Fix #11752: fix numerical issue in log_softmax (#21672)

wolegechu · facebook-github-bot · commit b403b10ff98a · 2019-06-17T12:59:08.000-07:00
Summary: #11866 has corrected this issue in function `host_softmax` (aten/src/ATen/native/SoftMax.cpp). But I tried the example proposed in #11752. `log_softmax` is still not working for big logits. I have looked into the source code, found that example had called `vec_host_softmax_lastdim`, not `host_softmax`. This code fixes the issue in `_vec_log_softmax_lastdim` and has a test for `log_softmax`. Pull Request resolved: #21672 Differential Revision: D15856327 Pulled By: VitalyFedyunin fbshipit-source-id: 7a1fd3c0a03d366c99eb873e235361e4fcfa7567
diff --git a/aten/src/ATen/native/cpu/SoftMaxKernel.cpp b/aten/src/ATen/native/cpu/SoftMaxKernel.cpp
@@ -65,19 +65,25 @@ inline void _vec_log_softmax_lastdim(
           }
           // See [Note AVX-SSE transitions] for why this should call the
           // vectorized version (aside from perf improvements).
-          vec256::map2(
-              [](Vec x, Vec y) { return x.log() + y; },
+          vec256::map(
+              [](Vec x) { return x.log(); },
               tmp_sum_scalar,
               tmp_sum_scalar,
-              max_input_arr,
               loop_end);
           for (int64_t j = 0; j < loop_end; j++) {
             int64_t i = ii + j;
             scalar_t* input_data = input_data_base + i * dim_size;
             scalar_t* output_data = output_data_base + i * dim_size;
             scalar_t tmp_sum = tmp_sum_scalar[j];
+            scalar_t max_input = max_input_arr[j];
+            
+            // It's necessary to keep the order of the operations below.
+            // In some cases that input is large digits and the difference
+            // is small, if we compute `max_input` plus `tmp_sum` before,
+            // there would be a numerical problem. See an example in
+            // https://github.com/pytorch/pytorch/issues/11752#issuecomment-422883379
             vec256::map(
-                [tmp_sum](Vec x) { return x - Vec(tmp_sum); },
+                [tmp_sum, max_input](Vec x) { return x - Vec(max_input) - Vec(tmp_sum); },
                 output_data,
                 input_data,
                 dim_size);
diff --git a/test/test_nn.py b/test/test_nn.py
@@ -8153,6 +8153,11 @@ def test_softmin(self):
         self.assertEqual(F.softmin(x, 1), F.softmax(-x, 1))
         self.assertEqual(F.softmin(x, 0), F.softmax(-x, 0))
 
+    def test_log_softmax(self):
+        x_small = torch.ones(1, 2, dtype=torch.float32)
+        x_big = x_small + 1e16
+        self.assertEqual(F.log_softmax(x_small, -1), F.log_softmax(x_big, -1))
+
     def test_adaptive_log_softmax(self):
         # args validation
         with self.assertRaises(ValueError):