Configurable flushing denormal numbers on CPU (#5294)

tunz · soumith · commit fae6c6712180 · 2018-02-19T19:23:43.000-05:00
* Configurable flushing denormal numbers on CPU

* Formatting

* Update docs

* Minor doc changes
diff --git a/aten/CMakeLists.txt b/aten/CMakeLists.txt
@@ -261,6 +261,7 @@ ENDIF()
 IF(C_SSE3_FOUND)
   MESSAGE(STATUS "SSE3 Found")
   SET(CMAKE_C_FLAGS "${C_SSE3_FLAGS} -DUSE_SSE3 ${CMAKE_C_FLAGS}")
+  SET(CMAKE_CXX_FLAGS "${C_SSE3_FLAGS} -DUSE_SSE3 ${CMAKE_CXX_FLAGS}")
 ENDIF(C_SSE3_FOUND)
 
 # we don't set -mavx and -mavx2 flags globally, but only for specific files
diff --git a/aten/src/ATen/Context.cpp b/aten/src/ATen/Context.cpp
@@ -15,6 +15,10 @@
 #endif
 #include "ATen/CPUGenerator.h"
 
+#ifdef USE_SSE3
+#include <pmmintrin.h>
+#endif
+
 namespace at {
 
 static inline void errorHandler(const char * msg, void * data) {
@@ -118,4 +122,19 @@ int64_t Context::current_device() const {
   return -1;
 }
 
+bool Context::setFlushDenormal(bool on) {
+#ifdef USE_SSE3
+  // Setting flush-to-zero (FTZ) flag
+  _MM_SET_FLUSH_ZERO_MODE(on ? _MM_FLUSH_ZERO_ON
+                             : _MM_FLUSH_ZERO_OFF);
+
+  // Setting denormals-are-zero (DAZ) flag
+  _MM_SET_DENORMALS_ZERO_MODE(on ? _MM_DENORMALS_ZERO_ON
+                                 : _MM_DENORMALS_ZERO_OFF);
+  return true;
+#else
+  return false;
+#endif
+}
+
 }
diff --git a/aten/src/ATen/Context.h b/aten/src/ATen/Context.h
@@ -56,6 +56,8 @@ class AT_API Context {
   cudaStream_t getCurrentCUDAStream() const;
   cudaDeviceProp* getCurrentDeviceProperties() const;
 
+  bool setFlushDenormal(bool on);
+
   // NB: This method is *purely* whether or not a user requested
   // that CuDNN was enabled, it doesn't actually say anything about
   // whether or not CuDNN is actually usable.  Use cudnn_is_acceptable
diff --git a/docs/source/torch.rst b/docs/source/torch.rst
@@ -9,6 +9,7 @@ Tensors
 .. autofunction:: set_default_tensor_type
 .. autofunction:: numel
 .. autofunction:: set_printoptions
+.. autofunction:: set_flush_denormal
 
 
 Creation Ops
diff --git a/test/test_torch.py b/test/test_torch.py
@@ -5471,6 +5471,29 @@ def test_offset_scalar_cast(self):
         y = x[2:]
         self.assertEqual(int(y), 3)
 
+    @unittest.skipIf(torch.set_flush_denormal(False),
+                     "flush_denormal not supported")
+    def test_set_flush_denormal(self):
+        tiny_float = 1e-42
+        tiny_double = 1e-320
+        float_tensor = torch.FloatTensor([1.0, tiny_float])
+        double_tensor = torch.DoubleTensor([1.0, tiny_float, tiny_double])
+
+        self.assertEqual(float_tensor[0], 1.0, prec=0.0)
+        self.assertEqual(float_tensor[1], tiny_float, prec=tiny_float / 16)
+        self.assertEqual(double_tensor[0], 1.0, prec=0.0)
+        self.assertEqual(double_tensor[1], tiny_float, prec=0.0)
+        self.assertEqual(double_tensor[2], tiny_double, prec=0.0)
+
+        torch.set_flush_denormal(True)
+        self.assertEqual(float_tensor[0], 1.0, prec=0.0)
+        self.assertEqual(float_tensor[1], 0.0, prec=0.0)  # tiny_float to zero
+        self.assertEqual(double_tensor[0], 1.0, prec=0.0)
+        # tiny_float is not converted to zero in double type
+        self.assertEqual(double_tensor[1], tiny_float, prec=0.0)
+        self.assertEqual(double_tensor[2], 0.0, prec=0.0)  # tiny_double to zero
+        torch.set_flush_denormal(False)
+
 # Functions to test negative dimension wrapping
 METHOD = 1
 INPLACE_METHOD = 2
diff --git a/torch/_torch_docs.py b/torch/_torch_docs.py
@@ -3932,6 +3932,37 @@
 
 """)
 
+add_docstr(torch._C.set_flush_denormal,
+           r"""
+set_flush_denormal(mode) -> bool
+
+Disables denormal floating numbers on CPU.
+
+Returns ``True`` if your system supports flushing denormal numbers and it
+successfully configures flush denormal mode.  :meth:`~torch.set_flush_denormal`
+is only supported on x86 architectures supporting SSE3.
+
+Args:
+    mode (bool): Controls whether to enable flush denormal mode or not
+
+Example::
+
+    >>> torch.set_flush_denormal(True)
+    True
+    >>> torch.DoubleTensor([1e-323])
+
+     0
+    [torch.DoubleTensor of size 1]
+
+    >>> torch.set_flush_denormal(False)
+    True
+    >>> torch.DoubleTensor([1e-323])
+
+    9.88131e-324 *
+      1.0000
+    [torch.DoubleTensor of size 1]
+""")
+
 add_docstr(torch._C.set_num_threads,
            r"""
 set_num_threads(int)
diff --git a/torch/csrc/Module.cpp b/torch/csrc/Module.cpp
@@ -586,6 +586,15 @@ PyObject *THPModule_benchmarkCuDNN(PyObject *_unused)
   else Py_RETURN_FALSE;
 }
 
+PyObject *THPModule_setFlushDenormal(PyObject *_unused, PyObject *arg) {
+  THPUtils_assert(PyBool_Check(arg), "flush_denormal expects a bool, "
+          "but got %s", THPUtils_typename(arg));
+  if (!at::globalContext().setFlushDenormal(arg == Py_True)) {
+    Py_RETURN_FALSE;
+  };
+  Py_RETURN_TRUE;
+}
+
 #ifdef WITH_CUDA
 extern PyObject * THCSPModule_initExtension(PyObject *self);
 #endif
@@ -619,6 +628,7 @@ static PyMethodDef TorchMethods[] = {
   {"from_numpy",      (PyCFunction)THPModule_fromNumpy,         METH_O,       NULL},
   {"_to_dlpack",      (PyCFunction)THPModule_toDLPack,          METH_O,       NULL},
   {"_from_dlpack",    (PyCFunction)THPModule_fromDLPack,        METH_O,       NULL},
+  {"set_flush_denormal", (PyCFunction)THPModule_setFlushDenormal, METH_O,     NULL},
 
   {"sigmoid",         (PyCFunction)THPModule_sigmoid,           METH_VARARGS | METH_KEYWORDS, NULL},
   {"log",             (PyCFunction)THPModule_log,               METH_VARARGS | METH_KEYWORDS, NULL},