pytorch · ssnl · Jun 28, 2018 · Jun 29, 2018 · Jun 29, 2018
diff --git a/aten/src/ATen/native/Dropout.cpp b/aten/src/ATen/native/Dropout.cpp
@@ -0,0 +1,70 @@
+#include "ATen/ATen.h"
+#include "ATen/Error.h"
+#include "ATen/NativeFunctions.h"
+#include "ATen/CPUGenerator.h"
+#include "ATen/CheckGenerator.h"
+#include "ATen/Generator.h"
+
+namespace at {
+namespace native {
+
+Tensor dropout(const Tensor& self, double p, bool featurewise, bool train, Generator *gen) {
+  AT_CHECK(0 <= p && p <= 1, "dropout() expects 0 <= p <= 1, but got p = ", p);
+  if (train && p > 0) {
+    if (p == 1) {
+      return at::zeros_like(self);
+    } else {
+      auto keep_p = 1. - p;
+      if (featurewise) {
+        auto dim = self.dim();
+        AT_CHECK(dim > 2,
+                 "feature_dropout() expects input to have at least 3 "
+                 "dimensions, but got input with size ", self.sizes());
+        auto noise_shape = self.sizes().vec();
+        for (int64_t i = 2; i < dim; i++) {
+          noise_shape[i] = 1;
+        }
+        auto noise = at::empty(noise_shape, self.type()).bernoulli_(keep_p, gen).div_(keep_p);
+        return self * noise;
+      } else {
+        auto noise = at::empty_like(self).bernoulli_(keep_p, gen).div_(keep_p);
+        return self * noise;
+      }
+    }
+  } else {
+    return self.clone();
+  }
+}
+
+Tensor& dropout_(Tensor& self, double p, bool featurewise, bool train, Generator *gen) {
+  AT_CHECK(0 <= p && p <= 1, "dropout() expects 0 <= p <= 1, but got p = ", p);
+  if (train && p > 0) {
+    if (p == 1) {
+      return self.zero_();
+    } else {
+      auto keep_p = 1. - p;
+      if (featurewise) {
+        auto dim = self.dim();
+        AT_CHECK(dim > 2,
+                 "feature_dropout() expects input to have at least 3 "
+                 "dimensions, but got input with size ", self.sizes());
+        auto noise_shape = self.sizes().vec();
+        for (int64_t i = 2; i < dim; i++) {
+          noise_shape[i] = 1;
+        }
+        auto noise = at::empty(noise_shape, self.type()).bernoulli_(keep_p, gen).div_(keep_p);
+        return self.mul_(noise);
+      } else {
+        auto noise = at::empty_like(self).bernoulli_(keep_p, gen).div_(keep_p);
+        return self.mul_(noise);
+      }
+    }
+  } else {
+    return self;
+  }
+}
+
+
+
+}
+}
diff --git a/aten/src/ATen/native/native_functions.yaml b/aten/src/ATen/native/native_functions.yaml
@@ -439,6 +439,10 @@
 - func: dot_out(Tensor result, Tensor self, Tensor tensor) -> Tensor
   variants: function
 
+- func: dropout(Tensor self, double p=0.5, bool featurewise=false, bool train=False, Generator* generator=nullptr) -> Tensor
+
+- func: dropout_(Tensor self, double p=0.5, bool featurewise=false, bool train=False, Generator* generator=nullptr) -> Tensor
+
 - func: einsum(std::string equation, TensorList tensors) -> Tensor
   variants: function
 

diff --git a/test/test_nn.py b/test/test_nn.py
@@ -1895,22 +1895,20 @@ def test_Dropout(self):
         input = torch.Tensor(1000)
         self._test_dropout(nn.Dropout, input)
 
-    def test_Dropout2d(self):
+    def test_FeatureDropout(self):
         b = random.randint(1, 5)
         w = random.randint(1, 5)
         h = random.randint(1, 5)
         num_features = 1000
         input = torch.Tensor(num_features, b, w, h)
-        self._test_dropout(nn.Dropout2d, input)
-
-    def test_Dropout3d(self):
+        self._test_dropout(nn.FeatureDropout, input)
         b = random.randint(1, 5)
         w = random.randint(1, 5)
         h = random.randint(1, 5)
         d = random.randint(1, 2)
         num_features = 1000
         input = torch.Tensor(num_features, b, d, w, h)
-        self._test_dropout(nn.Dropout3d, input)
+        self._test_dropout(nn.FeatureDropout, input)
 
     def test_AlphaDropout(self):
         # generate random tensor with zero mean and unit std
@@ -3407,8 +3405,8 @@ def test_invalid_dropout_p(self):
         self.assertRaises(ValueError, lambda: nn.Dropout2d(1.1))
         self.assertRaises(ValueError, lambda: nn.Dropout3d(-0.1))
         self.assertRaises(ValueError, lambda: nn.Dropout3d(1.1))
-        self.assertRaises(ValueError, lambda: F.dropout(v, -0.1))
-        self.assertRaises(ValueError, lambda: F.dropout(v, 1.1))
+        self.assertRaises(RuntimeError, lambda: F.dropout(v, -0.1))
+        self.assertRaises(RuntimeError, lambda: F.dropout(v, 1.1))
 
     def test_pad_sequence(self):
         def pad(tensor, length):

diff --git a/torch/nn/_functions/dropout.py b/torch/nn/_functions/dropout.py
diff --git a/torch/nn/backends/thnn.py b/torch/nn/backends/thnn.py
@@ -22,16 +22,12 @@ def _initialize_backend():
     from .._functions.thnn import _all_functions as _thnn_functions
     from .._functions.rnn import RNN, \
         RNNTanhCell, RNNReLUCell, GRUCell, LSTMCell
-    from .._functions.dropout import Dropout, FeatureDropout
 
     backend.register_function('RNN', RNN)
     backend.register_function('RNNTanhCell', RNNTanhCell)
     backend.register_function('RNNReLUCell', RNNReLUCell)
     backend.register_function('LSTMCell', LSTMCell)
     backend.register_function('GRUCell', GRUCell)
-    backend.register_function('Dropout', Dropout)
-    backend.register_function('Dropout2d', FeatureDropout)
-    backend.register_function('Dropout3d', FeatureDropout)
     for cls in _thnn_functions:
         name = cls.__name__
         backend.register_function(name, cls)

diff --git a/torch/nn/functional.py b/torch/nn/functional.py
@@ -551,7 +551,20 @@ def adaptive_avg_pool3d(input, output_size):
 
 # Activation functions
 def dropout(input, p=0.5, training=False, inplace=False):
-    return _functions.dropout.Dropout.apply(input, p, training, inplace)
+    r"""
+    Applies elementwise dropout with drop probablity :attr:`p`.
+
+    See :class:`~torch.nn.Dropout` for details.
+
+    Args:
+        p (float, optional): the drop probability. Default: 0.5
+        training (bool, optional): switch between training and evaluation mode. Default: ``False``
+        inplace (bool, optional): whether to apply dropout inplace. Default: ``False``
+    """
+    if inplace:
+        return input.dropout_(p, False, training)
+    else:
+        return input.dropout(p, False, training)
 
 
 def alpha_dropout(input, p=0.5, training=False):
@@ -585,12 +598,49 @@ def alpha_dropout(input, p=0.5, training=False):
     return output.mul_(a).add_(b)
 
 
-def dropout2d(input, p=0.5, training=False, inplace=False):
-    return _functions.dropout.FeatureDropout.apply(input, p, training, inplace)
+def feature_dropout(input, p=0.5, training=False, inplace=False):
+    r"""
+    Applies featurewise dropout. Each full channel is dropped out with
+    probablity :attr:`p`.
+
+    See :class:`~torch.nn.FeatureDropout` for details.
+
+    Args:
+        p (float, optional): the drop probability. Default: 0.5
+        training (bool, optional): switch between training and evaluation mode. Default: ``False``
+        inplace (bool, optional): whether to apply dropout inplace. Default: ``False``
+    """
+    if inplace:
+        return input.dropout_(p, True, training)
+    else:
+        return input.dropout(p, True, training)
+
+
+def _make_deprecated_dropoutNd(N):
+    def deprecated_dropoutNd(*args, **kwargs):
+        warnings.warn(
+            "dropout{}d is deprecated. Please use nn.FeatureDropout (module) "
+            "or nn.functional.feature_dropout (functional) instead.".format(N))
+        return feature_dropout(*args, **kwargs)
+
+    deprecated_dropoutNd.__doc__ = r"""
+dropout{}d(input, p=0.5, training=False, inplace=False) -> Tensor
+
+.. warning::
+    This method is now deprecated in favor of :func:`torch.nn.functional.feature_dropout`.
+
+Args:
+    p (float, optional): the drop probability. Default: 0.5
+    training (bool, optional): switch between training and evaluation mode. Default: ``False``
+    inplace (bool, optional): whether to apply dropout inplace. Default: ``False``
 
+See :func:`~torch.nn.functional.feature_dropout` for details.""".format(N)
+    deprecated_dropoutNd.__name__ = "dropout{}d".format(N)
+    return deprecated_dropoutNd
 
-def dropout3d(input, p=0.5, training=False, inplace=False):
-    return _functions.dropout.FeatureDropout.apply(input, p, training, inplace)
+dropout1d = _make_deprecated_dropoutNd(1)
+dropout2d = _make_deprecated_dropoutNd(2)
+dropout3d = _make_deprecated_dropoutNd(3)
 
 
 def threshold(input, threshold, value, inplace=False):

diff --git a/torch/nn/init.py b/torch/nn/init.py
@@ -417,6 +417,7 @@ def deprecated_init(*args, **kwargs):
 
     See :func:`~torch.nn.init.{new_name}` for details.""".format(
         old_name=old_name, new_name=new_name)
+    deprecated_init.__name__ = old_name
     return deprecated_init
 
 

diff --git a/torch/nn/modules/__init__.py b/torch/nn/modules/__init__.py
@@ -16,7 +16,7 @@
 from .batchnorm import BatchNorm1d, BatchNorm2d, BatchNorm3d
 from .instancenorm import InstanceNorm1d, InstanceNorm2d, InstanceNorm3d
 from .normalization import LocalResponseNorm, CrossMapLRN2d, LayerNorm, GroupNorm
-from .dropout import Dropout, Dropout2d, Dropout3d, AlphaDropout
+from .dropout import Dropout, Dropout2d, Dropout3d, AlphaDropout, FeatureDropout
 from .padding import ReflectionPad1d, ReflectionPad2d, ReplicationPad1d, ReplicationPad2d, \
     ReplicationPad3d, ZeroPad2d, ConstantPad1d, ConstantPad2d, ConstantPad3d
 from .sparse import Embedding, EmbeddingBag
@@ -41,7 +41,7 @@
     'MaxPool3d', 'MaxUnpool1d', 'MaxUnpool2d', 'MaxUnpool3d', 'FractionalMaxPool2d',
     'LPPool1d', 'LPPool2d', 'LocalResponseNorm', 'BatchNorm1d', 'BatchNorm2d', 'BatchNorm3d', 'InstanceNorm1d',
     'InstanceNorm2d', 'InstanceNorm3d', 'LayerNorm', 'GroupNorm', 'Dropout', 'Dropout2d', 'Dropout3d', 'AlphaDropout',
-    'ReflectionPad1d', 'ReflectionPad2d', 'ReplicationPad2d', 'ReplicationPad1d', 'ReplicationPad3d',
+    'ReflectionPad1d', 'ReflectionPad2d', 'ReplicationPad2d', 'ReplicationPad1d', 'ReplicationPad3d', 'FeatureDropout',
     'CrossMapLRN2d', 'Embedding', 'EmbeddingBag', 'RNNBase', 'RNN', 'LSTM', 'GRU', 'RNNCell', 'LSTMCell', 'GRUCell',
     'PixelShuffle', 'Upsample', 'UpsamplingNearest2d', 'UpsamplingBilinear2d', 'PairwiseDistance',
     'AdaptiveMaxPool1d', 'AdaptiveMaxPool2d', 'AdaptiveMaxPool3d', 'AdaptiveAvgPool1d', 'AdaptiveAvgPool2d',

diff --git a/torch/nn/modules/dropout.py b/torch/nn/modules/dropout.py
@@ -53,6 +53,45 @@ def forward(self, input):
         return F.dropout(input, self.p, self.training, self.inplace)
 
 
+class FeatureDropout(_DropoutNd):
+    r"""Randomly zeroes whole channels of the input tensor.
+    The channels to zero-out are randomized on every forward call.
+
+    Usually the input comes from :class:`nn.Conv2d` modules.
+
+    As described in the paper
+    `Efficient Object Localization Using Convolutional Networks`_ ,
+    if adjacent pixels within feature maps are strongly correlated
+    (as is normally the case in early convolution layers) then i.i.d. dropout
+    will not regularize the activations and will otherwise just result
+    in an effective learning rate decrease.
+
+    In this case, :func:`nn.FeatureDropout` will help promote independence between
+    feature maps and should be used instead.
+
+    Args:
+        p (float, optional): probability of an element to be zero-ed.
+        inplace (bool, optional): If set to ``True``, will do this operation
+            in-place
+
+    Shape:
+        - Input: :math:`(N, C, *)`, where :math:`*` represents arbitrary number
+                 of extra dimensions
+        - Output: :math:`(N, C, *)` (same shape as input)
+
+    Examples::
+
+        >>> m = nn.FeatureDropout(p=0.2)
+        >>> input = torch.randn(20, 16, 32, 32)
+        >>> output = m(input)
+
+    .. _Efficient Object Localization Using Convolutional Networks:
+       http://arxiv.org/abs/1411.4280
+    """
+    def forward(self, input):
+        return F.feature_dropout(input, self.p, self.training, self.inplace)
+
+
 class Dropout2d(_DropoutNd):
     r"""Randomly zeroes whole channels of the input tensor.
     The channels to zero-out are randomized on every forward call.
@@ -69,6 +108,9 @@ class Dropout2d(_DropoutNd):
     In this case, :func:`nn.Dropout2d` will help promote independence between
     feature maps and should be used instead.
 
+    .. warning::
+        This module is deprecated in favor of :class:`torch.nn.FeatureDropout`.
+
     Args:
         p (float, optional): probability of an element to be zero-ed.
         inplace (bool, optional): If set to ``True``, will do this operation
@@ -108,6 +150,9 @@ class Dropout3d(_DropoutNd):
     In this case, :func:`nn.Dropout3d` will help promote independence between
     feature maps and should be used instead.
 
+    .. warning::
+        This module is deprecated in favor of :class:`torch.nn.FeatureDropout`.
+
     Args:
         p (float, optional): probability of an element to be zeroed.
         inplace (bool, optional): If set to ``True``, will do this operation

diff --git a/torch/onnx/symbolic.py b/torch/onnx/symbolic.py
@@ -580,6 +580,17 @@ def batch_norm(g, input, weight, bias, running_mean, running_var, training, mome
         return res
 
 
+def dropout(g, input, p, featurewise, train, generator):
+    # See Note [Export inplace]
+    if not featurewise:
+        r, _ = g.op("Dropout", input, ratio_f=p, is_test_i=not train, outputs=2)
+        return r
+    elif train:
+        return _unimplemented("FeatureDropout", "training mode")
+    # NB: In inference mode, FeatureDropout is exported as an identity op.
+    return input
+
+
 def unfold(g, input, dimension, size, step):
     return g.op("ATen", input, operator_s="unfold", dimension_i=dimension, size_i=size, step_i=step)