Skip to content

Commit 5c695e3

Browse files
tippisumfacebook-github-bot
authored andcommitted
Implement 2D and 3D alpha_dropout (#9073)
Summary: It implements per-channel alpha_dropout. It also creates corresponding function classes and unifies the process of dropout and alpha_dropout. Pull Request resolved: #9073 Differential Revision: D8727008 Pulled By: ezyang fbshipit-source-id: 9d509f9c5db4e98f7b698cdfc4443505a4d2b331
1 parent 6116954 commit 5c695e3

File tree

5 files changed

+102
-50
lines changed

5 files changed

+102
-50
lines changed

test/test_nn.py

Lines changed: 23 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -733,6 +733,20 @@ def _test_dropout(self, cls, input):
733733
module.__repr__()
734734
str(module)
735735

736+
def _test_alpha_dropout(self, cls, input):
737+
mean = input.mean()
738+
std = input.std()
739+
740+
for p in [0.2, 0.5, 0.8]:
741+
module = cls(p)
742+
input_var = torch.tensor(input, requires_grad=True)
743+
output = module(input_var)
744+
# output mean should be close to input mean
745+
self.assertLess(abs(output.data.mean() - mean), 0.1)
746+
# output std should be close to input std
747+
self.assertLess(abs(output.data.std() - std), 0.1)
748+
output.backward(input)
749+
736750
def test_parameters(self):
737751
def num_params(module):
738752
return len(list(module.parameters()))
@@ -2086,19 +2100,16 @@ def test_Dropout3d(self):
20862100
def test_AlphaDropout(self):
20872101
# generate random tensor with zero mean and unit std
20882102
input = torch.randn(5000)
2103+
self._test_alpha_dropout(nn.AlphaDropout, input)
20892104

2090-
mean = input.mean()
2091-
std = input.std()
2092-
2093-
for p in [0.2, 0.5, 0.8]:
2094-
module = nn.AlphaDropout(p)
2095-
input_var = torch.tensor(input, requires_grad=True)
2096-
output = module(input_var)
2097-
# output mean should be close to input mean
2098-
self.assertLess(abs(output.data.mean() - mean), 0.1)
2099-
# output std should be close to input std
2100-
self.assertLess(abs(output.data.std() - std), 0.1)
2101-
output.backward(input)
2105+
def test_FeatureAlphaDropout(self):
2106+
b = random.randint(1, 5)
2107+
w = random.randint(1, 5)
2108+
h = random.randint(1, 5)
2109+
d = random.randint(1, 2)
2110+
num_features = 1000
2111+
input = torch.randn(num_features, b, d, w, h)
2112+
self._test_alpha_dropout(nn.FeatureAlphaDropout, input)
21022113

21032114
def _test_InstanceNorm_general(self, cls, input, device="cpu", dtype=torch.float):
21042115
# default case track_running_stats=False

torch/nn/_functions/dropout.py

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,3 +66,65 @@ def symbolic(g, input, p=0.5, train=False, inplace=False):
6666
def _make_noise(input):
6767
return input.new().resize_(input.size(0), input.size(1),
6868
*repeat(1, input.dim() - 2))
69+
70+
71+
class AlphaDropout(Dropout):
72+
73+
@staticmethod
74+
def symbolic(g, input, p=0.5, train=False, inplace=False):
75+
# See Note [Export inplace]
76+
# NB: In inference mode, FeatureDropout is exported as an identity op.
77+
from torch.onnx.symbolic import _unimplemented
78+
if train:
79+
return _unimplemented("AlphaDropout", "training mode")
80+
return input
81+
82+
@classmethod
83+
def forward(cls, ctx, input, p=0.5, train=False, inplace=False):
84+
if p < 0 or p > 1:
85+
raise ValueError("dropout probability has to be between 0 and 1, "
86+
"but got {}".format(p))
87+
ctx.p = p
88+
ctx.train = train
89+
ctx.inplace = inplace
90+
91+
if ctx.p == 0 or not ctx.train:
92+
return input
93+
94+
if ctx.inplace:
95+
ctx.mark_dirty(input)
96+
output = input
97+
else:
98+
output = input.clone()
99+
100+
ctx.noise = cls._make_noise(input)
101+
if ctx.p == 1:
102+
a = 0
103+
b = ctx.noise
104+
else:
105+
ctx.noise.bernoulli_(1 - ctx.p)
106+
alpha = 1.7580993408473766
107+
a = ((alpha ** 2 * ctx.p + 1) * (1 - ctx.p)) ** (-0.5)
108+
b = ctx.noise.add(-1).mul_(alpha * a).add_(alpha * a * ctx.p)
109+
ctx.noise = ctx.noise.mul_(a).expand_as(input)
110+
b = b.expand_as(input)
111+
output.mul_(ctx.noise).add_(b)
112+
113+
return output
114+
115+
116+
class FeatureAlphaDropout(AlphaDropout):
117+
118+
@staticmethod
119+
def symbolic(g, input, p=0.5, train=False, inplace=False):
120+
# See Note [Export inplace]
121+
# NB: In inference mode, FeatureDropout is exported as an identity op.
122+
from torch.onnx.symbolic import _unimplemented
123+
if train:
124+
return _unimplemented("FeatureAlphaDropout", "training mode")
125+
return input
126+
127+
@staticmethod
128+
def _make_noise(input):
129+
return input.new().resize_(input.size(0), input.size(1),
130+
*repeat(1, input.dim() - 2))

torch/nn/functional.py

Lines changed: 6 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -595,35 +595,12 @@ def dropout(input, p=0.5, training=False, inplace=False):
595595
return _functions.dropout.Dropout.apply(input, p, training, inplace)
596596

597597

598-
def alpha_dropout(input, p=0.5, training=False):
598+
def alpha_dropout(input, p=0.5, training=False, inplace=False):
599599
r"""Applies alpha dropout to the input.
600600
601601
See :class:`~torch.nn.AlphaDropout` for details.
602-
603-
Args:
604-
p (float, optional): the drop probability. Default: 0.5
605-
training (bool, optional): switch between training and evaluation mode. Default: ``False``
606602
"""
607-
if p < 0 or p > 1:
608-
raise ValueError("dropout probability has to be between 0 and 1, "
609-
"but got {}".format(p))
610-
611-
if p == 0 or not training:
612-
return input
613-
614-
alpha = -1.7580993408473766
615-
keep_prob = 1 - p
616-
# TODO avoid casting to byte after resize
617-
noise = input.data.new().resize_(input.size())
618-
noise.bernoulli_(p)
619-
noise = noise.byte()
620-
621-
output = input.masked_fill(noise, alpha)
622-
623-
a = (keep_prob + alpha ** 2 * keep_prob * (1 - keep_prob)) ** (-0.5)
624-
b = -a * alpha * (1 - keep_prob)
625-
626-
return output.mul_(a).add_(b)
603+
return _functions.dropout.AlphaDropout.apply(input, p, training, inplace)
627604

628605

629606
def dropout2d(input, p=0.5, training=False, inplace=False):
@@ -634,6 +611,10 @@ def dropout3d(input, p=0.5, training=False, inplace=False):
634611
return _functions.dropout.FeatureDropout.apply(input, p, training, inplace)
635612

636613

614+
def feature_alpha_dropout(input, p=0.5, training=False, inplace=False):
615+
return _functions.dropout.FeatureAlphaDropout.apply(input, p, training, inplace)
616+
617+
637618
def threshold(input, threshold, value, inplace=False):
638619
r"""Thresholds each element of the input Tensor.
639620

torch/nn/modules/__init__.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
from .batchnorm import BatchNorm1d, BatchNorm2d, BatchNorm3d
1717
from .instancenorm import InstanceNorm1d, InstanceNorm2d, InstanceNorm3d
1818
from .normalization import LocalResponseNorm, CrossMapLRN2d, LayerNorm, GroupNorm
19-
from .dropout import Dropout, Dropout2d, Dropout3d, AlphaDropout
19+
from .dropout import Dropout, Dropout2d, Dropout3d, AlphaDropout, FeatureAlphaDropout
2020
from .padding import ReflectionPad1d, ReflectionPad2d, ReplicationPad1d, ReplicationPad2d, \
2121
ReplicationPad3d, ZeroPad2d, ConstantPad1d, ConstantPad2d, ConstantPad3d
2222
from .sparse import Embedding, EmbeddingBag
@@ -40,7 +40,8 @@
4040
'ParameterList', 'ParameterDict', 'AvgPool1d', 'AvgPool2d', 'AvgPool3d', 'MaxPool1d', 'MaxPool2d',
4141
'MaxPool3d', 'MaxUnpool1d', 'MaxUnpool2d', 'MaxUnpool3d', 'FractionalMaxPool2d',
4242
'LPPool1d', 'LPPool2d', 'LocalResponseNorm', 'BatchNorm1d', 'BatchNorm2d', 'BatchNorm3d', 'InstanceNorm1d',
43-
'InstanceNorm2d', 'InstanceNorm3d', 'LayerNorm', 'GroupNorm', 'Dropout', 'Dropout2d', 'Dropout3d', 'AlphaDropout',
43+
'InstanceNorm2d', 'InstanceNorm3d', 'LayerNorm', 'GroupNorm',
44+
'Dropout', 'Dropout2d', 'Dropout3d', 'AlphaDropout', 'FeatureAlphaDropout',
4445
'ReflectionPad1d', 'ReflectionPad2d', 'ReplicationPad2d', 'ReplicationPad1d', 'ReplicationPad3d',
4546
'CrossMapLRN2d', 'Embedding', 'EmbeddingBag', 'RNNBase', 'RNN', 'LSTM', 'GRU', 'RNNCell', 'LSTMCell', 'GRUCell',
4647
'PixelShuffle', 'Upsample', 'UpsamplingNearest2d', 'UpsamplingBilinear2d', 'PairwiseDistance',

torch/nn/modules/dropout.py

Lines changed: 8 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,7 @@ def forward(self, input):
131131
return F.dropout3d(input, self.p, self.training, self.inplace)
132132

133133

134-
class AlphaDropout(Module):
134+
class AlphaDropout(_DropoutNd):
135135
r"""Applies Alpha Dropout over the input.
136136
137137
Alpha Dropout is a type of Dropout that maintains the self-normalizing
@@ -153,6 +153,8 @@ class AlphaDropout(Module):
153153
154154
Args:
155155
p (float): probability of an element to be dropped. Default: 0.5
156+
inplace (bool, optional): If set to ``True``, will do this operation
157+
in-place
156158
157159
Shape:
158160
- Input: `Any`. Input can be of any shape
@@ -167,16 +169,11 @@ class AlphaDropout(Module):
167169
.. _Self-Normalizing Neural Networks: https://arxiv.org/abs/1706.02515
168170
"""
169171

170-
def __init__(self, p=0.5):
171-
super(AlphaDropout, self).__init__()
172-
if p < 0 or p > 1:
173-
raise ValueError("dropout probability has to be between 0 and 1, "
174-
"but got {}".format(p))
175-
self.p = p
176-
177172
def forward(self, input):
178173
return F.alpha_dropout(input, self.p, self.training)
179174

180-
def __repr__(self):
181-
return self.__class__.__name__ + '(' \
182-
+ 'p=' + str(self.p) + ')'
175+
176+
class FeatureAlphaDropout(_DropoutNd):
177+
178+
def forward(self, input):
179+
return F.feature_alpha_dropout(input, self.p, self.training)

0 commit comments

Comments
 (0)