Skip to content
6 changes: 6 additions & 0 deletions docs/source/nn.rst
Original file line number Diff line number Diff line change
Expand Up @@ -383,6 +383,12 @@ Non-linear activations (other)
.. autoclass:: LogSoftmax
:members:

:hidden:`AdaptiveLogSoftmaxWithLoss`
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

.. autoclass:: AdaptiveLogSoftmaxWithLoss
:members:

Normalization layers
----------------------------------

Expand Down
112 changes: 104 additions & 8 deletions test/test_nn.py
Original file line number Diff line number Diff line change
Expand Up @@ -494,14 +494,10 @@ def _backward_criterion(self, criterion, input, target, gradOutput=None):
return input.grad.data

def _zero_grad_parameters(self, module):
if hasattr(module, 'weight') and module.weight is not None:
if module.weight.grad is not None:
module.weight.grad.data.zero_()
module.weight.grad.detach_()
if hasattr(module, 'bias') and module.bias is not None:
if module.bias.grad is not None:
module.bias.grad.data.zero_()
module.bias.grad.detach_()
for p in module.parameters():
if p.grad is not None:
p.grad.data.zero_()
p.grad.detach_()

def _get_parameters(self, module):
params = []
Expand Down Expand Up @@ -4927,6 +4923,93 @@ def test_grad_conv3d_input(self):
def test_grad_conv3d_weight(self):
self.run_grad_conv_test(F.conv3d, F.grad.conv3d_weight, 3, 'weight')

def test_adaptive_log_softmax(self):
# args validation
with self.assertRaises(ValueError):
_ = nn.AdaptiveLogSoftmaxWithLoss(16, 20, [5, 15, 15], div_value=2.)

with self.assertRaises(ValueError):
_ = nn.AdaptiveLogSoftmaxWithLoss(16, 20, [5, 15, 10], div_value=2.)

with self.assertRaises(ValueError):
_ = nn.AdaptiveLogSoftmaxWithLoss(16, 20, [5, 10, 25], div_value=2.)

# input shapes
with self.assertRaisesRegex(RuntimeError, "Input and target should have the same size"):
asfm = nn.AdaptiveLogSoftmaxWithLoss(16, 20, [5, 10, 15], div_value=2.)
x = torch.randn(2, 16)
y = torch.tensor([0, 5, 10])
asfm(x, y)

# out-of-bound targets
with self.assertRaisesRegex(RuntimeError, "Target values should be in"):
asfm = nn.AdaptiveLogSoftmaxWithLoss(16, 20, [5, 10, 15], div_value=2.)
x = torch.randn(2, 16)
y = torch.tensor([0, 20])
asfm(x, y)

# cluster sizes
asfm = nn.AdaptiveLogSoftmaxWithLoss(16, 20, [5, 10, 15], div_value=2.)
x = torch.randn(2, 16)
y = torch.tensor([0, 17])

self.assertEqual(asfm.head.weight.size(), (5 + 3, 16)) # 5 targets in head, 3 clusters, dimensionality 16
self.assertEqual(asfm.tail[0][1].weight.size(), (5, 8)) # 5 targets in this cluster, dimensionality 8
self.assertEqual(asfm.tail[1][1].weight.size(), (5, 4))
self.assertEqual(asfm.tail[2][1].weight.size(), (5, 2))

self.assertEqual(asfm(x, y).output.size(), (2, ))

# log_probs actually returns log_proba
asfm = nn.AdaptiveLogSoftmaxWithLoss(8, 4, [2], div_value=2.)
x = torch.randn(4, 8)
logprob_out = asfm.log_prob(x)

self.assertEqual(torch.exp(logprob_out).data.sum(1), torch.ones(4))

# forward returns the same thing as log_probs
for v in [0, 1, 2, 3]:
y = torch.full((4,), v, dtype=torch.long)
out, loss = asfm(x, y)

self.assertEqual(out, logprob_out.gather(1, y.unsqueeze(1)).squeeze())
self.assertEqual(loss, F.nll_loss(logprob_out, y))

# predict
x = torch.randn(64, 8).abs_()

# argmax in shortlist
asfm = nn.AdaptiveLogSoftmaxWithLoss(8, 10, [4, 8], div_value=2., head_bias=True)
asfm.head.weight.data.abs_()
asfm.head.bias.data.abs_()
asfm.head.weight.data[asfm.shortlist_size:, :].zero_()

out = asfm.predict(x)
self.assertEqual(out, asfm.log_prob(x).argmax(dim=1))

# argmax outside of shortlist
asfm = nn.AdaptiveLogSoftmaxWithLoss(8, 10, [4, 8], div_value=2., head_bias=True)
asfm.head.weight.data.abs_()
asfm.head.bias.data.abs_()
asfm.head.weight.data[:asfm.shortlist_size, :].zero_()

out = asfm.predict(x)
self.assertEqual(out, asfm.log_prob(x).argmax(dim=1))

# half of the argmax in shortlist, half in clusters
asfm = nn.AdaptiveLogSoftmaxWithLoss(8, 10, [4, 8], div_value=2., head_bias=True)
asfm.head.weight.data.abs_()
asfm.head.bias.data.abs_()

x[:32, :asfm.shortlist_size].zero_()
x[32:, asfm.shortlist_size:].zero_()

asfm.head.weight.data[:asfm.shortlist_size, asfm.shortlist_size:].zero_()
asfm.head.weight.data[asfm.shortlist_size:, :asfm.shortlist_size].zero_()

out = asfm.predict(x)
self.assertEqual(out, asfm.log_prob(x).argmax(dim=1))


class TestNNInit(TestCase):
def setUp(self):
Expand Down Expand Up @@ -7302,5 +7385,18 @@ def forward(self, input):
fullname='MaxUnpool3d_net',
check_gradgrad=False,))


class _AdaptiveLogSoftmaxWithLoss(nn.AdaptiveLogSoftmaxWithLoss):
def __call__(self, input):
t = torch.tensor([0, 1, 4, 8]).to(input.device)
return nn.AdaptiveLogSoftmaxWithLoss.__call__(self, input, t).output


add_test(NewModuleTest(
constructor=lambda: _AdaptiveLogSoftmaxWithLoss(16, 10, [2, 6]),
input_size=(4, 16),
fullname='AdaptiveLogSoftmax'))


if __name__ == '__main__':
run_tests()
2 changes: 2 additions & 0 deletions torch/nn/modules/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
from .upsampling import UpsamplingNearest2d, UpsamplingBilinear2d, Upsample
from .distance import PairwiseDistance, CosineSimilarity
from .fold import Fold, Unfold
from .adaptive import AdaptiveLogSoftmaxWithLoss

__all__ = [
'Module', 'Linear', 'Conv1d', 'Conv2d', 'Conv3d', 'ConvTranspose1d',
Expand All @@ -46,4 +47,5 @@
'AdaptiveMaxPool1d', 'AdaptiveMaxPool2d', 'AdaptiveMaxPool3d', 'AdaptiveAvgPool1d', 'AdaptiveAvgPool2d',
'AdaptiveAvgPool3d', 'TripletMarginLoss', 'ZeroPad2d', 'ConstantPad1d', 'ConstantPad2d',
'ConstantPad3d', 'Bilinear', 'CosineSimilarity', 'Unfold', 'Fold',
'AdaptiveLogSoftmaxWithLoss',
]
Loading