Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions aten/src/ATen/cudnn/README.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
All files living in this directory are written with the assumption that cuDNN is available,
which means that these code are not guarded by `#if AT_CUDNN_ENABLED()`. Therefore, whenever
you need to use definitions from here, please guard the `#include<ATen/cudnn/*.h>` and
definition usages with `#if AT_CUDNN_ENABLED()` macro, e.g. [BatchNorm.cpp](native/cudnn/BatchNorm.cpp).
All files living in this directory are written with the assumption that cuDNN is available,
which means that these code are not guarded by `#if AT_CUDNN_ENABLED()`. Therefore, whenever
you need to use definitions from here, please guard the `#include<ATen/cudnn/*.h>` and
definition usages with `#if AT_CUDNN_ENABLED()` macro, e.g. [native/cudnn/BatchNorm.cpp](native/cudnn/BatchNorm.cpp).
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
#include "THC/THC.h"
#include "ATen/cudnn/cudnn-wrapper.h"
#endif
#include <vector>

namespace at { namespace native {

Expand Down Expand Up @@ -69,4 +70,58 @@ Tensor batch_norm(
running_mean, running_var, training, momentum, eps);
}

Tensor group_norm(const Tensor& input, int64_t num_groups,
const Tensor& weight /* optional */, const Tensor& bias /* optional */,
double eps) {

auto input_shape = input.sizes();
int64_t b = input.size(0);
int64_t c = input.size(1);

if (c % num_groups != 0) {
std::stringstream ss;
ss << "Expected number of channels in input to be divisible by "
<< "num_groups, but got " << input.sizes() << " input and num_groups="
<< num_groups;
throw std::runtime_error(ss.str());
}

if (weight.defined() && (weight.dim() != 1 || weight.numel() != c)) {
std::stringstream ss;
ss << "Expected weight to be a vector of size equal to the number of "
<< "channels in input, but got " << weight.sizes() << " weight and "
<< input.sizes() << " input";
throw std::runtime_error(ss.str());
}

if (bias.defined() && (bias.dim() != 1 || bias.numel() != c)) {
std::stringstream ss;
ss << "Expected bias to be a vector of size equal to the number of "
<< "channels in input, but got " << bias.sizes() << " bias and "
<< input.sizes() << " input";
throw std::runtime_error(ss.str());
}

// Apply group norm
auto input_reshaped = input.contiguous().view({1, b * num_groups, -1});

auto out = at::batch_norm(input_reshaped, {}, {}, {}, {}, true, 0, eps, true);
out = out.view(input_shape);

if (!weight.defined() && !bias.defined()) {
return out;
}

std::vector<int64_t> affine_param_shape(input.dim(), 1);
affine_param_shape[1] = c;

if (weight.defined() && bias.defined()) {
return bias.view(affine_param_shape).addcmul(out, weight.view(affine_param_shape), 1);
} else if (weight.defined()) {
return out.mul(weight.view(affine_param_shape));
} else {
return out.add(bias.view(affine_param_shape));
}
}

}} // at::native
3 changes: 3 additions & 0 deletions aten/src/ATen/native/native_functions.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -291,6 +291,9 @@
- func: ger_out(Tensor result, Tensor self, Tensor vec2) -> Tensor
variants: function

- func: group_norm(Tensor input, int64_t num_groups, Tensor? weight={}, Tensor? bias={}, double eps=1e-5) -> Tensor
variants: function

- func: index(Tensor self, TensorList indices) -> Tensor
# NB: This function is special-cased in tools/autograd/gen_variable_type.py

Expand Down
116 changes: 113 additions & 3 deletions test/test_nn.py
Original file line number Diff line number Diff line change
Expand Up @@ -1732,7 +1732,7 @@ def test_InstanceNorm3d_general_cuda(self):
def _test_LayerNorm_general(self, type):
for i in range(2, 6):
shape = torch.LongTensor(i).random_(3, 6).tolist()
x = Variable(type(*shape).uniform_(0, 10))
x = type(*shape).uniform_(0, 10)

This comment was marked as off-topic.

normalized_ndim = random.randint(1, i - 1) # inclusive
normalized_shape = shape[-normalized_ndim:]
unnormalized_shape = shape[:-normalized_ndim]
Expand Down Expand Up @@ -1779,8 +1779,7 @@ def _test_LayerNorm_general(self, type):
self.assertEqual(old_running_var, ln.running_var)

def _test_LayerNorm_cuda_half(self):
# just THNN, LayerNorm has no cuDNN path
input = Variable(torch.rand(2, 3, 3, 2).cuda().half().random_(1, 10), requires_grad=True)
input = torch.zeros(2, 3, 3, 2, requires_grad=True).cuda().half().random_(1, 10)

This comment was marked as off-topic.

m = nn.LayerNorm([3, 2]).cuda().half()
output = m(input)
output.sum().backward()
Expand All @@ -1794,6 +1793,69 @@ def test_LayerNorm_general_cuda(self):
self._test_LayerNorm_general(torch.cuda.FloatTensor)
self._test_LayerNorm_cuda_half()

def _test_GroupNorm_general(self, type):
good_shape_g = {
(1, 2, 3, 4): 2,
(2, 3, 10): 3,
(3, 1, 1, 1, 2): 1,
(2, 6, 4, 2, 2): 3,
}
for shape, g in good_shape_g.items():
x = type(*shape).uniform_(0, 10)
b = shape[0]
c = shape[1]

# test that GN normalizes to mean 0 and stddev 1
gn = nn.GroupNorm(g, c, eps=0).type(type)
gn.weight.data.fill_(1)
gn.bias.data.fill_(0)
output = gn(x)
out_reshaped = output.view(b, g, -1)
mean = out_reshaped.mean(-1)
var = out_reshaped.var(-1, unbiased=False)
self.assertAlmostEqual(torch.abs(mean).mean(), 0, delta=1e-5)
self.assertAlmostEqual(torch.abs(var).mean(), 1, delta=1e-5)

# test that GN applies weight and bias correctly
scale = type(c).uniform_(0.2, 2)
bias = type(c).uniform_(0.2, 2)
gn.weight.data.copy_(scale)
gn.bias.data.copy_(bias)
output = gn(x)
out_reshaped = output.view(b, c, -1)
out_normed = (out_reshaped - bias.view(c, 1)) / scale.view(c, 1)
out_normed_reshaped = out_normed.view(b, g, -1)
mean = out_normed_reshaped.mean(-1)
var = out_normed_reshaped.var(-1, unbiased=False)
self.assertAlmostEqual(torch.abs(mean).mean(), 0, delta=1e-5)
self.assertAlmostEqual(torch.abs(var).mean(), 1, delta=1e-5)

This comment was marked as off-topic.


bad_shape_g = {
(1, 2, 3, 4): 3,
(2, 3, 10): 2,
(3, 1, 1, 1, 2): 10,
(2, 6, 4, 2, 2): 4,
}
for shape, g in bad_shape_g.items():
gn = nn.GroupNorm(g, shape[1])
input = type(*shape).uniform_(0, 10)
self.assertRaises(RuntimeError, lambda: gn(input))

def _test_GroupNorm_cuda_half(self):
input = torch.zeros(2, 4, 3, 2, requires_grad=True).cuda().half().random_(1, 10)
m = nn.GroupNorm(2, 4).cuda().half()
output = m(input)
output.sum().backward()
self.assertEqual(output.type(), input.type())

def test_GroupNorm_general(self):
self._test_GroupNorm_general(torch.FloatTensor)

@unittest.skipIf(not TEST_CUDA, "CUDA unavailable")
def test_GroupNorm_general_cuda(self):
self._test_GroupNorm_general(torch.cuda.FloatTensor)
self._test_GroupNorm_cuda_half()

def test_pad(self):
inputs = Variable(torch.randn(1, 3, 4, 4), requires_grad=True)
_assertGradAndGradgradChecks(self, lambda x: F.pad(x, (1, 1, 1, 1)), (inputs,))
Expand Down Expand Up @@ -5880,6 +5942,54 @@ def multimarginloss_weights_no_reduce_test():
check_eval=True,
desc='3d_elementwise_affine_tracking_stats',
),
dict(
module_name='GroupNorm',
constructor_args=(3, 6, 1e-3),
input_size=(4, 6, 5),
cudnn=True,
check_eval=True,
desc='1d_affine',
),
dict(
module_name='GroupNorm',
constructor_args=(5, 5, 1e-3, False),
input_size=(4, 5, 5),
cudnn=True,
check_eval=True,
desc='1d_no_affine_IN', # this setting is equivalent with InstanceNorm
),
dict(
module_name='GroupNorm',
constructor_args=(1, 5, 1e-3, False),
input_size=(4, 5, 5),
cudnn=True,
check_eval=True,
desc='1d_no_affine_LN', # this setting is equivalent with LayerNorm
),
dict(
module_name='GroupNorm',
constructor_args=(3, 6, 1e-3),
input_size=(4, 6, 2, 3),
cudnn=True,
check_eval=True,
desc='2d_affine',
),
dict(
module_name='GroupNorm',
constructor_args=(3, 3, 1e-3, False),
input_size=(4, 3, 2, 3),
cudnn=True,
check_eval=True,
desc='2d_no_affine_IN', # this setting is equivalent with InstanceNorm
),
dict(
module_name='GroupNorm',
constructor_args=(1, 3, 1e-3, False),
input_size=(4, 3, 2, 3),
cudnn=True,
check_eval=True,
desc='2d_no_affine_LN', # this setting is equivalent with LayerNorm
),
dict(
module_name='Conv1d',
constructor_args=(4, 5, 3),
Expand Down
26 changes: 22 additions & 4 deletions torch/nn/functional.py
Original file line number Diff line number Diff line change
Expand Up @@ -1207,8 +1207,8 @@ def batch_norm(input, running_mean, running_var, weight=None, bias=None,
)


def instance_norm(input, running_mean, running_var, weight=None, bias=None,
use_input_stats=True, momentum=0.1, eps=1e-5):
def instance_norm(input, running_mean=None, running_var=None, weight=None,

This comment was marked as off-topic.

bias=None, use_input_stats=True, momentum=0.1, eps=1e-5):
r"""Applies Instance Normalization for each channel in each data sample in a
batch.

Expand Down Expand Up @@ -1244,7 +1244,7 @@ def _instance_norm(input, running_mean=None, running_var=None, weight=None,
input_reshaped, running_mean, running_var, weight=weight, bias=bias,
training=use_input_stats, momentum=momentum, eps=eps)

# Reshape back
# Reshape and copy back
if running_mean is not None:
running_mean_orig.copy_(running_mean.view(b, c).mean(0, keepdim=False))
if running_var is not None:
Expand All @@ -1257,7 +1257,7 @@ def _instance_norm(input, running_mean=None, running_var=None, weight=None,
eps=eps)


def layer_norm(input, normalized_shape, running_mean, running_var,
def layer_norm(input, normalized_shape, running_mean=None, running_var=None,

This comment was marked as off-topic.

weight=None, bias=None, use_input_stats=True,
momentum=0.1, eps=1e-5):
r"""Applies Layer Normalization for last certain number of dimensions.
Expand All @@ -1267,6 +1267,16 @@ def layer_norm(input, normalized_shape, running_mean, running_var,
if not use_input_stats and (running_mean is None or running_var is None):
raise ValueError('Expected running_mean and running_var to be not None when use_input_stats=False')

if weight is not None and weight.size() != normalized_shape:
raise ValueError('Expected weight to be of same shape as '
'normalized_shape, but got {} weight and '
'normalized_shape={}'.format(weight.size(), normalized_shape))

if bias is not None and bias.size() != normalized_shape:
raise ValueError('Expected bias to be of same shape as '
'normalized_shape, but got {} bias and '
'normalized_shape={}'.format(bias.size(), normalized_shape))

normalized_ndim = len(normalized_shape)
input_shape = input.size()

Expand Down Expand Up @@ -1309,6 +1319,14 @@ def layer_norm(input, normalized_shape, running_mean, running_var,
return out


def group_norm(input, num_groups, weight=None, bias=None, eps=1e-5):

This comment was marked as off-topic.

r"""Applies Group Normalization for last certain number of dimensions.

See :class:`~torch.nn.GroupNorm` for details.
"""
return torch.group_norm(input, num_groups, weight, bias, eps)


def local_response_norm(input, size, alpha=1e-4, beta=0.75, k=1):
r"""Applies local response normalization over an input signal composed of
several input planes, where channels occupy the second dimension.
Expand Down
4 changes: 2 additions & 2 deletions torch/nn/modules/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
AdaptiveMaxPool2d, AdaptiveMaxPool3d, AdaptiveAvgPool1d, AdaptiveAvgPool2d, AdaptiveAvgPool3d
from .batchnorm import BatchNorm1d, BatchNorm2d, BatchNorm3d
from .instancenorm import InstanceNorm1d, InstanceNorm2d, InstanceNorm3d
from .normalization import LocalResponseNorm, CrossMapLRN2d, LayerNorm
from .normalization import LocalResponseNorm, CrossMapLRN2d, LayerNorm, GroupNorm
from .dropout import Dropout, Dropout2d, Dropout3d, AlphaDropout
from .padding import ReflectionPad1d, ReflectionPad2d, ReplicationPad1d, ReplicationPad2d, \
ReplicationPad3d, ZeroPad2d, ConstantPad1d, ConstantPad2d, ConstantPad3d
Expand All @@ -39,7 +39,7 @@
'ParameterList', 'AvgPool1d', 'AvgPool2d', 'AvgPool3d', 'MaxPool1d', 'MaxPool2d',
'MaxPool3d', 'MaxUnpool1d', 'MaxUnpool2d', 'MaxUnpool3d', 'FractionalMaxPool2d',
'LPPool1d', 'LPPool2d', 'LocalResponseNorm', 'BatchNorm1d', 'BatchNorm2d', 'BatchNorm3d', 'InstanceNorm1d',
'InstanceNorm2d', 'InstanceNorm3d', 'LayerNorm', 'Dropout', 'Dropout2d', 'Dropout3d', 'AlphaDropout',
'InstanceNorm2d', 'InstanceNorm3d', 'LayerNorm', 'GroupNorm', 'Dropout', 'Dropout2d', 'Dropout3d', 'AlphaDropout',
'ReflectionPad1d', 'ReflectionPad2d', 'ReplicationPad2d', 'ReplicationPad1d', 'ReplicationPad3d',
'CrossMapLRN2d', 'Embedding', 'EmbeddingBag', 'RNNBase', 'RNN', 'LSTM', 'GRU', 'RNNCell', 'LSTMCell', 'GRUCell',
'PixelShuffle', 'Upsample', 'UpsamplingNearest2d', 'UpsamplingBilinear2d', 'PairwiseDistance',
Expand Down
9 changes: 6 additions & 3 deletions torch/nn/modules/batchnorm.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,8 @@ class BatchNorm1d(_BatchNorm):
- Input: :math:`(N, C)` or :math:`(N, C, L)`
- Output: :math:`(N, C)` or :math:`(N, C, L)` (same shape as input)

Examples:
Examples::

>>> # With Learnable Parameters
>>> m = nn.BatchNorm1d(100)
>>> # Without Learnable Parameters
Expand Down Expand Up @@ -174,7 +175,8 @@ class BatchNorm2d(_BatchNorm):
- Input: :math:`(N, C, H, W)`
- Output: :math:`(N, C, H, W)` (same shape as input)

Examples:
Examples::

>>> # With Learnable Parameters
>>> m = nn.BatchNorm2d(100)
>>> # Without Learnable Parameters
Expand Down Expand Up @@ -244,7 +246,8 @@ class BatchNorm3d(_BatchNorm):
- Input: :math:`(N, C, D, H, W)`
- Output: :math:`(N, C, D, H, W)` (same shape as input)

Examples:
Examples::

>>> # With Learnable Parameters
>>> m = nn.BatchNorm3d(100)
>>> # Without Learnable Parameters
Expand Down
9 changes: 6 additions & 3 deletions torch/nn/modules/instancenorm.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,8 @@ class InstanceNorm1d(_InstanceNorm):
- Input: :math:`(N, C, L)`
- Output: :math:`(N, C, L)` (same shape as input)

Examples:
Examples::

>>> # Without Learnable Parameters
>>> m = nn.InstanceNorm1d(100)
>>> # With Learnable Parameters
Expand Down Expand Up @@ -127,7 +128,8 @@ class InstanceNorm2d(_InstanceNorm):
- Input: :math:`(N, C, H, W)`
- Output: :math:`(N, C, H, W)` (same shape as input)

Examples:
Examples::

>>> # Without Learnable Parameters
>>> m = nn.InstanceNorm2d(100)
>>> # With Learnable Parameters
Expand Down Expand Up @@ -190,7 +192,8 @@ class InstanceNorm3d(_InstanceNorm):
- Input: :math:`(N, C, D, H, W)`
- Output: :math:`(N, C, D, H, W)` (same shape as input)

Examples:
Examples::

>>> # Without Learnable Parameters
>>> m = nn.InstanceNorm3d(100)
>>> # With Learnable Parameters
Expand Down
Loading