Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 36 additions & 5 deletions aten/src/ATen/native/TensorShape.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,11 +44,42 @@ Tensor diagflat(const Tensor& self, int64_t offset) {
return self.contiguous().view(-1).diag(offset);
}

Tensor diagonal(const Tensor& self, int64_t offset) {
if (self.dim() != 2) {
throw std::runtime_error("diagonal expects a 2-dimensional tensor");
}
return self.diag(offset);
Tensor diagonal(const Tensor& self, int64_t offset, int64_t dim1_, int64_t dim2_) {
int64_t nDims = self.dim();
int64_t dim1 = maybe_wrap_dim(dim1_, nDims);
int64_t dim2 = maybe_wrap_dim(dim2_, nDims);
AT_ASSERT(dim1 != dim2, "diagonal dimensions cannot be identical %zd, %zd", dim1_, dim2_);
int64_t diag_size;
int64_t storage_offset = self.storage_offset();
// compute storage offset and size for the diagonal
// for positive values of offset (above the main diagonal)
// "leftmost columns" (along dim2) are dropped
// for negative values of offset (below the main diagonal)
// "topmost rows" (along dim1) are dropped.
// Note that we invert +/- in the second to absorb the negative
// sign in the offset.
if (offset >= 0) {
diag_size = std::min(self.size(dim1), self.size(dim2)-offset);
storage_offset += offset * self.stride(dim2);
} else {
diag_size = std::min(self.size(dim1)+offset, self.size(dim2));
storage_offset -= offset * self.stride(dim1);
}
AT_ASSERT(diag_size > 0, "invalid diagonal offset %zd", offset); // the diagonal offset was too large in magnitude

This comment was marked as off-topic.

This comment was marked as off-topic.


// construct new size and stride: we drop dim1 and dim2 (maximum first for not changing the index of the minumum)
// the new ("joint") dimension is appended to the end of the shape / stride to match numpy semantics
auto sizes = std::vector<int64_t>(self.sizes());
auto strides = std::vector<int64_t>(self.strides());
sizes.erase(sizes.begin() + std::max(dim1, dim2));
strides.erase(strides.begin() + std::max(dim1, dim2));
sizes.erase(sizes.begin() + std::min(dim1, dim2));
strides.erase(strides.begin() + std::min(dim1, dim2));
sizes.push_back(diag_size);
strides.push_back(self.stride(dim1)+self.stride(dim2));

// return view with new parameters
return self.as_strided(sizes, strides, storage_offset);
}

Tensor expand(const Tensor& self, IntList size) {
Expand Down
3 changes: 1 addition & 2 deletions aten/src/ATen/native/native_functions.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -277,8 +277,7 @@
- func: diagflat(Tensor self, int64_t offset=0) -> Tensor
variants: function

- func: diagonal(Tensor self, int64_t offset=0) -> Tensor
variants: function
- func: diagonal(Tensor self, int64_t offset=0, int64_t dim1=0, int64_t dim2=1) -> Tensor

- func: dot(Tensor self, Tensor tensor) -> Tensor

Expand Down
24 changes: 24 additions & 0 deletions test/test_autograd.py
Original file line number Diff line number Diff line change
Expand Up @@ -2116,6 +2116,18 @@ def test_mul_out_result_requires_grad(self):
# we should throw an exception if the output requires grad
self.assertRaisesRegex(RuntimeError, 'out=', lambda: torch.mul(a, b, out=x))

def test_diagonal_derivative_requires_grad(self):
# test that the backward requires grad
# we do this is because diagonal_backward uses inplace
# operations and gradgradcheck does not catch whether
# they works as expected (it will succeed even if
# the gradient has requires_grad == False
a = torch.randn(5, 6, requires_grad=True)
b = torch.diagonal(a)**2
c = b.sum()
d, = torch.autograd.grad(c, a, retain_graph=True, create_graph=True)
self.assertTrue(d.requires_grad)


def index_variable(shape, max_indices):
if not isinstance(shape, tuple):
Expand Down Expand Up @@ -2630,6 +2642,18 @@ class dont_convert(tuple):
('diag', (M,), NO_ARGS, '1d'),
('diag', (M, M), (1,), '2d_1'),
('diag', (M, M), (2,), '2d_2'),
('diagonal', (M, M), NO_ARGS, '2d'),
('diagonal', (3, 5), NO_ARGS, '2d_wide'),
('diagonal', (3, 5), (2,), '2d_wide_pos'),
('diagonal', (3, 5), (-2,), '2d_wide_neg'),
('diagonal', (5, 3), NO_ARGS, '2d_tall'),
('diagonal', (5, 3), (2,), '2d_tall_pos'),
('diagonal', (5, 3), (-2,), '2d_tall_neg'),
('diagonal', (M, M), (1,), '2d_1'),
('diagonal', (M, M), (2,), '2d_2'),
('diagonal', (M, M, M), (1, 1, 2), '3d_1'),
('diagonal', (M, M, M), (2, 0, 1), '3d_2'),
('diagonal', (M, M, M), (-2, 0, 1), '3d_3'),
('tril', (M, M), NO_ARGS),
('tril', (M, M), (2,), 'idx'),
('triu', (M, M), NO_ARGS),
Expand Down
19 changes: 19 additions & 0 deletions test/test_torch.py
Original file line number Diff line number Diff line change
Expand Up @@ -1909,6 +1909,25 @@ def _test_diagonal(self, dtype, device):
def test_diagonal(self):
self._test_diagonal(self, dtype=torch.float32, device='cpu')

@unittest.skipIf(not TEST_NUMPY, 'Numpy not found')
def test_diagonal_multidim(self):
x = torch.randn(10, 11, 12, 13)

This comment was marked as off-topic.

xn = x.numpy()
for args in [(2, 2, 3),
(2,),
(-2, 1, 2),
(0, -2, -1)]:
result = torch.diagonal(x, *args)
expected = xn.diagonal(*args)
self.assertEqual(expected.shape, result.shape)
self.assertTrue(np.allclose(expected, result.numpy()))
# test non-continguous
xp = x.permute(1, 2, 3, 0)
result = torch.diagonal(xp, 0, -2, -1)
expected = xp.numpy().diagonal(0, -2, -1)
self.assertEqual(expected.shape, result.shape)
self.assertTrue(np.allclose(expected, result.numpy()))

@staticmethod
def _test_diagflat(self, dtype, device):
# Basic sanity test
Expand Down
3 changes: 3 additions & 0 deletions tools/autograd/derivatives.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,9 @@
- name: diag(Tensor self, int64_t diagonal)
self: diag_backward(grad, self.sizes(), diagonal)

- name: diagonal(Tensor self, int64_t offset, int64_t dim1, int64_t dim2)
self: diagonal_backward(grad, self.sizes(), offset, dim1, dim2)

- name: dist(Tensor self, Tensor other, Scalar p)
self: norm_backward(grad, self - other, p, result)
other: -norm_backward(grad, self - other, p, result)
Expand Down
2 changes: 1 addition & 1 deletion tools/autograd/gen_autograd.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
deprecated_path = os.path.join(os.path.dirname(__file__), 'deprecated.yaml')

VIEW_FUNCTIONS = {
'alias', 'as_strided', 'expand', 'narrow', 'permute', 'select', 'slice',
'alias', 'as_strided', 'diagonal', 'expand', 'narrow', 'permute', 'select', 'slice',
'squeeze', 't', 'transpose', 'unfold', 'unsqueeze', 'view',
}

Expand Down
7 changes: 7 additions & 0 deletions tools/autograd/templates/Functions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -717,6 +717,13 @@ Tensor diag_backward(const Tensor & grad, IntList input_sizes, int64_t diagonal)
return grad_input;
}

Tensor diagonal_backward(const Tensor & grad, IntList input_sizes, int64_t offset, int64_t dim1, int64_t dim2) {
auto grad_input = at::zeros(grad.type(), input_sizes);
auto diag = grad_input.diagonal(offset, dim1, dim2);
diag.copy_(grad);
return grad_input;
}

Tensor mse_loss_double_backward(const Tensor & grad, const Tensor & input, bool size_average, bool reduce) {
auto grad_input = 2 * grad;
if (size_average && reduce) {
Expand Down
25 changes: 22 additions & 3 deletions torch/_torch_docs.py
Original file line number Diff line number Diff line change
Expand Up @@ -1267,9 +1267,11 @@

add_docstr(torch.diagonal,
r"""
diagonal(input, offset=0) -> Tensor
diagonal(input, offset=0, dim1=0, dim2=1) -> Tensor
Returns a 1-D tensor with the diagonal elements of :attr:`input`.
Returns a partial view of :attr:`input` with the its diagonal elements
with respect to :attr:`dim1` and :attr:`dim2` appended as a dimension
at the end of the shape.
The argument :attr:`offset` controls which diagonal to consider:
Expand All @@ -1278,9 +1280,15 @@
- If :attr:`offset` < 0, it is below the main diagonal.
Args:
input (Tensor): the input tensor. Must be 2-dimensional.
input (Tensor): the input tensor. Must be at least 2-dimensional.
offset (int, optional): which diagonal to consider. Default: 0
(main diagonal).
dim1 (int, optional): first dimension with respect to which to
take diagonal. Default: 0.
dim2 (int, optional): second dimension with respect to which to
take diagonal. Default: 1.
.. note:: To take a batch diagonal, pass in dim1=-2, dim2=-1.
Examples::
Expand All @@ -1305,6 +1313,17 @@
-0.2239
[torch.FloatTensor of size 2]
>>> x = torch.randn(2, 5, 4, 2)
>>> torch.diagonal(x, offset=-1, dim1=1, dim2=2)
(0 ,.,.) =

This comment was marked as off-topic.

-0.6806 -0.0281 -0.6595 -0.4199
0.8741 -0.1793 -0.6997 0.6265
(1 ,.,.) =
0.6182 1.3069 1.6503 1.7627
-0.2122 -0.2250 0.0990 -2.6433
[torch.FloatTensor of size (2,2,4)]
""")

add_docstr(torch.dist,
Expand Down