Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,7 @@ If you are working on the CUDA code, here are some useful CUDA debugging tips:
1. `CUDA_DEBUG=1` will enable CUDA debugging symbols (-g -G). This is particularly
helpful in debugging device code. However, it will slow down the build process,
so use wisely.
2. `cuda-gdb` and `cuda-memcheck` are your best CUDA debuging friends. Unlike`gdb`,
2. `cuda-gdb` and `cuda-memcheck` are your best CUDA debugging friends. Unlike`gdb`,
`cuda-gdb` can display actual values in a CUDA tensor (rather than all zeros).


Expand Down
1 change: 1 addition & 0 deletions aten/src/ATen/Declarations.cwrap
Original file line number Diff line number Diff line change
Expand Up @@ -3504,6 +3504,7 @@
- Double
backends:
- CPU
- CUDA
variants:
- method
- function
Expand Down
46 changes: 46 additions & 0 deletions aten/src/ATen/native/NativeFunctions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -268,6 +268,52 @@ Tensor & unsqueeze_(Tensor& self, int64_t dim) {
return self.as_strided_(std::get<0>(g), std::get<1>(g));
}

// For backward, we save svd.
// http://www.ics.forth.gr/cvrl/publications/conferences/2000_eccv_SVD_jacobian.pdf
// But instead of gesvd SVD A = U(A) Sig(A) V(A)^T, which doesn't specify signs
// of determinants of U and V, we consider det(A) = \prod Sig_(A), where
// 1. A = U_(A) Sig_(A) V(A)^T
// 2. Sig_(A) and U_(A) can be different in signs in first row/col from
// their counterparts so that U_(A) * V_(A) have +1 determinant
std::tuple<Tensor, Tensor, Tensor, Tensor> _det_with_svd(const Tensor& self) {
if (!at::isFloatingType(self.type().scalarType()) ||
self.dim() != 2 || self.size(0) != self.size(1)) {
std::ostringstream ss;
ss << "det(" << self.type() << "{" << self.sizes() << "}): expected a 2D"
<< "square tensor of floating types";
throw std::runtime_error(ss.str());
}
// check symmetric
bool symmetric = self.equal(self.transpose(0, 1));

auto svd = self.svd(true);
auto sigma = std::get<1>(svd);
auto u = std::get<0>(svd);
auto v = std::get<2>(svd);
auto det = sigma.prod();
if (!symmetric) {
auto qr = self.geqrf();
auto a = std::get<0>(qr);
auto tau = std::get<1>(qr);
// non-zero values in tau represent Householder reflectors, which has -1 det
int64_t num_reflectors = tau.nonzero().size(0);
auto qr_det = a.diag().prod();
if (num_reflectors % 2 == 1) {
qr_det = -qr_det;
}
det = qr_det; // QR is more stable than svd, so use it anyways
if ((qr_det < 0).any() ^ (det < 0).any()) { // if different sign
u.narrow(1, 0, 1).mul_(-1);
sigma.narrow(0, 0, 1).mul_(-1);
}
}
return std::make_tuple(det, u, sigma, v);
}

Tensor det(const Tensor& self) {
return std::get<0>(self._det_with_svd());
}

Tensor stack(TensorList tensors, int64_t dim) {
if (tensors.size() == 0) {
throw std::runtime_error("stack expects a non-empty TensorList");
Expand Down
4 changes: 4 additions & 0 deletions aten/src/ATen/native/native_functions.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,10 @@

- func: unsqueeze_(Tensor self, int64_t dim) -> Tensor

- func: _det_with_svd(Tensor self) -> (Tensor, Tensor, Tensor, Tensor)

This comment was marked as off-topic.

This comment was marked as off-topic.


- func: det(Tensor self) -> Tensor

- func: stack(TensorList tensors, int64_t dim=0) -> Tensor
variants: function

Expand Down
50 changes: 50 additions & 0 deletions aten/src/THC/generic/THCTensorMathMagma.cu
Original file line number Diff line number Diff line change
Expand Up @@ -584,6 +584,51 @@ THC_API void THCTensor_(potrs)(THCState *state, THCTensor *rb_, THCTensor *b, TH
#endif
}

THC_API void THCTensor_(geqrf)(THCState *state, THCTensor *ra_, THCTensor *rtau_, THCTensor *a_)
{
#ifdef USE_MAGMA
THArgCheck(a_->nDimension == 2, 2, "A should be 2 dimensional");

THCTensor *a = THCTensor_(newColumnMajor)(state, ra_, a_);
int64_t m = a->size[0];
int64_t n = a->size[1];
int64_t k = (m < n ? m : n);

#ifdef MAGMA_V2
#if defined(THC_REAL_IS_FLOAT)
int64_t nb = magma_get_sgeqrf_nb(m, n);
#else
int64_t nb = magma_get_dgeqrf_nb(m, n);
#endif
#else
#if defined(THC_REAL_IS_FLOAT)
int64_t nb = magma_get_sgeqrf_nb(m);
#else
int64_t nb = magma_get_dgeqrf_nb(m);
#endif
#endif

real *rtau_data = th_magma_malloc_pinned<real>(k);
real *a_data = THCTensor_(data)(state, a);

int info;
#if defined(THC_REAL_IS_FLOAT)
magma_sgeqrf2_gpu(m, n, a_data, m, rtau_data, &info);
#else
magma_dgeqrf2_gpu(m, n, a_data, m, rtau_data, &info);
#endif

if (info != 0)
THError("MAGMA geqrf2 : Argument %d : illegal value.", -info);

THCTensor_(freeCopyTo)(state, a, ra_);
THCTensor_(copyArray1d)(state, rtau_, rtau_data, k);
magma_free_pinned(rtau_data);
#else
THError(NoMagma(geqrf));
#endif
}

THC_API void THCTensor_(qr)(THCState *state, THCTensor *rq_, THCTensor *rr_, THCTensor *a_)
{
#ifdef USE_MAGMA
Expand Down Expand Up @@ -614,6 +659,11 @@ THC_API void THCTensor_(qr)(THCState *state, THCTensor *rq_, THCTensor *rr_, THC
real *work_data = THCTensor_(data)(state, work);

int info;
// We need to call two different versions of ?geqrf:
// ?geqrf_gpu allows fast computation of Q via ?orqrf_gpu, but doesn't give
// R properly. Note that the MAGMA documentation for this method is wrong.
// http://icl.cs.utk.edu/magma/forum/viewtopic.php?f=2&t=1015&p=2800&hilit=geqrf_gpu#p2800
// ?geqrf2_gpu gives correct R, but doesn't allow computation of Q via ?orqrf_gpu
#if defined(THC_REAL_IS_FLOAT)
magma_sgeqrf2_gpu(m, n, a_data, m, tau_data, &info);
#else
Expand Down
2 changes: 1 addition & 1 deletion aten/src/THC/generic/THCTensorMathMagma.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,9 @@ THC_API void THCTensor_(getri)(THCState *state, THCTensor *ra_, THCTensor *a);
THC_API void THCTensor_(potri)(THCState *state, THCTensor *ra_, THCTensor *a, const char *uplo);
THC_API void THCTensor_(potrf)(THCState *state, THCTensor *ra_, THCTensor *a, const char *uplo);
THC_API void THCTensor_(potrs)(THCState *state, THCTensor *rb_, THCTensor *a, THCTensor *b, const char *uplo);
THC_API void THCTensor_(geqrf)(THCState *state, THCTensor *ra_, THCTensor *rtau_, THCTensor *a_);
THC_API void THCTensor_(qr)(THCState *state, THCTensor *rq_, THCTensor *rr_, THCTensor *a);


#endif // defined(THC_REAL_IS_FLOAT) || defined(THC_REAL_IS_DOUBLE)

#endif
1 change: 1 addition & 0 deletions docs/source/torch.rst
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,7 @@ BLAS and LAPACK Operations
.. autofunction:: ger
.. autofunction:: gesv
.. autofunction:: inverse
.. autofunction:: det
.. autofunction:: matmul
.. autofunction:: mm
.. autofunction:: mv
Expand Down
60 changes: 54 additions & 6 deletions test/test_autograd.py
Original file line number Diff line number Diff line change
Expand Up @@ -1898,6 +1898,30 @@ def _make_cov(S):
return torch.mm(L, L.t())


def random_square_matrix_of_rank(l, rank):
assert rank <= l
A = torch.randn(l, l)
u, s, v = A.svd()
for i in range(l):
if i >= rank:
s[i] = 0
elif s[i] == 0:
s[i] = 1
return u.mm(torch.diag(s)).mm(v.transpose(0, 1))


def random_symmetric_matrix(l):
A = torch.randn(l, l)
return A.mm(A.transpose(0, 1))


def random_fullrank_matrix_distinct_singular_value(l):
A = torch.randn(l, l)
u, _, v = A.svd()
s = torch.arange(1, l + 1).mul_(1.0 / (l + 1))
return u.mm(torch.diag(s)).mm(v.transpose(0, 1))


class dont_convert(tuple):
pass

Expand All @@ -1906,7 +1930,6 @@ class dont_convert(tuple):
M = 10
S = 5


# (name, size, args...)
method_tests = [
('add', (S, S, S), ((S, S, S),)),
Expand Down Expand Up @@ -2166,6 +2189,13 @@ class dont_convert(tuple):
('index_copy', (S, S), (0, index_perm_variable(2, S), (2, S)), 'dim', [0]),
('index_fill', (S, S), (0, index_variable(2, S), 2), 'dim', [0]),
('inverse', (S, S), (), '', (), [skipIfNoLapack]),
('det', (S, S), (), '', (), [skipIfNoLapack]),
('det', lambda: random_symmetric_matrix(S), (), 'symmetric', (), [skipIfNoLapack]),
('det', lambda: random_square_matrix_of_rank(S, S - 2), (), 'dim2_null', (), [skipIfNoLapack]),
('det', lambda: random_square_matrix_of_rank(S, 1), (), 'rank1', (), [skipIfNoLapack]),
('det', lambda: random_square_matrix_of_rank(S, 2), (), 'rank2', (), [skipIfNoLapack]),
('det', lambda: random_fullrank_matrix_distinct_singular_value(S), (), 'distinct_postive_s', (), [skipIfNoLapack]),
('svd', lambda: random_fullrank_matrix_distinct_singular_value(S), (), '', (), [skipIfNoLapack]),
('gesv', (S, S), ((S, S),), '', (), [skipIfNoLapack]),
('potrf', _make_cov(S), (True,), '', (), [skipIfNoLapack]),
('eq', (S, S, S), ((S, S, S),)),
Expand Down Expand Up @@ -2303,6 +2333,8 @@ def maybe_non_contig(tensor):
return Variable(maybe_non_contig(arg), requires_grad=requires_grad)
elif isinstance(arg, Variable) and non_contiguous:
return Variable(maybe_non_contig(arg.data), requires_grad=arg.requires_grad)
elif callable(arg):
return map_arg(arg())
else:
return arg
return tuple(map_arg(arg) for arg in call_args)
Expand Down Expand Up @@ -2339,6 +2371,19 @@ def maybe_non_contig(tensor):
EXCLUDE_GRADCHECK = {
'potrf'
}
EXCLUDE_GRADGRADCHECK = {
'svd'
}
EXCLUDE_GRADGRADCHECK_BY_TEST_NAME = {
# Some of the following det ones pass because random matrix has full rank
# with high probability. But we can't rely on this. So only test gradgrad on
# test_det_distinct_postive_s.
'test_det',
'test_det_symmetric',
'test_det_dim2_null',
'test_det_rank1',
'test_det_rank2'
}


def exclude_tensor_method(name, test_name):
Expand All @@ -2359,6 +2404,7 @@ def exclude_tensor_method(name, test_name):
'resize_as',
'scatter',
'scatter_add',
'det',
}
if test_name in exclude_all_tensor_method_by_test_name:
return True
Expand Down Expand Up @@ -2390,17 +2436,19 @@ def gradgradcheck_method_precision_override(test_name):
return override


def run_grad_and_gradgrad_checks(test_case, test_name, apply_method, output_variable, input_variables):
def run_grad_and_gradgrad_checks(test_case, name, test_name, apply_method, output_variable,
input_variables, run_gradgradcheck=True):
test_case.assertTrue(gradcheck(apply_method, input_variables, eps=1e-6, atol=PRECISION))

if name in EXCLUDE_GRADGRADCHECK or test_name in EXCLUDE_GRADGRADCHECK_BY_TEST_NAME:
return
grad_y = generate_gradoutput(output_variable, non_contiguous=True)
gradgradcheck_precision_override = gradgradcheck_method_precision_override(test_name)
if gradgradcheck_precision_override is not None:
atol = gradgradcheck_precision_override['atol']
rtol = gradgradcheck_precision_override['rtol']
test_case.assertTrue(gradgradcheck(apply_method, input_variables, grad_y, atol=atol, rtol=rtol))
else:
test_case.assertTrue(gradgradcheck(apply_method, input_variables, grad_y,))
test_case.assertTrue(gradgradcheck(apply_method, input_variables, grad_y))


def run_functional_checks(test_case, test_name, name, apply_fn, run_grad_checks,
Expand All @@ -2413,7 +2461,7 @@ def run_functional_checks(test_case, test_name, name, apply_fn, run_grad_checks,
test_case.assertEqual(unpack_variables(output_variable), output_tensor)

if run_grad_checks:
run_grad_and_gradgrad_checks(test_case, test_name, apply_fn,
run_grad_and_gradgrad_checks(test_case, name, test_name, apply_fn,
output_variable, f_args_variable)

self_variable = f_args_variable[0]
Expand Down Expand Up @@ -2457,7 +2505,7 @@ def check(name):
# TODO: check that both have changed after adding all inplace ops

if not is_inplace and name not in EXCLUDE_GRADCHECK:
run_grad_and_gradgrad_checks(self, test_name,
run_grad_and_gradgrad_checks(self, name, test_name,
lambda *inputs: getattr(inputs[0], name)(*inputs[1:]),
output_variable, (self_variable,) + args_variable)

Expand Down
7 changes: 6 additions & 1 deletion test/test_cuda.py
Original file line number Diff line number Diff line change
Expand Up @@ -316,7 +316,8 @@ def tmp(t):
('qr', small_2d_lapack_fat, lambda t: [], 'fat', float_types),
('qr', large_2d_lapack, lambda t: [], 'big', float_types),
('inverse', new_t(20, 20), lambda t: [], None, float_types),

('geqrf', new_t(20, 20), lambda t: [], None, float_types),
# TODO: add det to here once Variable and Tensor are the same thing
]

# TODO: random functions, cat, gather, scatter, index*, masked*,
Expand Down Expand Up @@ -938,6 +939,10 @@ def test_caching_pinned_memory_multi_gpu(self):
def _select_broadcastable_dims(dims_full=None):
return TestTorch._select_broadcastable_dims(dims_full)

@unittest.skipIf(not HAS_MAGMA, "no MAGMA library detected")
def test_det(self):
TestTorch._test_det(self, lambda t: t.cuda())

def test_broadcast(self):
TestTorch._test_broadcast(self, lambda t: t.cuda())

Expand Down
Loading