Skip to content
Closed
27 changes: 0 additions & 27 deletions aten/src/ATen/Declarations.cwrap
Original file line number Diff line number Diff line change
Expand Up @@ -2323,33 +2323,6 @@
- THTensor* self
- THTensor* A
]]
[[
name: _th_symeig
cname: syev
types:
- Float
- Double
backends:
- CPU
- CUDA
variants:
- function
return: argument 0,1
arguments:
- arg: THTensor* res1
output: True
- arg: THTensor* res2
output: True
- THTensor* self
- arg: bool eigenvectors
if_true: V
if_false: N
default: N
- arg: bool upper
if_true: U
if_false: L
default: U
]]
[[
name: _th_eig
cname: geev
Expand Down
100 changes: 100 additions & 0 deletions aten/src/ATen/native/BatchLinearAlgebra.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,10 @@ extern "C" void sgeqrf_(int *m, int *n, float *a, int *lda, float *tau, float *w
// orgqr
extern "C" void dorgqr_(int *m, int *n, int *k, double *a, int *lda, double *tau, double *work, int *lwork, int *info);
extern "C" void sorgqr_(int *m, int *n, int *k, float *a, int *lda, float *tau, float *work, int *lwork, int *info);

// syev
extern "C" void dsyev_(char *jobz, char *uplo, int *n, double *a, int *lda, double *w, double *work, int *lwork, int *info);
extern "C" void ssyev_(char *jobz, char *uplo, int *n, float *a, int *lda, float *w, float *work, int *lwork, int *info);
#endif

namespace at {
Expand Down Expand Up @@ -93,6 +97,11 @@ void lapackOrgqr(int m, int n, int k, scalar_t *a, int lda, scalar_t *tau, scala
AT_ERROR("orgqr only takes float or double Tensors");
}

template<class scalar_t>
void lapackSymeig(char jobz, char uplo, int n, scalar_t *a, int lda, scalar_t *w, scalar_t *work, int lwork, int *info) {
AT_ERROR("symeig only takes float or double Tensors");
}

#ifdef USE_LAPACK
template<> void lapackSolve<double>(int n, int nrhs, double *a, int lda, int *ipiv, double *b, int ldb, int *info) {
dgesv_(&n, &nrhs, a, &lda, ipiv, b, &ldb, info);
Expand Down Expand Up @@ -157,6 +166,14 @@ template<> void lapackOrgqr<double>(int m, int n, int k, double *a, int lda, dou
template<> void lapackOrgqr<float>(int m, int n, int k, float *a, int lda, float *tau, float *work, int lwork, int *info) {
sorgqr_(&m, &n, &k, a, &lda, tau, work, &lwork, info);
}

template<> void lapackSymeig<double>(char jobz, char uplo, int n, double *a, int lda, double *w, double *work, int lwork, int *info) {
dsyev_(&jobz, &uplo, &n, a, &lda, w, work, &lwork, info);
}

template<> void lapackSymeig<float>(char jobz, char uplo, int n, float *a, int lda, float *w, float *work, int lwork, int *info) {
ssyev_(&jobz, &uplo, &n, a, &lda, w, work, &lwork, info);
}
#endif

// Below of the definitions of the functions operating on a batch that are going to be dispatched
Expand Down Expand Up @@ -833,4 +850,87 @@ std::tuple<Tensor&,Tensor&> qr_out(Tensor& Q, Tensor& R, const Tensor& self, boo
return std::tuple<Tensor&, Tensor&>(Q, R);
}

// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ symeig ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

template <typename scalar_t>
static void apply_symeig(Tensor& self, Tensor& eigvals, bool eigenvectors, bool upper, std::vector<int64_t>& infos) {
#ifndef USE_LAPACK
AT_ERROR("symeig: LAPACK library not found in compilation");
#else
auto self_data = self.data<scalar_t>();
auto eigvals_data = eigvals.data<scalar_t>();
auto self_matrix_stride = matrixStride(self);
auto eigvals_stride = eigvals.size(-1);
auto batch_size = batchCount(self);
auto n = self.size(-1);

char uplo = upper ? 'U' : 'L';
char jobz = eigenvectors ? 'V' : 'N';

int info;
// Run once, first to get the optimum work size.
// Since we deal with batches of matrices with the same dimensions, doing this outside
// the loop saves (batch_size - 1) workspace queries which would provide the same result
// and (batch_size - 1) calls to allocate and deallocate workspace using at::empty()
int lwork = -1;
scalar_t wkopt;
lapackSymeig<scalar_t>(jobz, uplo, n, self_data, n, eigvals_data, &wkopt, lwork, &info);
lwork = static_cast<int>(wkopt);
Tensor work = at::empty({lwork}, self.options());

for (int64_t i = 0; i < batch_size; i++) {
scalar_t* self_working_ptr = &self_data[i * self_matrix_stride];
scalar_t* eigvals_working_ptr = &eigvals_data[i * eigvals_stride];

// now compute the eigenvalues and the eigenvectors (optionally)
lapackSymeig<scalar_t>(jobz, uplo, n, self_working_ptr, n, eigvals_working_ptr, work.data<scalar_t>(), lwork, &info);
infos[i] = info;
if (info != 0) {
return;
}
}
#endif
}

std::tuple<Tensor, Tensor> _symeig_helper_cpu(const Tensor& self, bool eigenvectors, bool upper) {
std::vector<int64_t> infos(batchCount(self), 0);

auto self_sizes = self.sizes().vec();
self_sizes.pop_back();
auto eigvals = at::empty(self_sizes, self.options());

if (self.numel() == 0) {
return std::tuple<Tensor, Tensor>(eigvals, at::empty_like(self));
}

auto self_working_copy = cloneBatchedColumnMajor(self);
AT_DISPATCH_FLOATING_TYPES(self.scalar_type(), "symeig_cpu", [&]{
apply_symeig<scalar_t>(self_working_copy, eigvals, eigenvectors, upper, infos);
});

if (!eigenvectors) {
self_working_copy.zero_();
}
if (self.dim() > 2) {
batchCheckErrors(infos, "symeig_cpu");
} else {
singleCheckErrors(infos[0], "symeig_cpu");
}
return std::tuple<Tensor, Tensor>(eigvals, self_working_copy);
}

std::tuple<Tensor, Tensor> symeig(const Tensor& self, bool eigenvectors, bool upper) {
squareCheckInputs(self);
return at::_symeig_helper(self, eigenvectors, upper);
}

std::tuple<Tensor&, Tensor&> symeig_out(Tensor& vals, Tensor& vecs, const Tensor& self, bool eigenvectors, bool upper) {
squareCheckInputs(self);
Tensor vals_tmp, vecs_tmp;
std::tie(vals_tmp, vecs_tmp) = at::_symeig_helper(self, eigenvectors, upper);
vals.resize_as_(vals_tmp).copy_(vals_tmp);
vecs.resize_as_(vecs_tmp).copy_(vecs_tmp);
return std::tuple<Tensor&, Tensor&>(vals, vecs);
}

}} // namespace at::native
17 changes: 14 additions & 3 deletions aten/src/ATen/native/LinearAlgebraUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
#include <ATen/TensorUtils.h>
#include <limits>
#include <sstream>
#include <cstring>

namespace at { namespace native {

Expand Down Expand Up @@ -110,7 +111,7 @@ static inline void linearSolveCheckInputs(const Tensor& self, const Tensor& A) {
" but each b matrix is ", self.size(-2), " by ", self.size(-1));
}

// Validates input shapes for operations on batches of square matrices (inverse, cholesky, lu)
// Validates input shapes for operations on batches of square matrices (inverse, cholesky, lu, symeig)
static inline void squareCheckInputs(const Tensor& self) {
TORCH_CHECK(self.size(-1) == self.size(-2),
"A must be batches of square matrices, "
Expand Down Expand Up @@ -145,7 +146,12 @@ static inline void batchCheckErrors(const Tensor& infos, const char* name) {
if (info < 0) {
AT_ERROR(name, ": For batch ", i, ": Argument ", -info, " has illegal value");
} else if (info > 0) {
AT_ERROR(name, ": For batch ", i, ": U(", info, ",", info, ") is zero, singular U.");
if (strstr(name, "symeig")) {
AT_ERROR(name, ": For batch ", i, ": the algorithm failed to converge; ", info,
" off-diagonal elements of an intermediate tridiagonal form did not converge to zero.")
} else {
AT_ERROR(name, ": For batch ", i, ": U(", info, ",", info, ") is zero, singular U.");
}
}
}
}
Expand All @@ -158,7 +164,12 @@ static inline void singleCheckErrors(int64_t info, const char* name) {
if (info < 0) {
AT_ERROR(name, ": Argument ", -info, " has illegal value");
} else if (info > 0) {
AT_ERROR(name, ": U(", info, ",", info, ") is zero, singular U.");
if (strstr(name, "symeig")) {
AT_ERROR(name, ": the algorithm failed to converge; ", info,
" off-diagonal elements of an intermediate tridiagonal form did not converge to zero.")
} else {
AT_ERROR(name, ": U(", info, ",", info, ") is zero, singular U.");
}
}
}

Expand Down
113 changes: 112 additions & 1 deletion aten/src/ATen/native/cuda/BatchLinearAlgebra.cu
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,15 @@ template<class scalar_t>
void magmaOrgqr(
magma_int_t m, magma_int_t n, magma_int_t k, scalar_t* dA,
magma_int_t ldda, scalar_t* tau, scalar_t* dT, magma_int_t nb, magma_int_t* info) {
AT_ERROR("orgqr only takes float or doule Tensors");
AT_ERROR("orgqr only takes float or double Tensors");
}

template<class scalar_t>
void magmaSymeig(
magma_vec_t jobz, magma_uplo_t uplo, magma_int_t n, scalar_t* dA, magma_int_t ldda,
scalar_t* w, scalar_t* wA, magma_int_t ldwa, scalar_t* work, magma_int_t lwork,
magma_int_t* iwork, magma_int_t liwork, magma_int_t* info) {
AT_ERROR("symeig only takes float or double Tensors");
}

template<>
Expand Down Expand Up @@ -405,6 +413,22 @@ void magmaOrgqr<float>(
float* tau, float* dT, magma_int_t nb, magma_int_t* info) {
magma_sorgqr_gpu(m, n, k, dA, ldda, tau, dT, nb, info);
}

template<>
void magmaSymeig<double>(
magma_vec_t jobz, magma_uplo_t uplo, magma_int_t n, double* dA, magma_int_t ldda,
double* w, double* wA, magma_int_t ldwa, double* work, magma_int_t lwork,
magma_int_t* iwork, magma_int_t liwork, magma_int_t* info) {
magma_dsyevd_gpu(jobz, uplo, n, dA, ldda, w, wA, ldwa, work, lwork, iwork, liwork, info);
}

template<>
void magmaSymeig<float>(
magma_vec_t jobz, magma_uplo_t uplo, magma_int_t n, float* dA, magma_int_t ldda,
float* w, float* wA, magma_int_t ldwa, float* work, magma_int_t lwork,
magma_int_t* iwork, magma_int_t liwork, magma_int_t* info) {
magma_ssyevd_gpu(jobz, uplo, n, dA, ldda, w, wA, ldwa, work, lwork, iwork, liwork, info);
}
#endif

#define ALLOCATE_ARRAY(name, type, size, dummy_tensor) \
Expand Down Expand Up @@ -1123,6 +1147,93 @@ std::tuple<Tensor,Tensor> _qr_helper_cuda(const Tensor& self, bool some) {
r_working_copy.narrow_copy(-2, 0, n_columns_q).triu_());
}

// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ symeig ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

template <typename scalar_t>
static void apply_symeig(Tensor& self, Tensor& eigvals, bool eigenvectors, bool upper, std::vector<int64_t>& infos) {
#ifndef USE_MAGMA
AT_ERROR("symeig: MAGMA library not found in "
"compilation. Please rebuild with MAGMA.");
#else
auto self_data = self.data<scalar_t>();
auto eigvals_data = eigvals.data<scalar_t>();
auto self_matrix_stride = matrixStride(self);
auto eigvals_stride = eigvals.size(-1);
int64_t batch_size = batchCount(self);
magma_int_t n = magma_int_cast(self.size(-1), "n");

magma_uplo_t uplo = upper ? MagmaUpper : MagmaLower;
magma_vec_t jobz = eigenvectors ? MagmaVec : MagmaNoVec;

scalar_t* wA;
ALLOCATE_ARRAY(wA, scalar_t, n * n, self);

magma_int_t info;
// Run once, first to get the optimum work sizes.
// Since we deal with batches of matrices with the same dimensions, doing this outside
// the loop saves (batch_size - 1) workspace queries which would provide the same result
// and (batch_size - 1) calls to allocate and deallocate workspace using at::empty()
magma_int_t lwork = -1;
scalar_t wkopt;
magma_int_t liwork = -1;
magma_int_t iwkopt;
magmaSymeig<scalar_t>(jobz, uplo, n, self_data, n, eigvals_data, wA, n, &wkopt, lwork, &iwkopt, liwork, &info);

scalar_t* work;
magma_int_t* iwork;
lwork = magma_int_cast(wkopt, "work_size");
liwork = magma_int_cast(iwkopt, "iwork_size");
ALLOCATE_ARRAY(work, scalar_t, lwork, self);
ALLOCATE_ARRAY(iwork, magma_int_t, liwork, self);

for (int64_t i = 0; i < batch_size; i++) {
scalar_t* self_working_ptr = &self_data[i * self_matrix_stride];
scalar_t* eigvals_working_ptr = &eigvals_data[i * eigvals_stride];
magmaSymeig<scalar_t>(jobz, uplo, n, self_working_ptr, n, eigvals_working_ptr,
wA, n, work, lwork, iwork, liwork, &info);
infos[i] = info;
if (info != 0) {
return;
}
}
#endif
}

std::tuple<Tensor, Tensor> _symeig_helper_cuda(const Tensor& self, bool eigenvectors, bool upper) {
std::vector<int64_t> infos(batchCount(self), 0);

auto self_sizes = self.sizes().vec();
self_sizes.pop_back();

// We create temporary tensors on the CPU, because tensors on the GPU
// cause segfault when passed to magmaSymeig. The data is later
// moved to the appropriate device.
// In the case where self.numel() == 0, we just return an empty tensor of
// dimensions on the CUDA (to avoid the unnecessary "to(at::kCUDA)")
auto eigvals_working_copy = self.numel() == 0
? at::empty(self_sizes, self.options())
: at::empty(self_sizes, self.options().device(at::kCPU));

if (self.numel() == 0) {
return std::tuple<Tensor, Tensor>(eigvals_working_copy, at::empty_like(self));
}

auto self_working_copy = cloneBatchedColumnMajor(self);
AT_DISPATCH_FLOATING_TYPES(self.scalar_type(), "symeig_cuda", [&]{
apply_symeig<scalar_t>(self_working_copy, eigvals_working_copy, eigenvectors, upper, infos);
});

if (!eigenvectors) {
self_working_copy.zero_();
}
if (self.dim() > 2) {
batchCheckErrors(infos, "symeig_cuda");
} else {
singleCheckErrors(infos[0], "symeig_cuda");
}
return std::tuple<Tensor, Tensor>(eigvals_working_copy.to(self.device()), self_working_copy);
}

}} // namespace at::native

#undef ALLOCATE_ARRAY
10 changes: 5 additions & 5 deletions aten/src/ATen/native/native_functions.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3603,15 +3603,15 @@
CUDA: _triangular_solve_helper_cuda

- func: symeig(Tensor self, bool eigenvectors=False, bool upper=True, *, Tensor(a!) e, Tensor(b!) V) -> (Tensor(a!) eigenvalues, Tensor(b!) eigenvectors)
dispatch:
CPU: legacy::cpu::_th_symeig_out
CUDA: legacy::cuda::_th_symeig_out

- func: symeig(Tensor self, bool eigenvectors=False, bool upper=True) -> (Tensor eigenvalues, Tensor eigenvectors)
variants: method, function

- func: _symeig_helper(Tensor self, bool eigenvectors, bool upper) -> (Tensor, Tensor)
variants: function
dispatch:
CPU: legacy::cpu::_th_symeig
CUDA: legacy::cuda::_th_symeig
CPU: _symeig_helper_cpu
CUDA: _symeig_helper_cuda

- func: eig(Tensor self, bool eigenvectors=False, *, Tensor(a!) e, Tensor(b!) v) -> (Tensor(a!) eigenvalues, Tensor(b!) eigenvectors)
dispatch:
Expand Down
17 changes: 0 additions & 17 deletions aten/src/TH/generic/THLapack.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,6 @@

TH_EXTERNC void dgels_(char *trans, int *m, int *n, int *nrhs, double *a, int *lda, double *b, int *ldb, double *work, int *lwork, int *info);
TH_EXTERNC void sgels_(char *trans, int *m, int *n, int *nrhs, float *a, int *lda, float *b, int *ldb, float *work, int *lwork, int *info);
TH_EXTERNC void dsyev_(char *jobz, char *uplo, int *n, double *a, int *lda, double *w, double *work, int *lwork, int *info);
TH_EXTERNC void ssyev_(char *jobz, char *uplo, int *n, float *a, int *lda, float *w, float *work, int *lwork, int *info);
TH_EXTERNC void dgeev_(char *jobvl, char *jobvr, int *n, double *a, int *lda, double *wr, double *wi, double* vl, int *ldvl, double *vr, int *ldvr, double *work, int *lwork, int *info);
TH_EXTERNC void sgeev_(char *jobvl, char *jobvr, int *n, float *a, int *lda, float *wr, float *wi, float* vl, int *ldvl, float *vr, int *ldvr, float *work, int *lwork, int *info);
TH_EXTERNC void dgesdd_(char *jobz, int *m, int *n, double *a, int *lda, double *s, double *u, int *ldu, double *vt, int *ldvt, double *work, int *lwork, int *iwork, int *info);
Expand Down Expand Up @@ -40,21 +38,6 @@ void THLapack_(gels)(char trans, int m, int n, int nrhs, scalar_t *a, int lda, s
#endif
}

/* Compute all eigenvalues and, optionally, eigenvectors of a real symmetric
matrix A */
void THLapack_(syev)(char jobz, char uplo, int n, scalar_t *a, int lda, scalar_t *w, scalar_t *work, int lwork, int *info)
{
#ifdef USE_LAPACK
#if defined(TH_REAL_IS_DOUBLE)
dsyev_(&jobz, &uplo, &n, a, &lda, w, work, &lwork, info);
#else
ssyev_(&jobz, &uplo, &n, a, &lda, w, work, &lwork, info);
#endif
#else
THError("syev : Lapack library not found in compile time\n");
#endif
}

/* Compute for an N-by-N real nonsymmetric matrix A, the eigenvalues and,
optionally, the left and/or right eigenvectors */
void THLapack_(geev)(char jobvl, char jobvr, int n, scalar_t *a, int lda, scalar_t *wr, scalar_t *wi, scalar_t* vl, int ldvl, scalar_t *vr, int ldvr, scalar_t *work, int lwork, int *info)
Expand Down
2 changes: 0 additions & 2 deletions aten/src/TH/generic/THLapack.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,6 @@

/* ||AX-B|| */
TH_API void THLapack_(gels)(char trans, int m, int n, int nrhs, scalar_t *a, int lda, scalar_t *b, int ldb, scalar_t *work, int lwork, int *info);
/* Eigenvals */
TH_API void THLapack_(syev)(char jobz, char uplo, int n, scalar_t *a, int lda, scalar_t *w, scalar_t *work, int lwork, int *info);
/* Non-sym eigenvals */
TH_API void THLapack_(geev)(char jobvl, char jobvr, int n, scalar_t *a, int lda, scalar_t *wr, scalar_t *wi, scalar_t* vl, int ldvl, scalar_t *vr, int ldvr, scalar_t *work, int lwork, int *info);
/* svd */
Expand Down
Loading