Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 0 additions & 19 deletions aten/src/ATen/Declarations.cwrap
Original file line number Diff line number Diff line change
Expand Up @@ -2431,25 +2431,6 @@
${THTensor}_pstrf(res1_, res2_, self_, (upper) ? "U" : "L", tol_);
res2 -= 1; // LAPACK returns 1-indexed pivots
]]
[[
name: _th_qr
cname: qr
types:
- Float
- Double
backends:
- CPU
- CUDA
variants:
- function
return: argument 0,1
arguments:
- arg: THTensor* res1
output: True
- arg: THTensor* res2
output: True
- THTensor* self
]]
[[
name: _th_geqrf
cname: geqrf
Expand Down
2 changes: 1 addition & 1 deletion aten/src/ATen/core/Tensor.h
Original file line number Diff line number Diff line change
Expand Up @@ -713,7 +713,7 @@ class CAFFE2_API Tensor {
std::tuple<Tensor,Tensor> solve(const Tensor & A) const;
Tensor cholesky_inverse(bool upper=false) const;
std::tuple<Tensor,Tensor> pstrf(bool upper=true, Scalar tol=-1) const;
std::tuple<Tensor,Tensor> qr() const;
std::tuple<Tensor,Tensor> qr(bool some=true) const;
std::tuple<Tensor,Tensor> geqrf() const;
Tensor orgqr(const Tensor & input2) const;
Tensor ormqr(const Tensor & input2, const Tensor & input3, bool left=true, bool transpose=false) const;
Expand Down
4 changes: 2 additions & 2 deletions aten/src/ATen/core/TensorMethods.h
Original file line number Diff line number Diff line change
Expand Up @@ -1194,8 +1194,8 @@ inline Tensor Tensor::cholesky_inverse(bool upper) const {
inline std::tuple<Tensor,Tensor> Tensor::pstrf(bool upper, Scalar tol) const {
return dispatch_type().pstrf(*this, upper, tol);
}
inline std::tuple<Tensor,Tensor> Tensor::qr() const {
return dispatch_type().qr(*this);
inline std::tuple<Tensor,Tensor> Tensor::qr(bool some) const {
return dispatch_type().qr(*this, some);
}
inline std::tuple<Tensor,Tensor> Tensor::geqrf() const {
return dispatch_type().geqrf(*this);
Expand Down
2 changes: 1 addition & 1 deletion aten/src/ATen/core/Type.h
Original file line number Diff line number Diff line change
Expand Up @@ -523,7 +523,7 @@ struct CAFFE2_API Type {
virtual std::tuple<Tensor,Tensor> solve(const Tensor & self, const Tensor & A) const = 0;
virtual Tensor cholesky_inverse(const Tensor & self, bool upper) const = 0;
virtual std::tuple<Tensor,Tensor> pstrf(const Tensor & self, bool upper, Scalar tol) const = 0;
virtual std::tuple<Tensor,Tensor> qr(const Tensor & self) const = 0;
virtual std::tuple<Tensor,Tensor> qr(const Tensor & self, bool some) const = 0;
virtual std::tuple<Tensor,Tensor> geqrf(const Tensor & self) const = 0;
virtual Tensor orgqr(const Tensor & self, const Tensor & input2) const = 0;
virtual Tensor ormqr(const Tensor & self, const Tensor & input2, const Tensor & input3, bool left, bool transpose) const = 0;
Expand Down
185 changes: 185 additions & 0 deletions aten/src/ATen/native/BatchLinearAlgebra.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,14 @@ extern "C" void spotrf_(char *uplo, int *n, float *a, int *lda, int *info);
// trtrs
extern "C" void dtrtrs_(char *uplo, char *trans, char *diag, int *n, int *nrhs, double *a, int *lda, double *b, int *ldb, int *info);
extern "C" void strtrs_(char *uplo, char *trans, char *diag, int *n, int *nrhs, float *a, int *lda, float *b, int *ldb, int *info);

// geqrf
extern "C" void dgeqrf_(int *m, int *n, double *a, int *lda, double *tau, double *work, int *lwork, int *info);
extern "C" void sgeqrf_(int *m, int *n, float *a, int *lda, float *tau, float *work, int *lwork, int *info);

// orgqr
extern "C" void dorgqr_(int *m, int *n, int *k, double *a, int *lda, double *tau, double *work, int *lwork, int *info);
extern "C" void sorgqr_(int *m, int *n, int *k, float *a, int *lda, float *tau, float *work, int *lwork, int *info);
#endif

namespace at {
Expand Down Expand Up @@ -75,6 +83,16 @@ void lapackTriangularSolve(char uplo, char trans, char diag, int n, int nrhs, sc
AT_ERROR("triangular_solve only takes float or double Tensors");
}

template<class scalar_t>
void lapackGeqrf(int m, int n, scalar_t *a, int lda, scalar_t *tau, scalar_t *work, int lwork, int *info) {
AT_ERROR("geqrf only takes float or double Tensors");
}

template<class scalar_t>
void lapackOrgqr(int m, int n, int k, scalar_t *a, int lda, scalar_t *tau, scalar_t *work, int lwork, int *info) {
AT_ERROR("orgqr only takes float or double Tensors");
}

#ifdef USE_LAPACK
template<> void lapackSolve<double>(int n, int nrhs, double *a, int lda, int *ipiv, double *b, int ldb, int *info) {
dgesv_(&n, &nrhs, a, &lda, ipiv, b, &ldb, info);
Expand Down Expand Up @@ -123,6 +141,22 @@ template<> void lapackTriangularSolve<double>(char uplo, char trans, char diag,
template<> void lapackTriangularSolve<float>(char uplo, char trans, char diag, int n, int nrhs, float *a, int lda, float *b, int ldb, int *info) {
strtrs_(&uplo, &trans, &diag, &n, &nrhs, a, &lda, b, &ldb, info);
}

template<> void lapackGeqrf<double>(int m, int n, double *a, int lda, double *tau, double *work, int lwork, int *info) {
dgeqrf_(&m, &n, a, &lda, tau, work, &lwork, info);
}

template<> void lapackGeqrf<float>(int m, int n, float *a, int lda, float *tau, float *work, int lwork, int *info) {
sgeqrf_(&m, &n, a, &lda, tau, work, &lwork, info);
}

template<> void lapackOrgqr<double>(int m, int n, int k, double *a, int lda, double *tau, double *work, int lwork, int *info) {
dorgqr_(&m, &n, &k, a, &lda, tau, work, &lwork, info);
}

template<> void lapackOrgqr<float>(int m, int n, int k, float *a, int lda, float *tau, float *work, int lwork, int *info) {
sorgqr_(&m, &n, &k, a, &lda, tau, work, &lwork, info);
}
#endif

// Below of the definitions of the functions operating on a batch that are going to be dispatched
Expand Down Expand Up @@ -648,4 +682,155 @@ std::tuple<Tensor&, Tensor&> triangular_solve_out(Tensor& result, Tensor& clone_
return std::tuple<Tensor&, Tensor&>(result, clone_A);
}

// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ qr ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

template<typename scalar_t>
static void apply_geqrf(Tensor& self, Tensor& tau, int64_t m, int64_t n,
std::vector<int64_t>& infos) {
#ifndef USE_LAPACK
AT_ERROR("qr: LAPACK library not found in compilation");
#else
auto self_data = self.data<scalar_t>();
auto tau_data = tau.data<scalar_t>();
auto self_matrix_stride = matrixStride(self);
auto tau_stride = tau.size(-1);
auto batch_size = batchCount(self);

int info;
// Run once, first to get the optimum work size.
// Since we deal with batches of matrices with the same dimensions, doing this outside
// the loop saves (batch_size - 1) workspace queries which would provide the same result
// and (batch_size - 1) calls to allocate and deallocate workspace using at::empty()
int lwork = -1;
scalar_t wkopt;
lapackGeqrf<scalar_t>(m, n, self_data, m, tau_data, &wkopt, lwork, &info);
lwork = static_cast<int>(wkopt);
Tensor work = at::empty({lwork}, self.options());

for (int64_t i = 0; i < batch_size; i++) {
scalar_t* self_working_ptr = &self_data[i * self_matrix_stride];
scalar_t* tau_working_ptr = &tau_data[i * tau_stride];

// now compute the actual R and TAU
lapackGeqrf<scalar_t>(m, n, self_working_ptr, m, tau_working_ptr, work.data<scalar_t>(), lwork, &info);
infos[i] = info;
if (info != 0) {
return;
}
}
#endif
}

template<typename scalar_t>
static void apply_orgqr(Tensor& self, const Tensor& tau, int64_t m, int64_t n_columns,
int64_t k, std::vector<int64_t>& infos) {
#ifndef USE_LAPACK
AT_ERROR("qr: LAPACK library not found in compilation");
#else
auto self_data = self.data<scalar_t>();
auto tau_data = tau.data<scalar_t>();
auto self_matrix_stride = matrixStride(self);
auto tau_stride = tau.size(-1);
auto batch_size = batchCount(self);

int info;
// Run once, first to get the optimum work size.
// Since we deal with batches of matrices with the same dimensions, doing this outside
// the loop saves (batch_size - 1) workspace queries which would provide the same result
// and (batch_size - 1) calls to allocate and deallocate workspace using at::empty()
int lwork = -1;
scalar_t wkopt;
lapackOrgqr<scalar_t>(m, n_columns, k, self_data, m, tau_data, &wkopt, lwork, &info);
lwork = static_cast<int>(wkopt);
Tensor work = at::empty({lwork}, self.options());

for (int64_t i = 0; i < batch_size; i++) {
scalar_t* self_working_ptr = &self_data[i * self_matrix_stride];
scalar_t* tau_working_ptr = &tau_data[i * tau_stride];

// now compute the actual Q
lapackOrgqr<scalar_t>(m, n_columns, k, self_working_ptr, m, tau_working_ptr, work.data<scalar_t>(), lwork, &info);
infos[i] = info;
if (info != 0) {
return;
}
}
#endif
}

std::tuple<Tensor, Tensor> _qr_helper_cpu(const Tensor& self, bool some) {
std::vector<int64_t> infos(batchCount(self), 0);
int64_t m = self.size(-2), n = self.size(-1);

// Setup inputs for apply_geqrf
auto self_sizes = self.sizes().vec();
self_sizes.pop_back();
self_sizes[self.dim() - 2] = std::min(m, n);
auto tau_working_copy = at::empty(self_sizes, self.options());
Tensor q_working_copy;

// Setup input geometry for apply_orgqr
std::vector<int64_t> q_sizes, q_strides;
int64_t n_columns_q;
Tensor R;
std::tie(q_sizes, q_strides, n_columns_q) = _compute_geometry_for_Q(self, some);

// If there are no elements, then we simply return a pair of tensors of required dimensions
if (self.numel() == 0) {
// Fix the number of columns of q appropriately
q_sizes[self.dim() - 1] = n_columns_q;
q_working_copy = at::eye(q_sizes[self.dim() - 2], q_sizes[self.dim() - 1], self.options());
q_working_copy = q_working_copy.expand_as(q_working_copy);

// We repurpose the same q_sizes for R
// Fix the number of rows and columns of q_working_copy appropriately
q_sizes[self.dim() - 1] = n;
q_sizes[self.dim() - 2] = n_columns_q;
R = at::empty(q_sizes, self.options());
return std::make_tuple(q_working_copy, R);
}

// First perform GEQRF for R and TAU (the elementary reflectors)
// We will need to generate R from the upper triangular matrix from the
// matrix input to GEQRF.
q_working_copy = at::empty_strided(q_sizes, q_strides, self.options());
q_working_copy.narrow(-1, 0, n).copy_(self);

AT_DISPATCH_FLOATING_TYPES(self.scalar_type(), "qr_cpu", [&]{
apply_geqrf<scalar_t>(q_working_copy, tau_working_copy, m, n, infos);
});
if (self.dim() > 2) {
batchCheckErrors(infos, "qr_cpu");
} else {
singleCheckErrors(infos[0], "qr_cpu");
}

R = q_working_copy.slice(-2, 0, n_columns_q).slice(-1, 0, n).triu();

// Next perform ORGQR for Q using the results (both raw R and TAU) from GEQRF
AT_DISPATCH_FLOATING_TYPES(self.scalar_type(), "qr_cpu", [&]{
apply_orgqr<scalar_t>(q_working_copy, tau_working_copy, m, n_columns_q, std::min(m, n), infos);
});
if (self.dim() > 2) {
batchCheckErrors(infos, "qr_cpu");
} else {
singleCheckErrors(infos[0], "qr_cpu");
}
return std::make_tuple(q_working_copy.narrow_copy(-1, 0, n_columns_q), R);
}

std::tuple<Tensor,Tensor> qr(const Tensor& self, bool some) {
TORCH_CHECK(self.dim() >= 2,
"self should have at least 2 dimensions, but has ", self.dim(), " dimensions instead");
return at::_qr_helper(self, some);
}

std::tuple<Tensor&,Tensor&> qr_out(Tensor& Q, Tensor& R, const Tensor& self, bool some) {
Tensor Q_tmp, R_tmp;
std::tie(Q_tmp, R_tmp) = at::_qr_helper(self, some);
Q.resize_as_(Q_tmp).copy_(Q_tmp);
R.resize_as_(R_tmp).copy_(R_tmp);
return std::tuple<Tensor&, Tensor&>(Q, R);
}

}} // namespace at::native
27 changes: 27 additions & 0 deletions aten/src/ATen/native/LinearAlgebraUtils.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#include <ATen/ATen.h>
#include <ATen/ExpandUtils.h>
#include <ATen/TensorUtils.h>
#include <limits>
#include <sstream>

Expand Down Expand Up @@ -187,4 +188,30 @@ static inline std::tuple<Tensor,Tensor> _linear_solve_broadcast_args(const Tenso
return std::make_tuple(arg1_broadcasted, arg2_broadcasted);
}

// Function to compute sizes, strides and the extra columns for the Q matrix in the QR Decomposition
static inline std::tuple<std::vector<int64_t>,
std::vector<int64_t>,
int64_t> _compute_geometry_for_Q(const Tensor& input, bool some) {
int64_t m = input.size(-2), n = input.size(-1);
int64_t n_columns_q;

// We need to compute the required size of Q based on the `some` option
auto q_sizes = input.sizes().vec();
if (!some && m > n) {
q_sizes[input.dim() - 1] = m;
n_columns_q = m;
} else {
q_sizes[input.dim() - 1] = n;
n_columns_q = std::min(m, n);
}
auto q_strides = at::detail::defaultStrides(q_sizes);

// Q should be a column-major or a batch of column-major matrices
// ... x m x n will have strides: ...., n, 1
// We require: ...., 1, m
q_strides[input.dim() - 1] = m;
q_strides[input.dim() - 2] = 1;
return std::make_tuple(q_sizes, q_strides, n_columns_q);
}

}} // namespace at::native
Loading