Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 13 additions & 7 deletions aten/src/TH/generic/THBlas.c
Original file line number Diff line number Diff line change
Expand Up @@ -201,11 +201,12 @@ void THBlas_(gemv)(char trans, int64_t m, int64_t n, real alpha, real *a, int64_
lda = m;

#if defined(USE_BLAS) && (defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT))
if( (m <= INT_MAX) && (n <= INT_MAX) &&
(lda >= THMax(1, m)) && (lda <= INT_MAX) &&
if( (m <= INT_MAX) && (n <= INT_MAX) && (lda <= INT_MAX) &&
(incx > 0) && (incx <= INT_MAX) &&
(incy > 0) && (incy <= INT_MAX) )
{
THArgCheck(lda >= THMax(1, m), 6,
"lda should be at least max(1, m=%d), but have %d", m, lda);
int i_m = (int)m;
int i_n = (int)n;
int i_lda = (int)lda;
Expand Down Expand Up @@ -259,11 +260,12 @@ void THBlas_(ger)(int64_t m, int64_t n, real alpha, real *x, int64_t incx, real
lda = m;

#if defined(USE_BLAS) && (defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT))
if( (m <= INT_MAX) && (n <= INT_MAX) &&
(lda >= THMax(1, m)) && (lda <= INT_MAX) &&
if( (m <= INT_MAX) && (n <= INT_MAX) && (lda <= INT_MAX) &&
(incx > 0) && (incx <= INT_MAX) &&
(incy > 0) && (incy <= INT_MAX) )
{
THArgCheck(lda >= THMax(1, m), 9,
"lda should be at least max(1, m=%d), but have %d", m, lda);
int i_m = (int)m;
int i_n = (int)n;
int i_lda = (int)lda;
Expand Down Expand Up @@ -322,10 +324,14 @@ void THBlas_(gemm)(char transa, char transb, int64_t m, int64_t n, int64_t k, re

#if defined(USE_BLAS) && (defined(TH_REAL_IS_DOUBLE) || defined(TH_REAL_IS_FLOAT))
if( (m <= INT_MAX) && (n <= INT_MAX) && (k <= INT_MAX) &&
(lda >= THMax(1, (transa_ ? k : m))) && (lda <= INT_MAX) &&
(ldb >= THMax(1, (transb_ ? n : k))) && (ldb <= INT_MAX) &&
(ldc >= THMax(1, m)) && (ldc <= INT_MAX) )
(lda <= INT_MAX) && (ldb <= INT_MAX) && (ldc <= INT_MAX) )
{
THArgCheck(lda >= THMax(1, (transa_ ? k : m)), 8,
"lda should be at least max(1, %d), but have %d", (transa_ ? k : m), lda);
THArgCheck(ldb >= THMax(1, (transb_ ? n : k)), 10,
"ldb should be at least max(1, %d), but have %d", (transb_ ? n : k), ldb);
THArgCheck(ldc >= THMax(1, m), 13,
"ldc should be at least max(1, m=%d), but have %d", m, ldc);
int i_m = (int)m;
int i_n = (int)n;
int i_k = (int)k;
Expand Down
59 changes: 42 additions & 17 deletions aten/src/TH/generic/THTensorMath.c
Original file line number Diff line number Diff line change
Expand Up @@ -1331,14 +1331,17 @@ void THTensor_(addmv)(THTensor *r_, real beta, THTensor *t, real alpha, THTensor
THTensor_(copy)(r_, t);
}

if(mat->stride[0] == 1)
// n == 1 || lda >= max(1, m)
#define LDA_COND(M, N, LDA) ((N) == 1 || (LDA) >= THMax(1, (M)))

if(mat->stride[0] == 1 && LDA_COND(mat->size[0], mat->size[1], mat->stride[1]))
{
THBlas_(gemv)('n', mat->size[0], mat->size[1],
alpha, THTensor_(data)(mat), mat->stride[1],
THTensor_(data)(vec), vec->stride[0],
beta, THTensor_(data)(r_), r_->stride[0]);
}
else if(mat->stride[1] == 1)
else if(mat->stride[1] == 1 && LDA_COND(mat->size[1], mat->size[0], mat->stride[0]))
{
THBlas_(gemv)('t', mat->size[1], mat->size[0],
alpha, THTensor_(data)(mat), mat->stride[0],
Expand All @@ -1356,6 +1359,8 @@ void THTensor_(addmv)(THTensor *r_, real beta, THTensor *t, real alpha, THTensor

THTensor_(free)(cmat);
}

#undef LDA_COND
}

void THTensor_(match)(THTensor *r_, THTensor *m1, THTensor *m2, real gain)
Expand Down Expand Up @@ -1434,15 +1439,18 @@ void THTensor_(addmm)(THTensor *r_, real beta, THTensor *t, real alpha, THTensor
}
}

// n == 1 || ldc >= max(1, m)
#define LDC_COND(M, N, LDC) ((N) == 1 || (LDC) >= THMax(1, M))

/* r_ */
if(r_->stride[0] == 1 &&
r_->stride[1] != 0)
LDC_COND(r_->size[0], r_->size[1], r_->stride[1]))
{
transpose_r = 'n';
r__ = r_;
}
else if(r_->stride[1] == 1 &&
r_->stride[0] != 0)
LDC_COND(r_->size[1], r_->size[0], r_->stride[0]))
{
THTensor *swap = m2;
m2 = m1;
Expand All @@ -1453,22 +1461,30 @@ void THTensor_(addmm)(THTensor *r_, real beta, THTensor *t, real alpha, THTensor
else
{
transpose_r = 'n';

// make r__ FORTRAN contiguous
THTensor *transp_r_ = THTensor_(newTranspose)(r_, 0, 1);
r__ = THTensor_(newClone)(transp_r_);
THTensor_(free)(transp_r_);
THTensor_(transpose)(r__, NULL, 0, 1);
}

#undef LDC_COND

int64_t m = r__->size[(transpose_r == 'n' ? 0 : 1)];
int64_t n = r__->size[(transpose_r == 'n' ? 1 : 0)];
int64_t k = m1->size[(transpose_r == 'n' ? 1 : 0)];
int64_t ldr__ = r__->stride[(transpose_r == 'n' ? 1 : 0)];

/* m1 */
/* Need ldm1_ >= max(1, (transpose_m1 == 't' ? m : k)) */
if(m1->stride[(transpose_r == 'n' ? 0 : 1)] == 1 &&
m1->stride[(transpose_r == 'n' ? 1 : 0)] != 0)
m1->stride[(transpose_r == 'n' ? 1 : 0)] >= THMax(1, k))
{
transpose_m1 = 'n';
m1_ = m1;
}
else if(m1->stride[(transpose_r == 'n' ? 1 : 0)] == 1 &&
m1->stride[(transpose_r == 'n' ? 0 : 1)] != 0)
m1->stride[(transpose_r == 'n' ? 0 : 1)] >= THMax(1, m))
{
transpose_m1 = 't';
m1_ = m1;
Expand All @@ -1481,14 +1497,15 @@ void THTensor_(addmm)(THTensor *r_, real beta, THTensor *t, real alpha, THTensor
}

/* m2 */
/* Need ldm2_ >= max(1, (transpose_m2 == 't' ? n : k)) */
if(m2->stride[(transpose_r == 'n' ? 0 : 1)] == 1 &&
m2->stride[(transpose_r == 'n' ? 1 : 0)] != 0)
m2->stride[(transpose_r == 'n' ? 1 : 0)] >= THMax(1, k))
{
transpose_m2 = 'n';
m2_ = m2;
}
else if(m2->stride[(transpose_r == 'n' ? 1 : 0)] == 1 &&
m2->stride[(transpose_r == 'n' ? 0 : 1)] != 0)
m2->stride[(transpose_r == 'n' ? 0 : 1)] >= THMax(1, n))
{
transpose_m2 = 't';
m2_ = m2;
Expand All @@ -1500,21 +1517,24 @@ void THTensor_(addmm)(THTensor *r_, real beta, THTensor *t, real alpha, THTensor
free_m2 = 1;
}

int64_t ldm1_ = (transpose_m1 == 'n' ? m1_->stride[(transpose_r == 'n' ? 1 : 0)] : m1_->stride[(transpose_r == 'n' ? 0 : 1)]);
int64_t ldm2_ = (transpose_m2 == 'n' ? m2_->stride[(transpose_r == 'n' ? 1 : 0)] : m2_->stride[(transpose_r == 'n' ? 0 : 1)]);

#pragma omp critical(blasgemm)
/* do the operation */
THBlas_(gemm)(transpose_m1,
transpose_m2,
r__->size[(transpose_r == 'n' ? 0 : 1)],
r__->size[(transpose_r == 'n' ? 1 : 0)],
m1_->size[(transpose_r == 'n' ? 1 : 0)],
m,
n,
k,
alpha,
THTensor_(data)(m1_),
(transpose_m1 == 'n' ? m1_->stride[(transpose_r == 'n' ? 1 : 0)] : m1_->stride[(transpose_r == 'n' ? 0 : 1)]),
ldm1_,
THTensor_(data)(m2_),
(transpose_m2 == 'n' ? m2_->stride[(transpose_r == 'n' ? 1 : 0)] : m2_->stride[(transpose_r == 'n' ? 0 : 1)]),
ldm2_,
beta,
THTensor_(data)(r__),
r__->stride[(transpose_r == 'n' ? 1 : 0)]);
ldr__);

/* free intermediate variables */
if(free_m1)
Expand Down Expand Up @@ -1555,14 +1575,17 @@ void THTensor_(addr)(THTensor *r_, real beta, THTensor *t, real alpha, THTensor
else if(beta != 1)
THTensor_(mul)(r_, r_, beta);

if(r_->stride[0] == 1)
// n == 1 || lda >= max(1, m)
#define LDA_COND(M, N, LDA) ((N) == 1 || (LDA) >= THMax(1, (M)))

if(r_->stride[0] == 1 && LDA_COND(vec1->size[0], vec2->size[0], r_->stride[1]))
{
THBlas_(ger)(vec1->size[0], vec2->size[0],
alpha, THTensor_(data)(vec1), vec1->stride[0],
THTensor_(data)(vec2), vec2->stride[0],
THTensor_(data)(r_), r_->stride[1]);
}
else if(r_->stride[1] == 1)
else if(r_->stride[1] == 1 && LDA_COND(vec2->size[0], vec1->size[0], r_->stride[0]))
{
THBlas_(ger)(vec2->size[0], vec1->size[0],
alpha, THTensor_(data)(vec2), vec2->stride[0],
Expand All @@ -1580,6 +1603,8 @@ void THTensor_(addr)(THTensor *r_, real beta, THTensor *t, real alpha, THTensor

THTensor_(freeCopyTo)(cr, r_);
}

#undef LDA_COND
}

void THTensor_(addbmm)(THTensor *result, real beta, THTensor *t, real alpha, THTensor *batch1, THTensor *batch2)
Expand Down
28 changes: 18 additions & 10 deletions aten/src/THNN/generic/SpatialDilatedConvolution.c
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,11 @@ void THNN_(SpatialDilatedConvolution_updateOutput)(

input = THTensor_(newContiguous)(input);
weight = THTensor_(newContiguous)(weight);
bias = bias ? THTensor_(newContiguous)(bias) : bias;
THArgCheck(THTensor_(isContiguous)(columns), 5, "columns needs to be contiguous");
if (bias) {
bias = THTensor_(newContiguous)(bias);
THArgCheck(THTensor_(isContiguous)(ones), 6, "ones needs to be contiguous");
}
int batch = 1;
if (input->nDimension == 3) {
// Force batch
Expand All @@ -107,7 +111,8 @@ void THNN_(SpatialDilatedConvolution_updateOutput)(
// Define a buffer of ones, for bias accumulation
// Note: this buffer can be shared with other modules, it only ever gets increased,
// and always contains ones.
if (ones->nDimension != 2 || ones->size[0]*ones->size[1] < outputHeight*outputWidth) {
if (!THTensor_(isContiguous)(ones) || ones->nDimension != 2 ||
ones->size[0]*ones->size[1] < outputHeight*outputWidth) {
// Resize plane and fill with ones...
THTensor_(resize2d)(ones, outputHeight, outputWidth);
THTensor_(fill)(ones, 1);
Expand Down Expand Up @@ -207,6 +212,7 @@ void THNN_(SpatialDilatedConvolution_updateGradInput)(
input = THTensor_(newContiguous)(input);
weight = THTensor_(newContiguous)(weight);
gradOutput = THTensor_(newContiguous)(gradOutput);
THArgCheck(THTensor_(isContiguous)(gradColumns), 5, "gradColumns needs to be contiguous");
int batch = 1;
if (input->nDimension == 3) {
// Force batch
Expand Down Expand Up @@ -310,8 +316,11 @@ void THNN_(SpatialDilatedConvolution_accGradParameters)(
input = THTensor_(newContiguous)(input);
gradOutput = THTensor_(newContiguous)(gradOutput);
THArgCheck(THTensor_(isContiguous)(gradWeight), 4, "gradWeight needs to be contiguous");
if (gradBias)
THArgCheck(THTensor_(isContiguous)(columns), 6, "columns needs to be contiguous");
if (gradBias) {
THArgCheck(THTensor_(isContiguous)(gradBias), 5, "gradBias needs to be contiguous");
THArgCheck(THTensor_(isContiguous)(ones), 7, "ones needs to be contiguous");
}
int batch = 1;
if (input->nDimension == 3) {
// Force batch
Expand All @@ -329,13 +338,6 @@ void THNN_(SpatialDilatedConvolution_accGradParameters)(
// Batch size + input planes
int64_t batchSize = input->size[0];

// Define a buffer of ones, for bias accumulation
if (ones->nDimension != 2 || ones->size[0]*ones->size[1] < outputHeight*outputWidth) {
// Resize plane and fill with ones...
THTensor_(resize2d)(ones, outputHeight, outputWidth);
THTensor_(fill)(ones, 1);
}

// Resize temporary columns
THTensor_(resize2d)(columns, nInputPlane*kW*kH, outputHeight*outputWidth);

Expand Down Expand Up @@ -380,6 +382,12 @@ void THNN_(SpatialDilatedConvolution_accGradParameters)(

// Do GEMV (note: this is a bit confusing because gemv assumes column-major matrices)
if (gradBias) {
// Define a buffer of ones, for bias accumulation
if (ones->nDimension != 2 || ones->size[0]*ones->size[1] < outputHeight*outputWidth) {
// Resize plane and fill with ones...
THTensor_(resize2d)(ones, outputHeight, outputWidth);
THTensor_(fill)(ones, 1);
}
THBlas_(gemv)(
't',
k_, m_,
Expand Down
12 changes: 10 additions & 2 deletions aten/src/THNN/generic/SpatialFullDilatedConvolution.c
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,11 @@ void THNN_(SpatialFullDilatedConvolution_updateOutput)(

input = THTensor_(newContiguous)(input);
weight = THTensor_(newContiguous)(weight);
bias = bias ? THTensor_(newContiguous)(bias) : bias;
THArgCheck(THTensor_(isContiguous)(columns), 5, "columns needs to be contiguous");
if (bias) {
bias = THTensor_(newContiguous)(bias);
THArgCheck(THTensor_(isContiguous)(ones), 6, "ones needs to be contiguous");
}
int batch = 1;
if (input->nDimension == 3) {
// Force batch
Expand Down Expand Up @@ -265,6 +269,7 @@ void THNN_(SpatialFullDilatedConvolution_updateGradInput)(
input = THTensor_(newContiguous)(input);
gradOutput = THTensor_(newContiguous)(gradOutput);
weight = THTensor_(newContiguous)(weight);
THArgCheck(THTensor_(isContiguous)(gradColumns), 5, "gradColumns needs to be contiguous");
int batch = 1;
if (input->nDimension == 3) {
// Force batch
Expand Down Expand Up @@ -370,8 +375,11 @@ void THNN_(SpatialFullDilatedConvolution_accGradParameters)(
input = THTensor_(newContiguous)(input);
gradOutput = THTensor_(newContiguous)(gradOutput);
THArgCheck(THTensor_(isContiguous)(gradWeight), 4, "gradWeight needs to be contiguous");
if (gradBias)
THArgCheck(THTensor_(isContiguous)(columns), 6, "columns needs to be contiguous");
if (gradBias) {
THArgCheck(THTensor_(isContiguous)(gradBias), 5, "gradBias needs to be contiguous");
THArgCheck(THTensor_(isContiguous)(ones), 7, "ones needs to be contiguous");
}
int batch = 1;
if (input->nDimension == 3) {
// Force batch
Expand Down
19 changes: 15 additions & 4 deletions aten/src/THNN/generic/VolumetricDilatedConvolution.c
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,11 @@ void THNN_(VolumetricDilatedConvolution_updateOutput)(

input = THTensor_(newContiguous)(input);
weight = THTensor_(newContiguous)(weight);
bias = bias ? THTensor_(newContiguous)(bias) : bias;
THArgCheck(THTensor_(isContiguous)(columns), 5, "columns needs to be contiguous");
if (bias) {
bias = THTensor_(newContiguous)(bias);
THArgCheck(THTensor_(isContiguous)(ones), 6, "ones needs to be contiguous");
}
int batch = 1;
if (input->nDimension == 4) {
// Force batch
Expand Down Expand Up @@ -189,7 +193,7 @@ void THNN_(VolumetricDilatedConvolution_updateOutput)(

THTensor_(free)(input);
THTensor_(free)(weight);
if (bias) THTensor_(free)(bias);
if (bias) THTensor_(free)(bias);
}

void THNN_(VolumetricDilatedConvolution_updateGradInput)(
Expand All @@ -216,7 +220,8 @@ void THNN_(VolumetricDilatedConvolution_updateGradInput)(
input = THTensor_(newContiguous)(input);
gradOutput = THTensor_(newContiguous)(gradOutput);
weight = THTensor_(newContiguous)(weight);

THArgCheck(THTensor_(isContiguous)(gradColumns), 5, "gradColumns needs to be contiguous");

int batch = 1;
if (input->nDimension == 4) {
// Force batch
Expand Down Expand Up @@ -321,7 +326,13 @@ void THNN_(VolumetricDilatedConvolution_accGradParameters)(

input = THTensor_(newContiguous)(input);
gradOutput = THTensor_(newContiguous)(gradOutput);

THArgCheck(THTensor_(isContiguous)(gradWeight), 4, "gradWeight needs to be contiguous");
THArgCheck(THTensor_(isContiguous)(columns), 6, "columns needs to be contiguous");
if (gradBias) {
THArgCheck(THTensor_(isContiguous)(gradBias), 5, "gradBias needs to be contiguous");
THArgCheck(THTensor_(isContiguous)(ones), 7, "ones needs to be contiguous");
}

int batch = 1;
if (input->nDimension == 4) {
// Force batch
Expand Down