Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/backend/opencl/kernel/ireduce.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ void ireduce_dim_launcher(Param out, cl::Buffer *oidx, Param in,
ToNumStr<T> toNumStr;

std::ostringstream options;
options << " -D T=" << dtype_traits<T>::getName() << " -D dim=" << dim
options << " -D T=" << dtype_traits<T>::getName() << " -D kDim=" << dim
<< " -D DIMY=" << threads_y << " -D THREADS_X=" << THREADS_X
<< " -D init=" << toNumStr(Binary<T, op>::init()) << " -D "
<< binOpName<op>() << " -D CPLX=" << af::iscplx<T>()
Expand Down
16 changes: 8 additions & 8 deletions src/backend/opencl/kernel/ireduce_dim.cl
Original file line number Diff line number Diff line change
Expand Up @@ -26,15 +26,15 @@ __kernel void ireduce_dim_kernel(__global T *oData, KParam oInfo,

// There is only one element per group for out
// There are get_local_size(1) elements per group for in
// Hence increment ids[dim] just after offseting out and before offsetting
// Hence increment ids[kDim] just after offseting out and before offsetting
// in
oData += ids[3] * oInfo.strides[3] + ids[2] * oInfo.strides[2] +
ids[1] * oInfo.strides[1] + ids[0] + oInfo.offset;
olData += ids[3] * oInfo.strides[3] + ids[2] * oInfo.strides[2] +
ids[1] * oInfo.strides[1] + ids[0] + oInfo.offset;
const uint id_dim_out = ids[dim];
const uint id_dim_out = ids[kDim];

ids[dim] = ids[dim] * get_local_size(1) + lidy;
ids[kDim] = ids[kDim] * get_local_size(1) + lidy;

iData += ids[3] * iInfo.strides[3] + ids[2] * iInfo.strides[2] +
ids[1] * iInfo.strides[1] + ids[0] + iInfo.offset;
Expand All @@ -44,8 +44,8 @@ __kernel void ireduce_dim_kernel(__global T *oData, KParam oInfo,
ids[1] * iInfo.strides[1] + ids[0] + iInfo.offset;
}

const uint id_dim_in = ids[dim];
const uint istride_dim = iInfo.strides[dim];
const uint id_dim_in = ids[kDim];
const uint istride_dim = iInfo.strides[kDim];

bool is_valid = (ids[0] < iInfo.dims[0]) && (ids[1] < iInfo.dims[1]) &&
(ids[2] < iInfo.dims[2]) && (ids[3] < iInfo.dims[3]);
Expand All @@ -56,14 +56,14 @@ __kernel void ireduce_dim_kernel(__global T *oData, KParam oInfo,
T out_val = init;
uint out_idx = id_dim_in;

if (is_valid && id_dim_in < iInfo.dims[dim]) {
if (is_valid && id_dim_in < iInfo.dims[kDim]) {
out_val = *iData;
if (!IS_FIRST) out_idx = *ilData;
}

const uint id_dim_in_start = id_dim_in + group_dim * get_local_size(1);

for (int id = id_dim_in_start; is_valid && (id < iInfo.dims[dim]);
for (int id = id_dim_in_start; is_valid && (id < iInfo.dims[kDim]);
id += group_dim * get_local_size(1)) {
iData = iData + group_dim * get_local_size(1) * istride_dim;

Expand Down Expand Up @@ -112,7 +112,7 @@ __kernel void ireduce_dim_kernel(__global T *oData, KParam oInfo,
barrier(CLK_LOCAL_MEM_FENCE);
}

if (lidy == 0 && is_valid && (id_dim_out < oInfo.dims[dim])) {
if (lidy == 0 && is_valid && (id_dim_out < oInfo.dims[kDim])) {
*oData = *s_vptr;
*olData = *s_iptr;
}
Expand Down
2 changes: 1 addition & 1 deletion src/backend/opencl/kernel/join.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ void join(Param out, const Param in, const af::dim4 offset) {
std::ostringstream options;
options << " -D To=" << dtype_traits<To>::getName()
<< " -D Ti=" << dtype_traits<Ti>::getName()
<< " -D dim=" << dim;
<< " -D kDim=" << dim;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

join.cl OpenCL kernel doesn't use this one at all. I wonder why it's being passed to this at all.


if (std::is_same<To, double>::value ||
std::is_same<To, cdouble>::value) {
Expand Down
5 changes: 3 additions & 2 deletions src/backend/opencl/kernel/mean.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -134,8 +134,9 @@ void mean_dim_launcher(Param out, Param owt, Param in, Param inWeight,
std::ostringstream options;
options << " -D Ti=" << dtype_traits<Ti>::getName()
<< " -D Tw=" << dtype_traits<Tw>::getName()
<< " -D To=" << dtype_traits<To>::getName() << " -D dim=" << dim
<< " -D DIMY=" << threads_y << " -D THREADS_X=" << THREADS_X
<< " -D To=" << dtype_traits<To>::getName()
<< " -D kDim=" << dim << " -D DIMY=" << threads_y
<< " -D THREADS_X=" << THREADS_X
<< " -D init_To=" << toNumStr(Binary<To, af_add_t>::init())
<< " -D init_Tw=" << twNumStr(transform_weight(0))
<< " -D one_Tw=" << twNumStr(transform_weight(1));
Expand Down
18 changes: 9 additions & 9 deletions src/backend/opencl/kernel/mean_dim.cl
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ __kernel void mean_dim_kernel(__global To *oData, KParam oInfo,

// There is only one element per group for out
// There are get_local_size(1) elements per group for in
// Hence increment ids[dim] just after offseting out and before offsetting
// Hence increment ids[kDim] just after offseting out and before offsetting
// in
oData += ids[3] * oInfo.strides[3] + ids[2] * oInfo.strides[2] +
ids[1] * oInfo.strides[1] + ids[0] + oInfo.offset;
Expand All @@ -40,9 +40,9 @@ __kernel void mean_dim_kernel(__global To *oData, KParam oInfo,
owData += ids[3] * oInfo.strides[3] + ids[2] * oInfo.strides[2] +
ids[1] * oInfo.strides[1] + ids[0] + oInfo.offset;
#endif
const uint id_dim_out = ids[dim];
const uint id_dim_out = ids[kDim];

ids[dim] = ids[dim] * get_local_size(1) + lidy;
ids[kDim] = ids[kDim] * get_local_size(1) + lidy;

iData += ids[3] * iInfo.strides[3] + ids[2] * iInfo.strides[2] +
ids[1] * iInfo.strides[1] + ids[0] + iInfo.offset;
Expand All @@ -52,8 +52,8 @@ __kernel void mean_dim_kernel(__global To *oData, KParam oInfo,
ids[1] * iInfo.strides[1] + ids[0] + iInfo.offset;
#endif

const uint id_dim_in = ids[dim];
const uint istride_dim = iInfo.strides[dim];
const uint id_dim_in = ids[kDim];
const uint istride_dim = iInfo.strides[kDim];

bool is_valid = (ids[0] < iInfo.dims[0]) && (ids[1] < iInfo.dims[1]) &&
(ids[2] < iInfo.dims[2]) && (ids[3] < iInfo.dims[3]);
Expand All @@ -64,7 +64,7 @@ __kernel void mean_dim_kernel(__global To *oData, KParam oInfo,
To out_val = init_To;
Tw out_wt = init_Tw;

if (is_valid && id_dim_in < iInfo.dims[dim]) {
if (is_valid && id_dim_in < iInfo.dims[kDim]) {
out_val = transform(*iData);
#ifdef INPUT_WEIGHT
out_wt = *iwData;
Expand All @@ -76,14 +76,14 @@ __kernel void mean_dim_kernel(__global To *oData, KParam oInfo,
const uint id_dim_in_start = id_dim_in + group_dim * get_local_size(1);

#ifdef INPUT_WEIGHT
for (int id = id_dim_in_start; is_valid && (id < iInfo.dims[dim]);
for (int id = id_dim_in_start; is_valid && (id < iInfo.dims[kDim]);
id += group_dim * get_local_size(1)) {
iData = iData + group_dim * get_local_size(1) * istride_dim;
iwData = iwData + group_dim * get_local_size(1) * istride_dim;
binOp(&out_val, &out_wt, transform(*iData), *iwData);
}
#else
for (int id = id_dim_in_start; is_valid && (id < iInfo.dims[dim]);
for (int id = id_dim_in_start; is_valid && (id < iInfo.dims[kDim]);
id += group_dim * get_local_size(1)) {
iData = iData + group_dim * get_local_size(1) * istride_dim;
binOp(&out_val, &out_wt, transform(*iData), one_Tw);
Expand Down Expand Up @@ -127,7 +127,7 @@ __kernel void mean_dim_kernel(__global To *oData, KParam oInfo,
barrier(CLK_LOCAL_MEM_FENCE);
}

if (lidy == 0 && is_valid && (id_dim_out < oInfo.dims[dim])) {
if (lidy == 0 && is_valid && (id_dim_out < oInfo.dims[kDim])) {
*oData = *s_vptr;
#ifdef OUTPUT_WEIGHT
*owData = *s_wptr;
Expand Down
2 changes: 1 addition & 1 deletion src/backend/opencl/kernel/reduce.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ void reduce_dim_launcher(Param out, Param in, const int dim,
std::ostringstream options;
options << " -D To=" << dtype_traits<To>::getName()
<< " -D Ti=" << dtype_traits<Ti>::getName() << " -D T=To"
<< " -D dim=" << dim << " -D DIMY=" << threads_y
<< " -D kDim=" << dim << " -D DIMY=" << threads_y
<< " -D THREADS_X=" << THREADS_X
<< " -D init=" << toNumStr(Binary<To, op>::init()) << " -D "
<< binOpName<op>() << " -D CPLX=" << af::iscplx<Ti>();
Expand Down
14 changes: 7 additions & 7 deletions src/backend/opencl/kernel/reduce_dim.cl
Original file line number Diff line number Diff line change
Expand Up @@ -26,26 +26,26 @@ __kernel void reduce_dim_kernel(__global To *oData, KParam oInfo,

// There is only one element per group for out
// There are get_local_size(1) elements per group for in
// Hence increment ids[dim] just after offseting out and before offsetting
// Hence increment ids[kDim] just after offseting out and before offsetting
// in
oData += ids[3] * oInfo.strides[3] + ids[2] * oInfo.strides[2] +
ids[1] * oInfo.strides[1] + ids[0] + oInfo.offset;
const uint id_dim_out = ids[dim];
const uint id_dim_out = ids[kDim];

ids[dim] = ids[dim] * get_local_size(1) + lidy;
ids[kDim] = ids[kDim] * get_local_size(1) + lidy;
iData += ids[3] * iInfo.strides[3] + ids[2] * iInfo.strides[2] +
ids[1] * iInfo.strides[1] + ids[0] + iInfo.offset;
const uint id_dim_in = ids[dim];
const uint id_dim_in = ids[kDim];

const uint istride_dim = iInfo.strides[dim];
const uint istride_dim = iInfo.strides[kDim];

bool is_valid = (ids[0] < iInfo.dims[0]) && (ids[1] < iInfo.dims[1]) &&
(ids[2] < iInfo.dims[2]) && (ids[3] < iInfo.dims[3]);

__local To s_val[THREADS_X * DIMY];

To out_val = init;
for (int id = id_dim_in; is_valid && (id < iInfo.dims[dim]);
for (int id = id_dim_in; is_valid && (id < iInfo.dims[kDim]);
id += group_dim * get_local_size(1)) {
To in_val = transform(*iData);
if (change_nan) in_val = !IS_NAN(in_val) ? in_val : nanval;
Expand Down Expand Up @@ -73,7 +73,7 @@ __kernel void reduce_dim_kernel(__global To *oData, KParam oInfo,
barrier(CLK_LOCAL_MEM_FENCE);
}

if (lidy == 0 && is_valid && (id_dim_out < oInfo.dims[dim])) {
if (lidy == 0 && is_valid && (id_dim_out < oInfo.dims[kDim])) {
*oData = *s_ptr;
}
}
32 changes: 16 additions & 16 deletions src/backend/opencl/kernel/scan_dim.cl
Original file line number Diff line number Diff line change
Expand Up @@ -27,27 +27,27 @@ __kernel void scan_dim_kernel(__global To *oData, KParam oInfo,

// There is only one element per group for out
// There are DIMY elements per group for in
// Hence increment ids[dim] just after offseting out and before offsetting
// Hence increment ids[kDim] just after offseting out and before offsetting
// in
tData += ids[3] * tInfo.strides[3] + ids[2] * tInfo.strides[2] +
ids[1] * tInfo.strides[1] + ids[0];
const int groupId_dim = ids[dim];
const int groupId_dim = ids[kDim];

ids[dim] = ids[dim] * DIMY * lim + lidy;
ids[kDim] = ids[kDim] * DIMY * lim + lidy;
oData += ids[3] * oInfo.strides[3] + ids[2] * oInfo.strides[2] +
ids[1] * oInfo.strides[1] + ids[0];
iData += ids[3] * iInfo.strides[3] + ids[2] * iInfo.strides[2] +
ids[1] * iInfo.strides[1] + ids[0];
iData += iInfo.offset;

int id_dim = ids[dim];
const int out_dim = oInfo.dims[dim];
int id_dim = ids[kDim];
const int out_dim = oInfo.dims[kDim];

bool is_valid = (ids[0] < oInfo.dims[0]) && (ids[1] < oInfo.dims[1]) &&
(ids[2] < oInfo.dims[2]) && (ids[3] < oInfo.dims[3]);

const int ostride_dim = oInfo.strides[dim];
const int istride_dim = iInfo.strides[dim];
const int ostride_dim = oInfo.strides[kDim];
const int istride_dim = iInfo.strides[kDim];

__local To l_val0[THREADS_X * DIMY];
__local To l_val1[THREADS_X * DIMY];
Expand Down Expand Up @@ -95,7 +95,7 @@ __kernel void scan_dim_kernel(__global To *oData, KParam oInfo,
barrier(CLK_LOCAL_MEM_FENCE);
}

if (!isFinalPass && is_valid && (groupId_dim < tInfo.dims[dim]) && isLast) {
if (!isFinalPass && is_valid && (groupId_dim < tInfo.dims[kDim]) && isLast) {
*tData = val;
}
}
Expand All @@ -116,34 +116,34 @@ __kernel void bcast_dim_kernel(__global To *oData, KParam oInfo,
const int yid = groupId_y;

int ids[4] = {xid, yid, zid, wid};
const int groupId_dim = ids[dim];
const int groupId_dim = ids[kDim];

if (groupId_dim != 0) {
// There is only one element per group for out
// There are DIMY elements per group for in
// Hence increment ids[dim] just after offseting out and before
// Hence increment ids[kDim] just after offseting out and before
// offsetting in
tData += ids[3] * tInfo.strides[3] + ids[2] * tInfo.strides[2] +
ids[1] * tInfo.strides[1] + ids[0];

ids[dim] = ids[dim] * DIMY * lim + lidy;
ids[kDim] = ids[kDim] * DIMY * lim + lidy;
oData += ids[3] * oInfo.strides[3] + ids[2] * oInfo.strides[2] +
ids[1] * oInfo.strides[1] + ids[0];

// Shift broadcast one step to the right for exclusive scan (#2366)
int offset = inclusive_scan ? 0 : oInfo.strides[dim];
int offset = inclusive_scan ? 0 : oInfo.strides[kDim];
oData += offset;

const int id_dim = ids[dim];
const int out_dim = oInfo.dims[dim];
const int id_dim = ids[kDim];
const int out_dim = oInfo.dims[kDim];

bool is_valid = (ids[0] < oInfo.dims[0]) && (ids[1] < oInfo.dims[1]) &&
(ids[2] < oInfo.dims[2]) && (ids[3] < oInfo.dims[3]);

if (is_valid) {
To accum = *(tData - tInfo.strides[dim]);
To accum = *(tData - tInfo.strides[kDim]);

const int ostride_dim = oInfo.strides[dim];
const int ostride_dim = oInfo.strides[kDim];

for (int k = 0, id = id_dim; is_valid && k < lim && (id < out_dim);
k++, id += DIMY) {
Expand Down
2 changes: 1 addition & 1 deletion src/backend/opencl/kernel/scan_dim.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ static Kernel get_scan_dim_kernels(int kerIdx, int dim, bool isFinalPass,
std::ostringstream options;
options << " -D To=" << dtype_traits<To>::getName()
<< " -D Ti=" << dtype_traits<Ti>::getName() << " -D T=To"
<< " -D dim=" << dim << " -D DIMY=" << threads_y
<< " -D kDim=" << dim << " -D DIMY=" << threads_y
<< " -D THREADS_X=" << THREADS_X
<< " -D init=" << toNumStr(Binary<To, op>::init()) << " -D "
<< binOpName<op>() << " -D CPLX=" << af::iscplx<Ti>()
Expand Down
Loading