Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
115 changes: 66 additions & 49 deletions src/backend/oneapi/jit.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -218,61 +218,75 @@ __kernel void )JIT";
thread_local stringstream outOffsetStream;
thread_local stringstream inOffsetsStream;
thread_local stringstream opsStream;
thread_local stringstream kerStream;

int oid{0};
for (size_t i{0}; i < full_nodes.size(); i++) {
const auto& node{full_nodes[i]};
const auto& ids_curr{full_ids[i]};
// Generate input parameters, only needs current id
node->genParams(inParamStream, ids_curr.id, is_linear);
// Generate input offsets, only needs current id
node->genOffsets(inOffsetsStream, ids_curr.id, is_linear);
// Generate the core function body, needs children ids as well
node->genFuncs(opsStream, ids_curr);
for (auto outIt{begin(output_ids)}, endIt{end(output_ids)};
(outIt = find(outIt, endIt, ids_curr.id)) != endIt; ++outIt) {
// Generate also output parameters
outParamStream << "__global "
<< full_nodes[ids_curr.id]->getTypeStr() << " *out"
<< oid << ", int offset" << oid << ",\n";
// Apply output offset
outOffsetStream << "\nout" << oid << " += offset" << oid << ';';
// Generate code to write the output
opsStream << "out" << oid << "[idx] = val" << ids_curr.id << ";\n";
++oid;
string ret;
try {
int oid{0};
for (size_t i{0}; i < full_nodes.size(); i++) {
const auto& node{full_nodes[i]};
const auto& ids_curr{full_ids[i]};
// Generate input parameters, only needs current id
node->genParams(inParamStream, ids_curr.id, is_linear);
// Generate input offsets, only needs current id
node->genOffsets(inOffsetsStream, ids_curr.id, is_linear);
// Generate the core function body, needs children ids as well
node->genFuncs(opsStream, ids_curr);
for (size_t output_idx{0}; output_idx < output_ids.size();
++output_idx) {
if (output_ids[output_idx] == ids_curr.id) {
outParamStream
<< "__global " << full_nodes[ids_curr.id]->getTypeStr()
<< " *out" << oid << ", int offset" << oid << ",\n";
// Apply output offset
outOffsetStream << "\nout" << oid << " += offset" << oid
<< ';';
// Generate code to write the output
opsStream << "out" << output_idx << "[idx] = val"
<< ids_curr.id << ";\n";
++oid;
}
}
}
}

thread_local stringstream kerStream;
kerStream << DEFAULT_MACROS_STR << kernelVoid << funcName << "(\n"
<< inParamStream.str() << outParamStream.str() << dimParams << ")"
<< blockStart;
if (is_linear) {
kerStream << linearInit << inOffsetsStream.str()
<< outOffsetStream.str() << '\n';
if (loop0) kerStream << linearLoop0Start;
kerStream << "\n\n" << opsStream.str();
if (loop0) kerStream << linearLoop0End;
kerStream << linearEnd;
} else {
if (loop0) {
kerStream << stridedLoop0Init << outOffsetStream.str() << '\n'
<< stridedLoop0Start;
kerStream << DEFAULT_MACROS_STR << kernelVoid << funcName << "(\n"
<< inParamStream.str() << outParamStream.str() << dimParams
<< ")" << blockStart;
if (is_linear) {
kerStream << linearInit << inOffsetsStream.str()
<< outOffsetStream.str() << '\n';
if (loop0) kerStream << linearLoop0Start;
kerStream << "\n\n" << opsStream.str();
if (loop0) kerStream << linearLoop0End;
kerStream << linearEnd;
} else {
kerStream << stridedLoopNInit << outOffsetStream.str() << '\n';
if (loop3) kerStream << stridedLoop3Init;
if (loop1) kerStream << stridedLoop1Init << stridedLoop1Start;
if (loop3) kerStream << stridedLoop3Start;
if (loop0) {
kerStream << stridedLoop0Init << outOffsetStream.str() << '\n'
<< stridedLoop0Start;
} else {
kerStream << stridedLoopNInit << outOffsetStream.str() << '\n';
if (loop3) kerStream << stridedLoop3Init;
if (loop1) kerStream << stridedLoop1Init << stridedLoop1Start;
if (loop3) kerStream << stridedLoop3Start;
}
kerStream << "\n\n" << inOffsetsStream.str() << opsStream.str();
if (loop3) kerStream << stridedLoop3End;
if (loop1) kerStream << stridedLoop1End;
if (loop0) kerStream << stridedLoop0End;
kerStream << stridedEnd;
}
kerStream << "\n\n" << inOffsetsStream.str() << opsStream.str();
if (loop3) kerStream << stridedLoop3End;
if (loop1) kerStream << stridedLoop1End;
if (loop0) kerStream << stridedLoop0End;
kerStream << stridedEnd;
kerStream << blockEnd;
ret = kerStream.str();
} catch (...) {
// Prepare for next round, limit memory
inParamStream.str("");
outParamStream.str("");
inOffsetsStream.str("");
outOffsetStream.str("");
opsStream.str("");
kerStream.str("");
throw;
}
kerStream << blockEnd;
const string ret{kerStream.str()};

// Prepare for next round, limit memory
inParamStream.str("");
outParamStream.str("");
Expand Down Expand Up @@ -381,9 +395,11 @@ void evalNodes(vector<Param<T>>& outputs, const vector<Node*>& output_nodes) {

bool is_linear{true};
dim_t numOutElems{1};
assert(outputs.size() == output_nodes.size());
KParam& out_info{outputs[0].info};
dim_t* outDims{out_info.dims};
dim_t* outStrides{out_info.strides};
// unsigned nrInputs{0};

dim_t ndims{outDims[3] > 1 ? 4
: outDims[2] > 1 ? 3
Expand All @@ -409,6 +425,7 @@ void evalNodes(vector<Param<T>>& outputs, const vector<Node*>& output_nodes) {
for (const Node* node : full_nodes) {
is_linear &= node->isLinear(outDims);
moddimsFound |= (node->getOp() == af_moddims_t);
// if (node->isBuffer()) { ++nrInputs; }
}

bool emptyColumnsFound{false};
Expand Down
178 changes: 167 additions & 11 deletions test/arrayfire_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -105,17 +105,16 @@ std::string readNextNonEmptyLine(std::ifstream &file) {

std::string getBackendName(bool lower) {
af::Backend backend = af::getActiveBackend();
switch(backend) {
case AF_BACKEND_CPU:
return lower ? std::string("cpu") : std::string("CPU");
case AF_BACKEND_CUDA:
return lower ? std::string("cuda") : std::string("CUDA");
case AF_BACKEND_OPENCL:
return lower ? std::string("opencl") : std::string("OpenCL");
case AF_BACKEND_ONEAPI:
return lower ? std::string("oneapi") : std::string("oneAPI");
default:
return lower ? std::string("unknown") : std::string("Unknown");
switch (backend) {
case AF_BACKEND_CPU:
return lower ? std::string("cpu") : std::string("CPU");
case AF_BACKEND_CUDA:
return lower ? std::string("cuda") : std::string("CUDA");
case AF_BACKEND_OPENCL:
return lower ? std::string("opencl") : std::string("OpenCL");
case AF_BACKEND_ONEAPI:
return lower ? std::string("oneapi") : std::string("oneAPI");
default: return lower ? std::string("unknown") : std::string("Unknown");
}
}

Expand Down Expand Up @@ -2046,6 +2045,163 @@ INSTANTIATE(std::complex<float>);
INSTANTIATE(std::complex<double>);
#undef INSTANTIATE

af::array toTempFormat(tempFormat form, const af::array &in) {
af::array ret;
const af::dim4 &dims = in.dims();
switch (form) {
case JIT_FORMAT:
switch (in.type()) {
case b8: ret = not(in); break;
default: ret = in * 2;
}
// Make sure that the base array is <> form original
ret.eval();
switch (in.type()) {
case b8: ret = not(ret); break;
default: ret /= 2;
}
break;
case SUB_FORMAT_dim0: {
af::dim4 pdims(dims);
pdims[0] += 2;
af::array parent = af::randu(pdims, in.type());
parent(af::seq(1, dims[0]), af::span, af::span, af::span) = in;
ret = parent(af::seq(1, dims[0]), af::span, af::span, af::span);
}; break;
case SUB_FORMAT_dim1: {
af::dim4 pdims(dims);
pdims[1] += 2;
af::array parent = af::randu(pdims, in.type());
parent(af::span, af::seq(1, dims[1]), af::span, af::span) = in;
ret = parent(af::span, af::seq(1, dims[1]), af::span, af::span);
}; break;
case SUB_FORMAT_dim2: {
af::dim4 pdims(dims);
pdims[2] += 2;
af::array parent = af::randu(pdims, in.type());
parent(af::span, af::span, af::seq(1, dims[2]), af::span) = in;
ret = parent(af::span, af::span, af::seq(1, dims[2]), af::span);
}; break;
case SUB_FORMAT_dim3: {
af::dim4 pdims(dims);
pdims[3] += 2;
af::array parent = af::randu(pdims, in.type());
parent(af::span, af::span, af::span, af::seq(1, dims[3])) = in;
ret = parent(af::span, af::span, af::span, af::seq(1, dims[3]));
}; break;
case REORDERED_FORMAT: {
const dim_t idxs[4] = {0, 3, 1, 2};
// idxs[0] has to be 0, to keep the same data in mem
dim_t rev_idxs[4];
for (dim_t i = 0; i < 4; ++i) { rev_idxs[idxs[i]] = i; };
ret = af::reorder(in, idxs[0], idxs[1], idxs[2], idxs[3]);
ret = ret.copy(); // make data linear
ret = af::reorder(ret, rev_idxs[0], rev_idxs[1], rev_idxs[2],
rev_idxs[3]);
// ret has same content as in, although data is stored in
// different order
}; break;
case LINEAR_FORMAT:
default: ret = in.copy();
};
return ret;
}

void toTempFormat(tempFormat form, af_array *out, const af_array &in) {
dim_t dims[4];
af_get_dims(dims, dims + 1, dims + 2, dims + 3, in);
unsigned numdims;
af_get_numdims(&numdims, in);
af_dtype ty;
af_get_type(&ty, in);
switch (form) {
case JIT_FORMAT: {
// af_array one = nullptr, min_one = nullptr, res = nullptr;
af_array res = nullptr, two = nullptr;
ASSERT_SUCCESS(af_constant(&two, 2, numdims, dims, ty));
switch (ty) {
case b8: af_not(&res, in); break;
default:
// ret = in + af::constant(1, dims, in.type());
ASSERT_SUCCESS(af_mul(&res, in, two, false));
}
// Make sure that the base array is <> form original
ASSERT_SUCCESS(af_eval(res));
switch (ty) {
case b8: af_not(out, res); break;
default:
ASSERT_SUCCESS(af_div(out, res, two, false)); // NO EVAL!!
}
ASSERT_SUCCESS(af_release_array(two));
two = nullptr;
ASSERT_SUCCESS(af_release_array(res));
res = nullptr;
}; break;
case SUB_FORMAT_dim0: {
const dim_t pdims[4] = {dims[0] + 2, dims[1], dims[2], dims[3]};
af_array parent = nullptr;
ASSERT_SUCCESS(af_randu(&parent, std::max(1u, numdims), pdims, ty));
const af_seq idxs[4] = {af_make_seq(1, dims[0], 1), af_span,
af_span, af_span};

ASSERT_SUCCESS(af_assign_seq(out, parent, numdims, idxs, in));
ASSERT_SUCCESS(af_index(out, parent, numdims, idxs));
ASSERT_SUCCESS(af_release_array(parent));
}; break;
case SUB_FORMAT_dim1: {
const dim_t pdims[4] = {dims[0], dims[1] + 2, dims[2], dims[3]};
af_array parent = nullptr;
ASSERT_SUCCESS(af_randu(&parent, std::max(2u, numdims), pdims, ty));
const af_seq idxs[4] = {af_span, af_make_seq(1, dims[1], 1),
af_span, af_span};
ASSERT_SUCCESS(af_assign_seq(out, parent, numdims, idxs, in));
ASSERT_SUCCESS(af_index(out, parent, numdims, idxs));
ASSERT_SUCCESS(af_release_array(parent));
parent = nullptr;
}; break;
case SUB_FORMAT_dim2: {
const dim_t pdims[4] = {dims[0], dims[1], dims[2] + 2, dims[3]};
af_array parent = nullptr;
ASSERT_SUCCESS(af_randu(&parent, std::max(3u, numdims), pdims, ty));
const af_seq idxs[4] = {af_span, af_span,
af_make_seq(1, dims[2], 1), af_span};
ASSERT_SUCCESS(af_assign_seq(out, parent, numdims, idxs, in));
ASSERT_SUCCESS(af_index(out, parent, numdims, idxs));
ASSERT_SUCCESS(af_release_array(parent));
parent = nullptr;
}; break;
case SUB_FORMAT_dim3: {
const dim_t pdims[4] = {dims[0], dims[1], dims[2], dims[3] + 2};
af_array parent = nullptr;
ASSERT_SUCCESS(af_randu(&parent, std::max(4u, numdims), pdims, ty));
const af_seq idxs[4] = {af_span, af_span, af_span,
af_make_seq(1, dims[3], 1)};
ASSERT_SUCCESS(af_assign_seq(out, parent, numdims, idxs, in));
ASSERT_SUCCESS(af_index(out, parent, numdims, idxs));
ASSERT_SUCCESS(af_release_array(parent));
parent = nullptr;
}; break;
case REORDERED_FORMAT: {
const unsigned idxs[4] = {0, 3, 1, 2};
// idxs[0] has to be 0, to keep the same data in mem
dim_t rev_idxs[4];
for (dim_t i = 0; i < 4; ++i) { rev_idxs[idxs[i]] = i; };
af_array rev = nullptr;
ASSERT_SUCCESS(
af_reorder(&rev, in, idxs[0], idxs[1], idxs[2], idxs[3]));
ASSERT_SUCCESS(af_copy_array(out, rev));
ASSERT_SUCCESS(af_reorder(out, rev, rev_idxs[0], rev_idxs[1],
rev_idxs[2], rev_idxs[3]));
// ret has same content as in, although data is stored in
// different order
ASSERT_SUCCESS(af_release_array(rev));
rev = nullptr;
}; break;
case LINEAR_FORMAT:
default: af_copy_array(out, in);
};
}

int main(int argc, char **argv) {
::testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
Expand Down
25 changes: 22 additions & 3 deletions test/join.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -280,9 +280,9 @@ TEST(Join, respect_parameters_order_ISSUE3511) {
const af::array jit2{buf2 + 2.0};
const std::array<af::array, 8> cases{jit1, -jit1, jit1 + 1.0, jit2,
-jit2, jit1 + jit2, buf1, buf2};
const std::array<char*, 8> cases_name{"JIT1", "-JIT1", "JIT1+1.0",
"JIT2", "-JIT2", "JIT1+JIT2",
"BUF1", "BUF2"};
const std::array<const char*, 8> cases_name{"JIT1", "-JIT1", "JIT1+1.0",
"JIT2", "-JIT2", "JIT1+JIT2",
"BUF1", "BUF2"};
assert(cases.size() == cases_name.size());
for (size_t cl0{0}; cl0 < cases.size(); ++cl0) {
for (size_t cl1{0}; cl1 < cases.size(); ++cl1) {
Expand Down Expand Up @@ -312,3 +312,22 @@ TEST(Join, respect_parameters_order_ISSUE3511) {
}
}
}

#define TEST_TEMP_FORMAT(form, d) \
TEST(TEMP_FORMAT, form##_dim##d) { \
const dim4 dims(2, 2, 2, 2); \
const array a(randu(dims)); \
const array b(randu(dims)); \
\
array out = join(d, toTempFormat(form, a), toTempFormat(form, b)); \
array gold = join(d, a, b); \
EXPECT_ARRAYS_EQ(gold, out); \
}

#define TEST_TEMP_FORMATS(form) \
TEST_TEMP_FORMAT(form, 0) \
TEST_TEMP_FORMAT(form, 1) \
TEST_TEMP_FORMAT(form, 2) \
TEST_TEMP_FORMAT(form, 3)

FOREACH_TEMP_FORMAT(TEST_TEMP_FORMATS)
Loading
Loading