Hi so I want to run a parallel reduction operation on my host device.
When I compile using clang++ -fsycl it compiles fine but when I run it I get the following:
terminate called after throwing an instance of 'cl::sycl::runtime_error'
what(): Group algorithms are not supported on host device. -33 (CL_INVALID_DEVICE)
This is my parallel reduction operation, which works fine on my GPU:
#include "ddot.hpp"
#include <CL/sycl.hpp>
using namespace sycl;
int ddot (sycl::queue& q, const int n, const double * const x, const double * const y,
double * const result, double & time_allreduce)
{
// Buffer with just 1 element to get the reduction results
double sumResult = 0.0;
buffer<double> sumBuf { &sumResult, 1 };
sycl::buffer X(x, sycl::range<1> (n));
sycl::buffer Y(y, sycl::range<1> (n));
q.submit([&](handler& cgh) {
sycl::accessor xAcc{X, cgh};
sycl::accessor yAcc{Y, cgh};
auto sumReduction = reduction(sumBuf, cgh, plus<>());
unsigned long size = static_cast<unsigned long>(n);
cgh.parallel_for(range<1>(size), sumReduction,
[=](id<1> idx, auto& sum) {
sum += xAcc[idx] * yAcc[idx];
});
});
q.wait();
*result = sumBuf.get_host_access()[0];
return(0);
}