#define __CL_ENABLE_EXCEPTIONS #include "cl.hpp" #include #include #include #include #define REPRODUCE_BUG 1 #define dim_t long long using namespace cl; using namespace std; int main(int argc, char* argv[]) { try { Context(CL_DEVICE_TYPE_GPU); static const unsigned elements = 10; vector data(elements, 5); Buffer a(begin(data), end(data), true, false); Buffer b(begin(data), end(data), true, false); Buffer c(CL_MEM_READ_WRITE, elements * sizeof(float)); typedef struct { dim_t dims[4]; dim_t strides[4]; dim_t offset; } KParam; printf("Sizeof KParam: %lu\n", sizeof(KParam)); #if REPRODUCE_BUG == 0 Program addProg(R"d( #define dim_t long typedef struct { dim_t dims[4]; dim_t strides[4]; dim_t offset; } KParam; kernel void add( global const float * restrict const a, global const float * restrict const b, global float * restrict const c ) { unsigned idx = get_global_id(0); c[idx] = sizeof(KParam); } )d", true); auto add = make_kernel(addProg, "add"); add(EnqueueArgs(elements), a, b, c); #else Program addProg(R"d( #define dim_t long typedef struct { dim_t dims[4]; dim_t strides[4]; dim_t offset; } KParam; kernel void add(KParam foo, global const float * restrict const a, global const float * restrict const b, global float * restrict const c ) { unsigned idx = get_global_id(0); c[idx] = sizeof(foo); } )d", true); KParam foo; auto add = make_kernel(addProg, "add"); add(EnqueueArgs(elements), foo, a, b, c); #endif vector result(elements); cl::copy(c, begin(result), end(result)); std::copy(begin(result), end(result), ostream_iterator(cout, ", ")); } catch(cl::Error &err) { std::cout << err.err() << " " << err.what() << std::endl; } return 0; }