#include #include #include #define CL_CHECK(fn) do { \ cl_int _err = fn; \ if (_err == CL_SUCCESS) break; \ printf("%s(%d) OpenCL Error: %d\n", \ __FILE__, __LINE__, _err); \ return _err; \ } while (0) \ const char fmod_ker_str[] = "" "__kernel\n" "void fmod_ker(__global const float * const lhs,\n" "float rhs,\n" "__global float * const out) {\n" "unsigned idx = get_global_id(0);\n" "out[idx] = fmod(lhs[idx], rhs);\n" "}\n"; int main(int argc, char **args) { int pid = argc < 2 ? 0 : atoi(args[1]); int did = argc < 3 ? 0 : atoi(args[2]); unsigned num_platforms = 0; CL_CHECK(clGetPlatformIDs(0, NULL, &num_platforms)); if (num_platforms <= pid) { printf("Invalid Platform ID\n"); return -pid; } cl_platform_id *platforms = (cl_platform_id *)malloc(num_platforms * sizeof(cl_platform_id)); CL_CHECK(clGetPlatformIDs(num_platforms, platforms, NULL)); cl_platform_id platform = platforms[pid]; unsigned num_devices = 0; CL_CHECK(clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, 0, NULL, &num_devices)); if (num_devices <= did) { printf("Invalid Device ID\n"); return -did; } cl_device_id *devices = (cl_device_id *)malloc(num_devices * sizeof(cl_device_id)); CL_CHECK(clGetDeviceIDs(platform, CL_DEVICE_TYPE_ALL, num_devices, devices, NULL)); cl_device_id device = devices[did]; char platform_name[64]; char device_name[256]; CL_CHECK(clGetPlatformInfo(platform, CL_PLATFORM_NAME, sizeof(platform_name), &platform_name, NULL)); CL_CHECK(clGetDeviceInfo(device, CL_DEVICE_NAME, sizeof(device_name), &device_name, NULL)); printf("Platform: %s\n", platform_name); printf("Device: %s\n", device_name); printf("Kernel String:\n%s\n", fmod_ker_str); cl_context_properties cps[3] = {CL_CONTEXT_PLATFORM, (cl_context_properties)(platform), 0}; cl_int err; cl_context ctx = clCreateContext(cps, 1, &device, NULL, NULL, &err); CL_CHECK(err); cl_command_queue queue = clCreateCommandQueue(ctx, device, 0, &err); CL_CHECK(err); const int elements = 256; int bytes = elements * sizeof(float); float *h_in = (float *)malloc(bytes); float *h_out = (float *)malloc(bytes); for (int i = 0; i < elements; i++) { h_in[i] = 3.0; h_out[i] = 0.3; //10101; } cl_mem b_in = clCreateBuffer(ctx, CL_MEM_READ_WRITE, bytes, NULL, &err); CL_CHECK(err); CL_CHECK(clEnqueueWriteBuffer(queue, b_in, true, 0, bytes, h_in, 0, NULL, NULL)); cl_mem b_out = clCreateBuffer(ctx, CL_MEM_READ_WRITE, bytes, NULL, &err); CL_CHECK(err); float val = 0.3; size_t local = elements; size_t global = elements; const char *ker_sources[] = {fmod_ker_str, NULL}; const size_t ker_lens[] = { sizeof(fmod_ker_str), 0 }; cl_program program = clCreateProgramWithSource(ctx, 1, ker_sources, ker_lens, &err); CL_CHECK(err); CL_CHECK(clBuildProgram(program, 0, NULL, "", NULL, NULL)); cl_kernel kernel = clCreateKernel(program, "fmod_ker", &err); CL_CHECK(err); CL_CHECK(clSetKernelArg(kernel, 0, sizeof(cl_mem), &b_in )); CL_CHECK(clSetKernelArg(kernel, 1, sizeof(float ), &val )); CL_CHECK(clSetKernelArg(kernel, 2, sizeof(cl_mem), &b_out)); CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, &local, 0, NULL, NULL)); CL_CHECK(clFlush(queue)); CL_CHECK(clFinish(queue)); CL_CHECK(clEnqueueReadBuffer(queue, b_out, true, 0, bytes, h_out, 0, NULL, NULL)); for (int i = 0; i < 10; i++) { printf("Expected: %f, Calculated: %f\n", fmodf(h_in[i], val), h_out[i]); } free(platforms); free(devices); free(h_in); free(h_out); }