// includes, system #include #include #include #include // includes, project #include #define lastError() printf("%s\n", cudaGetErrorString(cudaGetLastError())) #define NUM_THREADS 32 //GPU KERNEL __global__ void process_array(float *array, int size){ int ind = blockIdx.x * blockDim.x + threadIdx.x; if(ind < size) array[ind] = pow(array[ind], (float)2) * 123.4 /44.3; } void write_file(int id, float *array, int size, char type){ FILE *fp; char file_name[20]; if(type == 0) strcpy(file_name, "device.txt"); else strcpy(file_name, "host.txt"); fp = fopen(file_name, "a+"); fprintf(fp, "TEST : %d:\n", id); for(unsigned int i=0; i>>(d_array, 32); cudaThreadSynchronize(); cutStopTimer(timer); printf("Processing Time of Device : %.3f sec.\n", cutGetTimerValue(timer)); cutDeleteTimer(timer); CUDA_SAFE_CALL(cudaMemcpy( h_array, d_array, mem_size,cudaMemcpyDeviceToHost) ); write_file(id, h_array, ARRAY_SIZE, 0); // HOST PROCESS!! init_array(h_array, ARRAY_SIZE); cutCreateTimer(&timer); cutStartTimer(timer); for(unsigned int i=0; i< ARRAY_SIZE; i++) h_array[i] = pow(h_array[i], 2) * 123.4 /44.3; cutStopTimer(timer); printf("\nProcessing Time of Host : %.3f sec.\n", cutGetTimerValue(timer)); cutDeleteTimer(timer); write_file(id, h_array, ARRAY_SIZE, 1); CUDA_SAFE_CALL( cudaFree(d_array) ); // free(&h_array); printf("------------------------------------\n"); } int main(){ int size = 1000; makeTest(1, size); makeTest(2, size*2); makeTest(3, size*3); makeTest(4, size*4); makeTest(5, size*5); makeTest(6, size*10); return 0; }