opencl/clexample.c - mendel-testing-tools - Git at Google

 /*
  * Copyright © 2020 Google LLC
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
  * to deal in the Software without restriction, including without limitation
  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  * and/or sell copies of the Software, and to permit persons to whom the
  * Software is furnished to do so, subject to the following conditions:
  *
  * The above copyright notice and this permission notice (including the next
  * paragraph) shall be included in all copies or substantial portions of the
  * Software.
  *
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  * DEALINGS IN THE SOFTWARE.
  */

 #define CL_TARGET_OPENCL_VERSION 120
 #include <CL/cl.h>

 #include <stdio.h>
 #include <stdlib.h>

 /* Max no of CL implementations, 5 should be enough to find CPU & GPU */
 #define MAX_PLATFORMS 5

 /* CL Program */
 const char *kKernels =
     "__kernel void square(__global int *ARR) {"
     "   ARR[get_global_id(0)] = ARR[get_global_id(0)] * "
     "   ARR[get_global_id(0)];}"
     "__kernel void add_arrays(__global int *ARR1, __global int *ARR2) {"
     "   ARR1[get_global_id(0)] = ARR1[get_global_id(0)] + "
     "   ARR2[get_global_id(0)];}"
     "__kernel void add_const(__global int *ARR, const int c) {"
     "   ARR[get_global_id(0)] = ARR[get_global_id(0)] + c;}";

 /* Constant used in the calculations */
 const cl_int kAdd = 2;
 const size_t kArraySize = 1024;

 /*
  * Function to carry out the CL elementwise calculations
  * (out_array = in_array*in_array + in_array + 2)
  */
 void Compute(cl_context ctx, cl_command_queue queue,
             int *in_array, int *out_array, size_t size) {
   cl_program program;
   cl_int err;
   cl_mem device_mem_1, device_mem_2;
   cl_kernel kernel_square, kernel_add_const, kernel_add_arrays;

   program = clCreateProgramWithSource(ctx, 1, (const char **) &kKernels,
                                       NULL, &err);
   err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
   kernel_square = clCreateKernel(program, "square", &err);
   kernel_add_const = clCreateKernel(program, "add_const", &err);
   kernel_add_arrays = clCreateKernel(program, "add_arrays", &err);

   /* Initialize the data buffers */
   device_mem_1 = clCreateBuffer(ctx, CL_MEM_READ_WRITE, size*sizeof(*in_array),
                         NULL, &err);
   device_mem_2 = clCreateBuffer(ctx, CL_MEM_READ_WRITE, size*sizeof(*in_array),
                         NULL, &err);

   /* Write and queue the input buffers */
   err = clEnqueueWriteBuffer(queue, device_mem_1, CL_TRUE, 0, size*sizeof(*in_array),
                               in_array, 0, NULL, NULL);
   err = clEnqueueWriteBuffer(queue, device_mem_2, CL_TRUE, 0, size*sizeof(*in_array),
                               in_array, 0, NULL, NULL);

   /* Calculate the square the elements of the array */
   clSetKernelArg(kernel_square, 0, sizeof(cl_mem), &device_mem_1);
   clEnqueueNDRangeKernel(queue, kernel_square, 1, NULL, &size, &size, 0, NULL, NULL);

   /* Add the current result to the original array */
   clSetKernelArg(kernel_add_arrays, 0, sizeof(cl_mem), &device_mem_1);
   clSetKernelArg(kernel_add_arrays, 1, sizeof(cl_mem), &device_mem_2);
   clEnqueueNDRangeKernel(queue, kernel_add_arrays, 1, NULL, &size, &size, 0, NULL, NULL);

   /* Add a constant to each element */
   clSetKernelArg(kernel_add_const, 0, sizeof(cl_mem), &device_mem_1);
   clSetKernelArg(kernel_add_const, 1, sizeof(int), &kAdd);
   clEnqueueNDRangeKernel(queue, kernel_add_const, 1, NULL, &size, &size, 0, NULL, NULL);


   /* Queue the output buffer, and kick off the calcuations */
   err = clEnqueueReadBuffer(queue, device_mem_1, CL_TRUE, 0, size*sizeof(*out_array),
                             out_array, 0, NULL, NULL);
   err = clFinish(queue);
   clReleaseMemObject(device_mem_1);
   clReleaseMemObject(device_mem_2);
 }

 /* Helper function to compare results */
 int CompareArrays(int *arr1, int *arr2, int no_elements) {
   int success = 1;

   for (int i=0; i<no_elements; i++) {
     if (arr1[i] != arr2[i]) {
       success = 0;
       break;
     }
   }
   return success;
 }

 int main( void ) {
   cl_int err;
   cl_platform_id platform;
   cl_platform_id platforms[MAX_PLATFORMS];
   cl_device_id device;
   cl_context_properties props[3] = {CL_CONTEXT_PLATFORM, 0, 0};
   cl_context ctx_gpu, ctx_cpu;
   cl_command_queue queue_cpu;
   cl_command_queue queue_gpu;
   cl_event event = NULL;
   int i, num_platforms;

   int *input_arr, *result_cpu_arr, *gpu_result_arr, *expected_result_arr;

   input_arr = (int*) malloc(kArraySize * sizeof(*input_arr));
   gpu_result_arr = (int*) malloc(kArraySize * sizeof(*gpu_result_arr));
   result_cpu_arr = (int*) malloc(kArraySize * sizeof(*result_cpu_arr));
   expected_result_arr = (int*) malloc(kArraySize *
                                       sizeof(*expected_result_arr));

   /* Initialize input and expected arrays*/
   for (i=0; i<kArraySize; i++) {
     input_arr[i] = i;
     expected_result_arr[i] = i*i+i+kAdd;
   }

   /* Get the platforms. */
   err = clGetPlatformIDs(5, platforms, &num_platforms);

   /* Setup one CPU and one GPU device */
   device = NULL;
   ctx_cpu = ctx_gpu = NULL;
   for (i=0; i<num_platforms; i++) {
     props[1] = (cl_context_properties) platforms[i];
     if (ctx_cpu == NULL) {
       err = clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_CPU,
                             1, &device, NULL);
       if (device != NULL) {
         ctx_cpu = clCreateContext(props, 1, &device, NULL, NULL, &err);
         queue_cpu = clCreateCommandQueue(ctx_cpu, device, 0, &err);
         device = NULL;
         continue;
       }
     }
     if (ctx_gpu == NULL) {
       err = clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_GPU,
                             1, &device, NULL);
       if (device != NULL) {
         ctx_gpu = clCreateContext(props, 1, &device, NULL, NULL, &err);
         queue_gpu = clCreateCommandQueue(ctx_gpu, device, 0, &err);
         device = NULL;
         continue;
       }
     }
   }

   /* Run the GPU computation */
   if (ctx_gpu) {
     Compute(ctx_gpu, queue_gpu, input_arr, gpu_result_arr, kArraySize);
     clReleaseCommandQueue(queue_gpu);
     clReleaseContext(ctx_gpu);

     if (CompareArrays(expected_result_arr, gpu_result_arr, kArraySize))
       printf("GPU: Result as expected\n");
     else
       printf("GPU: FAIL Result NOT as expected\n");
   } else {
     printf("GPU: FAIL No OpenCL implementation found\n");
   }

   /* Run the CPU computation */
   if (ctx_cpu) {
     Compute(ctx_cpu, queue_cpu, input_arr, result_cpu_arr, kArraySize);
     clReleaseCommandQueue( queue_cpu );
     clReleaseContext( ctx_cpu );

     if (CompareArrays(expected_result_arr, result_cpu_arr, kArraySize))
       printf("CPU: Result as expected\n");
     else
       printf("CPU: FAIL Result NOT as expected\n");
   } else {
     printf("CPU: FAIL No OpenCL implementation found\n");
   }


   /* Free the allocated memory */
   free(input_arr);
   free(result_cpu_arr);
   free(gpu_result_arr);
   free(expected_result_arr);

   return 0;
 }
	/*
	* Copyright © 2020 Google LLC
	*
	* Permission is hereby granted, free of charge, to any person obtaining a
	* copy of this software and associated documentation files (the "Software"),
	* to deal in the Software without restriction, including without limitation
	* the rights to use, copy, modify, merge, publish, distribute, sublicense,
	* and/or sell copies of the Software, and to permit persons to whom the
	* Software is furnished to do so, subject to the following conditions:
	*
	* The above copyright notice and this permission notice (including the next
	* paragraph) shall be included in all copies or substantial portions of the
	* Software.
	*
	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
	* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
	* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
	* DEALINGS IN THE SOFTWARE.
	*/

	#define CL_TARGET_OPENCL_VERSION 120
	#include <CL/cl.h>

	#include <stdio.h>
	#include <stdlib.h>

	/* Max no of CL implementations, 5 should be enough to find CPU & GPU */
	#define MAX_PLATFORMS 5

	/* CL Program */
	const char *kKernels =
	"__kernel void square(__global int *ARR) {"
	" ARR[get_global_id(0)] = ARR[get_global_id(0)] * "
	" ARR[get_global_id(0)];}"
	"__kernel void add_arrays(__global int ARR1, __global int ARR2) {"
	" ARR1[get_global_id(0)] = ARR1[get_global_id(0)] + "
	" ARR2[get_global_id(0)];}"
	"__kernel void add_const(__global int *ARR, const int c) {"
	" ARR[get_global_id(0)] = ARR[get_global_id(0)] + c;}";

	/* Constant used in the calculations */
	const cl_int kAdd = 2;
	const size_t kArraySize = 1024;

	/*
	* Function to carry out the CL elementwise calculations
	* (out_array = in_array*in_array + in_array + 2)
	*/
	void Compute(cl_context ctx, cl_command_queue queue,
	int in_array, int out_array, size_t size) {
	cl_program program;
	cl_int err;
	cl_mem device_mem_1, device_mem_2;
	cl_kernel kernel_square, kernel_add_const, kernel_add_arrays;

	program = clCreateProgramWithSource(ctx, 1, (const char **) &kKernels,
	NULL, &err);
	err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
	kernel_square = clCreateKernel(program, "square", &err);
	kernel_add_const = clCreateKernel(program, "add_const", &err);
	kernel_add_arrays = clCreateKernel(program, "add_arrays", &err);

	/* Initialize the data buffers */
	device_mem_1 = clCreateBuffer(ctx, CL_MEM_READ_WRITE, sizesizeof(in_array),
	NULL, &err);
	device_mem_2 = clCreateBuffer(ctx, CL_MEM_READ_WRITE, sizesizeof(in_array),
	NULL, &err);

	/* Write and queue the input buffers */
	err = clEnqueueWriteBuffer(queue, device_mem_1, CL_TRUE, 0, sizesizeof(in_array),
	in_array, 0, NULL, NULL);
	err = clEnqueueWriteBuffer(queue, device_mem_2, CL_TRUE, 0, sizesizeof(in_array),
	in_array, 0, NULL, NULL);

	/* Calculate the square the elements of the array */
	clSetKernelArg(kernel_square, 0, sizeof(cl_mem), &device_mem_1);
	clEnqueueNDRangeKernel(queue, kernel_square, 1, NULL, &size, &size, 0, NULL, NULL);

	/* Add the current result to the original array */
	clSetKernelArg(kernel_add_arrays, 0, sizeof(cl_mem), &device_mem_1);
	clSetKernelArg(kernel_add_arrays, 1, sizeof(cl_mem), &device_mem_2);
	clEnqueueNDRangeKernel(queue, kernel_add_arrays, 1, NULL, &size, &size, 0, NULL, NULL);

	/* Add a constant to each element */
	clSetKernelArg(kernel_add_const, 0, sizeof(cl_mem), &device_mem_1);
	clSetKernelArg(kernel_add_const, 1, sizeof(int), &kAdd);
	clEnqueueNDRangeKernel(queue, kernel_add_const, 1, NULL, &size, &size, 0, NULL, NULL);


	/* Queue the output buffer, and kick off the calcuations */
	err = clEnqueueReadBuffer(queue, device_mem_1, CL_TRUE, 0, sizesizeof(out_array),
	out_array, 0, NULL, NULL);
	err = clFinish(queue);
	clReleaseMemObject(device_mem_1);
	clReleaseMemObject(device_mem_2);
	}

	/* Helper function to compare results */
	int CompareArrays(int arr1, int arr2, int no_elements) {
	int success = 1;

	for (int i=0; i<no_elements; i++) {
	if (arr1[i] != arr2[i]) {
	success = 0;
	break;
	}
	}
	return success;
	}

	int main( void ) {
	cl_int err;
	cl_platform_id platform;
	cl_platform_id platforms[MAX_PLATFORMS];
	cl_device_id device;
	cl_context_properties props[3] = {CL_CONTEXT_PLATFORM, 0, 0};
	cl_context ctx_gpu, ctx_cpu;
	cl_command_queue queue_cpu;
	cl_command_queue queue_gpu;
	cl_event event = NULL;
	int i, num_platforms;

	int input_arr, result_cpu_arr, gpu_result_arr, expected_result_arr;

	input_arr = (int) malloc(kArraySize sizeof(*input_arr));
	gpu_result_arr = (int) malloc(kArraySize sizeof(*gpu_result_arr));
	result_cpu_arr = (int) malloc(kArraySize sizeof(*result_cpu_arr));
	expected_result_arr = (int) malloc(kArraySize
	sizeof(*expected_result_arr));

	/* Initialize input and expected arrays*/
	for (i=0; i<kArraySize; i++) {
	input_arr[i] = i;
	expected_result_arr[i] = i*i+i+kAdd;
	}

	/* Get the platforms. */
	err = clGetPlatformIDs(5, platforms, &num_platforms);

	/* Setup one CPU and one GPU device */
	device = NULL;
	ctx_cpu = ctx_gpu = NULL;
	for (i=0; i<num_platforms; i++) {
	props[1] = (cl_context_properties) platforms[i];
	if (ctx_cpu == NULL) {
	err = clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_CPU,
	1, &device, NULL);
	if (device != NULL) {
	ctx_cpu = clCreateContext(props, 1, &device, NULL, NULL, &err);
	queue_cpu = clCreateCommandQueue(ctx_cpu, device, 0, &err);
	device = NULL;
	continue;
	}
	}
	if (ctx_gpu == NULL) {
	err = clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_GPU,
	1, &device, NULL);
	if (device != NULL) {
	ctx_gpu = clCreateContext(props, 1, &device, NULL, NULL, &err);
	queue_gpu = clCreateCommandQueue(ctx_gpu, device, 0, &err);
	device = NULL;
	continue;
	}
	}
	}

	/* Run the GPU computation */
	if (ctx_gpu) {
	Compute(ctx_gpu, queue_gpu, input_arr, gpu_result_arr, kArraySize);
	clReleaseCommandQueue(queue_gpu);
	clReleaseContext(ctx_gpu);

	if (CompareArrays(expected_result_arr, gpu_result_arr, kArraySize))
	printf("GPU: Result as expected\n");
	else
	printf("GPU: FAIL Result NOT as expected\n");
	} else {
	printf("GPU: FAIL No OpenCL implementation found\n");
	}

	/* Run the CPU computation */
	if (ctx_cpu) {
	Compute(ctx_cpu, queue_cpu, input_arr, result_cpu_arr, kArraySize);
	clReleaseCommandQueue( queue_cpu );
	clReleaseContext( ctx_cpu );

	if (CompareArrays(expected_result_arr, result_cpu_arr, kArraySize))
	printf("CPU: Result as expected\n");
	else
	printf("CPU: FAIL Result NOT as expected\n");
	} else {
	printf("CPU: FAIL No OpenCL implementation found\n");
	}


	/* Free the allocated memory */
	free(input_arr);
	free(result_cpu_arr);
	free(gpu_result_arr);
	free(expected_result_arr);

	return 0;
	}