▶ 事件的两种使用方法。第一种是用事件 a 标记进入命令队列的操作 A,于是后续进入命令队列的操作 B 可以被要求等到前面事件 a 完成(即操作 A 完成)以后才能开始调度执行。第二种是使用用户自定义的事件创造和标记完成操作来手动控制时间,阻塞任务的进行。
● 事件的使用代码(用两向量之和的代码改过来的)
#include <stdio.h>
#include <stdlib.h>
#include <cl.h> const int nElement = ; const char *programSource = " \
__kernel void vectorAdd(__global int *A, __global int *B, __global int *C) \
{ \
int idx = get_global_id(); \
C[idx] = A[idx] + B[idx]; \
return; \
} \
"; int main()
{
const size_t datasize = sizeof(int) * nElement;
int i, *A, *B, *C;
cl_int status;
cl_event eventList[];// 一个事件列表 A = (int*)malloc(datasize);
B = (int*)malloc(datasize);
C = (int*)malloc(datasize);
for (i = ; i < nElement; A[i] = B[i] = i, i++); cl_uint nPlatform;
clGetPlatformIDs(, NULL, &nPlatform);
cl_platform_id *listPlatform = (cl_platform_id*)malloc(nPlatform * sizeof(cl_platform_id));
clGetPlatformIDs(nPlatform, listPlatform, NULL);
cl_uint nDevice = ;
clGetDeviceIDs(listPlatform[], CL_DEVICE_TYPE_ALL, , NULL, &nDevice);
cl_device_id *listDevice = (cl_device_id*)malloc(nDevice * sizeof(cl_device_id));
clGetDeviceIDs(listPlatform[], CL_DEVICE_TYPE_ALL, nDevice, listDevice, NULL);
cl_context context = clCreateContext(NULL, nDevice, listDevice, NULL, NULL, &status);
cl_command_queue cmdQueue = clCreateCommandQueue(context, listDevice[], , &status);
cl_program program = clCreateProgramWithSource(context, , (const char**)&programSource, NULL, &status);
status = clBuildProgram(program, nDevice, listDevice, NULL, NULL, NULL);
cl_kernel kernel = clCreateKernel(program, "vectorAdd", &status); cl_mem bufferA, bufferB, bufferC;
bufferA = clCreateBuffer(context, CL_MEM_READ_ONLY, datasize, NULL, &status);
bufferB = clCreateBuffer(context, CL_MEM_READ_ONLY, datasize, NULL, &status);
bufferC = clCreateBuffer(context, CL_MEM_WRITE_ONLY, datasize, NULL, &status); eventList[] = clCreateUserEvent(context, &status); // 用户自定义事件
clEnqueueWriteBuffer(cmdQueue, bufferA, CL_FALSE, , datasize, A, , &eventList[], &eventList[]); // 将该行语句标记为 eventList[1],要求它等待 eventList[0],事件列表长度为 1
clEnqueueWriteBuffer(cmdQueue, bufferB, CL_FALSE, , datasize, B, , &eventList[], &eventList[]); // 将该行语句标记为 eventList[2],要求它等待 eventList[0],事件列表长度为 1 clSetKernelArg(kernel, , sizeof(cl_mem), &bufferA);
clSetKernelArg(kernel, , sizeof(cl_mem), &bufferB);
clSetKernelArg(kernel, , sizeof(cl_mem), &bufferC);
size_t globalSize[] = { nElement }, localSize[] = { }; clEnqueueNDRangeKernel(cmdQueue, kernel, , NULL, globalSize, localSize, , eventList, NULL); // 核函数的调用需要等待事件列表,事件列表长度为 3 clSetUserEventStatus(eventList[], CL_COMPLETE);// 自定义完成事件 eventList[0],这样一来写入缓冲区和内核才能开始运行 clEnqueueReadBuffer(cmdQueue, bufferC, CL_TRUE, , datasize, C, , NULL, NULL);
for (i = ; i < nElement; i++)
{
if (C[i] != i + i)
break;
}
printf("Output is %s.\n", (i == nElement) ? "correct" : "incorrect"); free(A);
free(B);
free(C);
free(listPlatform);
free(listDevice);
clReleaseContext(context);
clReleaseMemObject(bufferA);
clReleaseMemObject(bufferB);
clReleaseMemObject(bufferC);
clReleaseCommandQueue(cmdQueue);
clReleaseProgram(program);
clReleaseKernel(kernel);
getchar();
return ;
}
● 输出结果
Output is correct.
● 若将第 58 行注释掉,则程序被挂起,不能结束。
▶ 回调函数,当到达特定状态(用事件定义)时在主机端执行的程序(函数)。一般用于主机端在等待设备端执行的过程中,用于调度其他任务或执行某些辅助计算,提高设备利用效率。
● 回调函数代码(用上面的事件代码改进得到的)
#include <stdio.h>
#include <stdlib.h>
#include <cl.h> const int nElement = ; const char *programSource = " \
__kernel void vectorAdd(__global int *A, __global int *B, __global int *C) \
{ \
int idx = get_global_id(); \
C[idx] = A[idx] + B[idx]; \
return; \
} \
"; void hostFunction(int data) // 需要在主机端执行的回调函数
{
printf("<hostFunction> data = %d\n", data);
} void CL_CALLBACK callbackFunction(cl_event eventIn, cl_int status, void *userData) // 注明回调函数,注意参数的固定格式
{
hostFunction(*(int*)userData);
} int main()
{
const size_t datasize = sizeof(int) * nElement;
int i, *A, *B, *C;
cl_int status;
cl_event eventList[];// 一个事件列表 A = (int*)malloc(datasize);
B = (int*)malloc(datasize);
C = (int*)malloc(datasize);
for (i = ; i < nElement; A[i] = B[i] = i, i++); cl_uint nPlatform;
clGetPlatformIDs(, NULL, &nPlatform);
cl_platform_id *listPlatform = (cl_platform_id*)malloc(nPlatform * sizeof(cl_platform_id));
clGetPlatformIDs(nPlatform, listPlatform, NULL);
cl_uint nDevice = ;
clGetDeviceIDs(listPlatform[], CL_DEVICE_TYPE_ALL, , NULL, &nDevice);
cl_device_id *listDevice = (cl_device_id*)malloc(nDevice * sizeof(cl_device_id));
clGetDeviceIDs(listPlatform[], CL_DEVICE_TYPE_ALL, nDevice, listDevice, NULL);
cl_context context = clCreateContext(NULL, nDevice, listDevice, NULL, NULL, &status);
cl_command_queue cmdQueue = clCreateCommandQueue(context, listDevice[], , &status);
cl_program program = clCreateProgramWithSource(context, , (const char**)&programSource, NULL, &status);
status = clBuildProgram(program, nDevice, listDevice, NULL, NULL, NULL);
cl_kernel kernel = clCreateKernel(program, "vectorAdd", &status); cl_mem bufferA, bufferB, bufferC;
bufferA = clCreateBuffer(context, CL_MEM_READ_ONLY, datasize, NULL, &status);
bufferB = clCreateBuffer(context, CL_MEM_READ_ONLY, datasize, NULL, &status);
bufferC = clCreateBuffer(context, CL_MEM_WRITE_ONLY, datasize, NULL, &status); eventList[] = clCreateUserEvent(context, &status);
clEnqueueWriteBuffer(cmdQueue, bufferA, CL_FALSE, , datasize, A, , &eventList[], &eventList[]);
clEnqueueWriteBuffer(cmdQueue, bufferB, CL_FALSE, , datasize, B, , &eventList[], &eventList[]); clSetKernelArg(kernel, , sizeof(cl_mem), &bufferA);
clSetKernelArg(kernel, , sizeof(cl_mem), &bufferB);
clSetKernelArg(kernel, , sizeof(cl_mem), &bufferC);
size_t globalSize[] = { nElement }, localSize[] = { };
clEnqueueNDRangeKernel(cmdQueue, kernel, , NULL, globalSize, localSize, , eventList, NULL); clSetUserEventStatus(eventList[], CL_COMPLETE); clSetEventCallback(eventList[], CL_COMPLETE, callbackFunction, (void*)&i);// 在自定义事件 eventList[0] 完成后允许回调函数开始执行 clEnqueueReadBuffer(cmdQueue, bufferC, CL_TRUE, , datasize, C, , NULL, NULL);
for (i = ; i < nElement; i++)
{
if (C[i] != i + i)
break;
}
printf("Output is %s.\n", (i == nElement) ? "correct" : "incorrect"); free(A);
free(B);
free(C);
free(listPlatform);
free(listDevice);
clReleaseContext(context);
clReleaseMemObject(bufferA);
clReleaseMemObject(bufferB);
clReleaseMemObject(bufferC);
clReleaseCommandQueue(cmdQueue);
clReleaseProgram(program);
clReleaseKernel(kernel);
getchar();
return ;
}
● 输出结果
<hostFunction> data =
Output is correct.
● 用到的定义,cl.h 中
// 有关 windows 下接口函数和回调函数的标记
#if defined(_WIN32)
#define CL_API_ENTRY
#define CL_API_CALL __stdcall
#define CL_CALLBACK __stdcall
#else
#define CL_API_ENTRY
#define CL_API_CALL
#define CL_CALLBACK
#endif // 有关事件的定义
typedef struct _cl_event* cl_event; // 有关回调函数开始执行的条件的选项
#define CL_COMPLETE 0x0
#define CL_RUNNING 0x1
#define CL_SUBMITTED 0x2
#define CL_QUEUED 0x3 extern CL_API_ENTRY cl_event CL_API_CALL clCreateUserEvent( // 创建用户自定义的事件
cl_context, // 给定上下文
cl_int * // 返回错误代码
) CL_API_SUFFIX__VERSION_1_1; extern CL_API_ENTRY cl_int CL_API_CALL clSetUserEventStatus(// 改变用户自定义事件状态
cl_event, // 给定事件
cl_int // 返回错误代码
) CL_API_SUFFIX__VERSION_1_1; extern CL_API_ENTRY cl_int CL_API_CALL clSetEventCallback( // 调用回调函数
cl_event, // 给定回调函数相关的状态(用事件定义)
cl_int, // 给定开始执行回调函数的条件(即给定事件达到某个状态)
void (CL_CALLBACK *)(cl_event, cl_int, void *), // 回调函数指针(注意参数格式)
void * // 传给回调函数的参数(上述函数指针的第三个参数)
) CL_API_SUFFIX__VERSION_1_1;