procedure TFormMain.BtnTestProgramClick(Sender: TObject);
const
MYSIZE=5;
// Some interesting data for the vectors
//InitialData1: array[0..13] of cl_int = (0,3,6,7,9,11,13,14,15,17,22,33,44,66);
InitialData1: array[0..6] of cl_int = (0,3,6,7,9,11,22);//,13,14,15,17,22,33,44,66);
InitialData2: array[0..0] of cl_int = (11);
var
i: integer;
HostVector1, HostVector2, HostOutputVector: array[0..MYSIZE-1] of cl_int;
sourceStr: string;
sourceSize: size_t;
sourcePChar: PChar;
OpenCLProgram: cl_program;
OpenCLVectorAdd: cl_kernel;
CommandQueue: cl_command_queue;
GPUVector1, GPUVector2, GPUOutputVector: cl_mem;
globalThreads: array[0..0] of size_t;
//localThreads: array[0..0] of size_t;
s, error_string: string;
returned_size: size_t;
begin
Cursor:=crHourGlass;
////////////////
// Init Context
if ListCouple.ItemIndex<0 then begin
ShowMessage('Select couple!');
Cursor:=crDefault;
exit;
end;
// Get compute devices from platform
errcode_ret:=clGetDeviceIDs(platform_devices[ListCouple.ItemIndex].platform_id, platform_devices[ListCouple.ItemIndex].device_type, 0, nil, @num_devices_returned);
SetLength(device_ids, num_devices_returned);
errcode_ret:=clGetDeviceIDs(platform_devices[ListCouple.ItemIndex].platform_id, platform_devices[ListCouple.ItemIndex].device_type, num_devices_returned, @device_ids[0], @num_devices_returned);
if (errcode_ret<>CL_SUCCESS) then begin
ShowMessage('Error: Failed to create a device group!');
Cursor:=crDefault;
exit;
end;
// Create a compute context
context:=clCreateContext(nil, num_devices_returned, @device_ids[0], nil, nil, @errcode_ret);
if (errcode_ret<>CL_SUCCESS) then begin
ShowMessage('Error: Failed to create a compute context!!');
Cursor:=crDefault;
exit;
end;
// End (Init Context)
////////////////
// Initialize with some interesting repeating data
for i:=0 to MYSIZE-1 do begin
HostVector1[i]:=InitialData1[i mod 20];
end;
HostVector2[0]:=InitialData2[0 mod 13];
// Create OpenCL program with source code
sourceStr:=convertToString('VectorAdd.cl');
sourceSize:=Length(sourceStr);
sourcePChar:=PChar(sourceStr);
OpenCLProgram := clCreateProgramWithSource(context, 1, @sourcePChar, @sourceSize, @errcode_ret);
if errcode_ret<>CL_SUCCESS then begin
ShowMessage('Error: clCreateProgramWithSource failed!');
clReleaseContext(context);
Cursor:=crDefault;
exit;
end;
// Build the program (OpenCL JIT compilation)
if CL_SUCCESS<>clBuildProgram(OpenCLProgram, 0, nil, nil, nil, nil) then begin
error_string:='Error: clBuildProgram failed! ';
clGetProgramBuildInfo(OpenCLProgram, device_ids[0], CL_PROGRAM_BUILD_LOG, 0, nil, @returned_size);
SetLength(s, returned_size+2);
clGetProgramBuildInfo(OpenCLProgram, device_ids[0], CL_PROGRAM_BUILD_LOG, Length(s), PChar(s), @returned_size);
SetLength(s, Min(Pos(#0, s)-1, returned_size-1));
error_string:=error_string+s;
ShowMessage(error_string);
clReleaseProgram(OpenCLProgram);
clReleaseContext(context);
Cursor:=crDefault;
exit;
end;
// Create a handle to the compiled OpenCL function (Kernel)
OpenCLVectorAdd:=clCreateKernel(OpenCLProgram, PChar('VectorAdd'), nil);
// Create a command-queue on the first CPU or GPU device
CommandQueue:=clCreateCommandQueue(context, device_ids[0], 0, nil);
// Allocate GPU memory for source vectors AND initialize from CPU memory
GPUVector1:=clCreateBuffer(context, CL_MEM_READ_ONLY or CL_MEM_COPY_HOST_PTR, sizeof(HostVector1[0]) * MYSIZE, @HostVector1[0], nil);
GPUVector2:=clCreateBuffer(context, CL_MEM_READ_ONLY or CL_MEM_COPY_HOST_PTR, sizeof(HostVector2[0]) * MYSIZE, @HostVector2[0], nil);
// Allocate output memory on GPU
GPUOutputVector:=clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(HostOutputVector[0]) * MYSIZE, nil, nil);
// In the next step we associate the GPU memory with the Kernel arguments
clSetKernelArg(OpenCLVectorAdd, 0, sizeof(cl_mem), @GPUOutputVector);
clSetKernelArg(OpenCLVectorAdd, 1, sizeof(cl_mem), @GPUVector1);
clSetKernelArg(OpenCLVectorAdd, 2, sizeof(cl_mem), @GPUVector2);
// Launch the Kernel on the GPU
globalThreads[0]:=MYSIZE;
//localThreads[0]:=1;
clEnqueueNDRangeKernel(CommandQueue, OpenCLVectorAdd, 1, nil, @globalThreads, nil, 0, nil, nil);
// Copy the output in GPU memory back to CPU memory
clEnqueueReadBuffer(CommandQueue, GPUOutputVector, CL_TRUE, 0, sizeof(HostOutputVector[0]) * MYSIZE, @HostOutputVector[0], 0, nil, nil);
// Free memory
clReleaseMemObject(GPUVector1);
clReleaseMemObject(GPUVector2);
clReleaseMemObject(GPUOutputVector);
clReleaseCommandQueue(CommandQueue);
clReleaseKernel(OpenCLVectorAdd);
clReleaseProgram(OpenCLProgram);
clReleaseContext(context);
// List results
ListOutput.Clear;
ListOutput.Items.Add( inttostr(HostOutputVector[0])); // result
//for i:=0 to MYSIZE-1 do ListOutput.Items.Add(Format('%d + %d = %d', [HostVector1[i], HostVector2[0], HostOutputVector[i]]));
Cursor:=crDefault;
end;