- procedure TFormMain.BtnTestProgramClick(Sender: TObject);
- const
- MYSIZE=5;
- // Some interesting data for the vectors
- //InitialData1: array[0..13] of cl_int = (0,3,6,7,9,11,13,14,15,17,22,33,44,66);
- InitialData1: array[0..6] of cl_int = (0,3,6,7,9,11,22);//,13,14,15,17,22,33,44,66);
- InitialData2: array[0..0] of cl_int = (11);
- var
- i: integer;
- HostVector1, HostVector2, HostOutputVector: array[0..MYSIZE-1] of cl_int;
- sourceStr: string;
- sourceSize: size_t;
- sourcePChar: PChar;
- OpenCLProgram: cl_program;
- OpenCLVectorAdd: cl_kernel;
- CommandQueue: cl_command_queue;
- GPUVector1, GPUVector2, GPUOutputVector: cl_mem;
- globalThreads: array[0..0] of size_t;
- //localThreads: array[0..0] of size_t;
- s, error_string: string;
- returned_size: size_t;
- begin
- Cursor:=crHourGlass;
- ////////////////
- // Init Context
- if ListCouple.ItemIndex<0 then begin
- ShowMessage('Select couple!');
- Cursor:=crDefault;
- exit;
- end;
- // Get compute devices from platform
- errcode_ret:=clGetDeviceIDs(platform_devices[ListCouple.ItemIndex].platform_id, platform_devices[ListCouple.ItemIndex].device_type, 0, nil, @num_devices_returned);
- SetLength(device_ids, num_devices_returned);
- errcode_ret:=clGetDeviceIDs(platform_devices[ListCouple.ItemIndex].platform_id, platform_devices[ListCouple.ItemIndex].device_type, num_devices_returned, @device_ids[0], @num_devices_returned);
- if (errcode_ret<>CL_SUCCESS) then begin
- ShowMessage('Error: Failed to create a device group!');
- Cursor:=crDefault;
- exit;
- end;
- // Create a compute context
- context:=clCreateContext(nil, num_devices_returned, @device_ids[0], nil, nil, @errcode_ret);
- if (errcode_ret<>CL_SUCCESS) then begin
- ShowMessage('Error: Failed to create a compute context!!');
- Cursor:=crDefault;
- exit;
- end;
- // End (Init Context)
- ////////////////
- // Initialize with some interesting repeating data
- for i:=0 to MYSIZE-1 do begin
- HostVector1[i]:=InitialData1[i mod 20];
- end;
- HostVector2[0]:=InitialData2[0 mod 13];
- // Create OpenCL program with source code
- sourceStr:=convertToString('VectorAdd.cl');
- sourceSize:=Length(sourceStr);
- sourcePChar:=PChar(sourceStr);
- OpenCLProgram := clCreateProgramWithSource(context, 1, @sourcePChar, @sourceSize, @errcode_ret);
- if errcode_ret<>CL_SUCCESS then begin
- ShowMessage('Error: clCreateProgramWithSource failed!');
- clReleaseContext(context);
- Cursor:=crDefault;
- exit;
- end;
- // Build the program (OpenCL JIT compilation)
- if CL_SUCCESS<>clBuildProgram(OpenCLProgram, 0, nil, nil, nil, nil) then begin
- error_string:='Error: clBuildProgram failed! ';
- clGetProgramBuildInfo(OpenCLProgram, device_ids[0], CL_PROGRAM_BUILD_LOG, 0, nil, @returned_size);
- SetLength(s, returned_size+2);
- clGetProgramBuildInfo(OpenCLProgram, device_ids[0], CL_PROGRAM_BUILD_LOG, Length(s), PChar(s), @returned_size);
- SetLength(s, Min(Pos(#0, s)-1, returned_size-1));
- error_string:=error_string+s;
- ShowMessage(error_string);
- clReleaseProgram(OpenCLProgram);
- clReleaseContext(context);
- Cursor:=crDefault;
- exit;
- end;
- // Create a handle to the compiled OpenCL function (Kernel)
- OpenCLVectorAdd:=clCreateKernel(OpenCLProgram, PChar('VectorAdd'), nil);
- // Create a command-queue on the first CPU or GPU device
- CommandQueue:=clCreateCommandQueue(context, device_ids[0], 0, nil);
- // Allocate GPU memory for source vectors AND initialize from CPU memory
- GPUVector1:=clCreateBuffer(context, CL_MEM_READ_ONLY or CL_MEM_COPY_HOST_PTR, sizeof(HostVector1[0]) * MYSIZE, @HostVector1[0], nil);
- GPUVector2:=clCreateBuffer(context, CL_MEM_READ_ONLY or CL_MEM_COPY_HOST_PTR, sizeof(HostVector2[0]) * MYSIZE, @HostVector2[0], nil);
- // Allocate output memory on GPU
- GPUOutputVector:=clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(HostOutputVector[0]) * MYSIZE, nil, nil);
- // In the next step we associate the GPU memory with the Kernel arguments
- clSetKernelArg(OpenCLVectorAdd, 0, sizeof(cl_mem), @GPUOutputVector);
- clSetKernelArg(OpenCLVectorAdd, 1, sizeof(cl_mem), @GPUVector1);
- clSetKernelArg(OpenCLVectorAdd, 2, sizeof(cl_mem), @GPUVector2);
- // Launch the Kernel on the GPU
- globalThreads[0]:=MYSIZE;
- //localThreads[0]:=1;
- clEnqueueNDRangeKernel(CommandQueue, OpenCLVectorAdd, 1, nil, @globalThreads, nil, 0, nil, nil);
- // Copy the output in GPU memory back to CPU memory
- clEnqueueReadBuffer(CommandQueue, GPUOutputVector, CL_TRUE, 0, sizeof(HostOutputVector[0]) * MYSIZE, @HostOutputVector[0], 0, nil, nil);
- // Free memory
- clReleaseMemObject(GPUVector1);
- clReleaseMemObject(GPUVector2);
- clReleaseMemObject(GPUOutputVector);
- clReleaseCommandQueue(CommandQueue);
- clReleaseKernel(OpenCLVectorAdd);
- clReleaseProgram(OpenCLProgram);
- clReleaseContext(context);
- // List results
- ListOutput.Clear;
- ListOutput.Items.Add( inttostr(HostOutputVector[0])); // result
- //for i:=0 to MYSIZE-1 do ListOutput.Items.Add(Format('%d + %d = %d', [HostVector1[i], HostVector2[0], HostOutputVector[i]]));
- Cursor:=crDefault;
- end;