I have created a program to +1 in OpenCL.
https://peta.okechan.net/blog/archives/2538
for your reference.
int index=get_global_id(0);
int index2 = get_global_id(1);
I would like to access data[index*M+index2]+=1.0;
using the .
int index=get_global_id(0);
int index2 = get_global_id(1);
always contains the same values for index
and index2
.
Most parts are not +1.
//
// main3.cpp
//
# include <iostream>
# include <vector>
# include <OpenCL/opencl.h>
# include <numeric>
#define PLATFORM_MAX4
#define DEVICE_MAX4
void EC(cl_int result, const char*title)
{
if(result!=CL_SUCCESS){
std::cout<<"Error:"<<title<"("<<result<")\n";
}
}
cl_interr = CL_SUCCESS;
void EC2(const char*title)
{
if(err!=CL_SUCCESS){
std::cout<<"Error:"<<title<"("<err<")\n";
}
err = CL_SUCCESS;
}
int main(int argc, const char*argv[])
{
// Get Platform List
cl_platform_id platforms [PLATFORM_MAX];
cl_uint platformCount;
EC (clGetPlatformIDs (PLATFORM_MAX, platforms, & platformCount), "clGetPlatformIDs");
if(platformCount==0){
std::cerr<<"No platform.\n";
return EXIT_FAILURE;
}
// Print found platform information
for(inti=0;i<platformCount;i++){
char vendor [100] = {0};
char version [100] = {0};
EC(clGetPlatformInfo(platforms[i], CL_PLATFORM_VENDOR, sizeof(vendor), vendor, nullptr), "clGetPlatformInfo";
EC(clGetPlatformInfo(platforms[i], CL_PLATFORM_VERSION, sizeof(version), version, nullptr), "clGetPlatformInfo";
std::cout<<"Platform id:"<<platforms[i]<", Vendor:"<vendor<<", Version:"<version<<"\n";
}
// Get device list
cl_device_id devices [DEVICE_MAX];
cl_uint deviceCount;
EC(clGetDeviceIDs(platforms[0], CL_DEVICE_TYPE_GPU, DEVICE_MAX, devices, & deviceCount), "clGetDeviceIDs");
if(deviceCount==0){
std::cerr<<"No device.\n";
return EXIT_FAILURE;
}
// Print information about found devices
std::cout<<deviceCount<<"device(s)found.\n";
for(inti=0;i<deviceCount;i++){
char name [100] = {0};
size_tlen;
EC(clGetDeviceInfo(devices[i], CL_DEVICE_NAME, sizeof(name), name, &len), "clGetDeviceInfo");
std::cout<<"Device id:"<<i<<", Name:"<<name<"\n";
}
// Creating Contexts
cl_context ctx = clCreateContext(nullptr, 1, devices, nullptr, nullptr, & err);
EC2("clCreateContext";
// Loading Compiled Cl Programs
const char*bitcode_path="kernel2.cl.gpu_32.bc";
size_tlen=strlen(bitcode_path);
cl_program program = clCreateProgramWithBinary(ctx, 1, devices, &len, (constructed char**) & bitcode_path, nullptr, & err);
EC2 ("clCreateProgramWithBinary");
// Program Build
EC(clBuildProgram(program, 1, devices, nullptr, nullptr, nullptr), "clBuildProgram";
// Creating a Kernel
cl_kernel kernel=clCreateKernel(program, "addone", & err);
EC2 ("clCreateKernel");
// Get Your Data
int n = 10;
std::vector<float>data(n*n,0.0f);
// Copy data while securing device memory
cl_mem device_mem = clCreateBuffer(ctx, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, size of (float)*n*n, data.data(), & err);
EC2 ("clCreateBuffer");
// Set kernel arguments
EC(clSetKernelArg(kernel, 0, sizeof(cl_mem), & device_mem), "clSetKernelArg");
EC(clSetKernelArg(kernel, 1, sizeof(int), & n), "clSetKernelArg");
// Creating a Command Queue
cl_command_queue q = clCreateCommandQueue(ctx, devices[0], 0, & err);
EC2("clCreateCommandQueue";
// Running the Kernel
size_t global[2], local[2], offset[2];
offset [0] = 0;
offset [0] = 0;
global[0] = n;
global[1] = n;
local[0] = 1;
local[1] = 1;
EC(clEnqueueNDRangeKernel(q, kernel, 2, offset, global, NULL, 0, nullptr, nullptr), "clEnqueueNDRangeKernel");
// Read Results
EC(clEnqueueReadBuffer(q, device_mem, CL_TRUE, 0, sizeof(float)*n*n, data.data(), 0, nullptr, nullptr), "clEnqueueReadBuffer");
// result printing
for(inti=0;i<n*n;i++){
std::cout<<data[i]<";
}
std::cout<<"\n";
float total=std::accumulate(data.begin(),data.end(),0.0);
std::cout<<total<<std::endl;
// Releasing the Command Queue
EC(clReleaseCommandQueue(q), "clReleaseCommandQueue";
// Free up device memory
EC(clReleaseMemObject(device_mem), "clReleaseMemObject";
// kernel release
EC(clReleaseKernel(kernel), "clReleaseKernel";
// program release
EC(clReleaseProgram(program), "clReleaseProgram";
// Releasing Contexts
EC(clReleaseContext(ctx), "clReleaseContext";
std::cout<<"Done.\n";
return EXIT_SUCCESS;
}
// Kernel portion from here
// kernel2.cl
__kernel
void addone(__global float*data, const int n)
{
int index=get_global_id(0);
int index2 = get_global_id(1);
int dim = get_work_dim();
printf("get_work_dim=%d\n", dim);
printf("index=%d, index2=%d\n", index, index2);
data [index*n+index2] + = 1.0f;
}
EC (clEnqueueNDRangeKernel(q, kernel, 2, offset, global, local, 0, nullptr, nullptr), "clEnqueueNDRangeKernel");
I tried to change the global
, local
, and so on.
The MacBook Pro 13" operating system is el capitan.
on Terminal/System/Library/Frameworks/OpenCL.framework/Libraries/openclc-c-o kernel2.cl.gpu_32.bc-arch gpu_32-emit-llvm kernel2.cl
g++-O3-std=c++11-framework opencl main3.cpp-otest
./test
in the .
c++ c
I haven't been able to confirm the execution because I don't have an OpenCL execution environment, but the kernel execution part
size_t global[2], local[2], offset[2];
offset [0] = 0;
offset[0] = 0;//<- What about offset[1] = 0?
It says offset[1]
is not initialized, so it is considered indefinite.
To prevent this kind of omission of initialization, initialization should be done at the time of declaration as much as possible.
In this case,
size_toffset[2]={0,0};
and so on.
© 2023 OneMinuteCode. All rights reserved.