How OpenCL handles two-dimensional arrays

Asked 2 months ago, Updated 2 months ago, 1 views

What do you want to do

I have created a program to +1 in OpenCL.
https://peta.okechan.net/blog/archives/2538
for your reference.

int index=get_global_id(0); 
int index2 = get_global_id(1);

I would like to access data[index*M+index2]+=1.0; using the .

Problems you are having

int index=get_global_id(0); 
int index2 = get_global_id(1); 

always contains the same values for index and index2.
Most parts are not +1.

Source Codes Affected

//
//  main3.cpp
//
# include <iostream>
# include <vector>
# include <OpenCL/opencl.h>
# include <numeric>

#define PLATFORM_MAX4
#define DEVICE_MAX4


void EC(cl_int result, const char*title)
{
    if(result!=CL_SUCCESS){
        std::cout<<"Error:"<<title<"("<<result<")\n";
    }
}


cl_interr = CL_SUCCESS;
void EC2(const char*title)
{
    if(err!=CL_SUCCESS){
        std::cout<<"Error:"<<title<"("<err<")\n";
    }
    err = CL_SUCCESS;
}


int main(int argc, const char*argv[])
{
    // Get Platform List
    cl_platform_id platforms [PLATFORM_MAX];
    cl_uint platformCount;
    EC (clGetPlatformIDs (PLATFORM_MAX, platforms, & platformCount), "clGetPlatformIDs");
    if(platformCount==0){
        std::cerr<<"No platform.\n";
        return EXIT_FAILURE;
    }

    // Print found platform information
    for(inti=0;i<platformCount;i++){
        char vendor [100] = {0};
        char version [100] = {0};
        EC(clGetPlatformInfo(platforms[i], CL_PLATFORM_VENDOR, sizeof(vendor), vendor, nullptr), "clGetPlatformInfo";
        EC(clGetPlatformInfo(platforms[i], CL_PLATFORM_VERSION, sizeof(version), version, nullptr), "clGetPlatformInfo";
        std::cout<<"Platform id:"<<platforms[i]<", Vendor:"<vendor<<", Version:"<version<<"\n";
    }

    // Get device list
    cl_device_id devices [DEVICE_MAX];
    cl_uint deviceCount;
    EC(clGetDeviceIDs(platforms[0], CL_DEVICE_TYPE_GPU, DEVICE_MAX, devices, & deviceCount), "clGetDeviceIDs");
    if(deviceCount==0){
        std::cerr<<"No device.\n";
        return EXIT_FAILURE;
    }

    // Print information about found devices
    std::cout<<deviceCount<<"device(s)found.\n";
    for(inti=0;i<deviceCount;i++){
        char name [100] = {0};
        size_tlen;
        EC(clGetDeviceInfo(devices[i], CL_DEVICE_NAME, sizeof(name), name, &len), "clGetDeviceInfo");
        std::cout<<"Device id:"<<i<<", Name:"<<name<"\n";
    }

    // Creating Contexts
    cl_context ctx = clCreateContext(nullptr, 1, devices, nullptr, nullptr, & err);
    EC2("clCreateContext";

    // Loading Compiled Cl Programs
    const char*bitcode_path="kernel2.cl.gpu_32.bc";
    size_tlen=strlen(bitcode_path);
    cl_program program = clCreateProgramWithBinary(ctx, 1, devices, &len, (constructed char**) & bitcode_path, nullptr, & err);
    EC2 ("clCreateProgramWithBinary");

    // Program Build
    EC(clBuildProgram(program, 1, devices, nullptr, nullptr, nullptr), "clBuildProgram";

    // Creating a Kernel
    cl_kernel kernel=clCreateKernel(program, "addone", & err);
    EC2 ("clCreateKernel");

    // Get Your Data
    int n = 10;
    std::vector<float>data(n*n,0.0f);
    // Copy data while securing device memory
    cl_mem device_mem = clCreateBuffer(ctx, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, size of (float)*n*n, data.data(), & err);
    EC2 ("clCreateBuffer");

    // Set kernel arguments
    EC(clSetKernelArg(kernel, 0, sizeof(cl_mem), & device_mem), "clSetKernelArg");
    EC(clSetKernelArg(kernel, 1, sizeof(int), & n), "clSetKernelArg");

    // Creating a Command Queue
    cl_command_queue q = clCreateCommandQueue(ctx, devices[0], 0, & err);
    EC2("clCreateCommandQueue";

    // Running the Kernel
    size_t global[2], local[2], offset[2];
    offset [0] = 0;
    offset [0] = 0;
    global[0] = n;
    global[1] = n;
    local[0] = 1;
    local[1] = 1;
    EC(clEnqueueNDRangeKernel(q, kernel, 2, offset, global, NULL, 0, nullptr, nullptr), "clEnqueueNDRangeKernel");

    // Read Results
    EC(clEnqueueReadBuffer(q, device_mem, CL_TRUE, 0, sizeof(float)*n*n, data.data(), 0, nullptr, nullptr), "clEnqueueReadBuffer");

    // result printing
    for(inti=0;i<n*n;i++){
        std::cout<<data[i]<";
    }
    std::cout<<"\n";
    float total=std::accumulate(data.begin(),data.end(),0.0);
    std::cout<<total<<std::endl;
    // Releasing the Command Queue
    EC(clReleaseCommandQueue(q), "clReleaseCommandQueue";

    // Free up device memory
    EC(clReleaseMemObject(device_mem), "clReleaseMemObject";

    // kernel release
    EC(clReleaseKernel(kernel), "clReleaseKernel";

    // program release
    EC(clReleaseProgram(program), "clReleaseProgram";

    // Releasing Contexts
    EC(clReleaseContext(ctx), "clReleaseContext";

    std::cout<<"Done.\n";
    return EXIT_SUCCESS;
}



// Kernel portion from here
// kernel2.cl
__kernel
void addone(__global float*data, const int n)
{
    int index=get_global_id(0);
    int index2 = get_global_id(1);
    int dim = get_work_dim();
    printf("get_work_dim=%d\n", dim);
    printf("index=%d, index2=%d\n", index, index2);
    data [index*n+index2] + = 1.0f;
}

Tried

EC (clEnqueueNDRangeKernel(q, kernel, 2, offset, global, local, 0, nullptr, nullptr), "clEnqueueNDRangeKernel");

I tried to change the global, local, and so on.

Run Environment

The MacBook Pro 13" operating system is el capitan.

on Terminal
/System/Library/Frameworks/OpenCL.framework/Libraries/openclc-c-o kernel2.cl.gpu_32.bc-arch gpu_32-emit-llvm kernel2.cl 
g++-O3-std=c++11-framework opencl main3.cpp-otest 
./test 

in the .

c++ c

2022-09-30 13:51

1 Answers

I haven't been able to confirm the execution because I don't have an OpenCL execution environment, but the kernel execution part

size_t global[2], local[2], offset[2];
offset [0] = 0;
offset[0] = 0;//<- What about offset[1] = 0?

It says offset[1] is not initialized, so it is considered indefinite.

To prevent this kind of omission of initialization, initialization should be done at the time of declaration as much as possible.
In this case,

size_toffset[2]={0,0};

and so on.


2022-09-30 13:51

If you have any answers or tips


© 2022 OneMinuteCode. All rights reserved.