bunchen 发表于 2015-10-9 09:46:03

Firefly rk3288 OpenCL

本帖最后由 bunchen 于 2015-10-9 10:20 编辑

Firefly rk3288采用Mali-T764的GPU,该GPU支持OpeCL 1.1。下面一步一步介绍android下OpenCL开发。

1.首先我们需要OpenCL的头文件和库。头文件可以在khronos的网站上下载:https://www.khronos.org/registry/cl/ 由于Mali-T764支持OpenCL 1.1所以我们下载1.1版的头文件并放对位置。新建include目录,把opencl.h放在include目录下,把cl_d3d10.h 、 cl_ext.h 、 cl_gl_ext.h 、 cl_gl.h 、 cl.h 、 cl.hpp 、 cl_platform.h放到include/CL目录下。在OpenCL的库文件在firefly rk3288源代码的device/rockchip/common/gpu/libMali-T760/libGLES_mali.so。

2.然后可以在这基础下开发了。下面给出一个打印OpenCL信息的Demo。由于libGLES_mali.so并没有放入的android的/system/lib路径下,所以我们要在Demo的Android.mk文件中要对libGLES_mali.so做预编译,Android.mk文件如下:LOCAL_PATH := $(call my-dir)


include $(CLEAR_VARS)
LOCAL_PREBUILT_LIBS := \
    ../opencl/lib/libGLES_mali.so
include $(BUILD_MULTI_PREBUILT)

include $(CLEAR_VARS)

LOCAL_C_INCLUDES := \
      $(LOCAL_PATH)/../opencl/include

LOCAL_SRC_FILES := \
      opencl-info.c
      
LOCAL_SHARED_LIBRARIES := \
      libGLES_mali

LOCAL_MODULE := opencl-info

include $(BUILD_EXECUTABLE)
在opencl-info.c中我们打印一些OpenCL的信息:
#include <stdio.h>
#include <stdlib.h>
#include <opencl.h>

void printPlatformInfo(cl_int ret ,cl_uint i, const char* part , char* buf)
{
    if(ret==CL_SUCCESS){
            printf("\t platform index=%d %s : %s\n",i,part,buf);
    }else if(ret==CL_INVALID_PLATFORM){
            printf("\t platform index=%d %s : invalid platform.\n",i,part);
      }else if(ret==CL_INVALID_VALUE){
            printf("\t platform index=%d %s : invalid value.\n",i,part);
      }else if(ret==CL_OUT_OF_HOST_MEMORY){
            printf("\t platform index=%d %s : out of host memory.\n",i,part);
      }else{
            printf("\t platform index=%d %s : i don't know why.\n",i,part);
      }

}

void printDevice(cl_device_id device)
{
    char buf;
    size_t size = 0 ;
    cl_device_type type;
    cl_int ret = CL_SUCCESS;
    printf("\t   ######################\n");
    ret = clGetDeviceInfo(device,CL_DEVICE_NAME,128,buf,&size);
    if(ret==CL_SUCCESS){
      printf("\t   device name=%s\n",buf);
    }else{
      printf("\t   get device name fail !\n");
    }
   
    ret = clGetDeviceInfo(device,CL_DEVICE_TYPE,sizeof(cl_device_type),&type,&size);
    if(ret==CL_SUCCESS){
      printf("\t   device type=%lu\n",type);
    }else{
      printf("\t   get device type fail !\n");
    }
   
   
    ret = clGetDeviceInfo(device,CL_DEVICE_VENDOR,128,buf,&size);
    if(ret==CL_SUCCESS){
      printf("\t   device vendor=%s\n",buf);
    }else{
      printf("\t   get device vendor fail !\n");
    }
    ret = clGetDeviceInfo(device,CL_DRIVER_VERSION,128,buf,&size);
    if(ret==CL_SUCCESS){
      printf("\t   device version=%s\n",buf);
    }else{
      printf("\t   get device version fail !\n");
    }
    ret = clGetDeviceInfo(device,CL_DEVICE_PROFILE,128,buf,&size);
    if(ret==CL_SUCCESS){
      printf("\t   device profile=%s\n",buf);
    }else{
      printf("\t   get device profile fail !\n");
    }
    printf("\t   ######################\n");
}

void printDevices(cl_platform_id platform , cl_device_type device_type)
{
    cl_device_id devices;
    cl_uint i ;
    cl_uint num = 0 ;
    cl_int ret = clGetDeviceIDs(platform,device_type,8,devices,&num);
    if(ret==CL_SUCCESS){
      printf("\tgetDeviceIDs success ! num=%d\n",num);
      
      for(i=0;i<num;i++){
            printDevice(devices);
      }
    }else{
      printf("\tgetDeviceIDs fail !\n");
    }
   
}

void printOpenCL()
{
      cl_platform_id platforms;
      cl_uint num = 0 ;
      cl_uint i=0;
      cl_int err = clGetPlatformIDs(8,platforms,&num);
      if(err==CL_SUCCESS){
                printf("printOpenCL got %d platforms : \n",num);
                for(i=0;i<num;i++){
                  char buf;
                  size_t size = 0 ;
                  cl_int ret = 0 ;
                  
                  ret = clGetPlatformInfo(platforms,CL_PLATFORM_PROFILE,128,buf,&size);
                  printPlatformInfo(ret,i,"profile",buf);
               
                ret = clGetPlatformInfo(platforms,CL_PLATFORM_VERSION,128,buf,&size);
                  printPlatformInfo(ret,i,"version",buf);
                  
                ret = clGetPlatformInfo(platforms,CL_PLATFORM_NAME,128,buf,&size);
                printPlatformInfo(ret,i,"name",buf);
                  
                  ret = clGetPlatformInfo(platforms,CL_PLATFORM_VENDOR,128,buf,&size);
                printPlatformInfo(ret,i,"vendor",buf);
               
                ret = clGetPlatformInfo(platforms,CL_PLATFORM_EXTENSIONS,128,buf,&size);
                printPlatformInfo(ret,i,"extensions",buf);
               
                printf("Device cpu:\n");
                printDevices(platforms,CL_DEVICE_TYPE_CPU);
               
                printf("Device gpu:\n");
                printDevices(platforms,CL_DEVICE_TYPE_GPU);
               
                printf("Device accelerator:\n");
                printDevices(platforms,CL_DEVICE_TYPE_ACCELERATOR);
               
                printf("Device default:\n");
                printDevices(platforms,CL_DEVICE_TYPE_DEFAULT);
               
                printf("Device all :\n");
                printDevices(platforms,CL_DEVICE_TYPE_ALL);
               
                }
      }else if(err==CL_INVALID_VALUE){
                printf("printOpenCL invalid value.\n");
      }else if(err==CL_OUT_OF_HOST_MEMORY){
                printf("printOpenCL out of host memory.\n");
      }else{
                printf("printOpenCL i don't know why.\n");
      }

}


int main(){
      printOpenCL();
      return 0;
}这里打印的信息请参考opencl文档 : https://www.khronos.org/registry/cl/specs/opencl-1.1.pdf
把编译好的opencl-info push到/system/bin目录下,把libGLES_mali.so push到/system/lib目录下,就可以在adb shell中运行opencl-info了。


从打印的信息可以看到,rk3288支持OpenCL 1.1,支持的设备是gpu的Mali-T764。

bunchen 发表于 2015-10-9 10:08:44

本帖最后由 bunchen 于 2015-10-28 16:23 编辑

这里贴出一个简单的向量加法的例子,和容易出错的地方。先上代码:#include <stdio.h>
#include <stdlib.h>
#include <opencl.h>

void printDeviceWorkInfo(cl_device_id device)
{
    cl_uint nMaxComputeUnits = 0;
    cl_uint nMaxWorkItemDims = 0;
    cl_uint i = 0;
    size_t* nMaxWorkItemSizes = NULL;
    size_t nMaxWorkGroupSize = 0;
    size_t size = 0 ;
    cl_int err ;
    err = clGetDeviceInfo(device,CL_DEVICE_MAX_COMPUTE_UNITS,sizeof(cl_uint),&nMaxComputeUnits,&size);
    if(err==CL_SUCCESS){
      printf("nMaxComputeUnits=%d\n",nMaxComputeUnits);
    }
   
    err = clGetDeviceInfo(device,CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS,sizeof(cl_uint),&nMaxWorkItemDims,&size);
    if(err==CL_SUCCESS){
      printf("nMaxWorkItemDims=%d\n",nMaxWorkItemDims);
      nMaxWorkItemSizes = (size_t*)malloc(sizeof(size_t)*nMaxWorkItemDims);
      err = clGetDeviceInfo(device,CL_DEVICE_MAX_WORK_ITEM_SIZES,sizeof(size_t)*nMaxWorkItemDims,nMaxWorkItemSizes,&size);
      if(err==CL_SUCCESS){
            for(i=0;i<nMaxWorkItemDims;i++){
                printf("nMaxWorkItemSizes[%d]=%d\n",i,nMaxWorkItemSizes);
            }
      }
      free(nMaxWorkItemSizes);
    }
   
    err = clGetDeviceInfo(device,CL_DEVICE_MAX_WORK_GROUP_SIZE,sizeof(size_t),&nMaxWorkGroupSize,&size);
    if(err==CL_SUCCESS){
      printf("nMaxWorkGroupSize=%d\n",nMaxWorkGroupSize);
    }
}

const char* program_src = ""
"__kernel void vector_add_gpu (__global const float* src_a,\n"
"   __global const float* src_b,\n"
"   __global float* res,\n"
"   const int num)\n"
"{\n"
"   int idx = get_global_id(0);\n"
"   if(idx<num){"
"       res=src_a+src_b;\n"
"   }\n"
"}\n"
;

static const cl_int vect_len = 10000000;

static float* vect_a = NULL ;
static float* vect_b = NULL ;
static float* vect_c = NULL ;

void initVects()
{
    cl_int i;
    vect_a = (float*)malloc(sizeof(float)*vect_len);
    vect_b = (float*)malloc(sizeof(float)*vect_len);
    vect_c = (float*)malloc(sizeof(float)*vect_len);
    for(i=0;i<vect_len;i++){
      vect_a=(float)rand()/RAND_MAX;
      vect_b=(float)rand()/RAND_MAX;
      vect_c=0.0f;
    }
}

void printVects()
{
    cl_int i;
    if(vect_a && vect_b && vect_c){
      printf("######################\n");
      for(i=0;i<4;i++){
            printf("%08d : %f,%f,%f\n",i,vect_a,vect_b,vect_c);
      }
      printf("    ...    \n");
      for(i=vect_len-4;i<vect_len;i++){
            printf("%08d : %f,%f,%f\n",i,vect_a,vect_b,vect_c);
      }
      printf("######################\n");
    }
}

void releaseVects()
{
    if(vect_a){
      free(vect_a);
      vect_a=NULL;
    }
    if(vect_b){
      free(vect_b);
      vect_b=NULL;
    }
    if(vect_c){
      free(vect_c);
      vect_c=NULL;
    }
}

size_t shrRoundUp(size_t f , size_t s)
{
    return (s+f-1)/f*f;
}



void test()
{
    cl_int error = 0 ;
    cl_platform_id platform;
    cl_context context;
    cl_command_queue queue;
    cl_device_id device;
    cl_mem inbuf_a ;
    cl_mem inbuf_b ;
    cl_mem outbuf_r ;
    const cl_int size = vect_len;
    cl_int i ;
    const size_t mem_size = sizeof(float)*size;
    size_t program_len = strlen(program_src);
    char build_log;
    size_t log_size;
    size_t local_ws;
    size_t global_ws;
    cl_kernel vector_add_kernel;
   
    error = clGetPlatformIDs(1,&platform,NULL);
    if(error != CL_SUCCESS){
      printf("get platform id fail !\n");
      exit(1);
    }
   
    error = clGetDeviceIDs(platform,CL_DEVICE_TYPE_GPU,1,&device,NULL);
    if(error != CL_SUCCESS){
      printf("get gpu device fail !\n");
      exit(1);
    }
   
    printDeviceWorkInfo(device);
   
    cl_context_properties properties[]={
      CL_CONTEXT_PLATFORM,
      (cl_context_properties)platform,
      0
    };
   
    // 这里要配置properties
    context = clCreateContext(properties,1,&device,NULL,NULL,&error);
    if(error != CL_SUCCESS){
      printf("create context fail !\n");
      exit(1);
    }
   
    queue = clCreateCommandQueue(context,device,CL_QUEUE_PROFILING_ENABLE,&error);
    if(error != CL_SUCCESS){
      printf("create command queue fail !\n");
      exit(1);
    }
   
    initVects();
    printVects();
   
    inbuf_a = clCreateBuffer(context,CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,mem_size,vect_a,&error);
    if(error!=CL_SUCCESS){
      printf("create buffer inbuf_a fail !\n");
      exit(1);
    }
    inbuf_b = clCreateBuffer(context,CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,mem_size,vect_b,&error);
    if(error!=CL_SUCCESS){
      printf("create buffer inbuf_b fail !\n");
      exit(1);
    }
    outbuf_r = clCreateBuffer(context,CL_MEM_WRITE_ONLY,mem_size,NULL,&error);
    if(error!=CL_SUCCESS){
      printf("create buffer outbuf_r fail !\n");
      exit(1);
    }
   
    cl_program program = clCreateProgramWithSource(context,1,&program_src,&program_len,&error);
    if(error!=CL_SUCCESS){
      printf("create program fail !\n");
      exit(1);
    }
    error = clBuildProgram(program,1,&device,NULL,NULL,NULL);
    if(error!=CL_SUCCESS){
      printf("build program fail !\n");
      clGetProgramBuildInfo(program,device,CL_PROGRAM_BUILD_LOG,1024,build_log,&log_size);
      printf("build_log : %s\n",build_log);
      exit(1);
    }
   
    vector_add_kernel = clCreateKernel(program,"vector_add_gpu",&error);
    if(error!=CL_SUCCESS){
      printf("create kernel fail !\n");
      exit(1);
    }
   
    error = clSetKernelArg(vector_add_kernel,0,sizeof(cl_mem),&inbuf_a);
    error |= clSetKernelArg(vector_add_kernel,1,sizeof(cl_mem),&inbuf_b);
    error |= clSetKernelArg(vector_add_kernel,2,sizeof(cl_mem),&outbuf_r);
    error |= clSetKernelArg(vector_add_kernel,3,sizeof(cl_int),&size);
    if(error!=CL_SUCCESS){
      printf("set kernel arg fail !\n");
      exit(1);
    }
   
    local_ws = 256;//我们使用一维的clEnqueueNDRangeKernel,这里local_ws选择nMaxWorkItemSizes=256
    global_ws = shrRoundUp(local_ws,size); //这里是线程总数,应该是local_ws的倍数。
    printf("local_ws=%d,global_ws=%d\n",local_ws,global_ws);
   
    error = clEnqueueNDRangeKernel(queue,vector_add_kernel,1,NULL,&global_ws,&local_ws,0,NULL,NULL);
    if(error!=CL_SUCCESS){
      printf("enqueue kernel fail !\n");
      exit(1);
    }
   
    clEnqueueReadBuffer(queue,outbuf_r,CL_TRUE,0,mem_size,vect_c,0,NULL,NULL);
    printVects();
   
    clReleaseKernel(vector_add_kernel);
    clReleaseProgram(program);
    clReleaseCommandQueue(queue);
    clReleaseContext(context);
    clReleaseMemObject(inbuf_a);
    clReleaseMemObject(inbuf_b);
    clReleaseMemObject(outbuf_r);
    releaseVects();
}

int main(){
      test();
      return 0;
}
这里说一下容易出错的地方:
(1)clCreateContext,网上有些例子把第一个参数置成0,经试验这样不行,要设置properties。
(2)clEnqueueNDRangeKernel的global_work_size和local_work_size。我们使用一维的clEnqueueNDRangeKernel,这里local_work_size选择nMaxWorkItemSizes=256,global_work_size是线程总数,应该是local_ws的倍数。
一维情况下:


二维情况下:



(3)kernel代码中
size_t get_global_id (uint dimindx)
返回这个线程的global_id,参数是维度索引,我们一维的情况下参数是0。


最后运行结果:


在加一个nv12转rgb的例子:#include <stdio.h>
#include <stdlib.h>
#include <opencl.h>
#include <sys/time.h>

void printDeviceWorkInfo(cl_device_id device)
{
    cl_uint nMaxComputeUnits = 0;
    cl_uint nMaxWorkItemDims = 0;
    cl_uint i = 0;
    size_t* nMaxWorkItemSizes = NULL;
    size_t nMaxWorkGroupSize = 0;
    size_t size = 0 ;
    cl_int err ;
    err = clGetDeviceInfo(device,CL_DEVICE_MAX_COMPUTE_UNITS,sizeof(cl_uint),&nMaxComputeUnits,&size);
    if(err==CL_SUCCESS){
      printf("nMaxComputeUnits=%d\n",nMaxComputeUnits);
    }
   
    err = clGetDeviceInfo(device,CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS,sizeof(cl_uint),&nMaxWorkItemDims,&size);
    if(err==CL_SUCCESS){
      printf("nMaxWorkItemDims=%d\n",nMaxWorkItemDims);
      nMaxWorkItemSizes = (size_t*)malloc(sizeof(size_t)*nMaxWorkItemDims);
      err = clGetDeviceInfo(device,CL_DEVICE_MAX_WORK_ITEM_SIZES,sizeof(size_t)*nMaxWorkItemDims,nMaxWorkItemSizes,&size);
      if(err==CL_SUCCESS){
            for(i=0;i<nMaxWorkItemDims;i++){
                printf("nMaxWorkItemSizes[%d]=%d\n",i,nMaxWorkItemSizes);
            }
      }
      free(nMaxWorkItemSizes);
    }
   
    err = clGetDeviceInfo(device,CL_DEVICE_MAX_WORK_GROUP_SIZE,sizeof(size_t),&nMaxWorkGroupSize,&size);
    if(err==CL_SUCCESS){
      printf("nMaxWorkGroupSize=%d\n",nMaxWorkGroupSize);
    }
}

const char* program_src = ""
"__kernel void nv12_to_rgb (__global const unsigned char* nv12,\n"
"   __global unsigned char* rgb,\n"
"   const int width,\n"
"   const int height)\n"
"{\n"
"   int idi = get_global_id(0);\n"
"   int idj = get_global_id(1);\n"
"   int k = 0 ;\n"
"   int y = 0 ;\n"
"   int u = 0 ;\n"
"   int v = 0 ;\n"
"   int t = 0 ;\n"
"   if(idi<width && idj<height){\n"
"       y=nv12;\n"
"       k=width*height+((idj/2)*(width/2)+idi/2)*2;\n"
"       u=nv12;\n"
"       v=nv12;\n"
"       k=(idj*width+idi)*3;\n"
"       t=(int)(y+1.370705*v-175.4502);\n"
"       rgb=t>255?255:t<0?0:t;\n"
"       t=(int)(y-0.698001*v-0.337633*u+132.56124);\n"
"       rgb=t>255?255:t<0?0:t;\n"
"       t=(int)(y+1.732446*u-221.7531);"
"       rgb=t>255?255:t<0?0:t;\n"
"   }\n"
"}\n"
;

size_t shrRoundUp(size_t f , size_t s)
{
    return (s+f-1)/f*f;
}

void nv12_to_rgb(uint8_t* rgb , uint8_t* nv12 , int width , int height)
{
    cl_int error = 0 ;
    cl_platform_id platform;
    cl_context context;
    cl_command_queue queue;
    cl_device_id device;
    cl_mem inbuf_nv12 ;
    cl_mem outbuf_rgb ;
    cl_int i ;
    size_t program_len = strlen(program_src);
    char build_log;
    size_t log_size;
    size_t local_ws;
    size_t global_ws;
    cl_kernel nv12_to_rgb;
   
    cl_int nv12_size = width*height*3/2;
    cl_int rgb_size = width*height*3;
   
    error = clGetPlatformIDs(1,&platform,NULL);
    if(error != CL_SUCCESS){
      printf("get platform id fail !\n");
      exit(1);
    }
   
    error = clGetDeviceIDs(platform,CL_DEVICE_TYPE_GPU,1,&device,NULL);
    if(error != CL_SUCCESS){
      printf("get gpu device fail !\n");
      exit(1);
    }
   
    printDeviceWorkInfo(device);
   
   
    cl_context_properties properties[]={
      CL_CONTEXT_PLATFORM,
      (cl_context_properties)platform,
      0
    };
   
    // 这里要配置properties
    context = clCreateContext(properties,1,&device,NULL,NULL,&error);
    if(error != CL_SUCCESS){
      printf("create context fail !\n");
      exit(1);
    }
   
    queue = clCreateCommandQueue(context,device,CL_QUEUE_PROFILING_ENABLE,&error);
    if(error != CL_SUCCESS){
      printf("create command queue fail !\n");
      exit(1);
    }
   
    cl_program program = clCreateProgramWithSource(context,1,&program_src,&program_len,&error);
    if(error!=CL_SUCCESS){
      printf("create program fail !\n");
      exit(1);
    }
    error = clBuildProgram(program,1,&device,NULL,NULL,NULL);
    if(error!=CL_SUCCESS){
      printf("build program fail !\n");
      clGetProgramBuildInfo(program,device,CL_PROGRAM_BUILD_LOG,1024,build_log,&log_size);
      printf("build_log : %s\n",build_log);
      exit(1);
    }
   
    nv12_to_rgb = clCreateKernel(program,"nv12_to_rgb",&error);
    if(error!=CL_SUCCESS){
      printf("create kernel fail !\n");
      exit(1);
    }

    struct timeval val;
    gettimeofday(&val,NULL);
    long t1 = val.tv_sec*1000000 + val.tv_usec;

    inbuf_nv12 = clCreateBuffer(context,CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,nv12_size,nv12,&error);
    if(error!=CL_SUCCESS){
      printf("create buffer inbuf_a fail !\n");
      exit(1);
    }
    outbuf_rgb = clCreateBuffer(context,CL_MEM_WRITE_ONLY,rgb_size,NULL,&error);
    if(error!=CL_SUCCESS){
      printf("create buffer outbuf_r fail !\n");
      exit(1);
    }
   
    error = clSetKernelArg(nv12_to_rgb,0,sizeof(cl_mem),&inbuf_nv12);
    error |= clSetKernelArg(nv12_to_rgb,1,sizeof(cl_mem),&outbuf_rgb);
    error |= clSetKernelArg(nv12_to_rgb,2,sizeof(cl_int),&width);
    error |= clSetKernelArg(nv12_to_rgb,3,sizeof(cl_int),&height);
   
    if(error!=CL_SUCCESS){
      printf("set kernel arg fail !\n");
      exit(1);
    }
   
    size_t lws = {16,16};
    size_t gws;
    gws = shrRoundUp(lws,width);
    gws = shrRoundUp(lws,height);
    printf("lws={%d,%d},gws={%d,%d}\n",lws,lws,gws,gws);
   
    error = clEnqueueNDRangeKernel(queue,nv12_to_rgb,2,NULL,gws,lws,0,NULL,NULL);
    if(error!=CL_SUCCESS){
      printf("enqueue kernel fail !\n");
      exit(1);
    }
   
    clEnqueueReadBuffer(queue,outbuf_rgb,CL_TRUE,0,rgb_size,rgb,0,NULL,NULL);
   

    clReleaseMemObject(inbuf_nv12);
    clReleaseMemObject(outbuf_rgb);   

   
    gettimeofday(&val,NULL);
    long t2 = val.tv_sec*1000000 + val.tv_usec;

    printf("nv12_to_rgb spend %ld (us)\n",t2-t1);

    clReleaseKernel(nv12_to_rgb);
    clReleaseProgram(program);
    clReleaseCommandQueue(queue);
    clReleaseContext(context);
}

int main(){
    uint8_t* nv12;
    uint8_t* rgb;
    int width=1920;
    int height=1080;
    int i,j,k;
    nv12=(uint8_t*)malloc(width*height*3/2);
    rgb=(uint8_t*)malloc(width*height*3);
   
    memset(nv12,0,width*height*3/2);
   
   
    memset(rgb,0,width*height*3);
    struct timeval val;
    gettimeofday(&val,NULL);
    long t1 = val.tv_sec*1000000 + val.tv_usec;
      nv12_to_rgb(rgb,nv12,width,height);
    gettimeofday(&val,NULL);
    long t2 = val.tv_sec*1000000 + val.tv_usec;
    printf("nv12_to_rgb spend %ld (us)\n",t2-t1);
      for(j=0;j<8;j++){
            for(i=0;i<8;i++){
                k=(j*width+i)*3;
                printf("(%02x,%02x,%02x) ",rgb,rgb,rgb);
            }
            printf("\n");
            }
      printf("......\n");
      for(j=height-8;j<height;j++){
            for(i=width-8;i<width;i++){
                k=(j*width+i)*3;
                printf("(%02x,%02x,%02x) ",rgb,rgb,rgb);
            }
            printf("\n");
            }

      
      free(nv12);
      free(rgb);
      return 0;
}
这是一个二维的例子,local_work_item*local_work_item要小于最大的workitem数,即 local_work_item*local_work_item<256,所以取
local_work_item=local_work_item=16

duoduomu 发表于 2015-10-9 10:15:07

bunchen 发表于 2015-10-9 10:08
二楼待用

不错,不过还可以参考http://malideveloper.arm.com/downloads/tools/oclsdk/Mali_OpenCL_SDK_v1.1.0.0a36a7_Linux.tgz 这个SDK,里面都有,还有demo!

bunchen 发表于 2015-10-9 10:25:31

duoduomu 发表于 2015-10-9 10:15
不错,不过还可以参考http://malideveloper.arm.com/downloads/tools/oclsdk/Mali_OpenCL_SDK_v1.1.0.0a3 ...

谢谢,之前一直想下载mali的sdk来看,奈何下载页面总是进不去,谢谢分享。

duoduomu 发表于 2015-10-9 11:01:37

bunchen 发表于 2015-10-9 10:25
谢谢,之前一直想下载mali的sdk来看,奈何下载页面总是进不去,谢谢分享。

哈哈 的确你按常理下载是不行的 !

fxlsunny 发表于 2015-10-15 14:50:24

不错正打算做opencl的事,不过想在Ubuntu下做,不知3288是否可行?

ff_20150814 发表于 2015-12-7 15:30:09

楼主大神你好,请问你的那个NV12转RGB耗时多少呢,GPU的频率是跑的多少M?

tk1user 发表于 2016-3-23 09:36:41

太厉害了,顶起!

总有刁民想害朕 发表于 2016-4-26 09:43:15

printOpenCL got 1 platforms :
         platform index=0 profile : FULL_PROFILE
         platform index=0 version : OpenCL 1.1 v1.r6p0-02rel0.0f4218be5cc66c20a4
f31b6cc856ee46
         platform index=0 name : ARM Platform
         platform index=0 vendor : ARM
file /dev/mali0 is not of a compatible version (user 9.0, kernel 8.0)
ERROR: The DDK is not compatible with any of the Mali GPUs on the system.
The DDK was built for 0x750 r0p0 status range , but none of the GPUs match
ed:
file /dev/mali0 is not of a compatible version (user 9.0, kernel 8.0)
         platform index=0 extensions : out of host memory.
Device cpu:
file /dev/mali0 is not of a compatible version (user 9.0, kernel 8.0)
ERROR: The DDK is not compatible with any of the Mali GPUs on the system.
The DDK was built for 0x750 r0p0 status range , but none of the GPUs match
ed:
file /dev/mali0 is not of a compatible version (user 9.0, kernel 8.0)
          getDeviceIDs fail !
Device gpu:
file /dev/mali0 is not of a compatible version (user 9.0, kernel 8.0)
ERROR: The DDK is not compatible with any of the Mali GPUs on the system.
The DDK was built for 0x750 r0p0 status range , but none of the GPUs match
ed:
file /dev/mali0 is not of a compatible version (user 9.0, kernel 8.0)
          getDeviceIDs fail !
Device accelerator:
file /dev/mali0 is not of a compatible version (user 9.0, kernel 8.0)
ERROR: The DDK is not compatible with any of the Mali GPUs on the system.
The DDK was built for 0x750 r0p0 status range , but none of the GPUs match
ed:
file /dev/mali0 is not of a compatible version (user 9.0, kernel 8.0)
          getDeviceIDs fail !
Device default:
file /dev/mali0 is not of a compatible version (user 9.0, kernel 8.0)
ERROR: The DDK is not compatible with any of the Mali GPUs on the system.
The DDK was built for 0x750 r0p0 status range , but none of the GPUs match
ed:
file /dev/mali0 is not of a compatible version (user 9.0, kernel 8.0)
          getDeviceIDs fail !
Device all :
file /dev/mali0 is not of a compatible version (user 9.0, kernel 8.0)
ERROR: The DDK is not compatible with any of the Mali GPUs on the system.
The DDK was built for 0x750 r0p0 status range , but none of the GPUs match
ed:
file /dev/mali0 is not of a compatible version (user 9.0, kernel 8.0)
          getDeviceIDs fail !

Eric.y 发表于 2016-4-30 16:59:30

duoduomu 发表于 2015-10-9 10:15
不错,不过还可以参考http://malideveloper.arm.com/downloads/tools/oclsdk/Mali_OpenCL_SDK_v1.1.0.0a3 ...

Android-OpenCL-v1.1.zip和ARM官网的SDK一样吗,我没灯泡下载不了
页: [1] 2 3
查看完整版本: Firefly rk3288 OpenCL