Firefly rk3288 OpenCL

bunchen 发表于 2015-10-9 09:46:03

本帖最后由 bunchen 于 2015-10-9 10:20 编辑

Firefly rk3288采用Mali-T764的GPU，该GPU支持OpeCL 1.1。下面一步一步介绍android下OpenCL开发。

1.首先我们需要OpenCL的头文件和库。头文件可以在khronos的网站上下载：https://www.khronos.org/registry/cl/ 由于Mali-T764支持OpenCL 1.1所以我们下载1.1版的头文件并放对位置。新建include目录，把opencl.h放在include目录下，把cl_d3d10.h 、 cl_ext.h 、 cl_gl_ext.h 、 cl_gl.h 、 cl.h 、 cl.hpp 、 cl_platform.h放到include/CL目录下。在OpenCL的库文件在firefly rk3288源代码的device/rockchip/common/gpu/libMali-T760/libGLES_mali.so。

2.然后可以在这基础下开发了。下面给出一个打印OpenCL信息的Demo。由于libGLES_mali.so并没有放入的android的/system/lib路径下，所以我们要在Demo的Android.mk文件中要对libGLES_mali.so做预编译，Android.mk文件如下：LOCAL_PATH := $(call my-dir)

include $(CLEAR_VARS)
LOCAL_PREBUILT_LIBS := \
../opencl/lib/libGLES_mali.so
include $(BUILD_MULTI_PREBUILT)

include $(CLEAR_VARS)

LOCAL_C_INCLUDES := \
   $(LOCAL_PATH)/../opencl/include

LOCAL_SRC_FILES := \
   opencl-info.c

LOCAL_SHARED_LIBRARIES := \
   libGLES_mali

LOCAL_MODULE := opencl-info

include $(BUILD_EXECUTABLE)
在opencl-info.c中我们打印一些OpenCL的信息：
#include <stdio.h>
#include <stdlib.h>
#include <opencl.h>

void printPlatformInfo(cl_int ret ,cl_uint i, const char* part , char* buf)
{
if(ret==CL_SUCCESS){
         printf("\t platform index=%d %s : %s\n",i,part,buf);
}else if(ret==CL_INVALID_PLATFORM){
         printf("\t platform index=%d %s : invalid platform.\n",i,part);
   }else if(ret==CL_INVALID_VALUE){
         printf("\t platform index=%d %s : invalid value.\n",i,part);
   }else if(ret==CL_OUT_OF_HOST_MEMORY){
         printf("\t platform index=%d %s : out of host memory.\n",i,part);
   }else{
         printf("\t platform index=%d %s : i don't know why.\n",i,part);
   }

}

void printDevice(cl_device_id device)
{
char buf;
size_t size = 0 ;
cl_device_type type;
cl_int ret = CL_SUCCESS;
printf("\t ######################\n");
ret = clGetDeviceInfo(device,CL_DEVICE_NAME,128,buf,&size);
if(ret==CL_SUCCESS){
   printf("\t device name=%s\n",buf);
}else{
   printf("\t get device name fail !\n");
}

ret = clGetDeviceInfo(device,CL_DEVICE_TYPE,sizeof(cl_device_type),&type,&size);
if(ret==CL_SUCCESS){
   printf("\t device type=%lu\n",type);
}else{
   printf("\t get device type fail !\n");
}

ret = clGetDeviceInfo(device,CL_DEVICE_VENDOR,128,buf,&size);
if(ret==CL_SUCCESS){
   printf("\t device vendor=%s\n",buf);
}else{
   printf("\t get device vendor fail !\n");
}
ret = clGetDeviceInfo(device,CL_DRIVER_VERSION,128,buf,&size);
if(ret==CL_SUCCESS){
   printf("\t device version=%s\n",buf);
}else{
   printf("\t get device version fail !\n");
}
ret = clGetDeviceInfo(device,CL_DEVICE_PROFILE,128,buf,&size);
if(ret==CL_SUCCESS){
   printf("\t device profile=%s\n",buf);
}else{
   printf("\t get device profile fail !\n");
}
printf("\t ######################\n");
}

void printDevices(cl_platform_id platform , cl_device_type device_type)
{
cl_device_id devices;
cl_uint i ;
cl_uint num = 0 ;
cl_int ret = clGetDeviceIDs(platform,device_type,8,devices,&num);
if(ret==CL_SUCCESS){
   printf("\tgetDeviceIDs success ! num=%d\n",num);

   for(i=0;i<num;i++){
         printDevice(devices);
   }
}else{
   printf("\tgetDeviceIDs fail !\n");
}

}

void printOpenCL()
{
   cl_platform_id platforms;
   cl_uint num = 0 ;
   cl_uint i=0;
   cl_int err = clGetPlatformIDs(8,platforms,&num);
   if(err==CL_SUCCESS){
            printf("printOpenCL got %d platforms : \n",num);
            for(i=0;i<num;i++){
               char buf;
               size_t size = 0 ;
               cl_int ret = 0 ;

               ret = clGetPlatformInfo(platforms,CL_PLATFORM_PROFILE,128,buf,&size);
               printPlatformInfo(ret,i,"profile",buf);

            ret = clGetPlatformInfo(platforms,CL_PLATFORM_VERSION,128,buf,&size);
               printPlatformInfo(ret,i,"version",buf);

            ret = clGetPlatformInfo(platforms,CL_PLATFORM_NAME,128,buf,&size);
            printPlatformInfo(ret,i,"name",buf);

               ret = clGetPlatformInfo(platforms,CL_PLATFORM_VENDOR,128,buf,&size);
            printPlatformInfo(ret,i,"vendor",buf);

            ret = clGetPlatformInfo(platforms,CL_PLATFORM_EXTENSIONS,128,buf,&size);
            printPlatformInfo(ret,i,"extensions",buf);

            printf("Device cpu:\n");
            printDevices(platforms,CL_DEVICE_TYPE_CPU);

            printf("Device gpu:\n");
            printDevices(platforms,CL_DEVICE_TYPE_GPU);

            printf("Device accelerator:\n");
            printDevices(platforms,CL_DEVICE_TYPE_ACCELERATOR);

            printf("Device default:\n");
            printDevices(platforms,CL_DEVICE_TYPE_DEFAULT);

            printf("Device all :\n");
            printDevices(platforms,CL_DEVICE_TYPE_ALL);

            }
   }else if(err==CL_INVALID_VALUE){
            printf("printOpenCL invalid value.\n");
   }else if(err==CL_OUT_OF_HOST_MEMORY){
            printf("printOpenCL out of host memory.\n");
   }else{
            printf("printOpenCL i don't know why.\n");
   }

}

int main(){
   printOpenCL();
   return 0;
}这里打印的信息请参考opencl文档 : https://www.khronos.org/registry/cl/specs/opencl-1.1.pdf
把编译好的opencl-info push到/system/bin目录下，把libGLES_mali.so push到/system/lib目录下，就可以在adb shell中运行opencl-info了。

从打印的信息可以看到，rk3288支持OpenCL 1.1，支持的设备是gpu的Mali-T764。

bunchen 发表于 2015-10-9 10:08:44

本帖最后由 bunchen 于 2015-10-28 16:23 编辑

这里贴出一个简单的向量加法的例子，和容易出错的地方。先上代码：#include <stdio.h>
#include <stdlib.h>
#include <opencl.h>

void printDeviceWorkInfo(cl_device_id device)
{
cl_uint nMaxComputeUnits = 0;
cl_uint nMaxWorkItemDims = 0;
cl_uint i = 0;
size_t* nMaxWorkItemSizes = NULL;
size_t nMaxWorkGroupSize = 0;
size_t size = 0 ;
cl_int err ;
err = clGetDeviceInfo(device,CL_DEVICE_MAX_COMPUTE_UNITS,sizeof(cl_uint),&nMaxComputeUnits,&size);
if(err==CL_SUCCESS){
   printf("nMaxComputeUnits=%d\n",nMaxComputeUnits);
}

err = clGetDeviceInfo(device,CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS,sizeof(cl_uint),&nMaxWorkItemDims,&size);
if(err==CL_SUCCESS){
   printf("nMaxWorkItemDims=%d\n",nMaxWorkItemDims);
   nMaxWorkItemSizes = (size_t*)malloc(sizeof(size_t)*nMaxWorkItemDims);
   err = clGetDeviceInfo(device,CL_DEVICE_MAX_WORK_ITEM_SIZES,sizeof(size_t)*nMaxWorkItemDims,nMaxWorkItemSizes,&size);
   if(err==CL_SUCCESS){
         for(i=0;i<nMaxWorkItemDims;i++){
            printf("nMaxWorkItemSizes[%d]=%d\n",i,nMaxWorkItemSizes);
         }
   }
   free(nMaxWorkItemSizes);
}

err = clGetDeviceInfo(device,CL_DEVICE_MAX_WORK_GROUP_SIZE,sizeof(size_t),&nMaxWorkGroupSize,&size);
if(err==CL_SUCCESS){
   printf("nMaxWorkGroupSize=%d\n",nMaxWorkGroupSize);
}
}

const char* program_src = ""
"__kernel void vector_add_gpu (__global const float* src_a,\n"
" __global const float* src_b,\n"
" __global float* res,\n"
" const int num)\n"
"{\n"
" int idx = get_global_id(0);\n"
" if(idx<num){"
"    res=src_a+src_b;\n"
" }\n"
"}\n"
;

static const cl_int vect_len = 10000000;

static float* vect_a = NULL ;
static float* vect_b = NULL ;
static float* vect_c = NULL ;

void initVects()
{
cl_int i;
vect_a = (float*)malloc(sizeof(float)*vect_len);
vect_b = (float*)malloc(sizeof(float)*vect_len);
vect_c = (float*)malloc(sizeof(float)*vect_len);
for(i=0;i<vect_len;i++){
   vect_a=(float)rand()/RAND_MAX;
   vect_b=(float)rand()/RAND_MAX;
   vect_c=0.0f;
}
}

void printVects()
{
cl_int i;
if(vect_a && vect_b && vect_c){
   printf("######################\n");
   for(i=0;i<4;i++){
         printf("%08d : %f,%f,%f\n",i,vect_a,vect_b,vect_c);
   }
   printf(" ... \n");
   for(i=vect_len-4;i<vect_len;i++){
         printf("%08d : %f,%f,%f\n",i,vect_a,vect_b,vect_c);
   }
   printf("######################\n");
}
}

void releaseVects()
{
if(vect_a){
   free(vect_a);
   vect_a=NULL;
}
if(vect_b){
   free(vect_b);
   vect_b=NULL;
}
if(vect_c){
   free(vect_c);
   vect_c=NULL;
}
}

size_t shrRoundUp(size_t f , size_t s)
{
return (s+f-1)/f*f;
}

void test()
{
cl_int error = 0 ;
cl_platform_id platform;
cl_context context;
cl_command_queue queue;
cl_device_id device;
cl_mem inbuf_a ;
cl_mem inbuf_b ;
cl_mem outbuf_r ;
const cl_int size = vect_len;
cl_int i ;
const size_t mem_size = sizeof(float)*size;
size_t program_len = strlen(program_src);
char build_log;
size_t log_size;
size_t local_ws;
size_t global_ws;
cl_kernel vector_add_kernel;

error = clGetPlatformIDs(1,&platform,NULL);
if(error != CL_SUCCESS){
   printf("get platform id fail !\n");
   exit(1);
}

error = clGetDeviceIDs(platform,CL_DEVICE_TYPE_GPU,1,&device,NULL);
if(error != CL_SUCCESS){
   printf("get gpu device fail !\n");
   exit(1);
}

printDeviceWorkInfo(device);

cl_context_properties properties[]={
   CL_CONTEXT_PLATFORM,
   (cl_context_properties)platform,
   0
};

// 这里要配置properties
context = clCreateContext(properties,1,&device,NULL,NULL,&error);
if(error != CL_SUCCESS){
   printf("create context fail !\n");
   exit(1);
}

queue = clCreateCommandQueue(context,device,CL_QUEUE_PROFILING_ENABLE,&error);
if(error != CL_SUCCESS){
   printf("create command queue fail !\n");
   exit(1);
}

initVects();
printVects();

inbuf_a = clCreateBuffer(context,CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,mem_size,vect_a,&error);
if(error!=CL_SUCCESS){
   printf("create buffer inbuf_a fail !\n");
   exit(1);
}
inbuf_b = clCreateBuffer(context,CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,mem_size,vect_b,&error);
if(error!=CL_SUCCESS){
   printf("create buffer inbuf_b fail !\n");
   exit(1);
}
outbuf_r = clCreateBuffer(context,CL_MEM_WRITE_ONLY,mem_size,NULL,&error);
if(error!=CL_SUCCESS){
   printf("create buffer outbuf_r fail !\n");
   exit(1);
}

cl_program program = clCreateProgramWithSource(context,1,&program_src,&program_len,&error);
if(error!=CL_SUCCESS){
   printf("create program fail !\n");
   exit(1);
}
error = clBuildProgram(program,1,&device,NULL,NULL,NULL);
if(error!=CL_SUCCESS){
   printf("build program fail !\n");
   clGetProgramBuildInfo(program,device,CL_PROGRAM_BUILD_LOG,1024,build_log,&log_size);
   printf("build_log : %s\n",build_log);
   exit(1);
}

vector_add_kernel = clCreateKernel(program,"vector_add_gpu",&error);
if(error!=CL_SUCCESS){
   printf("create kernel fail !\n");
   exit(1);
}

error = clSetKernelArg(vector_add_kernel,0,sizeof(cl_mem),&inbuf_a);
error |= clSetKernelArg(vector_add_kernel,1,sizeof(cl_mem),&inbuf_b);
error |= clSetKernelArg(vector_add_kernel,2,sizeof(cl_mem),&outbuf_r);
error |= clSetKernelArg(vector_add_kernel,3,sizeof(cl_int),&size);
if(error!=CL_SUCCESS){
   printf("set kernel arg fail !\n");
   exit(1);
}

local_ws = 256;//我们使用一维的clEnqueueNDRangeKernel，这里local_ws选择nMaxWorkItemSizes=256
global_ws = shrRoundUp(local_ws,size); //这里是线程总数，应该是local_ws的倍数。
printf("local_ws=%d,global_ws=%d\n",local_ws,global_ws);

error = clEnqueueNDRangeKernel(queue,vector_add_kernel,1,NULL,&global_ws,&local_ws,0,NULL,NULL);
if(error!=CL_SUCCESS){
   printf("enqueue kernel fail !\n");
   exit(1);
}

clEnqueueReadBuffer(queue,outbuf_r,CL_TRUE,0,mem_size,vect_c,0,NULL,NULL);
printVects();

clReleaseKernel(vector_add_kernel);
clReleaseProgram(program);
clReleaseCommandQueue(queue);
clReleaseContext(context);
clReleaseMemObject(inbuf_a);
clReleaseMemObject(inbuf_b);
clReleaseMemObject(outbuf_r);
releaseVects();
}

int main(){
   test();
   return 0;
}
这里说一下容易出错的地方：
（1）clCreateContext，网上有些例子把第一个参数置成0，经试验这样不行，要设置properties。
（2）clEnqueueNDRangeKernel的global_work_size和local_work_size。我们使用一维的clEnqueueNDRangeKernel，这里local_work_size选择nMaxWorkItemSizes=256，global_work_size是线程总数，应该是local_ws的倍数。
一维情况下：

二维情况下：

（3）kernel代码中
size_t get_global_id (uint dimindx)
返回这个线程的global_id,参数是维度索引，我们一维的情况下参数是0。

最后运行结果：

在加一个nv12转rgb的例子：#include <stdio.h>
#include <stdlib.h>
#include <opencl.h>
#include <sys/time.h>

void printDeviceWorkInfo(cl_device_id device)
{
cl_uint nMaxComputeUnits = 0;
cl_uint nMaxWorkItemDims = 0;
cl_uint i = 0;
size_t* nMaxWorkItemSizes = NULL;
size_t nMaxWorkGroupSize = 0;
size_t size = 0 ;
cl_int err ;
err = clGetDeviceInfo(device,CL_DEVICE_MAX_COMPUTE_UNITS,sizeof(cl_uint),&nMaxComputeUnits,&size);
if(err==CL_SUCCESS){
   printf("nMaxComputeUnits=%d\n",nMaxComputeUnits);
}

err = clGetDeviceInfo(device,CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS,sizeof(cl_uint),&nMaxWorkItemDims,&size);
if(err==CL_SUCCESS){
   printf("nMaxWorkItemDims=%d\n",nMaxWorkItemDims);
   nMaxWorkItemSizes = (size_t*)malloc(sizeof(size_t)*nMaxWorkItemDims);
   err = clGetDeviceInfo(device,CL_DEVICE_MAX_WORK_ITEM_SIZES,sizeof(size_t)*nMaxWorkItemDims,nMaxWorkItemSizes,&size);
   if(err==CL_SUCCESS){
         for(i=0;i<nMaxWorkItemDims;i++){
            printf("nMaxWorkItemSizes[%d]=%d\n",i,nMaxWorkItemSizes);
         }
   }
   free(nMaxWorkItemSizes);
}

err = clGetDeviceInfo(device,CL_DEVICE_MAX_WORK_GROUP_SIZE,sizeof(size_t),&nMaxWorkGroupSize,&size);
if(err==CL_SUCCESS){
   printf("nMaxWorkGroupSize=%d\n",nMaxWorkGroupSize);
}
}

const char* program_src = ""
"__kernel void nv12_to_rgb (__global const unsigned char* nv12,\n"
" __global unsigned char* rgb,\n"
" const int width,\n"
" const int height)\n"
"{\n"
" int idi = get_global_id(0);\n"
" int idj = get_global_id(1);\n"
" int k = 0 ;\n"
" int y = 0 ;\n"
" int u = 0 ;\n"
" int v = 0 ;\n"
" int t = 0 ;\n"
" if(idi<width && idj<height){\n"
"    y=nv12;\n"
"    k=width*height+((idj/2)*(width/2)+idi/2)*2;\n"
"    u=nv12;\n"
"    v=nv12;\n"
"    k=(idj*width+idi)*3;\n"
"    t=(int)(y+1.370705*v-175.4502);\n"
"    rgb=t>255?255:t<0?0:t;\n"
"    t=(int)(y-0.698001*v-0.337633*u+132.56124);\n"
"    rgb=t>255?255:t<0?0:t;\n"
"    t=(int)(y+1.732446*u-221.7531);"
"    rgb=t>255?255:t<0?0:t;\n"
" }\n"
"}\n"
;

size_t shrRoundUp(size_t f , size_t s)
{
return (s+f-1)/f*f;
}

void nv12_to_rgb(uint8_t* rgb , uint8_t* nv12 , int width , int height)
{
cl_int error = 0 ;
cl_platform_id platform;
cl_context context;
cl_command_queue queue;
cl_device_id device;
cl_mem inbuf_nv12 ;
cl_mem outbuf_rgb ;
cl_int i ;
size_t program_len = strlen(program_src);
char build_log;
size_t log_size;
size_t local_ws;
size_t global_ws;
cl_kernel nv12_to_rgb;

cl_int nv12_size = width*height*3/2;
cl_int rgb_size = width*height*3;

error = clGetPlatformIDs(1,&platform,NULL);
if(error != CL_SUCCESS){
   printf("get platform id fail !\n");
   exit(1);
}

error = clGetDeviceIDs(platform,CL_DEVICE_TYPE_GPU,1,&device,NULL);
if(error != CL_SUCCESS){
   printf("get gpu device fail !\n");
   exit(1);
}

printDeviceWorkInfo(device);

cl_context_properties properties[]={
   CL_CONTEXT_PLATFORM,
   (cl_context_properties)platform,
   0
};

// 这里要配置properties
context = clCreateContext(properties,1,&device,NULL,NULL,&error);
if(error != CL_SUCCESS){
   printf("create context fail !\n");
   exit(1);
}

queue = clCreateCommandQueue(context,device,CL_QUEUE_PROFILING_ENABLE,&error);
if(error != CL_SUCCESS){
   printf("create command queue fail !\n");
   exit(1);
}

cl_program program = clCreateProgramWithSource(context,1,&program_src,&program_len,&error);
if(error!=CL_SUCCESS){
   printf("create program fail !\n");
   exit(1);
}
error = clBuildProgram(program,1,&device,NULL,NULL,NULL);
if(error!=CL_SUCCESS){
   printf("build program fail !\n");
   clGetProgramBuildInfo(program,device,CL_PROGRAM_BUILD_LOG,1024,build_log,&log_size);
   printf("build_log : %s\n",build_log);
   exit(1);
}

nv12_to_rgb = clCreateKernel(program,"nv12_to_rgb",&error);
if(error!=CL_SUCCESS){
   printf("create kernel fail !\n");
   exit(1);
}

struct timeval val;
gettimeofday(&val,NULL);
long t1 = val.tv_sec*1000000 + val.tv_usec;

inbuf_nv12 = clCreateBuffer(context,CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,nv12_size,nv12,&error);
if(error!=CL_SUCCESS){
   printf("create buffer inbuf_a fail !\n");
   exit(1);
}
outbuf_rgb = clCreateBuffer(context,CL_MEM_WRITE_ONLY,rgb_size,NULL,&error);
if(error!=CL_SUCCESS){
   printf("create buffer outbuf_r fail !\n");
   exit(1);
}

error = clSetKernelArg(nv12_to_rgb,0,sizeof(cl_mem),&inbuf_nv12);
error |= clSetKernelArg(nv12_to_rgb,1,sizeof(cl_mem),&outbuf_rgb);
error |= clSetKernelArg(nv12_to_rgb,2,sizeof(cl_int),&width);
error |= clSetKernelArg(nv12_to_rgb,3,sizeof(cl_int),&height);

if(error!=CL_SUCCESS){
   printf("set kernel arg fail !\n");
   exit(1);
}

size_t lws = {16,16};
size_t gws;
gws = shrRoundUp(lws,width);
gws = shrRoundUp(lws,height);
printf("lws={%d,%d},gws={%d,%d}\n",lws,lws,gws,gws);

error = clEnqueueNDRangeKernel(queue,nv12_to_rgb,2,NULL,gws,lws,0,NULL,NULL);
if(error!=CL_SUCCESS){
   printf("enqueue kernel fail !\n");
   exit(1);
}

clEnqueueReadBuffer(queue,outbuf_rgb,CL_TRUE,0,rgb_size,rgb,0,NULL,NULL);

clReleaseMemObject(inbuf_nv12);
clReleaseMemObject(outbuf_rgb);

gettimeofday(&val,NULL);
long t2 = val.tv_sec*1000000 + val.tv_usec;

printf("nv12_to_rgb spend %ld (us)\n",t2-t1);

clReleaseKernel(nv12_to_rgb);
clReleaseProgram(program);
clReleaseCommandQueue(queue);
clReleaseContext(context);
}

int main(){
uint8_t* nv12;
uint8_t* rgb;
int width=1920;
int height=1080;
int i,j,k;
nv12=(uint8_t*)malloc(width*height*3/2);
rgb=(uint8_t*)malloc(width*height*3);

memset(nv12,0,width*height*3/2);

memset(rgb,0,width*height*3);
struct timeval val;
gettimeofday(&val,NULL);
long t1 = val.tv_sec*1000000 + val.tv_usec;
   nv12_to_rgb(rgb,nv12,width,height);
gettimeofday(&val,NULL);
long t2 = val.tv_sec*1000000 + val.tv_usec;
printf("nv12_to_rgb spend %ld (us)\n",t2-t1);
   for(j=0;j<8;j++){
         for(i=0;i<8;i++){
            k=(j*width+i)*3;
            printf("(%02x,%02x,%02x) ",rgb,rgb,rgb);
         }
         printf("\n");
         }
   printf("......\n");
   for(j=height-8;j<height;j++){
         for(i=width-8;i<width;i++){
            k=(j*width+i)*3;
            printf("(%02x,%02x,%02x) ",rgb,rgb,rgb);
         }
         printf("\n");
         }


   free(nv12);
   free(rgb);
   return 0;
}
这是一个二维的例子，local_work_item*local_work_item要小于最大的workitem数，即 local_work_item*local_work_item<256，所以取
local_work_item=local_work_item=16

duoduomu 发表于 2015-10-9 10:15:07

bunchen 发表于 2015-10-9 10:08
二楼待用

不错，不过还可以参考http://malideveloper.arm.com/downloads/tools/oclsdk/Mali_OpenCL_SDK_v1.1.0.0a36a7_Linux.tgz 这个SDK，里面都有，还有demo！

bunchen 发表于 2015-10-9 10:25:31

duoduomu 发表于 2015-10-9 10:15
不错，不过还可以参考http://malideveloper.arm.com/downloads/tools/oclsdk/Mali_OpenCL_SDK_v1.1.0.0a3 ...

谢谢，之前一直想下载mali的sdk来看，奈何下载页面总是进不去，谢谢分享。

duoduomu 发表于 2015-10-9 11:01:37

bunchen 发表于 2015-10-9 10:25
谢谢，之前一直想下载mali的sdk来看，奈何下载页面总是进不去，谢谢分享。

哈哈的确你按常理下载是不行的！

fxlsunny 发表于 2015-10-15 14:50:24

不错正打算做opencl的事，不过想在Ubuntu下做，不知3288是否可行？

ff_20150814 发表于 2015-12-7 15:30:09

楼主大神你好，请问你的那个NV12转RGB耗时多少呢，GPU的频率是跑的多少M?

tk1user 发表于 2016-3-23 09:36:41

太厉害了，顶起！

总有刁民想害朕 发表于 2016-4-26 09:43:15

printOpenCL got 1 platforms :
      platform index=0 profile : FULL_PROFILE
      platform index=0 version : OpenCL 1.1 v1.r6p0-02rel0.0f4218be5cc66c20a4
f31b6cc856ee46
      platform index=0 name : ARM Platform
      platform index=0 vendor : ARM
file /dev/mali0 is not of a compatible version (user 9.0, kernel 8.0)
ERROR: The DDK is not compatible with any of the Mali GPUs on the system.
The DDK was built for 0x750 r0p0 status range , but none of the GPUs match
ed:
file /dev/mali0 is not of a compatible version (user 9.0, kernel 8.0)
      platform index=0 extensions : out of host memory.
Device cpu:
file /dev/mali0 is not of a compatible version (user 9.0, kernel 8.0)
ERROR: The DDK is not compatible with any of the Mali GPUs on the system.
The DDK was built for 0x750 r0p0 status range , but none of the GPUs match
ed:
file /dev/mali0 is not of a compatible version (user 9.0, kernel 8.0)
      getDeviceIDs fail !
Device gpu:
file /dev/mali0 is not of a compatible version (user 9.0, kernel 8.0)
ERROR: The DDK is not compatible with any of the Mali GPUs on the system.
The DDK was built for 0x750 r0p0 status range , but none of the GPUs match
ed:
file /dev/mali0 is not of a compatible version (user 9.0, kernel 8.0)
      getDeviceIDs fail !
Device accelerator:
file /dev/mali0 is not of a compatible version (user 9.0, kernel 8.0)
ERROR: The DDK is not compatible with any of the Mali GPUs on the system.
The DDK was built for 0x750 r0p0 status range , but none of the GPUs match
ed:
file /dev/mali0 is not of a compatible version (user 9.0, kernel 8.0)
      getDeviceIDs fail !
Device default:
file /dev/mali0 is not of a compatible version (user 9.0, kernel 8.0)
ERROR: The DDK is not compatible with any of the Mali GPUs on the system.
The DDK was built for 0x750 r0p0 status range , but none of the GPUs match
ed:
file /dev/mali0 is not of a compatible version (user 9.0, kernel 8.0)
      getDeviceIDs fail !
Device all :
file /dev/mali0 is not of a compatible version (user 9.0, kernel 8.0)
ERROR: The DDK is not compatible with any of the Mali GPUs on the system.
The DDK was built for 0x750 r0p0 status range , but none of the GPUs match
ed:
file /dev/mali0 is not of a compatible version (user 9.0, kernel 8.0)
      getDeviceIDs fail !

Eric.y 发表于 2016-4-30 16:59:30

duoduomu 发表于 2015-10-9 10:15
不错，不过还可以参考http://malideveloper.arm.com/downloads/tools/oclsdk/Mali_OpenCL_SDK_v1.1.0.0a3 ...

Android-OpenCL-v1.1.zip和ARM官网的SDK一样吗，我没灯泡下载不了

页: [1] 2 3

Firefly开源社区's Archiver

Firefly rk3288 OpenCL