Firefly rk3288 OpenCL

bunchen · 发表于 2015-10-9 09:46:03

本帖最后由 bunchen 于 2015-10-9 10:20 编辑

Firefly rk3288采用Mali-T764的GPU，该GPU支持OpeCL 1.1。下面一步一步介绍android下OpenCL开发。

1.首先我们需要OpenCL的头文件和库。头文件可以在khronos的网站上下载：https://www.khronos.org/registry/cl/ 由于Mali-T764支持OpenCL 1.1所以我们下载1.1版的头文件并放对位置。新建include目录，把opencl.h放在include目录下，把cl_d3d10.h 、 cl_ext.h 、 cl_gl_ext.h 、 cl_gl.h 、 cl.h 、 cl.hpp 、 cl_platform.h放到include/CL目录下。在OpenCL的库文件在firefly rk3288源代码的device/rockchip/common/gpu/libMali-T760/libGLES_mali.so。

2.然后可以在这基础下开发了。下面给出一个打印OpenCL信息的Demo。由于libGLES_mali.so并没有放入的android的/system/lib路径下，所以我们要在Demo的Android.mk文件中要对libGLES_mali.so做预编译，Android.mk文件如下：

LOCAL_PATH := $(call my-dir)
include $(CLEAR_VARS)
LOCAL_PREBUILT_LIBS := \
../opencl/lib/libGLES_mali.so
include $(BUILD_MULTI_PREBUILT)
include $(CLEAR_VARS)
LOCAL_C_INCLUDES := \
$(LOCAL_PATH)/../opencl/include
LOCAL_SRC_FILES := \
opencl-info.c
LOCAL_SHARED_LIBRARIES := \
libGLES_mali
LOCAL_MODULE := opencl-info
include $(BUILD_EXECUTABLE)

复制代码

在opencl-info.c中我们打印一些OpenCL的信息：

#include <stdio.h>
#include <stdlib.h>
#include <opencl.h>
void printPlatformInfo(cl_int ret ,cl_uint i, const char* part , char* buf)
{
if(ret==CL_SUCCESS){
printf("\t platform index=%d %s : %s\n",i,part,buf);
}else if(ret==CL_INVALID_PLATFORM){
printf("\t platform index=%d %s : invalid platform.\n",i,part);
}else if(ret==CL_INVALID_VALUE){
printf("\t platform index=%d %s : invalid value.\n",i,part);
}else if(ret==CL_OUT_OF_HOST_MEMORY){
printf("\t platform index=%d %s : out of host memory.\n",i,part);
}else{
printf("\t platform index=%d %s : i don't know why.\n",i,part);
}
}
void printDevice(cl_device_id device)
{
char buf[128];
size_t size = 0 ;
cl_device_type type;
cl_int ret = CL_SUCCESS;
printf("\t ######################\n");
ret = clGetDeviceInfo(device,CL_DEVICE_NAME,128,buf,&size);
if(ret==CL_SUCCESS){
printf("\t device name=%s\n",buf);
}else{
printf("\t get device name fail !\n");
}
ret = clGetDeviceInfo(device,CL_DEVICE_TYPE,sizeof(cl_device_type),&type,&size);
if(ret==CL_SUCCESS){
printf("\t device type=%lu\n",type);
}else{
printf("\t get device type fail !\n");
}
ret = clGetDeviceInfo(device,CL_DEVICE_VENDOR,128,buf,&size);
if(ret==CL_SUCCESS){
printf("\t device vendor=%s\n",buf);
}else{
printf("\t get device vendor fail !\n");
}
ret = clGetDeviceInfo(device,CL_DRIVER_VERSION,128,buf,&size);
if(ret==CL_SUCCESS){
printf("\t device version=%s\n",buf);
}else{
printf("\t get device version fail !\n");
}
ret = clGetDeviceInfo(device,CL_DEVICE_PROFILE,128,buf,&size);
if(ret==CL_SUCCESS){
printf("\t device profile=%s\n",buf);
}else{
printf("\t get device profile fail !\n");
}
printf("\t ######################\n");
}
void printDevices(cl_platform_id platform , cl_device_type device_type)
{
cl_device_id devices[8];
cl_uint i ;
cl_uint num = 0 ;
cl_int ret = clGetDeviceIDs(platform,device_type,8,devices,&num);
if(ret==CL_SUCCESS){
printf("\t getDeviceIDs success ! num=%d\n",num);
for(i=0;i<num;i++){
printDevice(devices[i]);
}
}else{
printf("\t getDeviceIDs fail !\n");
}
}
void printOpenCL()
{
cl_platform_id platforms[8];
cl_uint num = 0 ;
cl_uint i=0;
cl_int err = clGetPlatformIDs(8,platforms,&num);
if(err==CL_SUCCESS){
printf("printOpenCL got %d platforms : \n",num);
for(i=0;i<num;i++){
char buf[128];
size_t size = 0 ;
cl_int ret = 0 ;
ret = clGetPlatformInfo(platforms[i],CL_PLATFORM_PROFILE,128,buf,&size);
printPlatformInfo(ret,i,"profile",buf);
ret = clGetPlatformInfo(platforms[i],CL_PLATFORM_VERSION,128,buf,&size);
printPlatformInfo(ret,i,"version",buf);
ret = clGetPlatformInfo(platforms[i],CL_PLATFORM_NAME,128,buf,&size);
printPlatformInfo(ret,i,"name",buf);
ret = clGetPlatformInfo(platforms[i],CL_PLATFORM_VENDOR,128,buf,&size);
printPlatformInfo(ret,i,"vendor",buf);
ret = clGetPlatformInfo(platforms[i],CL_PLATFORM_EXTENSIONS,128,buf,&size);
printPlatformInfo(ret,i,"extensions",buf);
printf("Device cpu:\n");
printDevices(platforms[i],CL_DEVICE_TYPE_CPU);
printf("Device gpu:\n");
printDevices(platforms[i],CL_DEVICE_TYPE_GPU);
printf("Device accelerator:\n");
printDevices(platforms[i],CL_DEVICE_TYPE_ACCELERATOR);
printf("Device default:\n");
printDevices(platforms[i],CL_DEVICE_TYPE_DEFAULT);
printf("Device all :\n");
printDevices(platforms[i],CL_DEVICE_TYPE_ALL);
}
}else if(err==CL_INVALID_VALUE){
printf("printOpenCL invalid value.\n");
}else if(err==CL_OUT_OF_HOST_MEMORY){
printf("printOpenCL out of host memory.\n");
}else{
printf("printOpenCL i don't know why.\n");
}
}
int main(){
printOpenCL();
return 0;
}

复制代码

这里打印的信息请参考opencl文档 : https://www.khronos.org/registry/cl/specs/opencl-1.1.pdf
把编译好的opencl-info push到/system/bin目录下，把libGLES_mali.so push到/system/lib目录下，就可以在adb shell中运行opencl-info了。

从打印的信息可以看到，rk3288支持OpenCL 1.1，支持的设备是gpu的Mali-T764。

bunchen · 发表于 2015-10-9 10:08:44

本帖最后由 bunchen 于 2015-10-28 16:23 编辑

这里贴出一个简单的向量加法的例子，和容易出错的地方。先上代码：

#include <stdio.h>
#include <stdlib.h>
#include <opencl.h>
void printDeviceWorkInfo(cl_device_id device)
{
cl_uint nMaxComputeUnits = 0;
cl_uint nMaxWorkItemDims = 0;
cl_uint i = 0;
size_t* nMaxWorkItemSizes = NULL;
size_t nMaxWorkGroupSize = 0;
size_t size = 0 ;
cl_int err ;
err = clGetDeviceInfo(device,CL_DEVICE_MAX_COMPUTE_UNITS,sizeof(cl_uint),&nMaxComputeUnits,&size);
if(err==CL_SUCCESS){
printf("nMaxComputeUnits=%d\n",nMaxComputeUnits);
}
err = clGetDeviceInfo(device,CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS,sizeof(cl_uint),&nMaxWorkItemDims,&size);
if(err==CL_SUCCESS){
printf("nMaxWorkItemDims=%d\n",nMaxWorkItemDims);
nMaxWorkItemSizes = (size_t*)malloc(sizeof(size_t)*nMaxWorkItemDims);
err = clGetDeviceInfo(device,CL_DEVICE_MAX_WORK_ITEM_SIZES,sizeof(size_t)*nMaxWorkItemDims,nMaxWorkItemSizes,&size);
if(err==CL_SUCCESS){
for(i=0;i<nMaxWorkItemDims;i++){
printf("nMaxWorkItemSizes[%d]=%d\n",i,nMaxWorkItemSizes[i]);
}
}
free(nMaxWorkItemSizes);
}
err = clGetDeviceInfo(device,CL_DEVICE_MAX_WORK_GROUP_SIZE,sizeof(size_t),&nMaxWorkGroupSize,&size);
if(err==CL_SUCCESS){
printf("nMaxWorkGroupSize=%d\n",nMaxWorkGroupSize);
}
}
const char* program_src = ""
"__kernel void vector_add_gpu (__global const float* src_a,\n"
" __global const float* src_b,\n"
" __global float* res,\n"
" const int num)\n"
"{\n"
" int idx = get_global_id(0);\n"
" if(idx<num){"
" res[idx]=src_a[idx]+src_b[idx];\n"
" }\n"
"}\n"
;
static const cl_int vect_len = 10000000;
static float* vect_a = NULL ;
static float* vect_b = NULL ;
static float* vect_c = NULL ;
void initVects()
{
cl_int i;
vect_a = (float*)malloc(sizeof(float)*vect_len);
vect_b = (float*)malloc(sizeof(float)*vect_len);
vect_c = (float*)malloc(sizeof(float)*vect_len);
for(i=0;i<vect_len;i++){
vect_a[i]=(float)rand()/RAND_MAX;
vect_b[i]=(float)rand()/RAND_MAX;
vect_c[i]=0.0f;
}
}
void printVects()
{
cl_int i;
if(vect_a && vect_b && vect_c){
printf("######################\n");
for(i=0;i<4;i++){
printf("%08d : %f,%f,%f\n",i,vect_a[i],vect_b[i],vect_c[i]);
}
printf(" ... \n");
for(i=vect_len-4;i<vect_len;i++){
printf("%08d : %f,%f,%f\n",i,vect_a[i],vect_b[i],vect_c[i]);
}
printf("######################\n");
}
}
void releaseVects()
{
if(vect_a){
free(vect_a);
vect_a=NULL;
}
if(vect_b){
free(vect_b);
vect_b=NULL;
}
if(vect_c){
free(vect_c);
vect_c=NULL;
}
}
size_t shrRoundUp(size_t f , size_t s)
{
return (s+f-1)/f*f;
}
void test()
{
cl_int error = 0 ;
cl_platform_id platform;
cl_context context;
cl_command_queue queue;
cl_device_id device;
cl_mem inbuf_a ;
cl_mem inbuf_b ;
cl_mem outbuf_r ;
const cl_int size = vect_len;
cl_int i ;
const size_t mem_size = sizeof(float)*size;
size_t program_len = strlen(program_src);
char build_log[1024];
size_t log_size;
size_t local_ws;
size_t global_ws;
cl_kernel vector_add_kernel;
error = clGetPlatformIDs(1,&platform,NULL);
if(error != CL_SUCCESS){
printf("get platform id fail !\n");
exit(1);
}
error = clGetDeviceIDs(platform,CL_DEVICE_TYPE_GPU,1,&device,NULL);
if(error != CL_SUCCESS){
printf("get gpu device fail !\n");
exit(1);
}
printDeviceWorkInfo(device);
cl_context_properties properties[]={
CL_CONTEXT_PLATFORM,
(cl_context_properties)platform,
0
};
// 这里要配置properties
context = clCreateContext(properties,1,&device,NULL,NULL,&error);
if(error != CL_SUCCESS){
printf("create context fail !\n");
exit(1);
}
queue = clCreateCommandQueue(context,device,CL_QUEUE_PROFILING_ENABLE,&error);
if(error != CL_SUCCESS){
printf("create command queue fail !\n");
exit(1);
}
initVects();
printVects();
inbuf_a = clCreateBuffer(context,CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,mem_size,vect_a,&error);
if(error!=CL_SUCCESS){
printf("create buffer inbuf_a fail !\n");
exit(1);
}
inbuf_b = clCreateBuffer(context,CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,mem_size,vect_b,&error);
if(error!=CL_SUCCESS){
printf("create buffer inbuf_b fail !\n");
exit(1);
}
outbuf_r = clCreateBuffer(context,CL_MEM_WRITE_ONLY,mem_size,NULL,&error);
if(error!=CL_SUCCESS){
printf("create buffer outbuf_r fail !\n");
exit(1);
}
cl_program program = clCreateProgramWithSource(context,1,&program_src,&program_len,&error);
if(error!=CL_SUCCESS){
printf("create program fail !\n");
exit(1);
}
error = clBuildProgram(program,1,&device,NULL,NULL,NULL);
if(error!=CL_SUCCESS){
printf("build program fail !\n");
clGetProgramBuildInfo(program,device,CL_PROGRAM_BUILD_LOG,1024,build_log,&log_size);
printf("build_log : %s\n",build_log);
exit(1);
}
vector_add_kernel = clCreateKernel(program,"vector_add_gpu",&error);
if(error!=CL_SUCCESS){
printf("create kernel fail !\n");
exit(1);
}
error = clSetKernelArg(vector_add_kernel,0,sizeof(cl_mem),&inbuf_a);
error |= clSetKernelArg(vector_add_kernel,1,sizeof(cl_mem),&inbuf_b);
error |= clSetKernelArg(vector_add_kernel,2,sizeof(cl_mem),&outbuf_r);
error |= clSetKernelArg(vector_add_kernel,3,sizeof(cl_int),&size);
if(error!=CL_SUCCESS){
printf("set kernel arg fail !\n");
exit(1);
}
local_ws = 256; //我们使用一维的clEnqueueNDRangeKernel，这里local_ws选择nMaxWorkItemSizes[0]=256
global_ws = shrRoundUp(local_ws,size); //这里是线程总数，应该是local_ws的倍数。
printf("local_ws=%d,global_ws=%d\n",local_ws,global_ws);
error = clEnqueueNDRangeKernel(queue,vector_add_kernel,1,NULL,&global_ws,&local_ws,0,NULL,NULL);
if(error!=CL_SUCCESS){
printf("enqueue kernel fail !\n");
exit(1);
}
clEnqueueReadBuffer(queue,outbuf_r,CL_TRUE,0,mem_size,vect_c,0,NULL,NULL);
printVects();
clReleaseKernel(vector_add_kernel);
clReleaseProgram(program);
clReleaseCommandQueue(queue);
clReleaseContext(context);
clReleaseMemObject(inbuf_a);
clReleaseMemObject(inbuf_b);
clReleaseMemObject(outbuf_r);
releaseVects();
}
int main(){
test();
return 0;
}

复制代码

这里说一下容易出错的地方：
（1）clCreateContext，网上有些例子把第一个参数置成0，经试验这样不行，要设置properties。
（2）clEnqueueNDRangeKernel的global_work_size和local_work_size。我们使用一维的clEnqueueNDRangeKernel，这里local_work_size选择nMaxWorkItemSizes[0]=256，global_work_size是线程总数，应该是local_ws的倍数。
一维情况下：

二维情况下：

（3）kernel代码中
size_t get_global_id (uint dimindx)
返回这个线程的global_id,参数是维度索引，我们一维的情况下参数是0。

最后运行结果：

在加一个nv12转rgb的例子：

#include <stdio.h>
#include <stdlib.h>
#include <opencl.h>
#include <sys/time.h>
void printDeviceWorkInfo(cl_device_id device)
{
cl_uint nMaxComputeUnits = 0;
cl_uint nMaxWorkItemDims = 0;
cl_uint i = 0;
size_t* nMaxWorkItemSizes = NULL;
size_t nMaxWorkGroupSize = 0;
size_t size = 0 ;
cl_int err ;
err = clGetDeviceInfo(device,CL_DEVICE_MAX_COMPUTE_UNITS,sizeof(cl_uint),&nMaxComputeUnits,&size);
if(err==CL_SUCCESS){
printf("nMaxComputeUnits=%d\n",nMaxComputeUnits);
}
err = clGetDeviceInfo(device,CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS,sizeof(cl_uint),&nMaxWorkItemDims,&size);
if(err==CL_SUCCESS){
printf("nMaxWorkItemDims=%d\n",nMaxWorkItemDims);
nMaxWorkItemSizes = (size_t*)malloc(sizeof(size_t)*nMaxWorkItemDims);
err = clGetDeviceInfo(device,CL_DEVICE_MAX_WORK_ITEM_SIZES,sizeof(size_t)*nMaxWorkItemDims,nMaxWorkItemSizes,&size);
if(err==CL_SUCCESS){
for(i=0;i<nMaxWorkItemDims;i++){
printf("nMaxWorkItemSizes[%d]=%d\n",i,nMaxWorkItemSizes[i]);
}
}
free(nMaxWorkItemSizes);
}
err = clGetDeviceInfo(device,CL_DEVICE_MAX_WORK_GROUP_SIZE,sizeof(size_t),&nMaxWorkGroupSize,&size);
if(err==CL_SUCCESS){
printf("nMaxWorkGroupSize=%d\n",nMaxWorkGroupSize);
}
}
const char* program_src = ""
"__kernel void nv12_to_rgb (__global const unsigned char* nv12,\n"
" __global unsigned char* rgb,\n"
" const int width,\n"
" const int height)\n"
"{\n"
" int idi = get_global_id(0);\n"
" int idj = get_global_id(1);\n"
" int k = 0 ;\n"
" int y = 0 ;\n"
" int u = 0 ;\n"
" int v = 0 ;\n"
" int t = 0 ;\n"
" if(idi<width && idj<height){\n"
" y=nv12[idj*width+idi];\n"
" k=width*height+((idj/2)*(width/2)+idi/2)*2;\n"
" u=nv12[k];\n"
" v=nv12[k+1];\n"
" k=(idj*width+idi)*3;\n"
" t=(int)(y+1.370705*v-175.4502);\n"
" rgb[k]=t>255?255:t<0?0:t;\n"
" t=(int)(y-0.698001*v-0.337633*u+132.56124);\n"
" rgb[k+1]=t>255?255:t<0?0:t;\n"
" t=(int)(y+1.732446*u-221.7531);"
" rgb[k+2]=t>255?255:t<0?0:t;\n"
" }\n"
"}\n"
;
size_t shrRoundUp(size_t f , size_t s)
{
return (s+f-1)/f*f;
}
void nv12_to_rgb(uint8_t* rgb , uint8_t* nv12 , int width , int height)
{
cl_int error = 0 ;
cl_platform_id platform;
cl_context context;
cl_command_queue queue;
cl_device_id device;
cl_mem inbuf_nv12 ;
cl_mem outbuf_rgb ;
cl_int i ;
size_t program_len = strlen(program_src);
char build_log[1024];
size_t log_size;
size_t local_ws;
size_t global_ws;
cl_kernel nv12_to_rgb;
cl_int nv12_size = width*height*3/2;
cl_int rgb_size = width*height*3;
error = clGetPlatformIDs(1,&platform,NULL);
if(error != CL_SUCCESS){
printf("get platform id fail !\n");
exit(1);
}
error = clGetDeviceIDs(platform,CL_DEVICE_TYPE_GPU,1,&device,NULL);
if(error != CL_SUCCESS){
printf("get gpu device fail !\n");
exit(1);
}
printDeviceWorkInfo(device);
cl_context_properties properties[]={
CL_CONTEXT_PLATFORM,
(cl_context_properties)platform,
0
};
// 这里要配置properties
context = clCreateContext(properties,1,&device,NULL,NULL,&error);
if(error != CL_SUCCESS){
printf("create context fail !\n");
exit(1);
}
queue = clCreateCommandQueue(context,device,CL_QUEUE_PROFILING_ENABLE,&error);
if(error != CL_SUCCESS){
printf("create command queue fail !\n");
exit(1);
}
cl_program program = clCreateProgramWithSource(context,1,&program_src,&program_len,&error);
if(error!=CL_SUCCESS){
printf("create program fail !\n");
exit(1);
}
error = clBuildProgram(program,1,&device,NULL,NULL,NULL);
if(error!=CL_SUCCESS){
printf("build program fail !\n");
clGetProgramBuildInfo(program,device,CL_PROGRAM_BUILD_LOG,1024,build_log,&log_size);
printf("build_log : %s\n",build_log);
exit(1);
}
nv12_to_rgb = clCreateKernel(program,"nv12_to_rgb",&error);
if(error!=CL_SUCCESS){
printf("create kernel fail !\n");
exit(1);
}
struct timeval val;
gettimeofday(&val,NULL);
long t1 = val.tv_sec*1000000 + val.tv_usec;
inbuf_nv12 = clCreateBuffer(context,CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,nv12_size,nv12,&error);
if(error!=CL_SUCCESS){
printf("create buffer inbuf_a fail !\n");
exit(1);
}
outbuf_rgb = clCreateBuffer(context,CL_MEM_WRITE_ONLY,rgb_size,NULL,&error);
if(error!=CL_SUCCESS){
printf("create buffer outbuf_r fail !\n");
exit(1);
}
error = clSetKernelArg(nv12_to_rgb,0,sizeof(cl_mem),&inbuf_nv12);
error |= clSetKernelArg(nv12_to_rgb,1,sizeof(cl_mem),&outbuf_rgb);
error |= clSetKernelArg(nv12_to_rgb,2,sizeof(cl_int),&width);
error |= clSetKernelArg(nv12_to_rgb,3,sizeof(cl_int),&height);
if(error!=CL_SUCCESS){
printf("set kernel arg fail !\n");
exit(1);
}
size_t lws[2] = {16,16};
size_t gws[2];
gws[0] = shrRoundUp(lws[0],width);
gws[1] = shrRoundUp(lws[1],height);
printf("lws={%d,%d},gws={%d,%d}\n",lws[0],lws[1],gws[0],gws[1]);
error = clEnqueueNDRangeKernel(queue,nv12_to_rgb,2,NULL,gws,lws,0,NULL,NULL);
if(error!=CL_SUCCESS){
printf("enqueue kernel fail !\n");
exit(1);
}
clEnqueueReadBuffer(queue,outbuf_rgb,CL_TRUE,0,rgb_size,rgb,0,NULL,NULL);
clReleaseMemObject(inbuf_nv12);
clReleaseMemObject(outbuf_rgb);
gettimeofday(&val,NULL);
long t2 = val.tv_sec*1000000 + val.tv_usec;
printf("nv12_to_rgb spend %ld (us)\n",t2-t1);
clReleaseKernel(nv12_to_rgb);
clReleaseProgram(program);
clReleaseCommandQueue(queue);
clReleaseContext(context);
}
int main(){
uint8_t* nv12;
uint8_t* rgb;
int width=1920;
int height=1080;
int i,j,k;
nv12=(uint8_t*)malloc(width*height*3/2);
rgb=(uint8_t*)malloc(width*height*3);
memset(nv12,0,width*height*3/2);
memset(rgb,0,width*height*3);
struct timeval val;
gettimeofday(&val,NULL);
long t1 = val.tv_sec*1000000 + val.tv_usec;
nv12_to_rgb(rgb,nv12,width,height);
gettimeofday(&val,NULL);
long t2 = val.tv_sec*1000000 + val.tv_usec;
printf("nv12_to_rgb spend %ld (us)\n",t2-t1);
for(j=0;j<8;j++){
for(i=0;i<8;i++){
k=(j*width+i)*3;
printf("(%02x,%02x,%02x) ",rgb[k],rgb[k+1],rgb[k+2]);
}
printf("\n");
}
printf("......\n");
for(j=height-8;j<height;j++){
for(i=width-8;i<width;i++){
k=(j*width+i)*3;
printf("(%02x,%02x,%02x) ",rgb[k],rgb[k+1],rgb[k+2]);
}
printf("\n");
}
free(nv12);
free(rgb);
return 0;
}

复制代码

这是一个二维的例子，local_work_item[0]*local_work_item[1]要小于最大的workitem数，即 local_work_item[0]*local_work_item[1]<256，所以取
local_work_item[0]=local_work_item[1]=16

duoduomu · 发表于 2015-10-9 10:15:07

bunchen 发表于 2015-10-9 10:08
二楼待用

不错，不过还可以参考http://malideveloper.arm.com/dow ... .0.0a36a7_Linux.tgz 这个SDK，里面都有，还有demo！

bunchen · 发表于 2015-10-9 10:25:31

duoduomu 发表于 2015-10-9 10:15
不错，不过还可以参考http://malideveloper.arm.com/downloads/tools/oclsdk/Mali_OpenCL_SDK_v1.1.0.0a3 ...

谢谢，之前一直想下载mali的sdk来看，奈何下载页面总是进不去，谢谢分享。

duoduomu · 发表于 2015-10-9 11:01:37

bunchen 发表于 2015-10-9 10:25
谢谢，之前一直想下载mali的sdk来看，奈何下载页面总是进不去，谢谢分享。

哈哈的确你按常理下载是不行的！

fxlsunny · 发表于 2015-10-15 14:50:24

不错正打算做opencl的事，不过想在Ubuntu下做，不知3288是否可行？

ff_20150814 · 发表于 2015-12-7 15:30:09

楼主大神你好，请问你的那个NV12转RGB耗时多少呢，GPU的频率是跑的多少M?

tk1user · 发表于 2016-3-23 09:36:41

太厉害了，顶起！

总有刁民想害朕 · 发表于 2016-4-26 09:43:15

printOpenCL got 1 platforms :
      platform index=0 profile : FULL_PROFILE
      platform index=0 version : OpenCL 1.1 v1.r6p0-02rel0.0f4218be5cc66c20a4
f31b6cc856ee46
      platform index=0 name : ARM Platform
      platform index=0 vendor : ARM
file /dev/mali0 is not of a compatible version (user 9.0, kernel 8.0)
ERROR: The DDK is not compatible with any of the Mali GPUs on the system.
The DDK was built for 0x750 r0p0 status range [1..1], but none of the GPUs match
ed:
file /dev/mali0 is not of a compatible version (user 9.0, kernel 8.0)
      platform index=0 extensions : out of host memory.
Device cpu:
file /dev/mali0 is not of a compatible version (user 9.0, kernel 8.0)
ERROR: The DDK is not compatible with any of the Mali GPUs on the system.
The DDK was built for 0x750 r0p0 status range [1..1], but none of the GPUs match
ed:
file /dev/mali0 is not of a compatible version (user 9.0, kernel 8.0)
      getDeviceIDs fail !
Device gpu:
file /dev/mali0 is not of a compatible version (user 9.0, kernel 8.0)
ERROR: The DDK is not compatible with any of the Mali GPUs on the system.
The DDK was built for 0x750 r0p0 status range [1..1], but none of the GPUs match
ed:
file /dev/mali0 is not of a compatible version (user 9.0, kernel 8.0)
      getDeviceIDs fail !
Device accelerator:
file /dev/mali0 is not of a compatible version (user 9.0, kernel 8.0)
ERROR: The DDK is not compatible with any of the Mali GPUs on the system.
The DDK was built for 0x750 r0p0 status range [1..1], but none of the GPUs match
ed:
file /dev/mali0 is not of a compatible version (user 9.0, kernel 8.0)
      getDeviceIDs fail !
Device default:
file /dev/mali0 is not of a compatible version (user 9.0, kernel 8.0)
ERROR: The DDK is not compatible with any of the Mali GPUs on the system.
The DDK was built for 0x750 r0p0 status range [1..1], but none of the GPUs match
ed:
file /dev/mali0 is not of a compatible version (user 9.0, kernel 8.0)
      getDeviceIDs fail !
Device all :
file /dev/mali0 is not of a compatible version (user 9.0, kernel 8.0)
ERROR: The DDK is not compatible with any of the Mali GPUs on the system.
The DDK was built for 0x750 r0p0 status range [1..1], but none of the GPUs match
ed:
file /dev/mali0 is not of a compatible version (user 9.0, kernel 8.0)
      getDeviceIDs fail !

Eric.y · 发表于 2016-4-30 16:59:30

duoduomu 发表于 2015-10-9 10:15
不错，不过还可以参考http://malideveloper.arm.com/downloads/tools/oclsdk/Mali_OpenCL_SDK_v1.1.0.0a3 ...

Android-OpenCL-v1.1.zip和ARM官网的SDK一样吗，我没灯泡下载不了