Hi,
I have written following program to find out SAD value (sum of absolute difference) in a given frame.
My kernel code is here......
The kernel works fine if the Frame_width=100 , Frame_height = 100, width = 64, height=64
The same kernel crashes if the Frame_width=500 , Frame_height = 500, width = 64, height=64
Any one has any idea. It's bit urgent for me...
void SAD_function(__global unsigned char *p1, __global unsigned char *p2, int height, int width, int frame_width, int frame_height, int id, __global int *out) {
int count=0, offset=0, iteration=0;
int SAD = 0, l=0;
int y=0, t=0, t1;
t1 = (frame_width - width + 1);
for(int iteration=0; iteration<=(frame_width-width) ; iteration++) {
int16 total16=0; y=0;
for(int vh=iteration; vh < (height*frame_width); ) {
count =0;
for(int vw=0; vw<width/16; vw++) {
__global uchar* p1_i = p1 + vh + (id*frame_width);
__global uchar* p2_i = p2 + y + (id*frame_width);
uchar16 vp1 = vload16(0, p1_i);
uchar16 vp2 = vload16(0, p2_i);
short16 temp = convert_short16(abs_diff(vp1,vp2));
total16 = total16+convert_int16(temp);
count = count+1;
vh=vh+16; y=y+16;
}
vh = vh + frame_width - (count*16) ;
}
SAD = total16.s0 +total16.s1 + total16.s2 + total16.s3 + total16.s4 +total16.s5 + total16.s6 + total16.s7 + total16.s8 + total16.s9 + total16.sa + total16.sb + total16.sc + total16.sd + total16.se + total16.sf;
out[t + t1*id ]=SAD; t = t+1; SAD=0;
}
offset= offset+frame_width;
}
__kernel void SAD_value(__global unsigned char *p1, __global unsigned char *p2, int height, int width, int frame_width, int frame_height, __global int *out) {
int id = get_global_id(0);
int size = get_global_size(0);
int wgid = get_group_id(0);
SAD_function(p1,p2, height, width, frame_width, frame_height, id, out);
}