X server found. dri2 connection failed! X server found. dri2 connection failed! X server found. dri2 connection failed! X server found. dri2 connection failed! X server found. dri2 connection failed! X server found. dri2 connection failed! Build options: CL kernel source: __kernel void self_test(__global int *buf) { __local int tmp[3]; tmp[get_local_id(0)] = buf[get_local_id(0)]; barrier(CLK_LOCAL_MEM_FENCE); buf[get_global_id(0)] = tmp[2 - get_local_id(0)] + buf[get_global_id(0)];} .decl_function self_test ## 71 registers ## .decl.dword %0 local_id_0 .decl.dword %1 local_id_1 .decl.dword %2 local_id_2 .decl.dword %3 group_id_0 .decl.dword %4 group_id_1 .decl.dword %5 group_id_2 .decl.dword %6 num_groups_0 .decl.dword %7 num_groups_1 .decl.dword %8 num_groups_2 .decl.dword %9 local_size_0 .decl.dword %10 local_size_1 .decl.dword %11 local_size_2 .decl.dword %12 enqueued_local_size_0 .decl.dword %13 enqueued_local_size_1 .decl.dword %14 enqueued_local_size_2 .decl.dword %15 global_size_0 .decl.dword %16 global_size_1 .decl.dword %17 global_size_2 .decl.dword %18 global_offset_0 .decl.dword %19 global_offset_1 .decl.dword %20 global_offset_2 .decl.dword %21 stack_pointer .decl.qword %22 stack_buffer .decl.word %23 block_ip .decl.dword %24 barrier_id .decl.dword %25 thread_number .decl.dword %26 work_dimension .decl.dword %27 zero .decl.dword %28 one .decl.word %29 retVal .decl.dword %30 dwblockip .decl.qword %31 profiling_buffer_pointer .decl.dword %32 profiling_timestamps0 .decl.dword %33 profiling_timestamps1 .decl.dword %34 profiling_timestamps2 .decl.dword %35 profiling_timestamps3 .decl.dword %36 profiling_timestamps4 .decl.dword %37 threadid .decl.qword %38 constant_addrspace_start .decl.qword %39 stack_size .decl.qword %40 enqueue_buffer_pointer .decl.dword %41 .decl.dword %42 .decl.dword %43 .decl.dword %44 .decl.dword %45 .decl.dword %46 .decl.dword %47 .decl.dword %48 .decl.dword %49 .decl.dword %50 .decl.dword %51 .decl.dword %52 .decl.dword %53 .decl.dword %54 .decl.dword %55 .decl.dword %56 .decl.dword %57 .decl.dword %58 .decl.dword %59 .decl.dword %60 .decl.dword %61 .decl.dword %62 .decl.dword %63 .decl.dword %64 .decl.dword %65 .decl.dword %66 .decl.dword %67 .decl.dword %68 .decl.dword %69 .decl.dword %70 ## 1 input registers ## decl_input.global %41 buf ## 0 output register ## ## 0 pushed register ## 3 blocks ## LABEL $0 LOADI.uint32 %42 4 LABEL $1 LOADI.uint32 %59 2 SHL.int32 %43 %0 %59 ADD.int32 %44 %41 %43 SUB.uint32 %60 %44 %41 LOAD.int32.global.aligned {%45} %60 bti:2 SHL.int32 %46 %0 %59 ADD.int32 %47 %42 %46 LOADI.uint32 %62 0 STORE.int32.local.aligned %47 {%45} bti:254 SYNC.workgroup.local_read.local_write SUB.int32 %48 %59 %0 SHL.int32 %49 %48 %59 ADD.int32 %50 %42 %49 LOAD.int32.local.aligned {%51} %50 bti:254 MUL.int32 %52 %3 %12 ADD.int32 %53 %52 %0 ADD.int32 %54 %53 %18 SHL.int32 %55 %54 %59 ADD.int32 %56 %41 %55 SUB.uint32 %69 %56 %41 LOAD.int32.global.aligned {%57} %69 bti:2 ADD.int32 %58 %57 %51 SUB.uint32 %70 %56 %41 STORE.int32.global.aligned %70 {%58} bti:2 LABEL $2 RET .end_function self_test's SELECTION IR begin: WARNING: not completed yet, welcome for the FIX! [0] L0: [2] CMP.le(16) arf : %23<8,8,1>:UW 0x0:UW [4](f0.1) L4: [6] L1: [8] CMP.le(16) arf : %23<8,8,1>:UW 0x1:UW [10](f0.1) MOV(16) %23<1>:UW : 0xffff:UW [12] CMP.eq(16) arf : %23<8,8,1>:UW 0xffff:UW [14](f0.1) JMPI(1) : 0:D [16](f0.1) MOV(16) %23<1>:UW : 0x1:UW [18] SHL(16) %43<1>:D : %0<8,8,1>:D 2:D [20] ADD(16) %44<1>:D : %41<0,1,0>:D %43<8,8,1>:D [22] ADD(16) %60<1>:UD : %44<8,8,1>:UD -%41<0,1,0>:UD [24] UNTYPED_READ(16) %45<1>:UD : %60<8,8,1>:UD 0x2:UD [26] SHL(16) %46<1>:D : %0<8,8,1>:D 2:D [28] ADD(16) %47<1>:D : %46<8,8,1>:D 0x4:UD [30] UNTYPED_WRITE(16) : %47<8,8,1>:UD %45<8,8,1>:UD 0xfe:UD [32] BARRIER(16) %72<1>:F : %71<8,8,1>:UD [34] ADD(16) %48<1>:D : -%0<8,8,1>:D 2:D [36] SHL(16) %49<1>:D : %48<8,8,1>:D 2:D [38] ADD(16) %50<1>:D : %49<8,8,1>:D 0x4:UD [40] UNTYPED_READ(16) %51<1>:UD : %50<8,8,1>:UD 0xfe:UD [42] MUL(1) %52<0>:D : %3<0,1,0>:D %12<0,1,0>:UD [44] ADD(16) %53<1>:D : %52<0,1,0>:D %0<8,8,1>:D [46] ADD(16) %54<1>:D : %53<8,8,1>:D %18<0,1,0>:D [48] SHL(16) %55<1>:D : %54<8,8,1>:D 2:D [50] ADD(16) %56<1>:D : %41<0,1,0>:D %55<8,8,1>:D [52] ADD(16) %69<1>:UD : %56<8,8,1>:UD -%41<0,1,0>:UD [54] UNTYPED_READ(16) %57<1>:UD : %69<8,8,1>:UD 0x2:UD [56] ADD(16) %58<1>:D : %57<8,8,1>:D %51<8,8,1>:D [58] ADD(16) %70<1>:UD : %56<8,8,1>:UD -%41<0,1,0>:UD [60] UNTYPED_WRITE(16) : %70<8,8,1>:UD %58<8,8,1>:UD 0x2:UD [62] L2: [64] EOT(16) : self_test's SELECTION IR end. X server found. dri2 connection failed! X server found. dri2 connection failed! X server found. dri2 connection failed! X server found. dri2 connection failed! X server found. dri2 connection failed! X server found. dri2 connection failed! X server found. dri2 connection failed! X server found. dri2 connection failed! LOG:[status=0] OK: OpenVX using GPU device#0 (Intel(R) HD Graphics Skylake Desktop GT2) [OpenCL 1.2 beignet 1.4 (git-4933bf9)] [SvmCaps 0 1] .decl_function __cl_fill_region_align4 ## 54 registers ## .decl.dword %0 local_id_0 .decl.dword %1 local_id_1 .decl.dword %2 local_id_2 .decl.dword %3 group_id_0 .decl.dword %4 group_id_1 .decl.dword %5 group_id_2 .decl.dword %6 num_groups_0 .decl.dword %7 num_groups_1 .decl.dword %8 num_groups_2 .decl.dword %9 local_size_0 .decl.dword %10 local_size_1 .decl.dword %11 local_size_2 .decl.dword %12 enqueued_local_size_0 .decl.dword %13 enqueued_local_size_1 .decl.dword %14 enqueued_local_size_2 .decl.dword %15 global_size_0 .decl.dword %16 global_size_1 .decl.dword %17 global_size_2 .decl.dword %18 global_offset_0 .decl.dword %19 global_offset_1 .decl.dword %20 global_offset_2 .decl.dword %21 stack_pointer .decl.qword %22 stack_buffer .decl.word %23 block_ip .decl.dword %24 barrier_id .decl.dword %25 thread_number .decl.dword %26 work_dimension .decl.dword %27 zero .decl.dword %28 one .decl.word %29 retVal .decl.dword %30 dwblockip .decl.qword %31 profiling_buffer_pointer .decl.dword %32 profiling_timestamps0 .decl.dword %33 profiling_timestamps1 .decl.dword %34 profiling_timestamps2 .decl.dword %35 profiling_timestamps3 .decl.dword %36 profiling_timestamps4 .decl.dword %37 threadid .decl.qword %38 constant_addrspace_start .decl.qword %39 stack_size .decl.qword %40 enqueue_buffer_pointer .decl.dword %41 .decl.dword %42 .decl.dword %43 .decl.dword %44 .decl.dword %45 .decl.dword %46 .decl.dword %47 .decl.bool %48 .decl.dword %49 .decl.dword %50 .decl.dword %51 .decl.dword %52 .decl.dword %53 ## 4 input registers ## decl_input.global %41 dst decl_input.value %42 pattern decl_input.value %43 offset decl_input.value %44 size ## 0 output register ## ## 0 pushed register ## 4 blocks ## LABEL $0 MUL.int32 %45 %3 %12 ADD.int32 %46 %45 %0 ADD.int32 %47 %46 %18 GE.uint32 %48 %47 %44 IF !<%48> -> label$5 LABEL $1 ADD.int32 %49 %47 %43 LOADI.uint32 %52 2 SHL.int32 %50 %49 %52 ADD.int32 %51 %41 %50 SUB.uint32 %53 %51 %41 STORE.float.global.aligned %53 {%42} bti:2 ENDIF -> label$5 LABEL $2 LABEL $3 RET .end_function __cl_fill_region_align4's SELECTION IR begin: WARNING: not completed yet, welcome for the FIX! [0] L0: [2] CMP.le(16) arf : %23<8,8,1>:UW 0x0:UW [4](f0.1) IF(16) : 0:D [6] MUL(1) %45<0>:D : %3<0,1,0>:D %12<0,1,0>:UD [8] ADD(16) %46<1>:D : %45<0,1,0>:D %0<8,8,1>:D [10] ADD(16) %47<1>:D : %46<8,8,1>:D %18<0,1,0>:D [12] CMP.ge(16) arf : %47<8,8,1>:UD %44<0,1,0>:UD [14](f48) IF(16) : 0:D [16] L1: [18] ADD(16) %49<1>:D : %47<8,8,1>:D %43<0,1,0>:D [20] SHL(16) %50<1>:D : %49<8,8,1>:D 2:D [22] ADD(16) %51<1>:D : %41<0,1,0>:D %50<8,8,1>:D [24] ADD(16) %53<1>:UD : %51<8,8,1>:UD -%41<0,1,0>:UD [26] UNTYPED_WRITE(16) : %53<8,8,1>:UD %42<0,1,0>:UD 0x2:UD [28] L5: [30] ENDIF(16) : 0:D [32] L4: [34] ENDIF(16) : 0:D [36] L2: [38] CMP.le(16) arf : %23<8,8,1>:UW 0x2:UW [40](f0.1) L7: [42] L3: [44] EOT(16) : __cl_fill_region_align4's SELECTION IR end. Build options: -cl-std=CL1.2 CL kernel source: uint amd_pack(float4 src){ uint dst = ((uint)(clamp (src.s0,0.0f,255.0f)) ) + ((uint)(clamp (src.s1,0.0f,255.0f))<< 8 ) + ((uint)(clamp (src.s2,0.0f,255.0f))<< 16) + ((uint)(clamp (src.s3,0.0f,255.0f))<< 24); return dst; } float amd_unpack3(uint src){ float dst= (float)((src >> 24) & 0xff); return dst; } float amd_unpack2(uint src){ float dst= (float)((src >> 16) & 0xff); return dst; } float amd_unpack1(uint src){ float dst= (float)((src >> 8) & 0xff); return dst; } float amd_unpack0(uint src){ float dst= (float)((src)& 0xff); return dst; } uint amd_bitalign(uint src0,uint src1, uint src2){ uint dst = (uint)(as_ulong((uint2)(src1,src0)) >> (src2 & 31)); return dst; } uint amd_bytealign(uint src0,uint src1, uint src2){ uint dst = (uint)(as_ulong((uint2)(src1,src0)) >> (src2 & 31) * 8 ); return dst; } uint amd_lerp(uint src0, uint src1, uint src2) { uint dst = (((((src0 >> 0) & 0xff) + ((src1 >> 0) & 0xff) + ((src2 >> 0) & 1)) >> 1) << 0) + (((((src0 >> 8) & 0xff) + ((src1 >> 8) & 0xff) + ((src2 >> 8) & 1)) >> 1) << 8) + (((((src0 >> 16) & 0xff) + ((src1 >> 16) & 0xff) + ((src2 >> 16) & 1)) >> 1) << 16) + (((((src0 >> 24) & 0xff) + ((src1 >> 24) & 0xff) + ((src2 >> 24) & 1)) >> 1) << 24); return dst;} uint amd_sad(uint src0, uint src1, uint src2){ uint dst = src2 + abs(((src0 >> 0) & 0xff) - ((src1 >> 0) & 0xff)) + abs(((src0 >> 8) & 0xff) - ((src1 >> 8) & 0xff)) + abs(((src0 >> 16) & 0xff) - ((src1 >> 16) & 0xff)) + abs(((src0 >> 24) & 0xff) - ((src1 >> 24) & 0xff)); return dst; } uint amd_sadhi(uint src0, uint src1, uint src2){ uint dst = src2 + (abs(((src0 >> 0) & 0xff) - ((src1 >> 0) & 0xff)) << 16) + (abs(((src0 >> 8) & 0xff) - ((src1 >> 8) & 0xff)) << 16) + (abs(((src0 >> 16) & 0xff) - ((src1 >> 16) & 0xff)) << 16) + (abs(((src0 >> 24) & 0xff) - ((src1 >> 24) & 0xff)) << 16); return dst; } uint amd_sad4(uint4 src0, uint4 src1, uint src2) { uint dst = src2 + abs(((src0.s0 >> 0) & 0xff) - ((src1.s0 >> 0) & 0xff)) + abs(((src0.s0 >> 8) & 0xff) - ((src1.s0 >> 8) & 0xff)) + abs(((src0.s0 >> 16) & 0xff) - ((src1.s0 >> 16) & 0xff)) + abs(((src0.s0 >> 24) & 0xff) - ((src1.s0 >> 24) & 0xff)) + abs(((src0.s1 >> 0) & 0xff) - ((src1.s0 >> 0) & 0xff)) + abs(((src0.s1 >> 8) & 0xff) - ((src1.s1 >> 8) & 0xff)) + abs(((src0.s1 >> 16) & 0xff) - ((src1.s1 >> 16) & 0xff)) + abs(((src0.s1 >> 24) & 0xff) - ((src1.s1 >> 24) & 0xff)) + abs(((src0.s2 >> 0) & 0xff) - ((src1.s2 >> 0) & 0xff)) + abs(((src0.s2 >> 8) & 0xff) - ((src1.s2 >> 8) & 0xff)) + abs(((src0.s2 >> 16) & 0xff) - ((src1.s2 >> 16) & 0xff)) + abs(((src0.s2 >> 24) & 0xff) - ((src1.s2 >> 24) & 0xff)) + abs(((src0.s3 >> 0) & 0xff) - ((src1.s3 >> 0) & 0xff)) + abs(((src0.s3 >> 8) & 0xff) - ((src1.s3 >> 8) & 0xff)) + abs(((src0.s3 >> 16) & 0xff) - ((src1.s3 >> 16) & 0xff)) + abs(((src0.s3 >> 24) & 0xff) - ((src1.s3 >> 24) & 0xff)); return dst; } uint amd_msad(uint src0, uint src1, uint src2){ uchar4 src0u8 = as_uchar4(src0); uchar4 src1u8 = as_uchar4(src1); uint dst = src2 + ((src1u8.s0 == 0) ? 0 : abs(src0u8.s0 - src1u8.s0)) + ((src1u8.s1 == 0) ? 0 : abs(src0u8.s1 - src1u8.s1)) + ((src1u8.s2 == 0) ? 0 : abs(src0u8.s2 - src1u8.s2)) + ((src1u8.s3 == 0) ? 0 : abs(src0u8.s3 - src1u8.s3)); return dst; } ulong amd_qsad(ulong src0, uint src1, ulong src2) { uchar8 src0u8 = as_uchar8(src0); ushort4 src2u16 = as_ushort4(src2); ushort4 dstu16; dstu16.s0 = amd_sad(as_uint(src0u8.s0123), src1, src2u16.s0); dstu16.s1 = amd_sad(as_uint(src0u8.s1234), src1, src2u16.s1); dstu16.s2 = amd_sad(as_uint(src0u8.s2345), src1, src2u16.s2); dstu16.s3 = amd_sad(as_uint(src0u8.s3456), src1, src2u16.s3); ulong dst = as_ulong(dstu16); return dst; } ulong amd_mqsad(ulong src0, uint src1, ulong src2) { uchar8 src0u8 = as_uchar8(src0); ushort4 src2u16 = as_ushort4(src2); ushort4 dstu16; dstu16.s0 = amd_msad(as_uint(src0u8.s0123), src1, src2u16.s0); dstu16.s1 = amd_msad(as_uint(src0u8.s1234), src1, src2u16.s1); dstu16.s2 = amd_msad(as_uint(src0u8.s2345), src1, src2u16.s2); dstu16.s3 = amd_msad(as_uint(src0u8.s3456), src1, src2u16.s3); ulong dst = as_ulong(dstu16); return dst; } uint amd_sadw(uint src0, uint src1, uint src2) { ushort2 src0u16 = as_ushort2(src0); ushort2 src1u16 = as_ushort2(src1); uint dst = src2 + abs(src0u16.s0 - src1u16.s0) + abs(src0u16.s1 - src1u16.s1); return dst; } uint amd_sadd(uint src0, uint src1, uint src2) { uint dst = src2 + abs(src0 - src1); return dst; } uint amd_bfe(uint src0, uint src1, uint src2) { uint dst; uint offset = src1 & 31; uint width = src2 & 31; if ( width == 0 ) dst=0; else if((offset + width) < 32) dst = (src0 << (32 - offset - width)) >> (32 - width); else dst = src0 >> offset; return dst; } uint amd_bfm(uint src0 , uint src1){ uint dst = ((1 << (src0 & 0x1f)) - 1) << (src1 & 0x1f); return dst; } uint amd_min3(uint src0, uint src1, uint src2) { uint dst = min(src0, min(src1,src2)); return dst; } uint amd_max3(uint src0, uint src1, uint src2) { uint dst = max(src0, max(src1,src2)); return dst; } uint amd_median3(uint src0, uint src1, uint src2){ uint dst = max(min(src0,src1), min(max(src0,src1),src2)); return dst; } float4 lens_model_function(float4 th, float fr, float4 abcd, float lens_type) { float4 r; if (!lens_type){ r = tan(th) * (float4)fr; return (r * ((float4)abcd.s3 + r * ((float4)abcd.s2 + r * ((float4)abcd.s1 + r * (float4)abcd.s0)))); } else if (lens_type < 3){ r = th * (float4)fr; return (r * ((float4)abcd.s3 + r * ((float4)abcd.s2 + r * ((float4)abcd.s1 + r * (float4)abcd.s0)))); } else if (lens_type == 3) { r = tan(th) * (float4)fr; float4 r2 = r*r; return (r * ((float4)1.f + r2* ((float4)abcd.s0 + r2 * ((float4)abcd.s1 + r2 * (float4)abcd.s2)))); }else { float4 r = th * (float4)fr; float4 r2 = r*r; return (r * ((float4)1.f + r2 * ((float4)abcd.s0 + r2 * (float4)abcd.s1))); } } __kernel __attribute__((reqd_work_group_size(8, 8, 1))) void calc_lens_distortion_and_warp_map( uint ncam, int l_type, uint camWidth, uint camHeight, uint paddingPixelCount, __global uchar * cam_params, uint camera_params_offs, uint camera_params_num, uint vm_width, uint vm_height, __global uchar *valid_pix_map, uint vm_stride, uint vm_offs, uint sc_width, uint sc_height, __global uchar * camera_src_coord_map, uint sc_stride, uint camera_src_coord_map_offs, __global uchar * camera_z_value_buf, uint camera_z_value_buf_offs, uint zbuf_num) { int gx = get_global_id(0); int gy = get_global_id(1); float pibyH = 1.636246173744684e-03; cam_params += camera_params_offs; camera_src_coord_map += camera_src_coord_map_offs; gx <<= 2; if( gx < vm_width && gy < vm_height){ camera_z_value_buf += camera_z_value_buf_offs + ((gy*vm_width + gx)<<2); valid_pix_map += vm_offs + gy*vm_stride + (gx << 2); uint4 valid_pix_out = *(__global uint4 *)(valid_pix_map); for (int camId=0; camId < 4; camId++) { __global float * cam_params_cur = (__global float *)(cam_params + camId*128); float4 cam_ltrb = *(__global float4*)(cam_params_cur); float4 cam_k1k2k3k0 = *(__global float4*)(cam_params_cur+4); float2 cam_du0dv0 = *(__global float2*)(cam_params_cur+8); float r_crop = *(__global float*)(cam_params_cur+10); float F0 = *(__global float*)(cam_params_cur+11); float4 F1T0T1T2 = *(__global float4*)(cam_params_cur+12); __global float * Mcam = (__global float*)(cam_params_cur+16); float lens_type = *(__global float*)(cam_params_cur+25); float2 center = cam_du0dv0 + (float2)(1024.000000, 768.000000); uint2 size = (uint2)((uint)(camId*vm_height*vm_width), (uint)(camId*sc_stride*vm_height)); __global float *camera_z_value_ptr = (__global float *)(camera_z_value_buf + size.x*4); __global float *camera_src_coord = (__global float *)(camera_src_coord_map + size.y + gy*sc_stride); float4 x0, x1, x2, x_src, y_src; float4 y0, y1, y2, te; te = (float4)((float)gx, (float)(gx+1), (float)(gx+2), (float)(gx+3)); float pe = gy*pibyH - (float)M_PI_2; te = te*(float4)pibyH - (float4)M_PI; float sin_pe = sin(pe), cos_pe = cos(pe); x0 = sin(te)*(float4)cos_pe; x0 -= (float4) F1T0T1T2.s1; x2 = cos(te)*(float4)cos_pe; x2 -= (float4) F1T0T1T2.s3; x1 = (float4)(sin_pe - F1T0T1T2.s2); y0 = (float4)1.f / (float4)sqrt(x0*x0 + x1*x1 + x2*x2); // mul_factor x0 *= y0; x1 *= y0; x2 *= y0; // compute mat_mult output y0 = x0*(float4)Mcam[0] + x1*(float4)Mcam[1] + x2*(float4)Mcam[2]; y1 = x0*(float4)Mcam[3] + x1*(float4)Mcam[4] + x2*(float4)Mcam[5]; y2 = x0*(float4)Mcam[6] + x1*(float4)Mcam[7] + x2*(float4)Mcam[8]; // calculate src coordinates te = asin(sqrt(min(max((y0*y0 + y1*y1), (float4)0.f), (float4)1.0f))); y1 = atan2(y1, y0); x0 = lens_model_function(te, F0, cam_k1k2k3k0, lens_type); x_src = (float4)F1T0T1T2.s0*x0*cos(y1); y_src = (float4)F1T0T1T2.s0*x0*sin(y1); te = sqrt(x_src*x_src + y_src*y_src); // rr x_src += (float4) center.x; y_src += (float4) center.y; float2 rbminus1 = cam_ltrb.s23 - (float2) 1.f; int4 isValidCamMap = select((int4)0, (int4)1, ((y2 > (float4)0.0f) && (x_src >= (float4)cam_ltrb.s0) && (x_src <= (float4)rbminus1.s0))); isValidCamMap &= select((int4)0, (int4)1, ((y_src >= (float4)cam_ltrb.s1) && (y_src <= (float4)rbminus1.s1))); isValidCamMap &= select((int4)0, (int4)1, (((float4)r_crop <= (float4)0.0f) || (te <= (float4)r_crop))); valid_pix_out |= convert_uint4(isValidCamMap << camId); // update zbuffer *(__global float4 *)camera_z_value_ptr = select((float4)0.f, fabs(y2), (isValidCamMap != (int4)0) ); int4 isPaddingCamMap = (int4)0; if (paddingPixelCount){ float4 cam_padltrb; cam_padltrb.s01 = cam_ltrb.s01 - (float2)paddingPixelCount; cam_padltrb.s23 = rbminus1 + (float2)paddingPixelCount; isPaddingCamMap = select(isPaddingCamMap, (int4)1, ((y2 > (float4)0.0f) && (x_src >= (float4)cam_padltrb.s0) && (x_src <= (float4)cam_padltrb.s2))); isPaddingCamMap &= select((int4)0, (int4)1, ((y_src >= (float4)cam_padltrb.s1) && (y_src <= (float4)cam_padltrb.s3))); isPaddingCamMap &= select((int4)0, (int4)1, ((float4)r_crop <= (float4)0.0f) || (te <= (float4)(r_crop + paddingPixelCount))); } x2 = convert_float4(isValidCamMap|isPaddingCamMap); x_src = select((float4)-1.f, x_src, (x2 != (float4)0.0f) ); y_src = select((float4)-1.f, y_src, (x2 != (float4)0.0f) ); float2 rbminus2 = rbminus1 * (float2)2.f; x_src = select(x_src, (float4)cam_ltrb.s0 - x_src, (x_src < (float4)cam_ltrb.s0)); x_src = select(x_src, (float4)rbminus2.s0 - x_src, (x_src >= (float4)rbminus1.s0)); y_src = select(y_src, (float4)cam_ltrb.s1 - y_src, (y_src < (float4)cam_ltrb.s1)); y_src = select(y_src, (float4)rbminus2.s1 - y_src, (y_src >= (float4)rbminus1.s1)); camera_src_coord += (gx<<1); *(__global float4 *)camera_src_coord = (float4) (x_src.s0, y_src.s0, x_src.s1, y_src.s1); *(__global float4 *)(camera_src_coord + 4) = (float4) (x_src.s2, y_src.s2, x_src.s3, y_src.s3); } *(__global uint4 *)valid_pix_map = valid_pix_out; } } LOG:[status=-1] ERROR: clBuildProgram(0x17544a0,-cl-std=CL1.2) failed(-11) for com.amd.loomsl.calc_lens_distortionwarp_map ERROR: OpenVX call failed with status = (-1) at /home/arpu/Work/githubsources/amdovx/amdovx_git/amdovx-modules/vx_loomsl/live_stitch_api.cpp#943 ERROR: OpenVX call failed with status = (-1) at /home/arpu/Work/githubsources/amdovx/amdovx_git/amdovx-modules/vx_loomsl/live_stitch_api.cpp#995 OK: loaded CAM00.bmp OK: loaded CAM01.bmp OK: loaded CAM02.bmp OK: loaded CAM03.bmp ERROR: OpenVX call failed with status = (-5) at /home/arpu/Work/githubsources/amdovx/amdovx_git/amdovx-modules/vx_loomsl/live_stitch_api.cpp#3027 ERROR: OpenVX call failed with status = (-5) at /home/arpu/Work/githubsources/amdovx/amdovx_git/amdovx-modules/vx_loomsl/live_stitch_api.cpp#3046 ERROR: OpenVX call failed with status = (-5) at /home/arpu/Work/githubsources/amdovx/amdovx_git/amdovx-modules/vx_loomsl/live_stitch_api.cpp#3098 ERROR: OpenVX call failed with status = (-5) at /home/arpu/Work/githubsources/amdovx/amdovx_git/amdovx-modules/vx_loomsl/live_stitch_api.cpp#3136 OK: created output.bmp LOG:[status=0] OK: OpenCL buffer usage: 117965344, 1/2