//---------------------------------------------------------------------------------------------------------------// __constant const float16 mg_lbT = (float16)( 1., 0., 0., 0., -1./3., -1./3., -1./3., 0., 0., 0., 0., 0., 0., 0., M_SQRT2, 0.); //---------------------------------------------------------------------------------------------------------------// int foldi3_mul(const int3 a) { return a.s0*a.s1*a.s2; } int3 scanli3_mul(int3 a) { a = a * (int3)(a.s12, 1); a = a * (int3)(a.s2,1,1); return a; } int doti2(const int2 a, const int2 b) { return a.lo*b.lo + a.hi*b.hi; } int doti3(const int3 a, const int3 b) { return doti2(a.lo,b.lo) + a.s2*b.s2; } float dotf2(const float2 a, const float2 b) { return dot(a,b); } float dotf3(const float3 a, const float3 b) { return dot(a,b); } float dotf4(const float4 a, const float4 b) { return dot(a,b); } float dotf8(const float8 a, const float8 b) { return dotf4(a.lo,b.lo) + dotf4(a.hi,b.hi); } float dotf16(const float16 a, const float16 b) { return dotf8(a.lo,b.lo) + dotf8(a.hi,b.hi); } //---------------------------------------------------------------------------------------------------------------// float2 random_normal(uint2 key) { const float2 interval = convert_float2(key); const float radius = sqrt(-2. * log(interval.s0)); const float angle = 2. * M_PI * interval.s1; return radius * (float2)(cos(angle), sin(angle)); } //---------------------------------------------------------------------------------------------------------------// __kernel void fluid_dist_eqn_next_3(__global float16 *const dist_eqn_3, const __global float16 *const dist_eq_3, const int3 local_n0, const long key, const int step) { const int3 local_n3 = 3+local_n0+3; const int indexval = get_global_id(0); const int ii = indexval/(local_n3.s1*local_n3.s2); const int jj = (indexval - ii*local_n3.s1*local_n3.s2)/local_n3.s2; const int kk = (indexval - ii*local_n3.s1*local_n3.s2 - jj*local_n3.s2); const int3 local_x3= (int3)(ii, jj, kk); if (all(local_x3 < local_n3)) { // Strides and offsets const int3 local_stride3 = scanli3_mul((int3)(local_n3.s12,1)); const int local_offset3 = doti3(local_x3,local_stride3); const int local_size3 = foldi3_mul(local_n3); __global const float16* const dist_eq_3_new = &dist_eq_3[local_size3 * (step+1 & 0x01)]; __global float16* const dist_eqn_3_new = &dist_eqn_3[local_size3 * (step+1 & 0x01)]; const float16 random = (float16)( random_normal(as_uint2(key)), 0., 0., 0., 0., 0., 0., random_normal(as_uint2(key)), random_normal(as_uint2(key)), 0., 0., 0., 0. ); dist_eqn_3_new[local_offset3] = dist_eq_3_new[local_offset3] + (float16)(dotf16(mg_lbT,random)); } }