//---------------------------------------------------------------------------------------------------------------// float foldf3_mul(const float3 a) { return a.s0*a.s1*a.s2; } int3 scanli3_mul(int3 a) { a = a * (int3)(a.s12, 1); a = a * (int3)(a.s2,1,1); return a; } int doti2(const int2 a, const int2 b) { return a.lo*b.lo + a.hi*b.hi; } int doti3(const int3 a, const int3 b) { return doti2(a.lo,b.lo) + a.s2*b.s2; } //---------------------------------------------------------------------------------------------------------------// __kernel void atom_force(__global float3 *const atom_force, __global const float3 *const atom_position2, __global const float3 *const atom_velocity, __global const float3 *const fluid_velocity_2, const int atom_n, const int3 fluid_n0, const int trilinear) { const int k = get_global_id(0); if ( k < atom_n) { // Strides and offsets const int3 x2 = convert_int3(floor(atom_position2[k])); const int3 n2 = 2+fluid_n0+2; const int3 stride2 = scanli3_mul((int3)(n2.s12,1)); const int offset2 = doti3(x2,stride2); // Compute interpolation weights float3 stencil_velocity = 0.; if (trilinear) { int3 stencil_offset; for (stencil_offset.s0 = 0; stencil_offset.s0 <= 1; ++stencil_offset.s0) for (stencil_offset.s1 = 0; stencil_offset.s1 <= 1; ++stencil_offset.s1) for (stencil_offset.s2 = 0; stencil_offset.s2 <= 1; ++stencil_offset.s2) { const float weight = foldf3_mul(1. - fabs(atom_position2[k] - convert_float3(x2 + stencil_offset))); stencil_velocity += fluid_velocity_2[offset2 + doti3(stencil_offset,stride2)] * weight; } } else { int3 stencil_offset; for (stencil_offset.s0 = -1; stencil_offset.s0 <= 2; ++stencil_offset.s0) for (stencil_offset.s1 = -1; stencil_offset.s1 <= 2; ++stencil_offset.s1) for (stencil_offset.s2 = -1; stencil_offset.s2 <= 2; ++stencil_offset.s2) { const float3 delta = fabs(atom_position2[k] - convert_float3(x2 + stencil_offset)); const float weight = foldf3_mul( ( delta < 1. ? // Could possibly optimize using delta % 1 3. - 2.*delta + sqrt(1.+4.*delta-4.*delta*delta) : 5. - 2.*delta - sqrt(-7.+12.*delta-4.*delta*delta) ) / 8. ); stencil_velocity += fluid_velocity_2[offset2 + doti3(stencil_offset,stride2)] * weight; } } // Compute force atom_force[k] = stencil_velocity - atom_velocity[k]; } }