[require] GLSL >= 4.40 [compute shader] #version 440 #extension GL_ARB_compute_shader : enable #extension GL_ARB_shader_image_load_store : enable #define tex1D texture #define tex3D texture #define LUT_POS(x, lut_size) mix(0.5 / (lut_size), 1.0 - 0.5 / (lut_size), (x)) layout(std140, binding=0) uniform UBO { layout(offset=0) vec3 src_luma; layout(offset=16) vec3 dst_luma; layout(offset=32) vec2 texture_size0; layout(offset=48) mat2 texture_rot0; layout(offset=80) vec2 texture_off0; layout(offset=88) vec2 pixel_size0; layout(offset=96) vec2 out_scale; layout(offset=104) vec2 tex_scale0; }; uniform sampler1D lut; layout(rgba16f) uniform writeonly image2D out_image; uniform sampler2D texture0; layout (local_size_x = 32, local_size_y = 8) in; #define outcoord(id) (out_scale * (vec2(id) + vec2(0.5))) #define texmap0_raw(id) (tex_scale0 * outcoord(id)) #define texmap0(id) (texture_rot0 * texmap0_raw(id) + pixel_size0 * texture_off0) #define texcoord0 texmap0(gl_GlobalInvocationID) shared float in0[420]; shared float in1[420]; shared float in2[420]; void main() { vec4 color = vec4(0.0, 0.0, 0.0, 1.0); #undef tex #undef texmap #define tex texture0 #define texmap texmap0 vec2 pos = texcoord0; vec2 size = texture_size0; vec2 pt = pixel_size0; color = vec4(0.0); { vec2 wpos = texmap(gl_WorkGroupID * gl_WorkGroupSize); vec2 wbase = wpos - pt * fract(wpos * size - vec2(0.5)); vec2 fcoord = fract(pos * size - vec2(0.5)); vec2 base = pos - pt * fcoord; ivec2 rel = ivec2(round((base - wbase) * size)); int idx; float w, d, wsum = 0.0; vec4 c; for (int y = int(gl_LocalInvocationID.y); y < 14; y += 8) { for (int x = int(gl_LocalInvocationID.x); x < 30; x += 32) { c = texture(tex, wbase + pt * vec2(x - 3, y - 3)); in0[30 * y + x] = c[0]; in1[30 * y + x] = c[1]; in2[30 * y + x] = c[2]; }} groupMemoryBarrier(); barrier(); // scaler samples idx = 30 * rel.y + rel.x + 0; idx = 30 * rel.y + rel.x + 1; idx = 30 * rel.y + rel.x + 2; idx = 30 * rel.y + rel.x + 3; d = length(vec2(0.0, -3.0) - fcoord); if (d < 3.032708) { w = tex1D(lut, LUT_POS(d * 1.0/3.238315, 64.0)).r; wsum += w; color[0] += w * in0[idx]; color[1] += w * in1[idx]; color[2] += w * in2[idx]; } idx = 30 * rel.y + rel.x + 4; d = length(vec2(1.0, -3.0) - fcoord); if (d < 3.032708) { w = tex1D(lut, LUT_POS(d * 1.0/3.238315, 64.0)).r; wsum += w; color[0] += w * in0[idx]; color[1] += w * in1[idx]; color[2] += w * in2[idx]; } idx = 30 * rel.y + rel.x + 5; idx = 30 * rel.y + rel.x + 6; idx = 30 * rel.y + rel.x + 7; idx = 30 * rel.y + rel.x + 30; idx = 30 * rel.y + rel.x + 31; d = length(vec2(-2.0, -2.0) - fcoord); if (d < 3.032708) { w = tex1D(lut, LUT_POS(d * 1.0/3.238315, 64.0)).r; wsum += w; color[0] += w * in0[idx]; color[1] += w * in1[idx]; color[2] += w * in2[idx]; } idx = 30 * rel.y + rel.x + 32; d = length(vec2(-1.0, -2.0) - fcoord); if (d < 3.032708) { w = tex1D(lut, LUT_POS(d * 1.0/3.238315, 64.0)).r; wsum += w; color[0] += w * in0[idx]; color[1] += w * in1[idx]; color[2] += w * in2[idx]; } idx = 30 * rel.y + rel.x + 33; d = length(vec2(0.0, -2.0) - fcoord); if (d < 3.032708) { w = tex1D(lut, LUT_POS(d * 1.0/3.238315, 64.0)).r; wsum += w; color[0] += w * in0[idx]; color[1] += w * in1[idx]; color[2] += w * in2[idx]; } idx = 30 * rel.y + rel.x + 34; d = length(vec2(1.0, -2.0) - fcoord); if (d < 3.032708) { w = tex1D(lut, LUT_POS(d * 1.0/3.238315, 64.0)).r; wsum += w; color[0] += w * in0[idx]; color[1] += w * in1[idx]; color[2] += w * in2[idx]; } idx = 30 * rel.y + rel.x + 35; d = length(vec2(2.0, -2.0) - fcoord); if (d < 3.032708) { w = tex1D(lut, LUT_POS(d * 1.0/3.238315, 64.0)).r; wsum += w; color[0] += w * in0[idx]; color[1] += w * in1[idx]; color[2] += w * in2[idx]; } idx = 30 * rel.y + rel.x + 36; d = length(vec2(3.0, -2.0) - fcoord); if (d < 3.032708) { w = tex1D(lut, LUT_POS(d * 1.0/3.238315, 64.0)).r; wsum += w; color[0] += w * in0[idx]; color[1] += w * in1[idx]; color[2] += w * in2[idx]; } idx = 30 * rel.y + rel.x + 37; idx = 30 * rel.y + rel.x + 60; idx = 30 * rel.y + rel.x + 61; d = length(vec2(-2.0, -1.0) - fcoord); if (d < 3.032708) { w = tex1D(lut, LUT_POS(d * 1.0/3.238315, 64.0)).r; wsum += w; color[0] += w * in0[idx]; color[1] += w * in1[idx]; color[2] += w * in2[idx]; } idx = 30 * rel.y + rel.x + 62; d = length(vec2(-1.0, -1.0) - fcoord); w = tex1D(lut, LUT_POS(d * 1.0/3.238315, 64.0)).r; wsum += w; color[0] += w * in0[idx]; color[1] += w * in1[idx]; color[2] += w * in2[idx]; idx = 30 * rel.y + rel.x + 63; d = length(vec2(0.0, -1.0) - fcoord); w = tex1D(lut, LUT_POS(d * 1.0/3.238315, 64.0)).r; wsum += w; color[0] += w * in0[idx]; color[1] += w * in1[idx]; color[2] += w * in2[idx]; idx = 30 * rel.y + rel.x + 64; d = length(vec2(1.0, -1.0) - fcoord); w = tex1D(lut, LUT_POS(d * 1.0/3.238315, 64.0)).r; wsum += w; color[0] += w * in0[idx]; color[1] += w * in1[idx]; color[2] += w * in2[idx]; idx = 30 * rel.y + rel.x + 65; d = length(vec2(2.0, -1.0) - fcoord); w = tex1D(lut, LUT_POS(d * 1.0/3.238315, 64.0)).r; wsum += w; color[0] += w * in0[idx]; color[1] += w * in1[idx]; color[2] += w * in2[idx]; idx = 30 * rel.y + rel.x + 66; d = length(vec2(3.0, -1.0) - fcoord); if (d < 3.032708) { w = tex1D(lut, LUT_POS(d * 1.0/3.238315, 64.0)).r; wsum += w; color[0] += w * in0[idx]; color[1] += w * in1[idx]; color[2] += w * in2[idx]; } idx = 30 * rel.y + rel.x + 67; idx = 30 * rel.y + rel.x + 90; d = length(vec2(-3.0, 0.0) - fcoord); if (d < 3.032708) { w = tex1D(lut, LUT_POS(d * 1.0/3.238315, 64.0)).r; wsum += w; color[0] += w * in0[idx]; color[1] += w * in1[idx]; color[2] += w * in2[idx]; } idx = 30 * rel.y + rel.x + 91; d = length(vec2(-2.0, 0.0) - fcoord); if (d < 3.032708) { w = tex1D(lut, LUT_POS(d * 1.0/3.238315, 64.0)).r; wsum += w; color[0] += w * in0[idx]; color[1] += w * in1[idx]; color[2] += w * in2[idx]; } idx = 30 * rel.y + rel.x + 92; d = length(vec2(-1.0, 0.0) - fcoord); w = tex1D(lut, LUT_POS(d * 1.0/3.238315, 64.0)).r; wsum += w; color[0] += w * in0[idx]; color[1] += w * in1[idx]; color[2] += w * in2[idx]; idx = 30 * rel.y + rel.x + 93; d = length(vec2(0.0, 0.0) - fcoord); w = tex1D(lut, LUT_POS(d * 1.0/3.238315, 64.0)).r; wsum += w; color[0] += w * in0[idx]; color[1] += w * in1[idx]; color[2] += w * in2[idx]; idx = 30 * rel.y + rel.x + 94; d = length(vec2(1.0, 0.0) - fcoord); w = tex1D(lut, LUT_POS(d * 1.0/3.238315, 64.0)).r; wsum += w; color[0] += w * in0[idx]; color[1] += w * in1[idx]; color[2] += w * in2[idx]; idx = 30 * rel.y + rel.x + 95; d = length(vec2(2.0, 0.0) - fcoord); w = tex1D(lut, LUT_POS(d * 1.0/3.238315, 64.0)).r; wsum += w; color[0] += w * in0[idx]; color[1] += w * in1[idx]; color[2] += w * in2[idx]; idx = 30 * rel.y + rel.x + 96; d = length(vec2(3.0, 0.0) - fcoord); if (d < 3.032708) { w = tex1D(lut, LUT_POS(d * 1.0/3.238315, 64.0)).r; wsum += w; color[0] += w * in0[idx]; color[1] += w * in1[idx]; color[2] += w * in2[idx]; } idx = 30 * rel.y + rel.x + 97; d = length(vec2(4.0, 0.0) - fcoord); if (d < 3.032708) { w = tex1D(lut, LUT_POS(d * 1.0/3.238315, 64.0)).r; wsum += w; color[0] += w * in0[idx]; color[1] += w * in1[idx]; color[2] += w * in2[idx]; } idx = 30 * rel.y + rel.x + 120; d = length(vec2(-3.0, 1.0) - fcoord); if (d < 3.032708) { w = tex1D(lut, LUT_POS(d * 1.0/3.238315, 64.0)).r; wsum += w; color[0] += w * in0[idx]; color[1] += w * in1[idx]; color[2] += w * in2[idx]; } idx = 30 * rel.y + rel.x + 121; d = length(vec2(-2.0, 1.0) - fcoord); if (d < 3.032708) { w = tex1D(lut, LUT_POS(d * 1.0/3.238315, 64.0)).r; wsum += w; color[0] += w * in0[idx]; color[1] += w * in1[idx]; color[2] += w * in2[idx]; } idx = 30 * rel.y + rel.x + 122; d = length(vec2(-1.0, 1.0) - fcoord); w = tex1D(lut, LUT_POS(d * 1.0/3.238315, 64.0)).r; wsum += w; color[0] += w * in0[idx]; color[1] += w * in1[idx]; color[2] += w * in2[idx]; idx = 30 * rel.y + rel.x + 123; d = length(vec2(0.0, 1.0) - fcoord); w = tex1D(lut, LUT_POS(d * 1.0/3.238315, 64.0)).r; wsum += w; color[0] += w * in0[idx]; color[1] += w * in1[idx]; color[2] += w * in2[idx]; idx = 30 * rel.y + rel.x + 124; d = length(vec2(1.0, 1.0) - fcoord); w = tex1D(lut, LUT_POS(d * 1.0/3.238315, 64.0)).r; wsum += w; color[0] += w * in0[idx]; color[1] += w * in1[idx]; color[2] += w * in2[idx]; idx = 30 * rel.y + rel.x + 125; d = length(vec2(2.0, 1.0) - fcoord); w = tex1D(lut, LUT_POS(d * 1.0/3.238315, 64.0)).r; wsum += w; color[0] += w * in0[idx]; color[1] += w * in1[idx]; color[2] += w * in2[idx]; idx = 30 * rel.y + rel.x + 126; d = length(vec2(3.0, 1.0) - fcoord); if (d < 3.032708) { w = tex1D(lut, LUT_POS(d * 1.0/3.238315, 64.0)).r; wsum += w; color[0] += w * in0[idx]; color[1] += w * in1[idx]; color[2] += w * in2[idx]; } idx = 30 * rel.y + rel.x + 127; d = length(vec2(4.0, 1.0) - fcoord); if (d < 3.032708) { w = tex1D(lut, LUT_POS(d * 1.0/3.238315, 64.0)).r; wsum += w; color[0] += w * in0[idx]; color[1] += w * in1[idx]; color[2] += w * in2[idx]; } idx = 30 * rel.y + rel.x + 150; idx = 30 * rel.y + rel.x + 151; d = length(vec2(-2.0, 2.0) - fcoord); if (d < 3.032708) { w = tex1D(lut, LUT_POS(d * 1.0/3.238315, 64.0)).r; wsum += w; color[0] += w * in0[idx]; color[1] += w * in1[idx]; color[2] += w * in2[idx]; } idx = 30 * rel.y + rel.x + 152; d = length(vec2(-1.0, 2.0) - fcoord); w = tex1D(lut, LUT_POS(d * 1.0/3.238315, 64.0)).r; wsum += w; color[0] += w * in0[idx]; color[1] += w * in1[idx]; color[2] += w * in2[idx]; idx = 30 * rel.y + rel.x + 153; d = length(vec2(0.0, 2.0) - fcoord); w = tex1D(lut, LUT_POS(d * 1.0/3.238315, 64.0)).r; wsum += w; color[0] += w * in0[idx]; color[1] += w * in1[idx]; color[2] += w * in2[idx]; idx = 30 * rel.y + rel.x + 154; d = length(vec2(1.0, 2.0) - fcoord); w = tex1D(lut, LUT_POS(d * 1.0/3.238315, 64.0)).r; wsum += w; color[0] += w * in0[idx]; color[1] += w * in1[idx]; color[2] += w * in2[idx]; idx = 30 * rel.y + rel.x + 155; d = length(vec2(2.0, 2.0) - fcoord); w = tex1D(lut, LUT_POS(d * 1.0/3.238315, 64.0)).r; wsum += w; color[0] += w * in0[idx]; color[1] += w * in1[idx]; color[2] += w * in2[idx]; idx = 30 * rel.y + rel.x + 156; d = length(vec2(3.0, 2.0) - fcoord); if (d < 3.032708) { w = tex1D(lut, LUT_POS(d * 1.0/3.238315, 64.0)).r; wsum += w; color[0] += w * in0[idx]; color[1] += w * in1[idx]; color[2] += w * in2[idx]; } idx = 30 * rel.y + rel.x + 157; idx = 30 * rel.y + rel.x + 180; idx = 30 * rel.y + rel.x + 181; d = length(vec2(-2.0, 3.0) - fcoord); if (d < 3.032708) { w = tex1D(lut, LUT_POS(d * 1.0/3.238315, 64.0)).r; wsum += w; color[0] += w * in0[idx]; color[1] += w * in1[idx]; color[2] += w * in2[idx]; } idx = 30 * rel.y + rel.x + 182; d = length(vec2(-1.0, 3.0) - fcoord); if (d < 3.032708) { w = tex1D(lut, LUT_POS(d * 1.0/3.238315, 64.0)).r; wsum += w; color[0] += w * in0[idx]; color[1] += w * in1[idx]; color[2] += w * in2[idx]; } idx = 30 * rel.y + rel.x + 183; d = length(vec2(0.0, 3.0) - fcoord); if (d < 3.032708) { w = tex1D(lut, LUT_POS(d * 1.0/3.238315, 64.0)).r; wsum += w; color[0] += w * in0[idx]; color[1] += w * in1[idx]; color[2] += w * in2[idx]; } idx = 30 * rel.y + rel.x + 184; d = length(vec2(1.0, 3.0) - fcoord); if (d < 3.032708) { w = tex1D(lut, LUT_POS(d * 1.0/3.238315, 64.0)).r; wsum += w; color[0] += w * in0[idx]; color[1] += w * in1[idx]; color[2] += w * in2[idx]; } idx = 30 * rel.y + rel.x + 185; d = length(vec2(2.0, 3.0) - fcoord); if (d < 3.032708) { w = tex1D(lut, LUT_POS(d * 1.0/3.238315, 64.0)).r; wsum += w; color[0] += w * in0[idx]; color[1] += w * in1[idx]; color[2] += w * in2[idx]; } idx = 30 * rel.y + rel.x + 186; d = length(vec2(3.0, 3.0) - fcoord); if (d < 3.032708) { w = tex1D(lut, LUT_POS(d * 1.0/3.238315, 64.0)).r; wsum += w; color[0] += w * in0[idx]; color[1] += w * in1[idx]; color[2] += w * in2[idx]; } idx = 30 * rel.y + rel.x + 187; idx = 30 * rel.y + rel.x + 210; idx = 30 * rel.y + rel.x + 211; idx = 30 * rel.y + rel.x + 212; idx = 30 * rel.y + rel.x + 213; d = length(vec2(0.0, 4.0) - fcoord); if (d < 3.032708) { w = tex1D(lut, LUT_POS(d * 1.0/3.238315, 64.0)).r; wsum += w; color[0] += w * in0[idx]; color[1] += w * in1[idx]; color[2] += w * in2[idx]; } idx = 30 * rel.y + rel.x + 214; d = length(vec2(1.0, 4.0) - fcoord); if (d < 3.032708) { w = tex1D(lut, LUT_POS(d * 1.0/3.238315, 64.0)).r; wsum += w; color[0] += w * in0[idx]; color[1] += w * in1[idx]; color[2] += w * in2[idx]; } idx = 30 * rel.y + rel.x + 215; idx = 30 * rel.y + rel.x + 216; idx = 30 * rel.y + rel.x + 217; color = color / vec4(wsum); } color *= 1.000000; color.a = 1.000000; // scaler post-conversion color.rgb = clamp(color.rgb, 0.0, 1.0); color.rgb = (1.0/(1.0 + exp(6.500000 * (0.750000 - color.rgb))) - 0.007577) * 1.0/0.827906; // color mapping color.rgb *= vec3(1.000000); color.rgb *= vec3(1.000000); // delinearize color.rgb = clamp(color.rgb, 0.0, 1.0); color.rgb *= vec3(1.000000); color.rgb = pow(color.rgb, vec3(1.0/2.4)); imageStore(out_image, ivec2(gl_GlobalInvocationID), color); }