#version 450 #extension GL_ARB_separate_shader_objects : enable layout (binding = 0) uniform sampler2D depthImage; layout (binding = 1, rgba8) uniform image2D resultImage; // Must be <= 4, otherwise margins will be too small. See below. const int kRadius = 4; // The area of interest is 8-wide so that we can pack the bits into a channel // of an 8-bit image. It is 8-high just because. const int kEight = 8; // Holds the 4-way neighbor relationships for the area of interest, plus a // 8-wide kRadius-high area above and below. // // A 1-bit in the // R channel means that the cell is higher than its up-neighbor. // G channel means that the cell is higher than its down-neighbor. // B channel means that the cell is higher than its left-neighbor. // A channel means that the cell is higher than its right-neighbor. const int kNeighborhoodSize = kEight + 2 * kRadius; // The 'region of interest' that stores the intermediate data structure. // Although we only have 1 kernel per workgroup, making this shared allows us // to not fall off a performance cliff that we otherwise would (at least on // NVIDIA/Linux). shared ivec4 roi[kNeighborhoodSize]; void computeNeighborRelationships() { float depths[kNeighborhoodSize][kNeighborhoodSize]; ivec2 depth_base = ivec2(gl_GlobalInvocationID.xy) * kEight - ivec2(kRadius, kRadius); for (uint x = 0; x < kNeighborhoodSize + 2; ++x) { for (uint y = 0; y < kNeighborhoodSize + 2; ++y) { depths[x][y] = texture(depthImage, depth_base + ivec2(x, y)).r; } } for (uint y = 0; y < kNeighborhoodSize; ++y) { int casts_leftward = 0; for (uint x = 1; x < kNeighborhoodSize; ++x) { float diff = depths[x][y] - depths[x - 1][y]; casts_leftward += (diff < 0.0) ? (1 << x) : 0; } roi[y].b = casts_leftward; } for (uint y = 0; y < kNeighborhoodSize; ++y) { int casts_rightward = 0; for (uint x = 0; x < kNeighborhoodSize - 1; ++x) { float diff = depths[x][y] - depths[x + 1][y]; casts_rightward += (diff < 0.0) ? (1 << x) : 0; } roi[y].a = casts_rightward; } for (uint y = 1; y < kNeighborhoodSize; ++y) { int casts_upward = 0; for (uint x = 0; x < kNeighborhoodSize; ++x) { float diff = depths[x][y] - depths[x][y - 1]; casts_upward += (diff < 0.0) ? (1 << x) : 0; } roi[y].r = casts_upward; } // Don't know about top row, don't care. roi[0].r = 0; for (uint y = 0; y < kNeighborhoodSize - 1; ++y) { int casts_downward = 0; for (uint x = 0; x < kNeighborhoodSize; ++x) { float diff = depths[x][y] - depths[x][y + 1]; casts_downward += (diff < 0.0) ? (1 << x) : 0; } roi[y].g = casts_downward; } // Don't know about bottom row, don't care. roi[kNeighborhoodSize - 1].g = 0; } void smearNeighborRelationships() { // Smear 'downward' to cast shadows even further 'downward'. for (uint y = kEight - 1; y >= 0; --y) { ivec4 smeared = roi[kRadius + y]; for (uint rad = 1; rad < kRadius; ++rad) { smeared.g |= roi[kRadius + y - rad].g; smeared.b |= roi[kRadius + y - rad].b; smeared.a |= roi[kRadius + y - rad].a; } roi[kRadius + y] = smeared; } // Smear 'upward' to cast shadows even further 'upward'. for (uint y = 0; y < kEight; ++y) { ivec4 smeared = roi[kRadius + y]; for (uint rad = 1; rad < kRadius; ++rad) { smeared.r |= roi[kRadius + y + rad].r; smeared.b |= roi[kRadius + y + rad].b; smeared.a |= roi[kRadius + y + rad].a; } roi[kRadius + y] = smeared; } // Smear 'rightward' to cast shadows even further 'rightward', and similarly // for leftward. for (uint y = 0; y < kEight; ++y) { ivec4 smeared = roi[kRadius + y]; for (uint rad = 1; rad < kRadius; ++rad) { // Smear 'upward' bits to left and right. smeared.r |= (smeared.r >> 1); smeared.r |= (smeared.r << 1); // Smear 'downward' bits to left and right. smeared.g |= (smeared.g >> 1); smeared.g |= (smeared.g << 1); // Smear 'leftward' bits to left only, to avoid false positives. smeared.b |= (smeared.b >> 1); // Smear 'rightward' bits to right only, to avoid false positives. smeared.a |= (smeared.a << 1); } roi[kRadius + y] = smeared; } } void main() { computeNeighborRelationships(); smearNeighborRelationships(); ivec2 base = ivec2(gl_GlobalInvocationID.xy) * 2; for (int y = 0; y < 2; ++y) { ivec4 up_down_row = ivec4( (roi[y * 4 + kRadius].r | roi[y * 4 + kRadius].g) >> kRadius, (roi[y * 4 + kRadius + 1].r | roi[y * 4 + kRadius + 1].g) >> kRadius, (roi[y * 4 + kRadius + 2].r | roi[y * 4 + kRadius + 2].g) >> kRadius, (roi[y * 4 + kRadius + 3].r | roi[y * 4 + kRadius + 3].g) >> kRadius); ivec4 left_right_row = ivec4( (roi[y * 4 + kRadius].b | roi[y * 4 + kRadius].a) >> kRadius, (roi[y * 4 + kRadius + 1].b | roi[y * 4 + kRadius + 1].a) >> kRadius, (roi[y * 4 + kRadius + 2].b | roi[y * 4 + kRadius + 2].a) >> kRadius, (roi[y * 4 + kRadius + 3].b | roi[y * 4 + kRadius + 3].a) >> kRadius); ivec4 left_row = ivec4(0, 0, 0, 0); ivec4 right_row = ivec4(0, 0, 0, 0); for (int xx = 0; xx < 4; ++xx) { for (int yy = 0; yy < 4; ++yy) { left_row[yy] += (up_down_row[yy] & (1 << xx)) > 0 ? (1 << (xx * 2)) : 0; left_row[yy] += (left_right_row[yy] & (1 << xx)) > 0 ? (1 << (xx * 2 + 1)) : 0; right_row[yy] += (up_down_row[yy] & (1 << (xx + 4))) > 0 ? (1 << (xx * 2)) : 0; right_row[yy] += (left_right_row[yy] & (1 << (xx + 4))) > 0 ? (1 << (xx * 2 + 1)) : 0; } } imageStore(resultImage, base + ivec2(0, y), vec4(left_row) / 255.0); imageStore(resultImage, base + ivec2(1, y), vec4(right_row) / 255.0); } }