layout (local_size_x = 8, local_size_y = 8, local_size_z = 1) in;
void main() {
- const vec3 stimulus = imageLoad(color_layer, ivec2(gl_GlobalInvocationID.xy)).rgb;
- const vec3 transformed = tony_mc_mapface(stimulus);
- vec3 composited = srgb_oetf(transformed);
-
const uvec2 tile_coord = gl_WorkGroupID.xy / 4;
const uint tile_index = tile_coord.y * uniforms.tile_stride + tile_coord.x;
const uint tile_base = tile_index * TILE_STRIDE;
-
TilesRead tiles_read = TilesRead(uniforms.tiles);
-
const uint lo = tiles_read.values[tile_base + TILE_BITMAP_RANGE_LO_OFFSET];
const uint hi = tiles_read.values[tile_base + TILE_BITMAP_RANGE_HI_OFFSET];
+
+ const vec3 stimulus = imageLoad(color_layer, ivec2(gl_GlobalInvocationID.xy)).rgb;
+ const vec3 transformed = tony_mc_mapface(stimulus);
+ vec3 composited = srgb_oetf(transformed);
+
if (lo <= hi) {
const vec4 ui = imageLoad(ui_layer_read, ivec2(gl_GlobalInvocationID.xy)).rgba;
composited = ui.rgb + (composited * (1.0 - ui.a));
continue;
}
- ivec2 bin_start = ivec2(floor(max(min(primitives_min, uniforms.screen_resolution), 0.0) / TILE_SIZE));
- ivec2 bin_end = ivec2(floor((max(min(primitives_max, uniforms.screen_resolution), 0.0) + (TILE_SIZE - 1)) / TILE_SIZE));
+ const ivec2 tile_start = ivec2(floor(max(min(primitives_min, uniforms.screen_resolution), 0.0) / TILE_SIZE));
+ const ivec2 tile_end = ivec2(floor((max(min(primitives_max, uniforms.screen_resolution), 0.0) + (TILE_SIZE - 1)) / TILE_SIZE));
- for (int y = bin_start.y; y < bin_end.y; y++) {
- for (int x = bin_start.x; x < bin_end.x; x++) {
- const uvec2 bin_coord = uvec2(x, y);
- const uint bin_index = bin_coord.y * uniforms.tile_stride + bin_coord.x;
- const vec2 bin_min = bin_coord * TILE_SIZE;
- const vec2 bin_max = min(bin_min + TILE_SIZE, uniforms.screen_resolution);
+ for (int y = tile_start.y; y < tile_end.y; y++) {
+ for (int x = tile_start.x; x < tile_end.x; x++) {
+ const uvec2 tile_coord = uvec2(x, y);
+ const uint tile_index = tile_coord.y * uniforms.tile_stride + tile_coord.x;
+ const vec2 tile_min = tile_coord * TILE_SIZE;
+ const vec2 tile_max = min(tile_min + TILE_SIZE, uniforms.screen_resolution);
- const bool intersects = !(any(lessThan(bin_max, primitive_min)) || any(greaterThan(bin_min, primitive_max)));
+ const bool intersects = !(any(lessThan(tile_max, primitive_min)) || any(greaterThan(tile_min, primitive_max)));
const uvec4 ballot = subgroupBallot(intersects);
if (ballot.x == 0 && ballot.y == 0) {
}
if (ballot.x != 0) {
- uniforms.tiles.values[bin_index * TILE_STRIDE + TILE_BITMAP_L0_OFFSET + gl_WorkGroupID.x * 32 + word_index + 0] = ballot.x;
+ uniforms.tiles.values[tile_index * TILE_STRIDE + TILE_BITMAP_L0_OFFSET + gl_WorkGroupID.x * 32 + word_index + 0] = ballot.x;
}
if (ballot.y != 0) {
- uniforms.tiles.values[bin_index * TILE_STRIDE + TILE_BITMAP_L0_OFFSET + gl_WorkGroupID.x * 32 + word_index + 1] = ballot.y;
+ uniforms.tiles.values[tile_index * TILE_STRIDE + TILE_BITMAP_L0_OFFSET + gl_WorkGroupID.x * 32 + word_index + 1] = ballot.y;
}
if (subgroupElect()) {
- uniforms.tiles.values[bin_index * TILE_STRIDE + TILE_BITMAP_L1_OFFSET + gl_WorkGroupID.x] |=
+ uniforms.tiles.values[tile_index * TILE_STRIDE + TILE_BITMAP_L1_OFFSET + gl_WorkGroupID.x] |=
(uint(ballot.x != 0) << (word_index + 0)) |
(uint(ballot.y != 0) << (word_index + 1));
- atomicMin(uniforms.tiles.values[bin_index * TILE_STRIDE + TILE_BITMAP_RANGE_LO_OFFSET], gl_WorkGroupID.x);
- atomicMax(uniforms.tiles.values[bin_index * TILE_STRIDE + TILE_BITMAP_RANGE_HI_OFFSET], gl_WorkGroupID.x);
+ atomicMin(uniforms.tiles.values[tile_index * TILE_STRIDE + TILE_BITMAP_RANGE_LO_OFFSET], gl_WorkGroupID.x);
+ atomicMax(uniforms.tiles.values[tile_index * TILE_STRIDE + TILE_BITMAP_RANGE_HI_OFFSET], gl_WorkGroupID.x);
}
}
}