From 0cd192429b0e862c86cc5b873513f28e0ce3a580 Mon Sep 17 00:00:00 2001 From: Josh Simmons Date: Sun, 21 Jul 2024 09:03:17 +0200 Subject: [PATCH] shark: Shader cleanup --- .../shaders/display_transform.comp.glsl | 11 ++++---- .../shaders/primitive_2d_bin.comp.glsl | 28 +++++++++---------- 2 files changed, 19 insertions(+), 20 deletions(-) diff --git a/title/shark-shaders/shaders/display_transform.comp.glsl b/title/shark-shaders/shaders/display_transform.comp.glsl index be48b91..113d314 100644 --- a/title/shark-shaders/shaders/display_transform.comp.glsl +++ b/title/shark-shaders/shaders/display_transform.comp.glsl @@ -27,18 +27,17 @@ vec3 tony_mc_mapface(vec3 stimulus) { layout (local_size_x = 8, local_size_y = 8, local_size_z = 1) in; void main() { - const vec3 stimulus = imageLoad(color_layer, ivec2(gl_GlobalInvocationID.xy)).rgb; - const vec3 transformed = tony_mc_mapface(stimulus); - vec3 composited = srgb_oetf(transformed); - const uvec2 tile_coord = gl_WorkGroupID.xy / 4; const uint tile_index = tile_coord.y * uniforms.tile_stride + tile_coord.x; const uint tile_base = tile_index * TILE_STRIDE; - TilesRead tiles_read = TilesRead(uniforms.tiles); - const uint lo = tiles_read.values[tile_base + TILE_BITMAP_RANGE_LO_OFFSET]; const uint hi = tiles_read.values[tile_base + TILE_BITMAP_RANGE_HI_OFFSET]; + + const vec3 stimulus = imageLoad(color_layer, ivec2(gl_GlobalInvocationID.xy)).rgb; + const vec3 transformed = tony_mc_mapface(stimulus); + vec3 composited = srgb_oetf(transformed); + if (lo <= hi) { const vec4 ui = imageLoad(ui_layer_read, ivec2(gl_GlobalInvocationID.xy)).rgba; composited = ui.rgb + (composited * (1.0 - ui.a)); diff --git a/title/shark-shaders/shaders/primitive_2d_bin.comp.glsl b/title/shark-shaders/shaders/primitive_2d_bin.comp.glsl index 0010d71..a826dae 100644 --- a/title/shark-shaders/shaders/primitive_2d_bin.comp.glsl +++ b/title/shark-shaders/shaders/primitive_2d_bin.comp.glsl @@ -45,17 +45,17 @@ void main() { continue; } - ivec2 bin_start = ivec2(floor(max(min(primitives_min, uniforms.screen_resolution), 0.0) / TILE_SIZE)); - ivec2 bin_end = ivec2(floor((max(min(primitives_max, uniforms.screen_resolution), 0.0) + (TILE_SIZE - 1)) / TILE_SIZE)); + const ivec2 tile_start = ivec2(floor(max(min(primitives_min, uniforms.screen_resolution), 0.0) / TILE_SIZE)); + const ivec2 tile_end = ivec2(floor((max(min(primitives_max, uniforms.screen_resolution), 0.0) + (TILE_SIZE - 1)) / TILE_SIZE)); - for (int y = bin_start.y; y < bin_end.y; y++) { - for (int x = bin_start.x; x < bin_end.x; x++) { - const uvec2 bin_coord = uvec2(x, y); - const uint bin_index = bin_coord.y * uniforms.tile_stride + bin_coord.x; - const vec2 bin_min = bin_coord * TILE_SIZE; - const vec2 bin_max = min(bin_min + TILE_SIZE, uniforms.screen_resolution); + for (int y = tile_start.y; y < tile_end.y; y++) { + for (int x = tile_start.x; x < tile_end.x; x++) { + const uvec2 tile_coord = uvec2(x, y); + const uint tile_index = tile_coord.y * uniforms.tile_stride + tile_coord.x; + const vec2 tile_min = tile_coord * TILE_SIZE; + const vec2 tile_max = min(tile_min + TILE_SIZE, uniforms.screen_resolution); - const bool intersects = !(any(lessThan(bin_max, primitive_min)) || any(greaterThan(bin_min, primitive_max))); + const bool intersects = !(any(lessThan(tile_max, primitive_min)) || any(greaterThan(tile_min, primitive_max))); const uvec4 ballot = subgroupBallot(intersects); if (ballot.x == 0 && ballot.y == 0) { @@ -63,20 +63,20 @@ void main() { } if (ballot.x != 0) { - uniforms.tiles.values[bin_index * TILE_STRIDE + TILE_BITMAP_L0_OFFSET + gl_WorkGroupID.x * 32 + word_index + 0] = ballot.x; + uniforms.tiles.values[tile_index * TILE_STRIDE + TILE_BITMAP_L0_OFFSET + gl_WorkGroupID.x * 32 + word_index + 0] = ballot.x; } if (ballot.y != 0) { - uniforms.tiles.values[bin_index * TILE_STRIDE + TILE_BITMAP_L0_OFFSET + gl_WorkGroupID.x * 32 + word_index + 1] = ballot.y; + uniforms.tiles.values[tile_index * TILE_STRIDE + TILE_BITMAP_L0_OFFSET + gl_WorkGroupID.x * 32 + word_index + 1] = ballot.y; } if (subgroupElect()) { - uniforms.tiles.values[bin_index * TILE_STRIDE + TILE_BITMAP_L1_OFFSET + gl_WorkGroupID.x] |= + uniforms.tiles.values[tile_index * TILE_STRIDE + TILE_BITMAP_L1_OFFSET + gl_WorkGroupID.x] |= (uint(ballot.x != 0) << (word_index + 0)) | (uint(ballot.y != 0) << (word_index + 1)); - atomicMin(uniforms.tiles.values[bin_index * TILE_STRIDE + TILE_BITMAP_RANGE_LO_OFFSET], gl_WorkGroupID.x); - atomicMax(uniforms.tiles.values[bin_index * TILE_STRIDE + TILE_BITMAP_RANGE_HI_OFFSET], gl_WorkGroupID.x); + atomicMin(uniforms.tiles.values[tile_index * TILE_STRIDE + TILE_BITMAP_RANGE_LO_OFFSET], gl_WorkGroupID.x); + atomicMax(uniforms.tiles.values[tile_index * TILE_STRIDE + TILE_BITMAP_RANGE_HI_OFFSET], gl_WorkGroupID.x); } } } -- 2.49.0