From: Josh Simmons Date: Fri, 24 May 2024 19:31:46 +0000 (+0200) Subject: shark: Try to unify shaders so they're more readable X-Git-Url: https://git.nega.tv//gitweb.cgi?a=commitdiff_plain;h=14bb3facdda08ddef7f5c4edea24c9a42a958afd;p=josh%2Fnarcissus shark: Try to unify shaders so they're more readable --- diff --git a/title/shark-shaders/shaders/primitive_2d.h b/title/shark-shaders/shaders/primitive_2d.h index c1d045b..ada3d4f 100644 --- a/title/shark-shaders/shaders/primitive_2d.h +++ b/title/shark-shaders/shaders/primitive_2d.h @@ -2,10 +2,13 @@ #define TILE_SIZE_COARSE 128 #define TILE_SIZE_FINE 16 #define TILE_SIZE_SHIFT 3 -#define TILE_BITMAP_WORDS_L1 (MAX_PRIMS / 32 / 32) -#define TILE_BITMAP_WORDS_L0 (MAX_PRIMS / 32) -#define TILE_STRIDE_COARSE TILE_BITMAP_WORDS_L0 -#define TILE_STRIDE_FINE (TILE_BITMAP_WORDS_L0 + TILE_BITMAP_WORDS_L1) +#define TILE_BITMAP_L1_WORDS (MAX_PRIMS / 32 / 32) +#define TILE_BITMAP_L0_WORDS (MAX_PRIMS / 32) +#define TILE_STRIDE_COARSE TILE_BITMAP_L0_WORDS +#define TILE_STRIDE_FINE (TILE_BITMAP_L0_WORDS + TILE_BITMAP_L1_WORDS) +#define TILE_BITMAP_OFFSET_COARSE 0 +#define TILE_BITMAP_L1_OFFSET_FINE 0 +#define TILE_BITMAP_L0_OFFSET_FINE TILE_BITMAP_L1_WORDS struct PrimitiveUniforms { uvec2 screen_resolution; diff --git a/title/shark-shaders/shaders/primitive_2d_bin_coarse.comp.glsl b/title/shark-shaders/shaders/primitive_2d_bin_coarse.comp.glsl index 637a730..639dee5 100644 --- a/title/shark-shaders/shaders/primitive_2d_bin_coarse.comp.glsl +++ b/title/shark-shaders/shaders/primitive_2d_bin_coarse.comp.glsl @@ -29,8 +29,8 @@ void main() { uvec4 ballot_result = subgroupBallot(intersects); if (subgroupElect()) { // managed democracy wins again const uint tile_index = tile_coord.y * primitive_uniforms.tile_resolution_coarse.x + tile_coord.x; - const uint bitmap_offset = tile_index * TILE_STRIDE_COARSE; - coarse_bitmap_wo[bitmap_offset + 2 * gl_WorkGroupID.x + 0] = ballot_result.x; - coarse_bitmap_wo[bitmap_offset + 2 * gl_WorkGroupID.x + 1] = ballot_result.y; + const uint tile_offset = tile_index * TILE_STRIDE_COARSE; + coarse_bitmap_wo[tile_offset + 2 * gl_WorkGroupID.x + 0] = ballot_result.x; + coarse_bitmap_wo[tile_offset + 2 * gl_WorkGroupID.x + 1] = ballot_result.y; } } diff --git a/title/shark-shaders/shaders/primitive_2d_bin_fine.comp.glsl b/title/shark-shaders/shaders/primitive_2d_bin_fine.comp.glsl index d421615..f5e6d3c 100644 --- a/title/shark-shaders/shaders/primitive_2d_bin_fine.comp.glsl +++ b/title/shark-shaders/shaders/primitive_2d_bin_fine.comp.glsl @@ -20,34 +20,38 @@ void main() { const uvec2 tile_max = min(tile_min + tile_size, primitive_uniforms.screen_resolution); const uint tile_index = tile_coord.y * primitive_uniforms.tile_resolution_fine.x + tile_coord.x; - const uint bitmap_index = gl_WorkGroupID.x * gl_WorkGroupSize.x + gl_SubgroupID * gl_SubgroupSize + gl_SubgroupInvocationID; + const uint index = gl_WorkGroupID.x * gl_WorkGroupSize.x + gl_SubgroupID * gl_SubgroupSize + gl_SubgroupInvocationID; uint bitmap_l0 = 0; - if (bitmap_index < primitive_uniforms.num_primitives_32) { + if (index < primitive_uniforms.num_primitives_32) { const uvec2 tile_coord_coarse = tile_coord >> TILE_SIZE_SHIFT; const uint tile_index_coarse = tile_coord_coarse.y * primitive_uniforms.tile_resolution_coarse.x + tile_coord_coarse.x; - const uint bitmap_offset_coarse = tile_index_coarse * TILE_STRIDE_COARSE + bitmap_index; + const uint tile_base_coarse = tile_index_coarse * TILE_STRIDE_COARSE; + const uint tile_bitmap_base_coarse = tile_base_coarse + TILE_BITMAP_OFFSET_COARSE; - uint bitmap_coarse = coarse_bitmap_ro[bitmap_offset_coarse]; + uint bitmap_coarse = coarse_bitmap_ro[tile_bitmap_base_coarse + index]; while (bitmap_coarse != 0) { const uint i = findLSB(bitmap_coarse); - const uint primitive_index = bitmap_index * 32 + i; bitmap_coarse ^= bitmap_coarse & -bitmap_coarse; + const uint primitive_index = index * 32 + i; if (test_glyph(primitive_index, tile_min, tile_max)) { bitmap_l0 |= 1 << i; } } } - const uint fine_bitmap_l0_offset = tile_index * TILE_STRIDE_FINE + TILE_BITMAP_WORDS_L1 + bitmap_index; - fine_bitmap_wo[fine_bitmap_l0_offset] = bitmap_l0; + const uint tile_base_fine = tile_index * TILE_STRIDE_FINE; - const bool bit_l1 = bitmap_l0 != 0; - uvec4 ballot_result = subgroupBallot(bit_l1); + // Write the L0 per-primitive bitmap. + const uint tile_bitmap_l0_base_fine = tile_base_fine + TILE_BITMAP_L0_OFFSET_FINE; + fine_bitmap_wo[tile_bitmap_l0_base_fine + index] = bitmap_l0; + + // Write the L1 per-bitmap-word bitmap. + uvec4 ballot_result = subgroupBallot(bitmap_l0 != 0); if (subgroupElect()) { - const uint fine_bitmap_l1_offset = tile_index * TILE_STRIDE_FINE; - fine_bitmap_wo[fine_bitmap_l1_offset + 2 * gl_WorkGroupID.x + 0] = ballot_result.x; - fine_bitmap_wo[fine_bitmap_l1_offset + 2 * gl_WorkGroupID.x + 1] = ballot_result.y; + const uint tile_bitmap_l1_base_fine = tile_base_fine + TILE_BITMAP_L1_OFFSET_FINE; + fine_bitmap_wo[tile_bitmap_l1_base_fine + 2 * gl_WorkGroupID.x + 0] = ballot_result.x; + fine_bitmap_wo[tile_bitmap_l1_base_fine + 2 * gl_WorkGroupID.x + 1] = ballot_result.y; } } diff --git a/title/shark-shaders/shaders/primitive_2d_rasterize.comp.glsl b/title/shark-shaders/shaders/primitive_2d_rasterize.comp.glsl index 16e94ac..24dfffb 100644 --- a/title/shark-shaders/shaders/primitive_2d_rasterize.comp.glsl +++ b/title/shark-shaders/shaders/primitive_2d_rasterize.comp.glsl @@ -10,22 +10,35 @@ layout (local_size_x = TILE_SIZE_FINE, local_size_y = TILE_SIZE_FINE, local_size_z = 1) in; void main() { - vec4 accum = vec4(0.0); - const uvec2 tile_coord = gl_WorkGroupID.xy; const uint tile_index = tile_coord.y * primitive_uniforms.tile_resolution_fine.x + tile_coord.x; - const uint bitmap_offset = tile_index * TILE_STRIDE_FINE; + const uint tile_base_fine = tile_index * TILE_STRIDE_FINE; + const uint tile_bitmap_l1_base_fine = tile_base_fine + TILE_BITMAP_L1_OFFSET_FINE; + const uint tile_bitmap_l0_base_fine = tile_base_fine + TILE_BITMAP_L0_OFFSET_FINE; + + vec4 accum = vec4(0.0); + // For each tile, iterate over all words in the L1 bitmap. + // + // TODO: Count the non-zero words in the tile with atomics, so we can early out on empty tiles? for (int index_l1 = 0; index_l1 < primitive_uniforms.num_primitives_1024; index_l1++) { - uint bitmap_l1 = fine_bitmap_ro[bitmap_offset + index_l1]; + // For each word, iterate all set bits. + uint bitmap_l1 = fine_bitmap_ro[tile_bitmap_l1_base_fine + index_l1]; while (bitmap_l1 != 0) { - const uint index_l0 = index_l1 * 32 + findLSB(bitmap_l1); - uint bitmap_l0 = fine_bitmap_ro[bitmap_offset + TILE_BITMAP_WORDS_L1 + index_l0]; + const uint i = findLSB(bitmap_l1); bitmap_l1 ^= bitmap_l1 & -bitmap_l1; + + // For each set bit in the L1 bitmap, iterate the set bits in the + // corresponding L0 bitmap. + const uint index_l0 = index_l1 * 32 + i; + uint bitmap_l0 = fine_bitmap_ro[tile_bitmap_l0_base_fine + index_l0]; while (bitmap_l0 != 0) { - const uint primitive_index = index_l0 * 32 + findLSB(bitmap_l0); + const uint j = findLSB(bitmap_l0); bitmap_l0 ^= bitmap_l0 & -bitmap_l0; + // Set bits in the L0 bitmap indicate binned primitives for this tile. + const uint primitive_index = index_l0 * 32 + j; + const GlyphInstance gi = glyph_instances[primitive_index]; const Glyph gl = glyphs[gi.index]; const vec2 glyph_min = gi.position + gl.offset_min;