From: Josh Simmons Date: Sun, 26 May 2024 13:34:33 +0000 (+0200) Subject: shark: Simplify primitive shaders X-Git-Url: https://git.nega.tv//gitweb.cgi?a=commitdiff_plain;h=4b30d76725ae614f5677f9e5c7275ecbc50a3558;p=josh%2Fnarcissus shark: Simplify primitive shaders --- diff --git a/title/shark-shaders/shaders/display_transform.comp.glsl b/title/shark-shaders/shaders/display_transform.comp.glsl index 41cb758..aea5280 100644 --- a/title/shark-shaders/shaders/display_transform.comp.glsl +++ b/title/shark-shaders/shaders/display_transform.comp.glsl @@ -4,14 +4,14 @@ struct PrimitiveUniforms { uvec2 screen_resolution; - uvec2 tile_resolution_coarse; - uvec2 tile_resolution_fine; uvec2 atlas_resolution; uint num_primitives; uint num_primitives_32; uint num_primitives_1024; - uint pad_1; + + uint tile_stride_coarse; + uint tile_stride_fine; }; layout(std430, push_constant) uniform uniformBuffer { @@ -50,7 +50,7 @@ layout (local_size_x = 8, local_size_y = 8, local_size_z = 1) in; void main() { const uvec2 tile_coord = gl_WorkGroupID.xy >> 1; - const uint tile_index = tile_coord.y * primitive_uniforms.tile_resolution_fine.x + tile_coord.x; + const uint tile_index = tile_coord.y * primitive_uniforms.tile_stride_fine + tile_coord.x; const vec3 stimulus = imageLoad(layer_rt, ivec2(gl_GlobalInvocationID.xy)).rgb; const vec3 transformed = tony_mc_mapface(stimulus); diff --git a/title/shark-shaders/shaders/primitive_2d.h b/title/shark-shaders/shaders/primitive_2d.h index ada3d4f..57064a3 100644 --- a/title/shark-shaders/shaders/primitive_2d.h +++ b/title/shark-shaders/shaders/primitive_2d.h @@ -12,14 +12,14 @@ struct PrimitiveUniforms { uvec2 screen_resolution; - uvec2 tile_resolution_coarse; - uvec2 tile_resolution_fine; uvec2 atlas_resolution; uint num_primitives; uint num_primitives_32; uint num_primitives_1024; - uint pad_1; + + uint tile_stride_coarse; + uint tile_stride_fine; }; struct Glyph { diff --git a/title/shark-shaders/shaders/primitive_2d_bin_coarse.comp.glsl b/title/shark-shaders/shaders/primitive_2d_bin_coarse.comp.glsl index 639dee5..80e803b 100644 --- a/title/shark-shaders/shaders/primitive_2d_bin_coarse.comp.glsl +++ b/title/shark-shaders/shaders/primitive_2d_bin_coarse.comp.glsl @@ -14,10 +14,9 @@ layout (local_size_x = 64, local_size_y = 1, local_size_z = 1) in; void main() { - const uvec2 tile_size = uvec2(TILE_SIZE_COARSE, TILE_SIZE_COARSE); const uvec2 tile_coord = gl_GlobalInvocationID.yz; - const uvec2 tile_min = tile_coord * tile_size; - const uvec2 tile_max = min(tile_min + tile_size, primitive_uniforms.screen_resolution); + const uvec2 tile_min = tile_coord * TILE_SIZE_COARSE; + const uvec2 tile_max = min(tile_min + TILE_SIZE_COARSE, primitive_uniforms.screen_resolution); const uint primitive_index = gl_WorkGroupID.x * gl_WorkGroupSize.x + gl_SubgroupID * gl_SubgroupSize + gl_SubgroupInvocationID; @@ -28,7 +27,7 @@ void main() { uvec4 ballot_result = subgroupBallot(intersects); if (subgroupElect()) { // managed democracy wins again - const uint tile_index = tile_coord.y * primitive_uniforms.tile_resolution_coarse.x + tile_coord.x; + const uint tile_index = tile_coord.y * primitive_uniforms.tile_stride_coarse + tile_coord.x; const uint tile_offset = tile_index * TILE_STRIDE_COARSE; coarse_bitmap_wo[tile_offset + 2 * gl_WorkGroupID.x + 0] = ballot_result.x; coarse_bitmap_wo[tile_offset + 2 * gl_WorkGroupID.x + 1] = ballot_result.y; diff --git a/title/shark-shaders/shaders/primitive_2d_bin_fine.comp.glsl b/title/shark-shaders/shaders/primitive_2d_bin_fine.comp.glsl index 51f642e..9efb59b 100644 --- a/title/shark-shaders/shaders/primitive_2d_bin_fine.comp.glsl +++ b/title/shark-shaders/shaders/primitive_2d_bin_fine.comp.glsl @@ -14,18 +14,17 @@ layout (local_size_x = 64, local_size_y = 1, local_size_z = 1) in; void main() { - const uvec2 tile_size = uvec2(TILE_SIZE_FINE, TILE_SIZE_FINE); const uvec2 tile_coord = gl_GlobalInvocationID.yz; - const uvec2 tile_min = tile_coord * tile_size; - const uvec2 tile_max = min(tile_min + tile_size, primitive_uniforms.screen_resolution); - const uint tile_index = tile_coord.y * primitive_uniforms.tile_resolution_fine.x + tile_coord.x; + const uvec2 tile_min = tile_coord * TILE_SIZE_FINE; + const uvec2 tile_max = min(tile_min + TILE_SIZE_FINE, primitive_uniforms.screen_resolution); + const uint tile_index = tile_coord.y * primitive_uniforms.tile_stride_fine + tile_coord.x; const uint index = gl_WorkGroupID.x * gl_WorkGroupSize.x + gl_SubgroupID * gl_SubgroupSize + gl_SubgroupInvocationID; uint bitmap_l0 = 0; if (index < primitive_uniforms.num_primitives_32) { const uvec2 tile_coord_coarse = tile_coord >> TILE_SIZE_SHIFT; - const uint tile_index_coarse = tile_coord_coarse.y * primitive_uniforms.tile_resolution_coarse.x + tile_coord_coarse.x; + const uint tile_index_coarse = tile_coord_coarse.y * primitive_uniforms.tile_stride_coarse + tile_coord_coarse.x; const uint tile_base_coarse = tile_index_coarse * TILE_STRIDE_COARSE; const uint tile_bitmap_base_coarse = tile_base_coarse + TILE_BITMAP_OFFSET_COARSE; diff --git a/title/shark-shaders/shaders/primitive_2d_clear_fine.comp.glsl b/title/shark-shaders/shaders/primitive_2d_clear_fine.comp.glsl index c310354..3d486b6 100644 --- a/title/shark-shaders/shaders/primitive_2d_clear_fine.comp.glsl +++ b/title/shark-shaders/shaders/primitive_2d_clear_fine.comp.glsl @@ -10,7 +10,5 @@ layout (local_size_x = 64, local_size_y = 1, local_size_z = 1) in; void main() { - if (gl_GlobalInvocationID.x < (primitive_uniforms.tile_resolution_fine.x * primitive_uniforms.tile_resolution_fine.y)) { - fine_count_wo[gl_GlobalInvocationID.x] = 0; - } + fine_count_wo[gl_GlobalInvocationID.x] = 0; } diff --git a/title/shark-shaders/shaders/primitive_2d_rasterize.comp.glsl b/title/shark-shaders/shaders/primitive_2d_rasterize.comp.glsl index 04f83f3..0ee9f47 100644 --- a/title/shark-shaders/shaders/primitive_2d_rasterize.comp.glsl +++ b/title/shark-shaders/shaders/primitive_2d_rasterize.comp.glsl @@ -11,7 +11,7 @@ layout (local_size_x = TILE_SIZE_FINE, local_size_y = TILE_SIZE_FINE, local_size void main() { const uvec2 tile_coord = gl_WorkGroupID.xy; - const uint tile_index = tile_coord.y * primitive_uniforms.tile_resolution_fine.x + tile_coord.x; + const uint tile_index = tile_coord.y * primitive_uniforms.tile_stride_fine + tile_coord.x; const uint tile_base_fine = tile_index * TILE_STRIDE_FINE; const uint tile_bitmap_l1_base_fine = tile_base_fine + TILE_BITMAP_L1_OFFSET_FINE; const uint tile_bitmap_l0_base_fine = tile_base_fine + TILE_BITMAP_L0_OFFSET_FINE; diff --git a/title/shark/src/main.rs b/title/shark/src/main.rs index cb753fe..b90401f 100644 --- a/title/shark/src/main.rs +++ b/title/shark/src/main.rs @@ -1035,9 +1035,10 @@ impl<'gpu> DrawState<'gpu> { * TILE_STRIDE_FINE * std::mem::size_of::() as u32; - let fine_color_buffer_size = tile_resolution_fine_x - * tile_resolution_fine_y - * std::mem::size_of::() as u32; + // align to the workgroup size to simplify shader + let fine_color_buffer_size = + ((tile_resolution_fine_x * tile_resolution_fine_y + 63) & !63) + * std::mem::size_of::() as u32; self.coarse_tile_bitmap_buffer = gpu.create_buffer(&BufferDesc { memory_location: MemoryLocation::Device, @@ -1329,31 +1330,7 @@ impl<'gpu> DrawState<'gpu> { // Render UI { - gpu.cmd_set_pipeline(cmd_encoder, self.primitive_2d_pipeline.coarse_bin_pipeline); - - let num_primitives = ui_state.primitive_instances.len() as u32; - let num_primitives_32 = (num_primitives + 31) / 32; - let num_primitives_1024 = (num_primitives_32 + 31) / 32; - - gpu.cmd_push_constants( - cmd_encoder, - ShaderStageFlags::COMPUTE, - 0, - &PrimitiveUniforms { - screen_resolution_x: self.width, - screen_resolution_y: self.height, - tile_resolution_coarse_x: self.tile_resolution_coarse_x, - tile_resolution_coarse_y: self.tile_resolution_coarse_y, - tile_resolution_fine_x: self.tile_resolution_fine_x, - tile_resolution_fine_y: self.tile_resolution_fine_y, - atlas_resolution_x: atlas_width, - atlas_resolution_y: atlas_height, - num_primitives, - num_primitives_32, - num_primitives_1024, - _pad0: 0, - }, - ); + gpu.cmd_set_pipeline(cmd_encoder, self.primitive_2d_pipeline.fine_clear_pipeline); let glyph_buffer = gpu.request_transient_buffer_with_data( frame, @@ -1440,15 +1417,6 @@ impl<'gpu> DrawState<'gpu> { ], ); - gpu.cmd_dispatch( - cmd_encoder, - (num_primitives + 63) / 64, - self.tile_resolution_coarse_x, - self.tile_resolution_coarse_y, - ); - - gpu.cmd_set_pipeline(cmd_encoder, self.primitive_2d_pipeline.fine_clear_pipeline); - gpu.cmd_dispatch( cmd_encoder, (self.tile_resolution_coarse_x * self.tile_resolution_coarse_y + 63) / 64, @@ -1456,44 +1424,86 @@ impl<'gpu> DrawState<'gpu> { 1, ); - gpu.cmd_barrier( - cmd_encoder, - Some(&GlobalBarrier { - prev_access: &[Access::ShaderWrite], - next_access: &[Access::ShaderOtherRead], - }), - &[], - ); - - gpu.cmd_set_pipeline(cmd_encoder, self.primitive_2d_pipeline.fine_bin_pipeline); - - gpu.cmd_dispatch( - cmd_encoder, - (num_primitives_32 + 63) / 64, - self.tile_resolution_fine_x, - self.tile_resolution_fine_y, - ); - - gpu.cmd_barrier( - cmd_encoder, - Some(&GlobalBarrier { - prev_access: &[Access::ShaderWrite], - next_access: &[Access::ShaderOtherRead], - }), - &[], - ); - - gpu.cmd_set_pipeline(cmd_encoder, self.primitive_2d_pipeline.rasterize_pipeline); - - gpu.cmd_dispatch( - cmd_encoder, - self.tile_resolution_fine_x, - self.tile_resolution_fine_y, - 1, - ); + let num_primitives = ui_state.primitive_instances.len() as u32; + let num_primitives_32 = (num_primitives + 31) / 32; + let num_primitives_1024 = (num_primitives_32 + 31) / 32; - // Cleanup ui_state.primitive_instances.clear(); + + for _pass_y in 0..1 { + for _pass_x in 0..1 { + gpu.cmd_set_pipeline( + cmd_encoder, + self.primitive_2d_pipeline.coarse_bin_pipeline, + ); + + gpu.cmd_push_constants( + cmd_encoder, + ShaderStageFlags::COMPUTE, + 0, + &PrimitiveUniforms { + screen_resolution_x: self.width, + screen_resolution_y: self.height, + atlas_resolution_x: atlas_width, + atlas_resolution_y: atlas_height, + num_primitives, + num_primitives_32, + num_primitives_1024, + tile_stride_coarse: self.tile_resolution_coarse_x, + tile_stride_fine: self.tile_resolution_fine_x, + }, + ); + + gpu.cmd_dispatch( + cmd_encoder, + (num_primitives + 63) / 64, + self.tile_resolution_coarse_x, + self.tile_resolution_coarse_y, + ); + + gpu.cmd_barrier( + cmd_encoder, + Some(&GlobalBarrier { + prev_access: &[Access::ShaderWrite], + next_access: &[Access::ShaderOtherRead], + }), + &[], + ); + + gpu.cmd_set_pipeline( + cmd_encoder, + self.primitive_2d_pipeline.fine_bin_pipeline, + ); + + gpu.cmd_dispatch( + cmd_encoder, + (num_primitives_32 + 63) / 64, + self.tile_resolution_fine_x, + self.tile_resolution_fine_y, + ); + + gpu.cmd_barrier( + cmd_encoder, + Some(&GlobalBarrier { + prev_access: &[Access::ShaderWrite], + next_access: &[Access::ShaderOtherRead], + }), + &[], + ); + + gpu.cmd_set_pipeline( + cmd_encoder, + self.primitive_2d_pipeline.rasterize_pipeline, + ); + + gpu.cmd_dispatch( + cmd_encoder, + self.tile_resolution_fine_x, + self.tile_resolution_fine_y, + 1, + ); + } + } } // Display transform and composite diff --git a/title/shark/src/pipelines/primitive_2d.rs b/title/shark/src/pipelines/primitive_2d.rs index ca37a7b..c65e791 100644 --- a/title/shark/src/pipelines/primitive_2d.rs +++ b/title/shark/src/pipelines/primitive_2d.rs @@ -19,10 +19,6 @@ pub const TILE_STRIDE_FINE: u32 = TILE_BITMAP_WORDS_L0 + TILE_BITMAP_WORDS_L1; pub struct PrimitiveUniforms { pub screen_resolution_x: u32, pub screen_resolution_y: u32, - pub tile_resolution_coarse_x: u32, - pub tile_resolution_coarse_y: u32, - pub tile_resolution_fine_x: u32, - pub tile_resolution_fine_y: u32, pub atlas_resolution_x: u32, pub atlas_resolution_y: u32, @@ -30,7 +26,8 @@ pub struct PrimitiveUniforms { pub num_primitives_32: u32, pub num_primitives_1024: u32, - pub _pad0: u32, + pub tile_stride_coarse: u32, + pub tile_stride_fine: u32, } #[allow(unused)]