shark: Simplify primitive shaders

author Josh Simmons <josh@nega.tv>

Sun, 26 May 2024 13:34:33 +0000 (15:34 +0200)

committer Josh Simmons <josh@nega.tv>

Sun, 26 May 2024 13:34:33 +0000 (15:34 +0200)
author Josh Simmons <josh@nega.tv>
Sun, 26 May 2024 13:34:33 +0000 (15:34 +0200)
committer Josh Simmons <josh@nega.tv>
Sun, 26 May 2024 13:34:33 +0000 (15:34 +0200)
diff --git a/title/shark-shaders/shaders/display_transform.comp.glsl b/title/shark-shaders/shaders/display_transform.comp.glsl

index 41cb75805e2a32bef5a467875c2dcc327aef8962..aea5280d141e6b7db5653a23fed1ede6ac0cb9ba 100644 (file)
--- a/title/shark-shaders/shaders/display_transform.comp.glsl
+++ b/title/shark-shaders/shaders/display_transform.comp.glsl
@@ -4,14 +4,14 @@
  
  struct PrimitiveUniforms {
      uvec2 screen_resolution;
-    uvec2 tile_resolution_coarse;
-    uvec2 tile_resolution_fine;
      uvec2 atlas_resolution;
  
      uint num_primitives;
      uint num_primitives_32;
      uint num_primitives_1024;
-    uint pad_1;
+
+    uint tile_stride_coarse;
+    uint tile_stride_fine;
  };
  
  layout(std430, push_constant) uniform uniformBuffer {
@@ -50,7 +50,7 @@ layout (local_size_x = 8, local_size_y = 8, local_size_z = 1) in;
  
  void main() {
      const uvec2 tile_coord = gl_WorkGroupID.xy >> 1;
-    const uint tile_index = tile_coord.y * primitive_uniforms.tile_resolution_fine.x + tile_coord.x;
+    const uint tile_index = tile_coord.y * primitive_uniforms.tile_stride_fine + tile_coord.x;
  
      const vec3 stimulus = imageLoad(layer_rt, ivec2(gl_GlobalInvocationID.xy)).rgb;
      const vec3 transformed = tony_mc_mapface(stimulus);
diff --git a/title/shark-shaders/shaders/primitive_2d.h b/title/shark-shaders/shaders/primitive_2d.h

index ada3d4fcdc81a5daab689e4acda49e21a004e50d..57064a34f90de669cfc1dad50b4781f0352e43f3 100644 (file)
--- a/title/shark-shaders/shaders/primitive_2d.h
+++ b/title/shark-shaders/shaders/primitive_2d.h
@@ -12,14 +12,14 @@
  
  struct PrimitiveUniforms {
      uvec2 screen_resolution;
-    uvec2 tile_resolution_coarse;
-    uvec2 tile_resolution_fine;
      uvec2 atlas_resolution;
  
      uint num_primitives;
      uint num_primitives_32;
      uint num_primitives_1024;
-    uint pad_1;
+
+    uint tile_stride_coarse;
+    uint tile_stride_fine;
  };
  
  struct Glyph {
diff --git a/title/shark-shaders/shaders/primitive_2d_bin_coarse.comp.glsl b/title/shark-shaders/shaders/primitive_2d_bin_coarse.comp.glsl

index 639dee535d7931fba4b7e1fe05ae016995cd61b1..80e803b47c756034ec18c8e60a12947337dd1231 100644 (file)
--- a/title/shark-shaders/shaders/primitive_2d_bin_coarse.comp.glsl
+++ b/title/shark-shaders/shaders/primitive_2d_bin_coarse.comp.glsl
@@ -14,10 +14,9 @@
  layout (local_size_x = 64, local_size_y = 1, local_size_z = 1) in;
  
  void main() {
-    const uvec2 tile_size = uvec2(TILE_SIZE_COARSE, TILE_SIZE_COARSE);
      const uvec2 tile_coord = gl_GlobalInvocationID.yz;
-    const uvec2 tile_min = tile_coord * tile_size;
-    const uvec2 tile_max = min(tile_min + tile_size, primitive_uniforms.screen_resolution);
+    const uvec2 tile_min = tile_coord * TILE_SIZE_COARSE;
+    const uvec2 tile_max = min(tile_min + TILE_SIZE_COARSE, primitive_uniforms.screen_resolution);
  
      const uint primitive_index = gl_WorkGroupID.x * gl_WorkGroupSize.x + gl_SubgroupID * gl_SubgroupSize + gl_SubgroupInvocationID;
  
@@ -28,7 +27,7 @@ void main() {
  
      uvec4 ballot_result = subgroupBallot(intersects);
      if (subgroupElect()) { // managed democracy wins again
-        const uint tile_index = tile_coord.y * primitive_uniforms.tile_resolution_coarse.x + tile_coord.x;
+        const uint tile_index = tile_coord.y * primitive_uniforms.tile_stride_coarse + tile_coord.x;
          const uint tile_offset = tile_index * TILE_STRIDE_COARSE;
          coarse_bitmap_wo[tile_offset + 2 * gl_WorkGroupID.x + 0] = ballot_result.x;
          coarse_bitmap_wo[tile_offset + 2 * gl_WorkGroupID.x + 1] = ballot_result.y;
diff --git a/title/shark-shaders/shaders/primitive_2d_bin_fine.comp.glsl b/title/shark-shaders/shaders/primitive_2d_bin_fine.comp.glsl

index 51f642e04e26aa5ccfec7e06f9ab7123bf0b32a5..9efb59b4f73f436b766f10aef49a86158fcb06b5 100644 (file)
--- a/title/shark-shaders/shaders/primitive_2d_bin_fine.comp.glsl
+++ b/title/shark-shaders/shaders/primitive_2d_bin_fine.comp.glsl
@@ -14,18 +14,17 @@
  layout (local_size_x = 64, local_size_y = 1, local_size_z = 1) in;
  
  void main() {
-    const uvec2 tile_size = uvec2(TILE_SIZE_FINE, TILE_SIZE_FINE);
      const uvec2 tile_coord = gl_GlobalInvocationID.yz;
-    const uvec2 tile_min = tile_coord * tile_size;
-    const uvec2 tile_max = min(tile_min + tile_size, primitive_uniforms.screen_resolution);
-    const uint tile_index = tile_coord.y * primitive_uniforms.tile_resolution_fine.x + tile_coord.x;
+    const uvec2 tile_min = tile_coord * TILE_SIZE_FINE;
+    const uvec2 tile_max = min(tile_min + TILE_SIZE_FINE, primitive_uniforms.screen_resolution);
+    const uint tile_index = tile_coord.y * primitive_uniforms.tile_stride_fine + tile_coord.x;
  
      const uint index = gl_WorkGroupID.x * gl_WorkGroupSize.x + gl_SubgroupID * gl_SubgroupSize + gl_SubgroupInvocationID;
  
      uint bitmap_l0 = 0;
      if (index < primitive_uniforms.num_primitives_32) {
          const uvec2 tile_coord_coarse = tile_coord >> TILE_SIZE_SHIFT;
-        const uint tile_index_coarse = tile_coord_coarse.y * primitive_uniforms.tile_resolution_coarse.x + tile_coord_coarse.x;
+        const uint tile_index_coarse = tile_coord_coarse.y * primitive_uniforms.tile_stride_coarse + tile_coord_coarse.x;
          const uint tile_base_coarse = tile_index_coarse * TILE_STRIDE_COARSE;
          const uint tile_bitmap_base_coarse = tile_base_coarse + TILE_BITMAP_OFFSET_COARSE;
  
diff --git a/title/shark-shaders/shaders/primitive_2d_clear_fine.comp.glsl b/title/shark-shaders/shaders/primitive_2d_clear_fine.comp.glsl

index c3103544afccdcc6043665e0ed011b4c9df3c0fb..3d486b6c735ca0de69cb79ec0608a55265c40a88 100644 (file)
--- a/title/shark-shaders/shaders/primitive_2d_clear_fine.comp.glsl
+++ b/title/shark-shaders/shaders/primitive_2d_clear_fine.comp.glsl
@@ -10,7 +10,5 @@
  layout (local_size_x = 64, local_size_y = 1, local_size_z = 1) in;
  
  void main() {
-    if (gl_GlobalInvocationID.x < (primitive_uniforms.tile_resolution_fine.x * primitive_uniforms.tile_resolution_fine.y)) {
-        fine_count_wo[gl_GlobalInvocationID.x] = 0;
-    }
+    fine_count_wo[gl_GlobalInvocationID.x] = 0;
  }
diff --git a/title/shark-shaders/shaders/primitive_2d_rasterize.comp.glsl b/title/shark-shaders/shaders/primitive_2d_rasterize.comp.glsl

index 04f83f3ce7179786951eeaefcdffd4beeda7e217..0ee9f470a16a42a0cbe96700433f68287cbe50c6 100644 (file)
--- a/title/shark-shaders/shaders/primitive_2d_rasterize.comp.glsl
+++ b/title/shark-shaders/shaders/primitive_2d_rasterize.comp.glsl
@@ -11,7 +11,7 @@ layout (local_size_x = TILE_SIZE_FINE, local_size_y = TILE_SIZE_FINE, local_size
  
  void main() {
      const uvec2 tile_coord = gl_WorkGroupID.xy;
-    const uint tile_index = tile_coord.y * primitive_uniforms.tile_resolution_fine.x + tile_coord.x;
+    const uint tile_index = tile_coord.y * primitive_uniforms.tile_stride_fine + tile_coord.x;
      const uint tile_base_fine = tile_index * TILE_STRIDE_FINE;
      const uint tile_bitmap_l1_base_fine = tile_base_fine + TILE_BITMAP_L1_OFFSET_FINE;
      const uint tile_bitmap_l0_base_fine = tile_base_fine + TILE_BITMAP_L0_OFFSET_FINE;
diff --git a/title/shark/src/main.rs b/title/shark/src/main.rs

index cb753fe7dbaf99a47c5ce6e5b279b88f5aaa979f..b90401f8e79da9f13c0ad3504e24226e8d22a431 100644 (file)
--- a/title/shark/src/main.rs
+++ b/title/shark/src/main.rs
@@ -1035,9 +1035,10 @@ impl<'gpu> DrawState<'gpu> {
                          * TILE_STRIDE_FINE
                          * std::mem::size_of::<u32>() as u32;
  
-                    let fine_color_buffer_size = tile_resolution_fine_x
-                        * tile_resolution_fine_y
-                        * std::mem::size_of::<u32>() as u32;
+                    // align to the workgroup size to simplify shader
+                    let fine_color_buffer_size =
+                        ((tile_resolution_fine_x * tile_resolution_fine_y + 63) & !63)
+                            * std::mem::size_of::<u32>() as u32;
  
                      self.coarse_tile_bitmap_buffer = gpu.create_buffer(&BufferDesc {
                          memory_location: MemoryLocation::Device,
@@ -1329,31 +1330,7 @@ impl<'gpu> DrawState<'gpu> {
  
              // Render UI
              {
-                gpu.cmd_set_pipeline(cmd_encoder, self.primitive_2d_pipeline.coarse_bin_pipeline);
-
-                let num_primitives = ui_state.primitive_instances.len() as u32;
-                let num_primitives_32 = (num_primitives + 31) / 32;
-                let num_primitives_1024 = (num_primitives_32 + 31) / 32;
-
-                gpu.cmd_push_constants(
-                    cmd_encoder,
-                    ShaderStageFlags::COMPUTE,
-                    0,
-                    &PrimitiveUniforms {
-                        screen_resolution_x: self.width,
-                        screen_resolution_y: self.height,
-                        tile_resolution_coarse_x: self.tile_resolution_coarse_x,
-                        tile_resolution_coarse_y: self.tile_resolution_coarse_y,
-                        tile_resolution_fine_x: self.tile_resolution_fine_x,
-                        tile_resolution_fine_y: self.tile_resolution_fine_y,
-                        atlas_resolution_x: atlas_width,
-                        atlas_resolution_y: atlas_height,
-                        num_primitives,
-                        num_primitives_32,
-                        num_primitives_1024,
-                        _pad0: 0,
-                    },
-                );
+                gpu.cmd_set_pipeline(cmd_encoder, self.primitive_2d_pipeline.fine_clear_pipeline);
  
                  let glyph_buffer = gpu.request_transient_buffer_with_data(
                      frame,
@@ -1440,15 +1417,6 @@ impl<'gpu> DrawState<'gpu> {
                      ],
                  );
  
-                gpu.cmd_dispatch(
-                    cmd_encoder,
-                    (num_primitives + 63) / 64,
-                    self.tile_resolution_coarse_x,
-                    self.tile_resolution_coarse_y,
-                );
-
-                gpu.cmd_set_pipeline(cmd_encoder, self.primitive_2d_pipeline.fine_clear_pipeline);
-
                  gpu.cmd_dispatch(
                      cmd_encoder,
                      (self.tile_resolution_coarse_x * self.tile_resolution_coarse_y + 63) / 64,
@@ -1456,44 +1424,86 @@ impl<'gpu> DrawState<'gpu> {
                      1,
                  );
  
-                gpu.cmd_barrier(
-                    cmd_encoder,
-                    Some(&GlobalBarrier {
-                        prev_access: &[Access::ShaderWrite],
-                        next_access: &[Access::ShaderOtherRead],
-                    }),
-                    &[],
-                );
-
-                gpu.cmd_set_pipeline(cmd_encoder, self.primitive_2d_pipeline.fine_bin_pipeline);
-
-                gpu.cmd_dispatch(
-                    cmd_encoder,
-                    (num_primitives_32 + 63) / 64,
-                    self.tile_resolution_fine_x,
-                    self.tile_resolution_fine_y,
-                );
-
-                gpu.cmd_barrier(
-                    cmd_encoder,
-                    Some(&GlobalBarrier {
-                        prev_access: &[Access::ShaderWrite],
-                        next_access: &[Access::ShaderOtherRead],
-                    }),
-                    &[],
-                );
-
-                gpu.cmd_set_pipeline(cmd_encoder, self.primitive_2d_pipeline.rasterize_pipeline);
-
-                gpu.cmd_dispatch(
-                    cmd_encoder,
-                    self.tile_resolution_fine_x,
-                    self.tile_resolution_fine_y,
-                    1,
-                );
+                let num_primitives = ui_state.primitive_instances.len() as u32;
+                let num_primitives_32 = (num_primitives + 31) / 32;
+                let num_primitives_1024 = (num_primitives_32 + 31) / 32;
  
-                // Cleanup
                  ui_state.primitive_instances.clear();
+
+                for _pass_y in 0..1 {
+                    for _pass_x in 0..1 {
+                        gpu.cmd_set_pipeline(
+                            cmd_encoder,
+                            self.primitive_2d_pipeline.coarse_bin_pipeline,
+                        );
+
+                        gpu.cmd_push_constants(
+                            cmd_encoder,
+                            ShaderStageFlags::COMPUTE,
+                            0,
+                            &PrimitiveUniforms {
+                                screen_resolution_x: self.width,
+                                screen_resolution_y: self.height,
+                                atlas_resolution_x: atlas_width,
+                                atlas_resolution_y: atlas_height,
+                                num_primitives,
+                                num_primitives_32,
+                                num_primitives_1024,
+                                tile_stride_coarse: self.tile_resolution_coarse_x,
+                                tile_stride_fine: self.tile_resolution_fine_x,
+                            },
+                        );
+
+                        gpu.cmd_dispatch(
+                            cmd_encoder,
+                            (num_primitives + 63) / 64,
+                            self.tile_resolution_coarse_x,
+                            self.tile_resolution_coarse_y,
+                        );
+
+                        gpu.cmd_barrier(
+                            cmd_encoder,
+                            Some(&GlobalBarrier {
+                                prev_access: &[Access::ShaderWrite],
+                                next_access: &[Access::ShaderOtherRead],
+                            }),
+                            &[],
+                        );
+
+                        gpu.cmd_set_pipeline(
+                            cmd_encoder,
+                            self.primitive_2d_pipeline.fine_bin_pipeline,
+                        );
+
+                        gpu.cmd_dispatch(
+                            cmd_encoder,
+                            (num_primitives_32 + 63) / 64,
+                            self.tile_resolution_fine_x,
+                            self.tile_resolution_fine_y,
+                        );
+
+                        gpu.cmd_barrier(
+                            cmd_encoder,
+                            Some(&GlobalBarrier {
+                                prev_access: &[Access::ShaderWrite],
+                                next_access: &[Access::ShaderOtherRead],
+                            }),
+                            &[],
+                        );
+
+                        gpu.cmd_set_pipeline(
+                            cmd_encoder,
+                            self.primitive_2d_pipeline.rasterize_pipeline,
+                        );
+
+                        gpu.cmd_dispatch(
+                            cmd_encoder,
+                            self.tile_resolution_fine_x,
+                            self.tile_resolution_fine_y,
+                            1,
+                        );
+                    }
+                }
              }
  
              // Display transform and composite
diff --git a/title/shark/src/pipelines/primitive_2d.rs b/title/shark/src/pipelines/primitive_2d.rs

index ca37a7b64b75b7999c4029a4cb6c967ec7c0729a..c65e79152ff24ee8a1570338da28fb37c2801144 100644 (file)
--- a/title/shark/src/pipelines/primitive_2d.rs
+++ b/title/shark/src/pipelines/primitive_2d.rs
@@ -19,10 +19,6 @@ pub const TILE_STRIDE_FINE: u32 = TILE_BITMAP_WORDS_L0 + TILE_BITMAP_WORDS_L1;
  pub struct PrimitiveUniforms {
      pub screen_resolution_x: u32,
      pub screen_resolution_y: u32,
-    pub tile_resolution_coarse_x: u32,
-    pub tile_resolution_coarse_y: u32,
-    pub tile_resolution_fine_x: u32,
-    pub tile_resolution_fine_y: u32,
      pub atlas_resolution_x: u32,
      pub atlas_resolution_y: u32,
  
@@ -30,7 +26,8 @@ pub struct PrimitiveUniforms {
      pub num_primitives_32: u32,
      pub num_primitives_1024: u32,
  
-    pub _pad0: u32,
+    pub tile_stride_coarse: u32,
+    pub tile_stride_fine: u32,
  }
  
  #[allow(unused)]
author	Josh Simmons <josh@nega.tv>
	Sun, 26 May 2024 13:34:33 +0000 (15:34 +0200)
committer	Josh Simmons <josh@nega.tv>
	Sun, 26 May 2024 13:34:33 +0000 (15:34 +0200)
title/shark-shaders/shaders/display_transform.comp.glsl		patch \| blob \| blame \| history
title/shark-shaders/shaders/primitive_2d.h		patch \| blob \| blame \| history
title/shark-shaders/shaders/primitive_2d_bin_coarse.comp.glsl		patch \| blob \| blame \| history
title/shark-shaders/shaders/primitive_2d_bin_fine.comp.glsl		patch \| blob \| blame \| history
title/shark-shaders/shaders/primitive_2d_clear_fine.comp.glsl		patch \| blob \| blame \| history
title/shark-shaders/shaders/primitive_2d_rasterize.comp.glsl		patch \| blob \| blame \| history
title/shark/src/main.rs		patch \| blob \| blame \| history
title/shark/src/pipelines/primitive_2d.rs		patch \| blob \| blame \| history