shark: Try single pass for coarse culling

author Josh Simmons <josh@nega.tv>

Mon, 27 May 2024 06:43:34 +0000 (08:43 +0200)

committer Josh Simmons <josh@nega.tv>

Mon, 27 May 2024 06:43:34 +0000 (08:43 +0200)
author Josh Simmons <josh@nega.tv>
Mon, 27 May 2024 06:43:34 +0000 (08:43 +0200)
committer Josh Simmons <josh@nega.tv>
Mon, 27 May 2024 06:43:34 +0000 (08:43 +0200)
diff --git a/title/shark-shaders/shaders/primitive_2d_bin_coarse.comp.glsl b/title/shark-shaders/shaders/primitive_2d_bin_coarse.comp.glsl

index 6be407cec5cc84d74ecb13d861d76e8e61763891..00fa31cfaff190e8bec395fced318d1a8d9653cb 100644 (file)
--- a/title/shark-shaders/shaders/primitive_2d_bin_coarse.comp.glsl
+++ b/title/shark-shaders/shaders/primitive_2d_bin_coarse.comp.glsl
@@ -29,7 +29,7 @@ void main() {
  
      uvec4 ballot_result = subgroupBallot(intersects);
      if (subgroupElect()) { // managed democracy wins again
-        const uint tile_index = tile_coord.y * TILE_DISPATCH_X + tile_coord.x;
+        const uint tile_index = tile_coord.y * (primitive_uniforms.tile_stride_fine / TILE_SIZE_MUL)  + tile_coord.x;
          const uint tile_offset = tile_index * TILE_STRIDE_COARSE;
          coarse_bitmap_wo[tile_offset + 2 * gl_WorkGroupID.x + 0] = ballot_result.x;
          coarse_bitmap_wo[tile_offset + 2 * gl_WorkGroupID.x + 1] = ballot_result.y;
diff --git a/title/shark-shaders/shaders/primitive_2d_bin_fine.comp.glsl b/title/shark-shaders/shaders/primitive_2d_bin_fine.comp.glsl

index b4c8c33b6a2968a3f65789e62ccdfabb0da428d1..06c9a7bb73b57db3c203b0102fd175ff4375e9f2 100644 (file)
--- a/title/shark-shaders/shaders/primitive_2d_bin_fine.comp.glsl
+++ b/title/shark-shaders/shaders/primitive_2d_bin_fine.comp.glsl
@@ -27,8 +27,8 @@ void main() {
  
      uint bitmap_l0 = 0;
      if (index < primitive_uniforms.num_primitives_32) {
-        const uvec2 tile_coord_coarse = tile_coord / TILE_SIZE_MUL;
-        const uint tile_index_coarse = tile_coord_coarse.y * TILE_DISPATCH_X + tile_coord_coarse.x;
+        const uvec2 tile_coord_coarse = (tile_coord / TILE_SIZE_MUL) + primitive_uniforms.tile_offset_coarse;
+        const uint tile_index_coarse = tile_coord_coarse.y * (primitive_uniforms.tile_stride_fine / TILE_SIZE_MUL) + tile_coord_coarse.x;
          const uint tile_base_coarse = tile_index_coarse * TILE_STRIDE_COARSE;
          const uint tile_bitmap_base_coarse = tile_base_coarse + TILE_BITMAP_OFFSET_COARSE;
  
diff --git a/title/shark/src/main.rs b/title/shark/src/main.rs

index c5b378be98475a6d84bba52c8720cb3b6f5b03aa..e50ab41bd209f64cbf4323b9b3270d51adec477a 100644 (file)
--- a/title/shark/src/main.rs
+++ b/title/shark/src/main.rs
@@ -885,23 +885,11 @@ impl<'gpu> DrawState<'gpu> {
          let models = Models::load(gpu);
          let images = Images::load(gpu, thread_token);
  
-        let coarse_bitmap_buffer_size = TILE_DISPATCH_COARSE_X
-            * TILE_DISPATCH_COARSE_Y
-            * TILE_STRIDE_COARSE
-            * std::mem::size_of::<u32>() as u32;
-
          let fine_bitmap_buffer_size = TILE_DISPATCH_FINE_X
              * TILE_DISPATCH_FINE_Y
              * TILE_STRIDE_FINE
              * std::mem::size_of::<u32>() as u32;
  
-        let coarse_tile_bitmap_buffer = gpu.create_buffer(&BufferDesc {
-            memory_location: MemoryLocation::Device,
-            host_mapped: false,
-            usage: BufferUsageFlags::STORAGE,
-            size: coarse_bitmap_buffer_size.widen(),
-        });
-
          let fine_tile_bitmap_buffer = gpu.create_buffer(&BufferDesc {
              memory_location: MemoryLocation::Device,
              host_mapped: false,
@@ -923,7 +911,7 @@ impl<'gpu> DrawState<'gpu> {
              depth_image: default(),
              rt_image: default(),
              ui_image: default(),
-            coarse_tile_bitmap_buffer,
+            coarse_tile_bitmap_buffer: default(),
              fine_tile_bitmap_buffer,
              fine_tile_color_buffer: default(),
              glyph_atlas_image: default(),
@@ -1047,6 +1035,19 @@ impl<'gpu> DrawState<'gpu> {
                      || tile_resolution_fine_y != self.tile_resolution_fine_y
                  {
                      gpu.destroy_buffer(frame, self.fine_tile_color_buffer);
+                    gpu.destroy_buffer(frame, self.coarse_tile_bitmap_buffer);
+
+                    let coarse_bitmap_buffer_size = tile_resolution_coarse_x
+                        * tile_resolution_coarse_y
+                        * TILE_STRIDE_COARSE
+                        * std::mem::size_of::<u32>() as u32;
+
+                    self.coarse_tile_bitmap_buffer = gpu.create_buffer(&BufferDesc {
+                        memory_location: MemoryLocation::Device,
+                        host_mapped: false,
+                        usage: BufferUsageFlags::STORAGE,
+                        size: coarse_bitmap_buffer_size.widen(),
+                    });
  
                      // align to the workgroup size to simplify shader
                      let fine_color_buffer_size =
@@ -1418,19 +1419,48 @@ impl<'gpu> DrawState<'gpu> {
  
                  ui_state.primitive_instances.clear();
  
+                gpu.cmd_set_pipeline(cmd_encoder, self.primitive_2d_pipeline.coarse_bin_pipeline);
+
+                gpu.cmd_push_constants(
+                    cmd_encoder,
+                    ShaderStageFlags::COMPUTE,
+                    0,
+                    &PrimitiveUniforms {
+                        screen_resolution_x: self.width,
+                        screen_resolution_y: self.height,
+                        atlas_resolution_x: atlas_width,
+                        atlas_resolution_y: atlas_height,
+                        num_primitives,
+                        num_primitives_32,
+                        num_primitives_1024,
+                        tile_stride_fine: self.tile_resolution_fine_x,
+                        tile_offset_x: 0,
+                        tile_offset_y: 0,
+                    },
+                );
+
+                gpu.cmd_dispatch(
+                    cmd_encoder,
+                    (num_primitives + 63) / 64,
+                    self.tile_resolution_coarse_x,
+                    self.tile_resolution_coarse_y,
+                );
+
+                gpu.cmd_barrier(
+                    cmd_encoder,
+                    Some(&GlobalBarrier {
+                        prev_access: &[Access::ShaderWrite],
+                        next_access: &[Access::ShaderOtherRead],
+                    }),
+                    &[],
+                );
+
                  for tile_offset_y in
                      (0..self.tile_resolution_coarse_y).step_by(TILE_DISPATCH_COARSE_Y as usize)
                  {
                      for tile_offset_x in
                          (0..self.tile_resolution_coarse_x).step_by(TILE_DISPATCH_COARSE_X as usize)
                      {
-                        let coarse_dispatch_x = (tile_offset_x + TILE_DISPATCH_COARSE_X)
-                            .min(self.tile_resolution_coarse_x)
-                            - tile_offset_x;
-                        let coarse_dispatch_y = (tile_offset_y + TILE_DISPATCH_COARSE_Y)
-                            .min(self.tile_resolution_coarse_y)
-                            - tile_offset_y;
-
                          let tile_offset_fine_x =
                              tile_offset_x * (TILE_SIZE_COARSE / TILE_SIZE_FINE);
  
@@ -1445,11 +1475,6 @@ impl<'gpu> DrawState<'gpu> {
                              .min(self.tile_resolution_fine_y)
                              - tile_offset_fine_y;
  
-                        gpu.cmd_set_pipeline(
-                            cmd_encoder,
-                            self.primitive_2d_pipeline.coarse_bin_pipeline,
-                        );
-
                          gpu.cmd_push_constants(
                              cmd_encoder,
                              ShaderStageFlags::COMPUTE,
@@ -1468,22 +1493,6 @@ impl<'gpu> DrawState<'gpu> {
                              },
                          );
  
-                        gpu.cmd_dispatch(
-                            cmd_encoder,
-                            (num_primitives + 63) / 64,
-                            coarse_dispatch_x,
-                            coarse_dispatch_y,
-                        );
-
-                        gpu.cmd_barrier(
-                            cmd_encoder,
-                            Some(&GlobalBarrier {
-                                prev_access: &[Access::ShaderWrite],
-                                next_access: &[Access::ShaderOtherRead],
-                            }),
-                            &[],
-                        );
-
                          gpu.cmd_set_pipeline(
                              cmd_encoder,
                              self.primitive_2d_pipeline.fine_bin_pipeline,
author	Josh Simmons <josh@nega.tv>
	Mon, 27 May 2024 06:43:34 +0000 (08:43 +0200)
committer	Josh Simmons <josh@nega.tv>
	Mon, 27 May 2024 06:43:34 +0000 (08:43 +0200)
title/shark-shaders/shaders/primitive_2d_bin_coarse.comp.glsl		patch \| blob \| blame \| history
title/shark-shaders/shaders/primitive_2d_bin_fine.comp.glsl		patch \| blob \| blame \| history
title/shark/src/main.rs		patch \| blob \| blame \| history