shark: Track dirty tiles

author Josh Simmons <josh@nega.tv>

Sun, 26 May 2024 09:55:14 +0000 (11:55 +0200)

committer Josh Simmons <josh@nega.tv>

Sun, 26 May 2024 09:55:14 +0000 (11:55 +0200)
author Josh Simmons <josh@nega.tv>
Sun, 26 May 2024 09:55:14 +0000 (11:55 +0200)
committer Josh Simmons <josh@nega.tv>
Sun, 26 May 2024 09:55:14 +0000 (11:55 +0200)
diff --git a/title/shark-shaders/build.rs b/title/shark-shaders/build.rs

index 10d140ada5f3efcb6ab0d614ef29d7e38355fa0f..c6eb9d1f2c295a58e907acc42231d098d5cac88d 100644 (file)
--- a/title/shark-shaders/build.rs
+++ b/title/shark-shaders/build.rs
@@ -26,6 +26,10 @@ const SHADERS: &[Shader] = &[
          stage: "comp",
          name: "primitive_2d_bin_fine",
      },
+    Shader {
+        stage: "comp",
+        name: "primitive_2d_clear_fine",
+    },
      Shader {
          stage: "comp",
          name: "primitive_2d_rasterize",
diff --git a/title/shark-shaders/shaders/display_transform.comp.glsl b/title/shark-shaders/shaders/display_transform.comp.glsl

index 7b5e9a80bac0b1d73bee158b103bb3d8a7c99e53..41cb75805e2a32bef5a467875c2dcc327aef8962 100644 (file)
--- a/title/shark-shaders/shaders/display_transform.comp.glsl
+++ b/title/shark-shaders/shaders/display_transform.comp.glsl
@@ -1,5 +1,23 @@
  #version 460
  
+#extension GL_EXT_control_flow_attributes : require
+
+struct PrimitiveUniforms {
+    uvec2 screen_resolution;
+    uvec2 tile_resolution_coarse;
+    uvec2 tile_resolution_fine;
+    uvec2 atlas_resolution;
+
+    uint num_primitives;
+    uint num_primitives_32;
+    uint num_primitives_1024;
+    uint pad_1;
+};
+
+layout(std430, push_constant) uniform uniformBuffer {
+    PrimitiveUniforms primitive_uniforms; 
+};
+
  layout (set = 0, binding = 0) uniform sampler bilinear_sampler;
  
  layout (set = 0, binding = 1) uniform texture3D tony_mc_mapface_lut;
@@ -9,6 +27,10 @@ layout (set = 0, binding = 3, rgba16f) uniform readonly image2D layer_ui;
  
  layout (set = 0, binding = 4, rgba16f) uniform writeonly image2D composited_output;
  
+layout(std430, set = 0, binding = 5) readonly buffer fineTileCountBufferRead {
+    uint fine_count_ro[];
+};
+
  float srgb_oetf(float a) {
      return (.0031308f >= a) ? 12.92f * a : 1.055f * pow(a, .4166666666666667f) - .055f;
  }
@@ -27,10 +49,18 @@ vec3 tony_mc_mapface(vec3 stimulus) {
  layout (local_size_x = 8, local_size_y = 8, local_size_z = 1) in;
  
  void main() {
+    const uvec2 tile_coord = gl_WorkGroupID.xy >> 1;
+    const uint tile_index = tile_coord.y * primitive_uniforms.tile_resolution_fine.x + tile_coord.x;
+
      const vec3 stimulus = imageLoad(layer_rt, ivec2(gl_GlobalInvocationID.xy)).rgb;
      const vec3 transformed = tony_mc_mapface(stimulus);
-    const vec3 srgb = srgb_oetf(transformed);
-    const vec4 ui = imageLoad(layer_ui, ivec2(gl_GlobalInvocationID.xy)).rgba;
-    const vec3 composited = ui.rgb + (srgb * (1.0 - ui.a));
+    vec3 composited = srgb_oetf(transformed);
+
+    [[branch]]
+    if (fine_count_ro[tile_index] != 0) {
+        const vec4 ui = imageLoad(layer_ui, ivec2(gl_GlobalInvocationID.xy)).rgba;
+        composited = ui.rgb + (composited * (1.0 - ui.a));
+    }
+
      imageStore(composited_output, ivec2(gl_GlobalInvocationID.xy), vec4(composited, 1.0));
  }
diff --git a/title/shark-shaders/shaders/primitive_2d_bin_fine.comp.glsl b/title/shark-shaders/shaders/primitive_2d_bin_fine.comp.glsl

index f5e6d3c07a892c4be536e3d4f15d65263371f84d..51f642e04e26aa5ccfec7e06f9ab7123bf0b32a5 100644 (file)
--- a/title/shark-shaders/shaders/primitive_2d_bin_fine.comp.glsl
+++ b/title/shark-shaders/shaders/primitive_2d_bin_fine.comp.glsl
@@ -53,5 +53,10 @@ void main() {
          const uint tile_bitmap_l1_base_fine = tile_base_fine + TILE_BITMAP_L1_OFFSET_FINE;
          fine_bitmap_wo[tile_bitmap_l1_base_fine + 2 * gl_WorkGroupID.x + 0] = ballot_result.x;
          fine_bitmap_wo[tile_bitmap_l1_base_fine + 2 * gl_WorkGroupID.x + 1] = ballot_result.y;
+
+        const uint count = uint(ballot_result.x != 0) + uint(ballot_result.y != 0);
+        if (count != 0) {
+            atomicAdd(fine_count_wo[tile_index], count);
+        }
      }
  }
diff --git a/title/shark-shaders/shaders/primitive_2d_bindings.h b/title/shark-shaders/shaders/primitive_2d_bindings.h

index 47294b01a5a7e3255774c7c2537b4570e381b7f6..6d5931da225dfc2b30aad4350830974ccd9ac9e6 100644 (file)
--- a/title/shark-shaders/shaders/primitive_2d_bindings.h
+++ b/title/shark-shaders/shaders/primitive_2d_bindings.h
@@ -28,12 +28,20 @@ layout(std430, set = 0, binding = 5) writeonly buffer coarseTileBufferWrite {
      uint coarse_bitmap_wo[];
  };
  
-layout(std430, set = 0, binding = 6) readonly buffer fineTileBufferRead {
+layout(std430, set = 0, binding = 6) readonly buffer fineTileBitmapBufferRead {
      uint fine_bitmap_ro[];
  };
  
-layout(std430, set = 0, binding = 6) writeonly buffer fineTileBufferWrite {
+layout(std430, set = 0, binding = 6) writeonly buffer fineTileBitmapBufferWrite {
      uint fine_bitmap_wo[];
  };
  
-layout (set = 0, binding = 7, rgba16f) uniform writeonly image2D ui_image;
+layout(std430, set = 0, binding = 7) readonly buffer fineTileCountBufferRead {
+    uint fine_count_ro[];
+};
+
+layout(std430, set = 0, binding = 7) writeonly buffer fineTileCountBufferWrite {
+    uint fine_count_wo[];
+};
+
+layout (set = 0, binding = 8, rgba16f) uniform writeonly image2D ui_image;
diff --git a/title/shark-shaders/shaders/primitive_2d_clear_fine.comp.glsl b/title/shark-shaders/shaders/primitive_2d_clear_fine.comp.glsl

new file mode 100644 (file)

index 0000000..c310354
--- /dev/null
+++ b/title/shark-shaders/shaders/primitive_2d_clear_fine.comp.glsl
@@ -0,0 +1,16 @@
+#version 460
+
+#extension GL_GOOGLE_include_directive : require
+
+#extension GL_EXT_scalar_block_layout : require
+#extension GL_EXT_control_flow_attributes : require
+
+#include "primitive_2d.h"
+
+layout (local_size_x = 64, local_size_y = 1, local_size_z = 1) in;
+
+void main() {
+    if (gl_GlobalInvocationID.x < (primitive_uniforms.tile_resolution_fine.x * primitive_uniforms.tile_resolution_fine.y)) {
+        fine_count_wo[gl_GlobalInvocationID.x] = 0;
+    }
+}
diff --git a/title/shark-shaders/shaders/primitive_2d_rasterize.comp.glsl b/title/shark-shaders/shaders/primitive_2d_rasterize.comp.glsl

index 24dfffbc4b1540992f4ad3c715c74a58314e3462..04f83f3ce7179786951eeaefcdffd4beeda7e217 100644 (file)
--- a/title/shark-shaders/shaders/primitive_2d_rasterize.comp.glsl
+++ b/title/shark-shaders/shaders/primitive_2d_rasterize.comp.glsl
@@ -18,12 +18,16 @@ void main() {
  
      vec4 accum = vec4(0.0);
  
-    // For each tile, iterate over all words in the L1 bitmap.
-    //
-    // TODO: Count the non-zero words in the tile with atomics, so we can early out on empty tiles? 
-    for (int index_l1 = 0; index_l1 < primitive_uniforms.num_primitives_1024; index_l1++) {
+    uint word_count = fine_count_ro[tile_index];
+
+    // For each tile, iterate over all words in the L1 bitmap. 
+    for (int index_l1 = 0; word_count != 0 && index_l1 < primitive_uniforms.num_primitives_1024; index_l1++) {
          // For each word, iterate all set bits.
          uint bitmap_l1 = fine_bitmap_ro[tile_bitmap_l1_base_fine + index_l1];
+
+        if (bitmap_l1 != 0)
+            word_count -= 1;
+
          while (bitmap_l1 != 0) {
              const uint i = findLSB(bitmap_l1);
              bitmap_l1 ^= bitmap_l1 & -bitmap_l1;
@@ -43,7 +47,7 @@ void main() {
                  const Glyph gl = glyphs[gi.index];
                  const vec2 glyph_min = gi.position + gl.offset_min;
                  const vec2 glyph_max = gi.position + gl.offset_max;
-                const vec2 sample_center = gl_GlobalInvocationID.xy; // half pixel offset goes here?
+                const vec2 sample_center = gl_GlobalInvocationID.xy + vec2(0.5);
                  if (all(greaterThanEqual(sample_center, glyph_min)) && all(lessThanEqual(sample_center, glyph_max))) {
                      const vec2 glyph_size = gl.offset_max - gl.offset_min;
                      const vec4 color = unpackUnorm4x8(gi.color).bgra;
diff --git a/title/shark/src/main.rs b/title/shark/src/main.rs

index 65d8a9b1a47245acc3c1ed9514252309383f1e09..cb753fe7dbaf99a47c5ce6e5b279b88f5aaa979f 100644 (file)
--- a/title/shark/src/main.rs
+++ b/title/shark/src/main.rs
@@ -24,7 +24,8 @@ use narcissus_gpu::{
  };
  use narcissus_image as image;
  use narcissus_maths::{
-    clamp, perlin_noise3, sin_pi_f32, vec3, Affine3, Deg, HalfTurn, Mat3, Mat4, Point3, Vec3,
+    clamp, perlin_noise3, sin_cos_pi_f32, sin_pi_f32, vec3, Affine3, Deg, HalfTurn, Mat3, Mat4,
+    Point3, Vec3,
  };
  use pipelines::primitive_2d::{GlyphInstance, Primitive2dPipeline};
  use spring::simple_spring_damper_exact;
@@ -862,6 +863,7 @@ struct DrawState<'gpu> {
  
      coarse_tile_bitmap_buffer: Buffer,
      fine_tile_bitmap_buffer: Buffer,
+    fine_tile_color_buffer: Buffer,
  
      glyph_atlas_image: Image,
  
@@ -898,6 +900,7 @@ impl<'gpu> DrawState<'gpu> {
              ui_image: default(),
              coarse_tile_bitmap_buffer: default(),
              fine_tile_bitmap_buffer: default(),
+            fine_tile_color_buffer: default(),
              glyph_atlas_image: default(),
              samplers,
              models,
@@ -1020,6 +1023,7 @@ impl<'gpu> DrawState<'gpu> {
                  {
                      gpu.destroy_buffer(frame, self.coarse_tile_bitmap_buffer);
                      gpu.destroy_buffer(frame, self.fine_tile_bitmap_buffer);
+                    gpu.destroy_buffer(frame, self.fine_tile_color_buffer);
  
                      let coarse_bitmap_buffer_size = tile_resolution_coarse_x
                          * tile_resolution_coarse_y
@@ -1031,6 +1035,10 @@ impl<'gpu> DrawState<'gpu> {
                          * TILE_STRIDE_FINE
                          * std::mem::size_of::<u32>() as u32;
  
+                    let fine_color_buffer_size = tile_resolution_fine_x
+                        * tile_resolution_fine_y
+                        * std::mem::size_of::<u32>() as u32;
+
                      self.coarse_tile_bitmap_buffer = gpu.create_buffer(&BufferDesc {
                          memory_location: MemoryLocation::Device,
                          host_mapped: false,
@@ -1045,6 +1053,13 @@ impl<'gpu> DrawState<'gpu> {
                          size: fine_bitmap_buffer_size.widen(),
                      });
  
+                    self.fine_tile_color_buffer = gpu.create_buffer(&BufferDesc {
+                        memory_location: MemoryLocation::Device,
+                        host_mapped: false,
+                        usage: BufferUsageFlags::STORAGE,
+                        size: fine_color_buffer_size.widen(),
+                    });
+
                      self.tile_resolution_coarse_x = tile_resolution_coarse_x;
                      self.tile_resolution_coarse_y = tile_resolution_coarse_y;
                      self.tile_resolution_fine_x = tile_resolution_fine_x;
@@ -1410,6 +1425,13 @@ impl<'gpu> DrawState<'gpu> {
                          Bind {
                              binding: 7,
                              array_element: 0,
+                            typed: TypedBind::StorageBuffer(&[self
+                                .fine_tile_color_buffer
+                                .to_arg()]),
+                        },
+                        Bind {
+                            binding: 8,
+                            array_element: 0,
                              typed: TypedBind::StorageImage(&[(
                                  ImageLayout::General,
                                  self.ui_image,
@@ -1425,6 +1447,15 @@ impl<'gpu> DrawState<'gpu> {
                      self.tile_resolution_coarse_y,
                  );
  
+                gpu.cmd_set_pipeline(cmd_encoder, self.primitive_2d_pipeline.fine_clear_pipeline);
+
+                gpu.cmd_dispatch(
+                    cmd_encoder,
+                    (self.tile_resolution_coarse_x * self.tile_resolution_coarse_y + 63) / 64,
+                    1,
+                    1,
+                );
+
                  gpu.cmd_barrier(
                      cmd_encoder,
                      Some(&GlobalBarrier {
@@ -1537,6 +1568,13 @@ impl<'gpu> DrawState<'gpu> {
                                  swapchain_image,
                              )]),
                          },
+                        Bind {
+                            binding: 5,
+                            array_element: 0,
+                            typed: TypedBind::StorageBuffer(&[self
+                                .fine_tile_color_buffer
+                                .to_arg()]),
+                        },
                      ],
                  );
  
@@ -1712,11 +1750,15 @@ pub fn main() {
  
              let tick_duration = Instant::now() - tick_start;
  
+            let (base_x, base_y) = sin_cos_pi_f32(game_state.time);
+            let base_x = (base_x + 1.0) * 0.5;
+            let base_y = (base_y + 1.0) * 0.5;
+
              for i in 0..80 {
                  let i = i as f32;
                  ui_state.text_fmt(
-                    5.0,
-                    i * 15.0 * scale,
+                    base_x * 100.0 * scale + 5.0,
+                    base_y * 100.0 * scale + i * 15.0 * scale,
                      FontFamily::RobotoRegular,
                      40.0,
                      format_args!("tick: {:?}", tick_duration),
@@ -1730,8 +1772,8 @@ pub fn main() {
                      let x = 200.0 + j * 200.0;
                      let y = 100.0 + j * 100.0;
                      ui_state.text_fmt(
-                        x * scale,
-                        (y + i * 15.0) * scale,
+                        base_x * 100.0 * scale +x * scale,
+                        base_y * 100.0 * scale +(y + i * 15.0) * scale,
                          FontFamily::NotoSansJapanese,
                          15.0,
                          format_args!(
diff --git a/title/shark/src/pipelines/display_transform.rs b/title/shark/src/pipelines/display_transform.rs

index 0133787ac4f37cb6d0d71eefa029508b7bcc14a9..ae018645d1b45b6877417b55910752e0c3ee29b1 100644 (file)
--- a/title/shark/src/pipelines/display_transform.rs
+++ b/title/shark/src/pipelines/display_transform.rs
@@ -1,10 +1,12 @@
  use narcissus_gpu::{
      BindDesc, BindGroupLayout, BindingType, ComputePipelineDesc, Pipeline, PipelineLayout,
-    ShaderDesc, ShaderStageFlags,
+    PushConstantRange, ShaderDesc, ShaderStageFlags,
  };
  
  use crate::Gpu;
  
+use super::primitive_2d::PrimitiveUniforms;
+
  pub struct DisplayTransformPipeline {
      pub bind_group_layout: BindGroupLayout,
      pub pipeline: Pipeline,
@@ -23,11 +25,17 @@ impl DisplayTransformPipeline {
              BindDesc::new(ShaderStageFlags::COMPUTE, BindingType::StorageImage),
              // Composited Output
              BindDesc::new(ShaderStageFlags::COMPUTE, BindingType::StorageImage),
+            // Tile color buffer
+            BindDesc::new(ShaderStageFlags::COMPUTE, BindingType::StorageBuffer),
          ]);
  
          let layout = &PipelineLayout {
              bind_group_layouts: &[bind_group_layout],
-            push_constant_ranges: &[],
+            push_constant_ranges: &[PushConstantRange {
+                stage_flags: ShaderStageFlags::COMPUTE,
+                offset: 0,
+                size: std::mem::size_of::<PrimitiveUniforms>() as u32,
+            }],
          };
  
          let pipeline = gpu.create_compute_pipeline(&ComputePipelineDesc {
diff --git a/title/shark/src/pipelines/primitive_2d.rs b/title/shark/src/pipelines/primitive_2d.rs

index 125bcf3576d799a70933bb52c81a945c8559bdaf..ca37a7b64b75b7999c4029a4cb6c967ec7c0729a 100644 (file)
--- a/title/shark/src/pipelines/primitive_2d.rs
+++ b/title/shark/src/pipelines/primitive_2d.rs
@@ -46,6 +46,7 @@ pub struct Primitive2dPipeline {
      pub bind_group_layout: BindGroupLayout,
      pub coarse_bin_pipeline: Pipeline,
      pub fine_bin_pipeline: Pipeline,
+    pub fine_clear_pipeline: Pipeline,
      pub rasterize_pipeline: Pipeline,
  }
  
@@ -66,6 +67,8 @@ impl Primitive2dPipeline {
              BindDesc::new(ShaderStageFlags::COMPUTE, BindingType::StorageBuffer),
              // Fine Tiles
              BindDesc::new(ShaderStageFlags::COMPUTE, BindingType::StorageBuffer),
+            // Fine Color
+            BindDesc::new(ShaderStageFlags::COMPUTE, BindingType::StorageBuffer),
              // UI Image Output
              BindDesc::new(ShaderStageFlags::COMPUTE, BindingType::StorageImage),
          ]);
@@ -95,6 +98,14 @@ impl Primitive2dPipeline {
              layout,
          });
  
+        let fine_clear_pipeline = gpu.create_compute_pipeline(&ComputePipelineDesc {
+            shader: ShaderDesc {
+                entry: c"main",
+                code: shark_shaders::PRIMITIVE_2D_CLEAR_FINE_COMP_SPV,
+            },
+            layout,
+        });
+
          let rasterize_pipeline = gpu.create_compute_pipeline(&ComputePipelineDesc {
              shader: ShaderDesc {
                  entry: c"main",
@@ -107,6 +118,7 @@ impl Primitive2dPipeline {
              bind_group_layout,
              coarse_bin_pipeline,
              fine_bin_pipeline,
+            fine_clear_pipeline,
              rasterize_pipeline,
          }
      }
author	Josh Simmons <josh@nega.tv>
	Sun, 26 May 2024 09:55:14 +0000 (11:55 +0200)
committer	Josh Simmons <josh@nega.tv>
	Sun, 26 May 2024 09:55:14 +0000 (11:55 +0200)
title/shark-shaders/build.rs		patch \| blob \| blame \| history
title/shark-shaders/shaders/display_transform.comp.glsl		patch \| blob \| blame \| history
title/shark-shaders/shaders/primitive_2d_bin_fine.comp.glsl		patch \| blob \| blame \| history
title/shark-shaders/shaders/primitive_2d_bindings.h		patch \| blob \| blame \| history
title/shark-shaders/shaders/primitive_2d_clear_fine.comp.glsl	[new file with mode: 0644]	patch \| blob
title/shark-shaders/shaders/primitive_2d_rasterize.comp.glsl		patch \| blob \| blame \| history
title/shark/src/main.rs		patch \| blob \| blame \| history
title/shark/src/pipelines/display_transform.rs		patch \| blob \| blame \| history
title/shark/src/pipelines/primitive_2d.rs		patch \| blob \| blame \| history