]> git.nega.tv - josh/narcissus/commitdiff
shark: Track dirty tiles
authorJosh Simmons <josh@nega.tv>
Sun, 26 May 2024 09:55:14 +0000 (11:55 +0200)
committerJosh Simmons <josh@nega.tv>
Sun, 26 May 2024 09:55:14 +0000 (11:55 +0200)
title/shark-shaders/build.rs
title/shark-shaders/shaders/display_transform.comp.glsl
title/shark-shaders/shaders/primitive_2d_bin_fine.comp.glsl
title/shark-shaders/shaders/primitive_2d_bindings.h
title/shark-shaders/shaders/primitive_2d_clear_fine.comp.glsl [new file with mode: 0644]
title/shark-shaders/shaders/primitive_2d_rasterize.comp.glsl
title/shark/src/main.rs
title/shark/src/pipelines/display_transform.rs
title/shark/src/pipelines/primitive_2d.rs

index 10d140ada5f3efcb6ab0d614ef29d7e38355fa0f..c6eb9d1f2c295a58e907acc42231d098d5cac88d 100644 (file)
@@ -26,6 +26,10 @@ const SHADERS: &[Shader] = &[
         stage: "comp",
         name: "primitive_2d_bin_fine",
     },
+    Shader {
+        stage: "comp",
+        name: "primitive_2d_clear_fine",
+    },
     Shader {
         stage: "comp",
         name: "primitive_2d_rasterize",
index 7b5e9a80bac0b1d73bee158b103bb3d8a7c99e53..41cb75805e2a32bef5a467875c2dcc327aef8962 100644 (file)
@@ -1,5 +1,23 @@
 #version 460
 
+#extension GL_EXT_control_flow_attributes : require
+
+struct PrimitiveUniforms {
+    uvec2 screen_resolution;
+    uvec2 tile_resolution_coarse;
+    uvec2 tile_resolution_fine;
+    uvec2 atlas_resolution;
+
+    uint num_primitives;
+    uint num_primitives_32;
+    uint num_primitives_1024;
+    uint pad_1;
+};
+
+layout(std430, push_constant) uniform uniformBuffer {
+    PrimitiveUniforms primitive_uniforms; 
+};
+
 layout (set = 0, binding = 0) uniform sampler bilinear_sampler;
 
 layout (set = 0, binding = 1) uniform texture3D tony_mc_mapface_lut;
@@ -9,6 +27,10 @@ layout (set = 0, binding = 3, rgba16f) uniform readonly image2D layer_ui;
 
 layout (set = 0, binding = 4, rgba16f) uniform writeonly image2D composited_output;
 
+layout(std430, set = 0, binding = 5) readonly buffer fineTileCountBufferRead {
+    uint fine_count_ro[];
+};
+
 float srgb_oetf(float a) {
     return (.0031308f >= a) ? 12.92f * a : 1.055f * pow(a, .4166666666666667f) - .055f;
 }
@@ -27,10 +49,18 @@ vec3 tony_mc_mapface(vec3 stimulus) {
 layout (local_size_x = 8, local_size_y = 8, local_size_z = 1) in;
 
 void main() {
+    const uvec2 tile_coord = gl_WorkGroupID.xy >> 1;
+    const uint tile_index = tile_coord.y * primitive_uniforms.tile_resolution_fine.x + tile_coord.x;
+
     const vec3 stimulus = imageLoad(layer_rt, ivec2(gl_GlobalInvocationID.xy)).rgb;
     const vec3 transformed = tony_mc_mapface(stimulus);
-    const vec3 srgb = srgb_oetf(transformed);
-    const vec4 ui = imageLoad(layer_ui, ivec2(gl_GlobalInvocationID.xy)).rgba;
-    const vec3 composited = ui.rgb + (srgb * (1.0 - ui.a));
+    vec3 composited = srgb_oetf(transformed);
+
+    [[branch]]
+    if (fine_count_ro[tile_index] != 0) {
+        const vec4 ui = imageLoad(layer_ui, ivec2(gl_GlobalInvocationID.xy)).rgba;
+        composited = ui.rgb + (composited * (1.0 - ui.a));
+    }
+
     imageStore(composited_output, ivec2(gl_GlobalInvocationID.xy), vec4(composited, 1.0));
 }
index f5e6d3c07a892c4be536e3d4f15d65263371f84d..51f642e04e26aa5ccfec7e06f9ab7123bf0b32a5 100644 (file)
@@ -53,5 +53,10 @@ void main() {
         const uint tile_bitmap_l1_base_fine = tile_base_fine + TILE_BITMAP_L1_OFFSET_FINE;
         fine_bitmap_wo[tile_bitmap_l1_base_fine + 2 * gl_WorkGroupID.x + 0] = ballot_result.x;
         fine_bitmap_wo[tile_bitmap_l1_base_fine + 2 * gl_WorkGroupID.x + 1] = ballot_result.y;
+
+        const uint count = uint(ballot_result.x != 0) + uint(ballot_result.y != 0);
+        if (count != 0) {
+            atomicAdd(fine_count_wo[tile_index], count);
+        }
     }
 }
index 47294b01a5a7e3255774c7c2537b4570e381b7f6..6d5931da225dfc2b30aad4350830974ccd9ac9e6 100644 (file)
@@ -28,12 +28,20 @@ layout(std430, set = 0, binding = 5) writeonly buffer coarseTileBufferWrite {
     uint coarse_bitmap_wo[];
 };
 
-layout(std430, set = 0, binding = 6) readonly buffer fineTileBufferRead {
+layout(std430, set = 0, binding = 6) readonly buffer fineTileBitmapBufferRead {
     uint fine_bitmap_ro[];
 };
 
-layout(std430, set = 0, binding = 6) writeonly buffer fineTileBufferWrite {
+layout(std430, set = 0, binding = 6) writeonly buffer fineTileBitmapBufferWrite {
     uint fine_bitmap_wo[];
 };
 
-layout (set = 0, binding = 7, rgba16f) uniform writeonly image2D ui_image;
+layout(std430, set = 0, binding = 7) readonly buffer fineTileCountBufferRead {
+    uint fine_count_ro[];
+};
+
+layout(std430, set = 0, binding = 7) writeonly buffer fineTileCountBufferWrite {
+    uint fine_count_wo[];
+};
+
+layout (set = 0, binding = 8, rgba16f) uniform writeonly image2D ui_image;
diff --git a/title/shark-shaders/shaders/primitive_2d_clear_fine.comp.glsl b/title/shark-shaders/shaders/primitive_2d_clear_fine.comp.glsl
new file mode 100644 (file)
index 0000000..c310354
--- /dev/null
@@ -0,0 +1,16 @@
+#version 460
+
+#extension GL_GOOGLE_include_directive : require
+
+#extension GL_EXT_scalar_block_layout : require
+#extension GL_EXT_control_flow_attributes : require
+
+#include "primitive_2d.h"
+
+layout (local_size_x = 64, local_size_y = 1, local_size_z = 1) in;
+
+void main() {
+    if (gl_GlobalInvocationID.x < (primitive_uniforms.tile_resolution_fine.x * primitive_uniforms.tile_resolution_fine.y)) {
+        fine_count_wo[gl_GlobalInvocationID.x] = 0;
+    }
+}
index 24dfffbc4b1540992f4ad3c715c74a58314e3462..04f83f3ce7179786951eeaefcdffd4beeda7e217 100644 (file)
@@ -18,12 +18,16 @@ void main() {
 
     vec4 accum = vec4(0.0);
 
-    // For each tile, iterate over all words in the L1 bitmap.
-    //
-    // TODO: Count the non-zero words in the tile with atomics, so we can early out on empty tiles? 
-    for (int index_l1 = 0; index_l1 < primitive_uniforms.num_primitives_1024; index_l1++) {
+    uint word_count = fine_count_ro[tile_index];
+
+    // For each tile, iterate over all words in the L1 bitmap. 
+    for (int index_l1 = 0; word_count != 0 && index_l1 < primitive_uniforms.num_primitives_1024; index_l1++) {
         // For each word, iterate all set bits.
         uint bitmap_l1 = fine_bitmap_ro[tile_bitmap_l1_base_fine + index_l1];
+
+        if (bitmap_l1 != 0)
+            word_count -= 1;
+
         while (bitmap_l1 != 0) {
             const uint i = findLSB(bitmap_l1);
             bitmap_l1 ^= bitmap_l1 & -bitmap_l1;
@@ -43,7 +47,7 @@ void main() {
                 const Glyph gl = glyphs[gi.index];
                 const vec2 glyph_min = gi.position + gl.offset_min;
                 const vec2 glyph_max = gi.position + gl.offset_max;
-                const vec2 sample_center = gl_GlobalInvocationID.xy; // half pixel offset goes here?
+                const vec2 sample_center = gl_GlobalInvocationID.xy + vec2(0.5);
                 if (all(greaterThanEqual(sample_center, glyph_min)) && all(lessThanEqual(sample_center, glyph_max))) {
                     const vec2 glyph_size = gl.offset_max - gl.offset_min;
                     const vec4 color = unpackUnorm4x8(gi.color).bgra;
index 65d8a9b1a47245acc3c1ed9514252309383f1e09..cb753fe7dbaf99a47c5ce6e5b279b88f5aaa979f 100644 (file)
@@ -24,7 +24,8 @@ use narcissus_gpu::{
 };
 use narcissus_image as image;
 use narcissus_maths::{
-    clamp, perlin_noise3, sin_pi_f32, vec3, Affine3, Deg, HalfTurn, Mat3, Mat4, Point3, Vec3,
+    clamp, perlin_noise3, sin_cos_pi_f32, sin_pi_f32, vec3, Affine3, Deg, HalfTurn, Mat3, Mat4,
+    Point3, Vec3,
 };
 use pipelines::primitive_2d::{GlyphInstance, Primitive2dPipeline};
 use spring::simple_spring_damper_exact;
@@ -862,6 +863,7 @@ struct DrawState<'gpu> {
 
     coarse_tile_bitmap_buffer: Buffer,
     fine_tile_bitmap_buffer: Buffer,
+    fine_tile_color_buffer: Buffer,
 
     glyph_atlas_image: Image,
 
@@ -898,6 +900,7 @@ impl<'gpu> DrawState<'gpu> {
             ui_image: default(),
             coarse_tile_bitmap_buffer: default(),
             fine_tile_bitmap_buffer: default(),
+            fine_tile_color_buffer: default(),
             glyph_atlas_image: default(),
             samplers,
             models,
@@ -1020,6 +1023,7 @@ impl<'gpu> DrawState<'gpu> {
                 {
                     gpu.destroy_buffer(frame, self.coarse_tile_bitmap_buffer);
                     gpu.destroy_buffer(frame, self.fine_tile_bitmap_buffer);
+                    gpu.destroy_buffer(frame, self.fine_tile_color_buffer);
 
                     let coarse_bitmap_buffer_size = tile_resolution_coarse_x
                         * tile_resolution_coarse_y
@@ -1031,6 +1035,10 @@ impl<'gpu> DrawState<'gpu> {
                         * TILE_STRIDE_FINE
                         * std::mem::size_of::<u32>() as u32;
 
+                    let fine_color_buffer_size = tile_resolution_fine_x
+                        * tile_resolution_fine_y
+                        * std::mem::size_of::<u32>() as u32;
+
                     self.coarse_tile_bitmap_buffer = gpu.create_buffer(&BufferDesc {
                         memory_location: MemoryLocation::Device,
                         host_mapped: false,
@@ -1045,6 +1053,13 @@ impl<'gpu> DrawState<'gpu> {
                         size: fine_bitmap_buffer_size.widen(),
                     });
 
+                    self.fine_tile_color_buffer = gpu.create_buffer(&BufferDesc {
+                        memory_location: MemoryLocation::Device,
+                        host_mapped: false,
+                        usage: BufferUsageFlags::STORAGE,
+                        size: fine_color_buffer_size.widen(),
+                    });
+
                     self.tile_resolution_coarse_x = tile_resolution_coarse_x;
                     self.tile_resolution_coarse_y = tile_resolution_coarse_y;
                     self.tile_resolution_fine_x = tile_resolution_fine_x;
@@ -1410,6 +1425,13 @@ impl<'gpu> DrawState<'gpu> {
                         Bind {
                             binding: 7,
                             array_element: 0,
+                            typed: TypedBind::StorageBuffer(&[self
+                                .fine_tile_color_buffer
+                                .to_arg()]),
+                        },
+                        Bind {
+                            binding: 8,
+                            array_element: 0,
                             typed: TypedBind::StorageImage(&[(
                                 ImageLayout::General,
                                 self.ui_image,
@@ -1425,6 +1447,15 @@ impl<'gpu> DrawState<'gpu> {
                     self.tile_resolution_coarse_y,
                 );
 
+                gpu.cmd_set_pipeline(cmd_encoder, self.primitive_2d_pipeline.fine_clear_pipeline);
+
+                gpu.cmd_dispatch(
+                    cmd_encoder,
+                    (self.tile_resolution_coarse_x * self.tile_resolution_coarse_y + 63) / 64,
+                    1,
+                    1,
+                );
+
                 gpu.cmd_barrier(
                     cmd_encoder,
                     Some(&GlobalBarrier {
@@ -1537,6 +1568,13 @@ impl<'gpu> DrawState<'gpu> {
                                 swapchain_image,
                             )]),
                         },
+                        Bind {
+                            binding: 5,
+                            array_element: 0,
+                            typed: TypedBind::StorageBuffer(&[self
+                                .fine_tile_color_buffer
+                                .to_arg()]),
+                        },
                     ],
                 );
 
@@ -1712,11 +1750,15 @@ pub fn main() {
 
             let tick_duration = Instant::now() - tick_start;
 
+            let (base_x, base_y) = sin_cos_pi_f32(game_state.time);
+            let base_x = (base_x + 1.0) * 0.5;
+            let base_y = (base_y + 1.0) * 0.5;
+
             for i in 0..80 {
                 let i = i as f32;
                 ui_state.text_fmt(
-                    5.0,
-                    i * 15.0 * scale,
+                    base_x * 100.0 * scale + 5.0,
+                    base_y * 100.0 * scale + i * 15.0 * scale,
                     FontFamily::RobotoRegular,
                     40.0,
                     format_args!("tick: {:?}", tick_duration),
@@ -1730,8 +1772,8 @@ pub fn main() {
                     let x = 200.0 + j * 200.0;
                     let y = 100.0 + j * 100.0;
                     ui_state.text_fmt(
-                        x * scale,
-                        (y + i * 15.0) * scale,
+                        base_x * 100.0 * scale +x * scale,
+                        base_y * 100.0 * scale +(y + i * 15.0) * scale,
                         FontFamily::NotoSansJapanese,
                         15.0,
                         format_args!(
index 0133787ac4f37cb6d0d71eefa029508b7bcc14a9..ae018645d1b45b6877417b55910752e0c3ee29b1 100644 (file)
@@ -1,10 +1,12 @@
 use narcissus_gpu::{
     BindDesc, BindGroupLayout, BindingType, ComputePipelineDesc, Pipeline, PipelineLayout,
-    ShaderDesc, ShaderStageFlags,
+    PushConstantRange, ShaderDesc, ShaderStageFlags,
 };
 
 use crate::Gpu;
 
+use super::primitive_2d::PrimitiveUniforms;
+
 pub struct DisplayTransformPipeline {
     pub bind_group_layout: BindGroupLayout,
     pub pipeline: Pipeline,
@@ -23,11 +25,17 @@ impl DisplayTransformPipeline {
             BindDesc::new(ShaderStageFlags::COMPUTE, BindingType::StorageImage),
             // Composited Output
             BindDesc::new(ShaderStageFlags::COMPUTE, BindingType::StorageImage),
+            // Tile color buffer
+            BindDesc::new(ShaderStageFlags::COMPUTE, BindingType::StorageBuffer),
         ]);
 
         let layout = &PipelineLayout {
             bind_group_layouts: &[bind_group_layout],
-            push_constant_ranges: &[],
+            push_constant_ranges: &[PushConstantRange {
+                stage_flags: ShaderStageFlags::COMPUTE,
+                offset: 0,
+                size: std::mem::size_of::<PrimitiveUniforms>() as u32,
+            }],
         };
 
         let pipeline = gpu.create_compute_pipeline(&ComputePipelineDesc {
index 125bcf3576d799a70933bb52c81a945c8559bdaf..ca37a7b64b75b7999c4029a4cb6c967ec7c0729a 100644 (file)
@@ -46,6 +46,7 @@ pub struct Primitive2dPipeline {
     pub bind_group_layout: BindGroupLayout,
     pub coarse_bin_pipeline: Pipeline,
     pub fine_bin_pipeline: Pipeline,
+    pub fine_clear_pipeline: Pipeline,
     pub rasterize_pipeline: Pipeline,
 }
 
@@ -66,6 +67,8 @@ impl Primitive2dPipeline {
             BindDesc::new(ShaderStageFlags::COMPUTE, BindingType::StorageBuffer),
             // Fine Tiles
             BindDesc::new(ShaderStageFlags::COMPUTE, BindingType::StorageBuffer),
+            // Fine Color
+            BindDesc::new(ShaderStageFlags::COMPUTE, BindingType::StorageBuffer),
             // UI Image Output
             BindDesc::new(ShaderStageFlags::COMPUTE, BindingType::StorageImage),
         ]);
@@ -95,6 +98,14 @@ impl Primitive2dPipeline {
             layout,
         });
 
+        let fine_clear_pipeline = gpu.create_compute_pipeline(&ComputePipelineDesc {
+            shader: ShaderDesc {
+                entry: c"main",
+                code: shark_shaders::PRIMITIVE_2D_CLEAR_FINE_COMP_SPV,
+            },
+            layout,
+        });
+
         let rasterize_pipeline = gpu.create_compute_pipeline(&ComputePipelineDesc {
             shader: ShaderDesc {
                 entry: c"main",
@@ -107,6 +118,7 @@ impl Primitive2dPipeline {
             bind_group_layout,
             coarse_bin_pipeline,
             fine_bin_pipeline,
+            fine_clear_pipeline,
             rasterize_pipeline,
         }
     }