]> git.nega.tv - josh/narcissus/commitdiff
shark: Simplify primitive shaders
authorJosh Simmons <josh@nega.tv>
Sun, 26 May 2024 13:34:33 +0000 (15:34 +0200)
committerJosh Simmons <josh@nega.tv>
Sun, 26 May 2024 13:34:33 +0000 (15:34 +0200)
title/shark-shaders/shaders/display_transform.comp.glsl
title/shark-shaders/shaders/primitive_2d.h
title/shark-shaders/shaders/primitive_2d_bin_coarse.comp.glsl
title/shark-shaders/shaders/primitive_2d_bin_fine.comp.glsl
title/shark-shaders/shaders/primitive_2d_clear_fine.comp.glsl
title/shark-shaders/shaders/primitive_2d_rasterize.comp.glsl
title/shark/src/main.rs
title/shark/src/pipelines/primitive_2d.rs

index 41cb75805e2a32bef5a467875c2dcc327aef8962..aea5280d141e6b7db5653a23fed1ede6ac0cb9ba 100644 (file)
@@ -4,14 +4,14 @@
 
 struct PrimitiveUniforms {
     uvec2 screen_resolution;
-    uvec2 tile_resolution_coarse;
-    uvec2 tile_resolution_fine;
     uvec2 atlas_resolution;
 
     uint num_primitives;
     uint num_primitives_32;
     uint num_primitives_1024;
-    uint pad_1;
+
+    uint tile_stride_coarse;
+    uint tile_stride_fine;
 };
 
 layout(std430, push_constant) uniform uniformBuffer {
@@ -50,7 +50,7 @@ layout (local_size_x = 8, local_size_y = 8, local_size_z = 1) in;
 
 void main() {
     const uvec2 tile_coord = gl_WorkGroupID.xy >> 1;
-    const uint tile_index = tile_coord.y * primitive_uniforms.tile_resolution_fine.x + tile_coord.x;
+    const uint tile_index = tile_coord.y * primitive_uniforms.tile_stride_fine + tile_coord.x;
 
     const vec3 stimulus = imageLoad(layer_rt, ivec2(gl_GlobalInvocationID.xy)).rgb;
     const vec3 transformed = tony_mc_mapface(stimulus);
index ada3d4fcdc81a5daab689e4acda49e21a004e50d..57064a34f90de669cfc1dad50b4781f0352e43f3 100644 (file)
 
 struct PrimitiveUniforms {
     uvec2 screen_resolution;
-    uvec2 tile_resolution_coarse;
-    uvec2 tile_resolution_fine;
     uvec2 atlas_resolution;
 
     uint num_primitives;
     uint num_primitives_32;
     uint num_primitives_1024;
-    uint pad_1;
+
+    uint tile_stride_coarse;
+    uint tile_stride_fine;
 };
 
 struct Glyph {
index 639dee535d7931fba4b7e1fe05ae016995cd61b1..80e803b47c756034ec18c8e60a12947337dd1231 100644 (file)
 layout (local_size_x = 64, local_size_y = 1, local_size_z = 1) in;
 
 void main() {
-    const uvec2 tile_size = uvec2(TILE_SIZE_COARSE, TILE_SIZE_COARSE);
     const uvec2 tile_coord = gl_GlobalInvocationID.yz;
-    const uvec2 tile_min = tile_coord * tile_size;
-    const uvec2 tile_max = min(tile_min + tile_size, primitive_uniforms.screen_resolution);
+    const uvec2 tile_min = tile_coord * TILE_SIZE_COARSE;
+    const uvec2 tile_max = min(tile_min + TILE_SIZE_COARSE, primitive_uniforms.screen_resolution);
 
     const uint primitive_index = gl_WorkGroupID.x * gl_WorkGroupSize.x + gl_SubgroupID * gl_SubgroupSize + gl_SubgroupInvocationID;
 
@@ -28,7 +27,7 @@ void main() {
 
     uvec4 ballot_result = subgroupBallot(intersects);
     if (subgroupElect()) { // managed democracy wins again
-        const uint tile_index = tile_coord.y * primitive_uniforms.tile_resolution_coarse.x + tile_coord.x;
+        const uint tile_index = tile_coord.y * primitive_uniforms.tile_stride_coarse + tile_coord.x;
         const uint tile_offset = tile_index * TILE_STRIDE_COARSE;
         coarse_bitmap_wo[tile_offset + 2 * gl_WorkGroupID.x + 0] = ballot_result.x;
         coarse_bitmap_wo[tile_offset + 2 * gl_WorkGroupID.x + 1] = ballot_result.y;
index 51f642e04e26aa5ccfec7e06f9ab7123bf0b32a5..9efb59b4f73f436b766f10aef49a86158fcb06b5 100644 (file)
 layout (local_size_x = 64, local_size_y = 1, local_size_z = 1) in;
 
 void main() {
-    const uvec2 tile_size = uvec2(TILE_SIZE_FINE, TILE_SIZE_FINE);
     const uvec2 tile_coord = gl_GlobalInvocationID.yz;
-    const uvec2 tile_min = tile_coord * tile_size;
-    const uvec2 tile_max = min(tile_min + tile_size, primitive_uniforms.screen_resolution);
-    const uint tile_index = tile_coord.y * primitive_uniforms.tile_resolution_fine.x + tile_coord.x;
+    const uvec2 tile_min = tile_coord * TILE_SIZE_FINE;
+    const uvec2 tile_max = min(tile_min + TILE_SIZE_FINE, primitive_uniforms.screen_resolution);
+    const uint tile_index = tile_coord.y * primitive_uniforms.tile_stride_fine + tile_coord.x;
 
     const uint index = gl_WorkGroupID.x * gl_WorkGroupSize.x + gl_SubgroupID * gl_SubgroupSize + gl_SubgroupInvocationID;
 
     uint bitmap_l0 = 0;
     if (index < primitive_uniforms.num_primitives_32) {
         const uvec2 tile_coord_coarse = tile_coord >> TILE_SIZE_SHIFT;
-        const uint tile_index_coarse = tile_coord_coarse.y * primitive_uniforms.tile_resolution_coarse.x + tile_coord_coarse.x;
+        const uint tile_index_coarse = tile_coord_coarse.y * primitive_uniforms.tile_stride_coarse + tile_coord_coarse.x;
         const uint tile_base_coarse = tile_index_coarse * TILE_STRIDE_COARSE;
         const uint tile_bitmap_base_coarse = tile_base_coarse + TILE_BITMAP_OFFSET_COARSE;
 
index c3103544afccdcc6043665e0ed011b4c9df3c0fb..3d486b6c735ca0de69cb79ec0608a55265c40a88 100644 (file)
@@ -10,7 +10,5 @@
 layout (local_size_x = 64, local_size_y = 1, local_size_z = 1) in;
 
 void main() {
-    if (gl_GlobalInvocationID.x < (primitive_uniforms.tile_resolution_fine.x * primitive_uniforms.tile_resolution_fine.y)) {
-        fine_count_wo[gl_GlobalInvocationID.x] = 0;
-    }
+    fine_count_wo[gl_GlobalInvocationID.x] = 0;
 }
index 04f83f3ce7179786951eeaefcdffd4beeda7e217..0ee9f470a16a42a0cbe96700433f68287cbe50c6 100644 (file)
@@ -11,7 +11,7 @@ layout (local_size_x = TILE_SIZE_FINE, local_size_y = TILE_SIZE_FINE, local_size
 
 void main() {
     const uvec2 tile_coord = gl_WorkGroupID.xy;
-    const uint tile_index = tile_coord.y * primitive_uniforms.tile_resolution_fine.x + tile_coord.x;
+    const uint tile_index = tile_coord.y * primitive_uniforms.tile_stride_fine + tile_coord.x;
     const uint tile_base_fine = tile_index * TILE_STRIDE_FINE;
     const uint tile_bitmap_l1_base_fine = tile_base_fine + TILE_BITMAP_L1_OFFSET_FINE;
     const uint tile_bitmap_l0_base_fine = tile_base_fine + TILE_BITMAP_L0_OFFSET_FINE;
index cb753fe7dbaf99a47c5ce6e5b279b88f5aaa979f..b90401f8e79da9f13c0ad3504e24226e8d22a431 100644 (file)
@@ -1035,9 +1035,10 @@ impl<'gpu> DrawState<'gpu> {
                         * TILE_STRIDE_FINE
                         * std::mem::size_of::<u32>() as u32;
 
-                    let fine_color_buffer_size = tile_resolution_fine_x
-                        * tile_resolution_fine_y
-                        * std::mem::size_of::<u32>() as u32;
+                    // align to the workgroup size to simplify shader
+                    let fine_color_buffer_size =
+                        ((tile_resolution_fine_x * tile_resolution_fine_y + 63) & !63)
+                            * std::mem::size_of::<u32>() as u32;
 
                     self.coarse_tile_bitmap_buffer = gpu.create_buffer(&BufferDesc {
                         memory_location: MemoryLocation::Device,
@@ -1329,31 +1330,7 @@ impl<'gpu> DrawState<'gpu> {
 
             // Render UI
             {
-                gpu.cmd_set_pipeline(cmd_encoder, self.primitive_2d_pipeline.coarse_bin_pipeline);
-
-                let num_primitives = ui_state.primitive_instances.len() as u32;
-                let num_primitives_32 = (num_primitives + 31) / 32;
-                let num_primitives_1024 = (num_primitives_32 + 31) / 32;
-
-                gpu.cmd_push_constants(
-                    cmd_encoder,
-                    ShaderStageFlags::COMPUTE,
-                    0,
-                    &PrimitiveUniforms {
-                        screen_resolution_x: self.width,
-                        screen_resolution_y: self.height,
-                        tile_resolution_coarse_x: self.tile_resolution_coarse_x,
-                        tile_resolution_coarse_y: self.tile_resolution_coarse_y,
-                        tile_resolution_fine_x: self.tile_resolution_fine_x,
-                        tile_resolution_fine_y: self.tile_resolution_fine_y,
-                        atlas_resolution_x: atlas_width,
-                        atlas_resolution_y: atlas_height,
-                        num_primitives,
-                        num_primitives_32,
-                        num_primitives_1024,
-                        _pad0: 0,
-                    },
-                );
+                gpu.cmd_set_pipeline(cmd_encoder, self.primitive_2d_pipeline.fine_clear_pipeline);
 
                 let glyph_buffer = gpu.request_transient_buffer_with_data(
                     frame,
@@ -1440,15 +1417,6 @@ impl<'gpu> DrawState<'gpu> {
                     ],
                 );
 
-                gpu.cmd_dispatch(
-                    cmd_encoder,
-                    (num_primitives + 63) / 64,
-                    self.tile_resolution_coarse_x,
-                    self.tile_resolution_coarse_y,
-                );
-
-                gpu.cmd_set_pipeline(cmd_encoder, self.primitive_2d_pipeline.fine_clear_pipeline);
-
                 gpu.cmd_dispatch(
                     cmd_encoder,
                     (self.tile_resolution_coarse_x * self.tile_resolution_coarse_y + 63) / 64,
@@ -1456,44 +1424,86 @@ impl<'gpu> DrawState<'gpu> {
                     1,
                 );
 
-                gpu.cmd_barrier(
-                    cmd_encoder,
-                    Some(&GlobalBarrier {
-                        prev_access: &[Access::ShaderWrite],
-                        next_access: &[Access::ShaderOtherRead],
-                    }),
-                    &[],
-                );
-
-                gpu.cmd_set_pipeline(cmd_encoder, self.primitive_2d_pipeline.fine_bin_pipeline);
-
-                gpu.cmd_dispatch(
-                    cmd_encoder,
-                    (num_primitives_32 + 63) / 64,
-                    self.tile_resolution_fine_x,
-                    self.tile_resolution_fine_y,
-                );
-
-                gpu.cmd_barrier(
-                    cmd_encoder,
-                    Some(&GlobalBarrier {
-                        prev_access: &[Access::ShaderWrite],
-                        next_access: &[Access::ShaderOtherRead],
-                    }),
-                    &[],
-                );
-
-                gpu.cmd_set_pipeline(cmd_encoder, self.primitive_2d_pipeline.rasterize_pipeline);
-
-                gpu.cmd_dispatch(
-                    cmd_encoder,
-                    self.tile_resolution_fine_x,
-                    self.tile_resolution_fine_y,
-                    1,
-                );
+                let num_primitives = ui_state.primitive_instances.len() as u32;
+                let num_primitives_32 = (num_primitives + 31) / 32;
+                let num_primitives_1024 = (num_primitives_32 + 31) / 32;
 
-                // Cleanup
                 ui_state.primitive_instances.clear();
+
+                for _pass_y in 0..1 {
+                    for _pass_x in 0..1 {
+                        gpu.cmd_set_pipeline(
+                            cmd_encoder,
+                            self.primitive_2d_pipeline.coarse_bin_pipeline,
+                        );
+
+                        gpu.cmd_push_constants(
+                            cmd_encoder,
+                            ShaderStageFlags::COMPUTE,
+                            0,
+                            &PrimitiveUniforms {
+                                screen_resolution_x: self.width,
+                                screen_resolution_y: self.height,
+                                atlas_resolution_x: atlas_width,
+                                atlas_resolution_y: atlas_height,
+                                num_primitives,
+                                num_primitives_32,
+                                num_primitives_1024,
+                                tile_stride_coarse: self.tile_resolution_coarse_x,
+                                tile_stride_fine: self.tile_resolution_fine_x,
+                            },
+                        );
+
+                        gpu.cmd_dispatch(
+                            cmd_encoder,
+                            (num_primitives + 63) / 64,
+                            self.tile_resolution_coarse_x,
+                            self.tile_resolution_coarse_y,
+                        );
+
+                        gpu.cmd_barrier(
+                            cmd_encoder,
+                            Some(&GlobalBarrier {
+                                prev_access: &[Access::ShaderWrite],
+                                next_access: &[Access::ShaderOtherRead],
+                            }),
+                            &[],
+                        );
+
+                        gpu.cmd_set_pipeline(
+                            cmd_encoder,
+                            self.primitive_2d_pipeline.fine_bin_pipeline,
+                        );
+
+                        gpu.cmd_dispatch(
+                            cmd_encoder,
+                            (num_primitives_32 + 63) / 64,
+                            self.tile_resolution_fine_x,
+                            self.tile_resolution_fine_y,
+                        );
+
+                        gpu.cmd_barrier(
+                            cmd_encoder,
+                            Some(&GlobalBarrier {
+                                prev_access: &[Access::ShaderWrite],
+                                next_access: &[Access::ShaderOtherRead],
+                            }),
+                            &[],
+                        );
+
+                        gpu.cmd_set_pipeline(
+                            cmd_encoder,
+                            self.primitive_2d_pipeline.rasterize_pipeline,
+                        );
+
+                        gpu.cmd_dispatch(
+                            cmd_encoder,
+                            self.tile_resolution_fine_x,
+                            self.tile_resolution_fine_y,
+                            1,
+                        );
+                    }
+                }
             }
 
             // Display transform and composite
index ca37a7b64b75b7999c4029a4cb6c967ec7c0729a..c65e79152ff24ee8a1570338da28fb37c2801144 100644 (file)
@@ -19,10 +19,6 @@ pub const TILE_STRIDE_FINE: u32 = TILE_BITMAP_WORDS_L0 + TILE_BITMAP_WORDS_L1;
 pub struct PrimitiveUniforms {
     pub screen_resolution_x: u32,
     pub screen_resolution_y: u32,
-    pub tile_resolution_coarse_x: u32,
-    pub tile_resolution_coarse_y: u32,
-    pub tile_resolution_fine_x: u32,
-    pub tile_resolution_fine_y: u32,
     pub atlas_resolution_x: u32,
     pub atlas_resolution_y: u32,
 
@@ -30,7 +26,8 @@ pub struct PrimitiveUniforms {
     pub num_primitives_32: u32,
     pub num_primitives_1024: u32,
 
-    pub _pad0: u32,
+    pub tile_stride_coarse: u32,
+    pub tile_stride_fine: u32,
 }
 
 #[allow(unused)]