From: Josh Simmons Date: Mon, 27 May 2024 05:25:33 +0000 (+0200) Subject: shark: Tweak primitive shaders X-Git-Url: https://git.nega.tv//gitweb.cgi?a=commitdiff_plain;h=2e8086f22e71b856af4c7fedb73adbe26416c2ee;p=josh%2Fnarcissus shark: Tweak primitive shaders --- diff --git a/title/shark-shaders/shaders/primitive_2d.h b/title/shark-shaders/shaders/primitive_2d.h index d8f2261..16a2c18 100644 --- a/title/shark-shaders/shaders/primitive_2d.h +++ b/title/shark-shaders/shaders/primitive_2d.h @@ -1,7 +1,7 @@ #define MAX_PRIMS 0x20000u -#define TILE_SIZE_COARSE 128 +#define TILE_SIZE_COARSE 64 #define TILE_SIZE_FINE 16 -#define TILE_SIZE_SHIFT 3 +#define TILE_SIZE_MUL (TILE_SIZE_COARSE / TILE_SIZE_FINE) #define TILE_BITMAP_L1_WORDS (MAX_PRIMS / 32 / 32) #define TILE_BITMAP_L0_WORDS (MAX_PRIMS / 32) #define TILE_STRIDE_COARSE TILE_BITMAP_L0_WORDS @@ -10,7 +10,7 @@ #define TILE_BITMAP_L1_OFFSET_FINE 0 #define TILE_BITMAP_L0_OFFSET_FINE TILE_BITMAP_L1_WORDS -#define TILE_DISPATCH_X 8 +#define TILE_DISPATCH_X 15 struct PrimitiveUniforms { uvec2 screen_resolution; @@ -38,11 +38,6 @@ struct GlyphInstance { uint color; }; -struct PrimitiveInstance { - uint type; - uint index; -}; - #include "primitive_2d_bindings.h" bool test_glyph(uint index, uvec2 tile_min, uvec2 tile_max) { diff --git a/title/shark-shaders/shaders/primitive_2d_bin_fine.comp.glsl b/title/shark-shaders/shaders/primitive_2d_bin_fine.comp.glsl index 80f0b23..b4c8c33 100644 --- a/title/shark-shaders/shaders/primitive_2d_bin_fine.comp.glsl +++ b/title/shark-shaders/shaders/primitive_2d_bin_fine.comp.glsl @@ -14,10 +14,10 @@ layout (local_size_x = 64, local_size_y = 1, local_size_z = 1) in; void main() { - const uvec2 tile_coord = gl_GlobalInvocationID.yz; - const uint tile_index = tile_coord.y * (TILE_DISPATCH_X << TILE_SIZE_SHIFT) + tile_coord.x; + const uvec2 tile_coord = gl_WorkGroupID.yz; + const uint tile_index = tile_coord.y * TILE_DISPATCH_X * TILE_SIZE_MUL + tile_coord.x; - const uvec2 tile_coord_global = tile_coord + (primitive_uniforms.tile_offset_coarse << TILE_SIZE_SHIFT); + const uvec2 tile_coord_global = tile_coord + primitive_uniforms.tile_offset_coarse * TILE_SIZE_MUL; const uint tile_index_global = tile_coord_global.y * primitive_uniforms.tile_stride_fine + tile_coord_global.x; const uvec2 tile_min = tile_coord_global * TILE_SIZE_FINE; @@ -27,7 +27,7 @@ void main() { uint bitmap_l0 = 0; if (index < primitive_uniforms.num_primitives_32) { - const uvec2 tile_coord_coarse = tile_coord >> TILE_SIZE_SHIFT; + const uvec2 tile_coord_coarse = tile_coord / TILE_SIZE_MUL; const uint tile_index_coarse = tile_coord_coarse.y * TILE_DISPATCH_X + tile_coord_coarse.x; const uint tile_base_coarse = tile_index_coarse * TILE_STRIDE_COARSE; const uint tile_bitmap_base_coarse = tile_base_coarse + TILE_BITMAP_OFFSET_COARSE; @@ -45,14 +45,14 @@ void main() { } const uint tile_base_fine = tile_index * TILE_STRIDE_FINE; + const uint tile_bitmap_l0_base_fine = tile_base_fine + TILE_BITMAP_L0_OFFSET_FINE; // Write the L0 per-primitive bitmap. - const uint tile_bitmap_l0_base_fine = tile_base_fine + TILE_BITMAP_L0_OFFSET_FINE; fine_bitmap_wo[tile_bitmap_l0_base_fine + index] = bitmap_l0; - // Write the L1 per-bitmap-word bitmap. uvec4 ballot_result = subgroupBallot(bitmap_l0 != 0); if (subgroupElect()) { + // Write the L1 per-bitmap-word bitmap. const uint tile_bitmap_l1_base_fine = tile_base_fine + TILE_BITMAP_L1_OFFSET_FINE; fine_bitmap_wo[tile_bitmap_l1_base_fine + 2 * gl_WorkGroupID.x + 0] = ballot_result.x; fine_bitmap_wo[tile_bitmap_l1_base_fine + 2 * gl_WorkGroupID.x + 1] = ballot_result.y; diff --git a/title/shark-shaders/shaders/primitive_2d_bindings.h b/title/shark-shaders/shaders/primitive_2d_bindings.h index 6d5931d..e54758b 100644 --- a/title/shark-shaders/shaders/primitive_2d_bindings.h +++ b/title/shark-shaders/shaders/primitive_2d_bindings.h @@ -16,32 +16,28 @@ layout(std430, set = 0, binding = 3) readonly buffer glyphInstanceBuffer { GlyphInstance glyph_instances[]; }; -layout(std430, set = 0, binding = 4) readonly buffer primitiveInstanceBuffer { - PrimitiveInstance primitive_instances[]; -}; - -layout(std430, set = 0, binding = 5) readonly buffer coarseTileBufferRead { +layout(std430, set = 0, binding = 4) readonly buffer coarseTileBufferRead { uint coarse_bitmap_ro[]; }; -layout(std430, set = 0, binding = 5) writeonly buffer coarseTileBufferWrite { +layout(std430, set = 0, binding = 4) writeonly buffer coarseTileBufferWrite { uint coarse_bitmap_wo[]; }; -layout(std430, set = 0, binding = 6) readonly buffer fineTileBitmapBufferRead { +layout(std430, set = 0, binding = 5) readonly buffer fineTileBitmapBufferRead { uint fine_bitmap_ro[]; }; -layout(std430, set = 0, binding = 6) writeonly buffer fineTileBitmapBufferWrite { +layout(std430, set = 0, binding = 5) writeonly buffer fineTileBitmapBufferWrite { uint fine_bitmap_wo[]; }; -layout(std430, set = 0, binding = 7) readonly buffer fineTileCountBufferRead { +layout(std430, set = 0, binding = 6) readonly buffer fineTileCountBufferRead { uint fine_count_ro[]; }; -layout(std430, set = 0, binding = 7) writeonly buffer fineTileCountBufferWrite { +layout(std430, set = 0, binding = 6) writeonly buffer fineTileCountBufferWrite { uint fine_count_wo[]; }; -layout (set = 0, binding = 8, rgba16f) uniform writeonly image2D ui_image; +layout (set = 0, binding = 7, rgba16f) uniform writeonly image2D ui_image; diff --git a/title/shark-shaders/shaders/primitive_2d_rasterize.comp.glsl b/title/shark-shaders/shaders/primitive_2d_rasterize.comp.glsl index 71e447f..813eddb 100644 --- a/title/shark-shaders/shaders/primitive_2d_rasterize.comp.glsl +++ b/title/shark-shaders/shaders/primitive_2d_rasterize.comp.glsl @@ -5,24 +5,74 @@ #extension GL_EXT_scalar_block_layout : require #extension GL_EXT_control_flow_attributes : require +#extension GL_KHR_shader_subgroup_vote : require +#extension GL_KHR_shader_subgroup_ballot : require + #include "primitive_2d.h" layout (local_size_x = TILE_SIZE_FINE, local_size_y = TILE_SIZE_FINE, local_size_z = 1) in; +#define DEBUG_SHOW_TILES 0 + +#if DEBUG_SHOW_TILES != 0 + +vec3 plasma_quintic(float x) +{ + x = clamp(x, 0.0, 1.0); + vec4 x1 = vec4(1.0, x, x * x, x * x * x); // 1 x x2 x3 + vec4 x2 = x1 * x1.w * x; // x4 x5 x6 x7 + return vec3( + dot(x1.xyzw, vec4(+0.063861086, +1.992659096, -1.023901152, -0.490832805)) + dot(x2.xy, vec2(+1.308442123, -0.914547012)), + dot(x1.xyzw, vec4(+0.049718590, -0.791144343, +2.892305078, +0.811726816)) + dot(x2.xy, vec2(-4.686502417, +2.717794514)), + dot(x1.xyzw, vec4(+0.513275779, +1.580255060, -5.164414457, +4.559573646)) + dot(x2.xy, vec2(-1.916810682, +0.570638854)) + ); +} + +#endif + void main() { const uvec2 tile_coord = gl_WorkGroupID.xy; - const uint tile_index = tile_coord.y * (TILE_DISPATCH_X << TILE_SIZE_SHIFT) + tile_coord.x; + const uint tile_index = tile_coord.y * TILE_DISPATCH_X * TILE_SIZE_MUL + tile_coord.x; - const uvec2 tile_coord_global = tile_coord + (primitive_uniforms.tile_offset_coarse << TILE_SIZE_SHIFT); + const uvec2 tile_coord_global = tile_coord + primitive_uniforms.tile_offset_coarse * TILE_SIZE_MUL; const uint tile_index_global = tile_coord_global.y * primitive_uniforms.tile_stride_fine + tile_coord_global.x; const uint tile_base_fine = tile_index * TILE_STRIDE_FINE; const uint tile_bitmap_l1_base_fine = tile_base_fine + TILE_BITMAP_L1_OFFSET_FINE; const uint tile_bitmap_l0_base_fine = tile_base_fine + TILE_BITMAP_L0_OFFSET_FINE; +#if DEBUG_SHOW_TILES == 1 + + uint count = 0; + // For each tile, iterate over all words in the L1 bitmap. + for (int index_l1 = 0; index_l1 < primitive_uniforms.num_primitives_1024; index_l1++) { + // For each word, iterate all set bits. + uint bitmap_l1 = fine_bitmap_ro[tile_bitmap_l1_base_fine + index_l1]; + + while (bitmap_l1 != 0) { + const uint i = findLSB(bitmap_l1); + bitmap_l1 ^= bitmap_l1 & -bitmap_l1; + + // For each set bit in the L1 bitmap, iterate the set bits in the + // corresponding L0 bitmap. + const uint index_l0 = index_l1 * 32 + i; + uint bitmap_l0 = fine_bitmap_ro[tile_bitmap_l0_base_fine + index_l0]; + + count += bitCount(bitmap_l0); + } + } + + const vec3 color = plasma_quintic(float(count) / 50.0); + imageStore(ui_image, ivec2(gl_GlobalInvocationID.xy + primitive_uniforms.tile_offset_coarse * TILE_SIZE_COARSE), vec4(color, 1.0)); + +#else + vec4 accum = vec4(0.0); uint word_count = fine_count_ro[tile_index_global]; + if (word_count == 0) { + return; + } // For each tile, iterate over all words in the L1 bitmap. for (int index_l1 = 0; word_count != 0 && index_l1 < primitive_uniforms.num_primitives_1024; index_l1++) { @@ -65,4 +115,6 @@ void main() { } imageStore(ui_image, ivec2(gl_GlobalInvocationID.xy + primitive_uniforms.tile_offset_coarse * TILE_SIZE_COARSE), accum); + +#endif } diff --git a/title/shark/src/main.rs b/title/shark/src/main.rs index 9825556..c5b378b 100644 --- a/title/shark/src/main.rs +++ b/title/shark/src/main.rs @@ -1343,12 +1343,6 @@ impl<'gpu> DrawState<'gpu> { BufferUsageFlags::STORAGE, ui_state.primitive_instances.as_slice(), ); - let primitive_instance_buffer = gpu.request_transient_buffer_with_data( - frame, - thread_token, - BufferUsageFlags::STORAGE, - &[0u32], - ); gpu.cmd_set_bind_group( frame, @@ -1382,31 +1376,26 @@ impl<'gpu> DrawState<'gpu> { Bind { binding: 4, array_element: 0, - typed: TypedBind::StorageBuffer(&[primitive_instance_buffer.to_arg()]), - }, - Bind { - binding: 5, - array_element: 0, typed: TypedBind::StorageBuffer(&[self .coarse_tile_bitmap_buffer .to_arg()]), }, Bind { - binding: 6, + binding: 5, array_element: 0, typed: TypedBind::StorageBuffer(&[self .fine_tile_bitmap_buffer .to_arg()]), }, Bind { - binding: 7, + binding: 6, array_element: 0, typed: TypedBind::StorageBuffer(&[self .fine_tile_color_buffer .to_arg()]), }, Bind { - binding: 8, + binding: 7, array_element: 0, typed: TypedBind::StorageImage(&[( ImageLayout::General, @@ -1790,27 +1779,22 @@ pub fn main() { base_x * 100.0 * scale + 5.0, base_y * 100.0 * scale + i * 15.0 * scale, FontFamily::RobotoRegular, - 40.0, + 20.0, format_args!("tick: {:?}", tick_duration), ); } - for i in 0..80 { - for j in 0..11 { - let i = i as f32; - let j = j as f32; - let x = 200.0 + j * 200.0; - let y = 100.0 + j * 100.0; - ui_state.text_fmt( - base_x * 100.0 * scale +x * scale, - base_y * 100.0 * scale +(y + i * 15.0) * scale, + for i in 0..120 { + let i = i as f32; + ui_state.text_fmt( + 5.0, + (15.0 + i * 15.0) * scale, FontFamily::NotoSansJapanese, - 15.0, + 8.0, format_args!( - "お握り The Quick Brown Fox Jumped Over The Lazy Dog. ████████お握り The Quick Brown Fox Jumped Over The Lazy Dog. ████████" + "お握り The Quick Brown Fox Jumped Over The Lazy Dog. ████████お握り The Quick Brown Fox Jumped Over The Lazy Dog. ████████お握り The Quick Brown Fox Jumped Over The Lazy Dog. ████████お握り The Quick Brown Fox Jumped Over The Lazy Dog. ████████お握り The Quick Brown Fox Jumped Over The Lazy Dog. ████████お握り The Quick Brown Fox Jumped Over The Lazy Dog. ████████お握り The Quick Brown Fox Jumped Over The Lazy Dog. ████████お握り The Quick Brown Fox Jumped Over The Lazy Dog. ████████" ), ); - } } draw_state.draw( diff --git a/title/shark/src/pipelines/primitive_2d.rs b/title/shark/src/pipelines/primitive_2d.rs index 56d9538..c9e85e3 100644 --- a/title/shark/src/pipelines/primitive_2d.rs +++ b/title/shark/src/pipelines/primitive_2d.rs @@ -7,15 +7,15 @@ use narcissus_gpu::{ use crate::Gpu; pub const MAX_PRIMS: u32 = 0x20000; -pub const TILE_SIZE_COARSE: u32 = 128; +pub const TILE_SIZE_COARSE: u32 = 64; pub const TILE_SIZE_FINE: u32 = 16; pub const TILE_BITMAP_WORDS_L1: u32 = MAX_PRIMS / 32 / 32; pub const TILE_BITMAP_WORDS_L0: u32 = MAX_PRIMS / 32; pub const TILE_STRIDE_COARSE: u32 = TILE_BITMAP_WORDS_L0; pub const TILE_STRIDE_FINE: u32 = TILE_BITMAP_WORDS_L0 + TILE_BITMAP_WORDS_L1; -pub const TILE_DISPATCH_COARSE_X: u32 = 8; -pub const TILE_DISPATCH_COARSE_Y: u32 = 5; +pub const TILE_DISPATCH_COARSE_X: u32 = 15; +pub const TILE_DISPATCH_COARSE_Y: u32 = 15; pub const TILE_DISPATCH_FINE_X: u32 = TILE_DISPATCH_COARSE_X * (TILE_SIZE_COARSE / TILE_SIZE_FINE); pub const TILE_DISPATCH_FINE_Y: u32 = TILE_DISPATCH_COARSE_Y * (TILE_SIZE_COARSE / TILE_SIZE_FINE); @@ -65,8 +65,6 @@ impl Primitive2dPipeline { BindDesc::new(ShaderStageFlags::COMPUTE, BindingType::StorageBuffer), // Glyph Instances BindDesc::new(ShaderStageFlags::COMPUTE, BindingType::StorageBuffer), - // Primitive Instances - BindDesc::new(ShaderStageFlags::COMPUTE, BindingType::StorageBuffer), // Coarse Tiles BindDesc::new(ShaderStageFlags::COMPUTE, BindingType::StorageBuffer), // Fine Tiles