From bcacb5e932fbaa301f3e710bbf5dc6fdde5259ad Mon Sep 17 00:00:00 2001 From: Josh Simmons Date: Mon, 20 May 2024 23:09:04 +0200 Subject: [PATCH] shark: Bin primitives into tiles --- engine/narcissus-font/src/cache.rs | 48 ++++---- title/shark-shaders/build.rs | 6 +- .../shaders/primitive_2d_bin.comp.glsl | 68 ++++++++++++ .../shaders/primitive_2d_constants.h | 6 + .../shaders/primitive_2d_rasterize.comp.glsl | 73 ++++++++++++ .../shaders/primitive_2d_tiled.comp.glsl | 65 ----------- .../shaders/primitive_2d_types.h | 20 +--- title/shark/src/main.rs | 104 ++++++++++++------ title/shark/src/pipelines/primitive_2d.rs | 18 ++- 9 files changed, 266 insertions(+), 142 deletions(-) create mode 100644 title/shark-shaders/shaders/primitive_2d_bin.comp.glsl create mode 100644 title/shark-shaders/shaders/primitive_2d_constants.h create mode 100644 title/shark-shaders/shaders/primitive_2d_rasterize.comp.glsl delete mode 100644 title/shark-shaders/shaders/primitive_2d_tiled.comp.glsl diff --git a/engine/narcissus-font/src/cache.rs b/engine/narcissus-font/src/cache.rs index 8a027eb..e96b60d 100644 --- a/engine/narcissus-font/src/cache.rs +++ b/engine/narcissus-font/src/cache.rs @@ -24,16 +24,16 @@ pub struct TouchedGlyphIndex(u32); #[repr(C)] pub struct TouchedGlyph { // Bitmap coordinates in texture atlas. - pub x0: i32, - pub x1: i32, - pub y0: i32, - pub y1: i32, + pub atlas_min_x: i32, + pub atlas_min_y: i32, + pub atlas_max_x: i32, + pub atlas_max_y: i32, // Glyph bounding box relative to glyph origin. - pub offset_x0: f32, - pub offset_x1: f32, - pub offset_y0: f32, - pub offset_y1: f32, + pub offset_min_x: f32, + pub offset_min_y: f32, + pub offset_max_x: f32, + pub offset_max_y: f32, } struct CachedGlyph { @@ -169,15 +169,15 @@ where let touched_glyph = &mut self.touched_glyphs[touched_glyph_index.0.widen()]; - touched_glyph.x0 = rect.x; - touched_glyph.x1 = rect.x + rect.w; - touched_glyph.y0 = rect.y; - touched_glyph.y1 = rect.y + rect.h; + touched_glyph.atlas_min_x = rect.x; + touched_glyph.atlas_min_y = rect.y; + touched_glyph.atlas_max_x = rect.x + rect.w; + touched_glyph.atlas_max_y = rect.y + rect.h; - touched_glyph.offset_x0 = cached_glyph.offset_x0; - touched_glyph.offset_y0 = cached_glyph.offset_y0; - touched_glyph.offset_x1 = cached_glyph.offset_x1; - touched_glyph.offset_y1 = cached_glyph.offset_y1; + touched_glyph.offset_min_x = cached_glyph.offset_x0; + touched_glyph.offset_min_y = cached_glyph.offset_y0; + touched_glyph.offset_max_x = cached_glyph.offset_x1; + touched_glyph.offset_max_y = cached_glyph.offset_y1; } // This glyph isn't cached, so we must prepare to pack and // render it. @@ -291,15 +291,15 @@ where let touched_glyph = &mut self.touched_glyphs[rect.id as usize]; - touched_glyph.x0 = rect.x; - touched_glyph.x1 = rect.x + rect.w; - touched_glyph.y0 = rect.y; - touched_glyph.y1 = rect.y + rect.h; + touched_glyph.atlas_min_x = rect.x; + touched_glyph.atlas_min_y = rect.y; + touched_glyph.atlas_max_x = rect.x + rect.w; + touched_glyph.atlas_max_y = rect.y + rect.h; - touched_glyph.offset_x0 = offset_x0; - touched_glyph.offset_y0 = offset_y0; - touched_glyph.offset_x1 = offset_x1; - touched_glyph.offset_y1 = offset_y1; + touched_glyph.offset_min_x = offset_x0; + touched_glyph.offset_min_y = offset_y0; + touched_glyph.offset_max_x = offset_x1; + touched_glyph.offset_max_y = offset_y1; } // The `cached_glyphs` and `rects` arrays need to be sorted for the diff --git a/title/shark-shaders/build.rs b/title/shark-shaders/build.rs index 8ed767f..5990ab9 100644 --- a/title/shark-shaders/build.rs +++ b/title/shark-shaders/build.rs @@ -20,7 +20,11 @@ const SHADERS: &[Shader] = &[ }, Shader { stage: "comp", - name: "primitive_2d_tiled", + name: "primitive_2d_bin", + }, + Shader { + stage: "comp", + name: "primitive_2d_rasterize", }, Shader { stage: "comp", diff --git a/title/shark-shaders/shaders/primitive_2d_bin.comp.glsl b/title/shark-shaders/shaders/primitive_2d_bin.comp.glsl new file mode 100644 index 0000000..d618644 --- /dev/null +++ b/title/shark-shaders/shaders/primitive_2d_bin.comp.glsl @@ -0,0 +1,68 @@ +#version 460 + +#extension GL_GOOGLE_include_directive : require + +#extension GL_EXT_scalar_block_layout : require +#extension GL_EXT_control_flow_attributes : require + +#extension GL_KHR_shader_subgroup_vote : require +#extension GL_KHR_shader_subgroup_ballot : require + +#include "primitive_2d_types.h" +#include "primitive_2d_constants.h" + +layout(std430, set = 0, binding = 0) uniform uniformBuffer { + ivec2 screen_size; + ivec2 atlas_size; + uint num_primitives; +}; + +layout(std430, set = 0, binding = 3) readonly buffer glyphBuffer { + Glyph glyphs[]; +}; + +layout(std430, set = 0, binding = 4) readonly buffer glyphInstanceBuffer { + GlyphInstance glyph_instances[]; +}; + +layout(std430, set = 0, binding = 5) readonly buffer primitiveInstanceBuffer { + PrimitiveInstance primitive_instances[]; +}; + +bool test_glyph(uint index, ivec2 tile_min, ivec2 tile_max) { + const GlyphInstance gi = glyph_instances[index]; + const Glyph gl = glyphs[gi.index]; + const vec2 glyph_min = gi.position + gl.offset_min; + const vec2 glyph_max = gi.position + gl.offset_max; + return !(any(lessThan(tile_max, glyph_min)) || any(greaterThan(tile_min, glyph_max))); +} + +layout(std430, set = 0, binding = 6) writeonly buffer tileBuffer { + uint tile_bitmap[]; +}; + +// TODO: Spec constant support for different subgroup sizes. +layout (local_size_x = 64, local_size_y = 1, local_size_z = 1) in; + +void main() { + const ivec2 tile_size = ivec2(TILE_SIZE, TILE_SIZE); + const ivec2 tile_coord = ivec2(gl_GlobalInvocationID.yz); + const ivec2 tile_min = ivec2(tile_coord * tile_size); + const ivec2 tile_max = min(tile_min + tile_size, screen_size); + + const uint local_index = gl_SubgroupID * gl_SubgroupSize + gl_SubgroupInvocationID; + const uint primitive_index = gl_WorkGroupID.x * gl_WorkGroupSize.x + local_index; + + bool intersects = false; + if (primitive_index < num_primitives) { + intersects = test_glyph(primitive_index, tile_min, tile_max); + } + + uvec4 ballot_result = subgroupBallot(intersects); + if (subgroupElect()) { // managed democracy wins again + const int tile_index = tile_coord.y * MAX_TILES_X + tile_coord.x; + const uint bitmap_base_offset = uint(tile_index * TILE_STRIDE); + tile_bitmap[bitmap_base_offset + 2u * gl_WorkGroupID.x + 0u] = ballot_result.x; + tile_bitmap[bitmap_base_offset + 2u * gl_WorkGroupID.x + 1u] = ballot_result.y; + } +} diff --git a/title/shark-shaders/shaders/primitive_2d_constants.h b/title/shark-shaders/shaders/primitive_2d_constants.h new file mode 100644 index 0000000..a4fb13f --- /dev/null +++ b/title/shark-shaders/shaders/primitive_2d_constants.h @@ -0,0 +1,6 @@ +#define MAX_PRIMS 0x8000u +#define TILE_SIZE 16 +#define TILE_STRIDE (MAX_PRIMS / 32u) +#define MAX_TILES_X 180 +#define MAX_TILES_Y 113 +#define NUM_TILES (MAX_TILES_X * MAX_TILES_Y) \ No newline at end of file diff --git a/title/shark-shaders/shaders/primitive_2d_rasterize.comp.glsl b/title/shark-shaders/shaders/primitive_2d_rasterize.comp.glsl new file mode 100644 index 0000000..127dd44 --- /dev/null +++ b/title/shark-shaders/shaders/primitive_2d_rasterize.comp.glsl @@ -0,0 +1,73 @@ +#version 460 + +#extension GL_GOOGLE_include_directive : require + +#extension GL_EXT_scalar_block_layout : require +#extension GL_EXT_control_flow_attributes : require + +#include "primitive_2d_constants.h" +#include "primitive_2d_types.h" + +layout(std430, set = 0, binding = 0) uniform uniformBuffer { + uint screen_width; + uint screen_height; + uint atlas_width; + uint atlas_height; + uint num_primitives; +}; + +layout (set = 0, binding = 1) uniform sampler bilinear_sampler; +layout (set = 0, binding = 2) uniform texture2D glyph_atlas; + +layout(std430, set = 0, binding = 3) readonly buffer glyphBuffer { + Glyph glyphs[]; +}; + +layout(std430, set = 0, binding = 4) readonly buffer glyphInstanceBuffer { + GlyphInstance glyph_instances[]; +}; + +layout(std430, set = 0, binding = 5) readonly buffer primitiveInstanceBuffer { + PrimitiveInstance primitive_instances[]; +}; + +layout(std430, set = 0, binding = 6) readonly buffer tileBuffer { + uint tile_bitmap[]; +}; + +layout (set = 0, binding = 7, rgba16f) uniform writeonly image2D ui_image; + +layout (local_size_x = 16, local_size_y = 16, local_size_z = 1) in; + +void main() { + vec4 accum = vec4(0.0); + + const ivec2 tile_coord = ivec2(gl_WorkGroupID.xy); + const int tile_index = tile_coord.y * MAX_TILES_X + tile_coord.x; + const uint bitmap_base_offset = uint(tile_index * TILE_STRIDE); + + for (int i = 0; i < num_primitives / 32; i++) { + uint bitmap = tile_bitmap[bitmap_base_offset + i]; + while (bitmap != 0) { + const uint t = bitmap & -bitmap; + const int index = i * 32 + findLSB(bitmap); + bitmap ^= t; + + const GlyphInstance gi = glyph_instances[index]; + const Glyph gl = glyphs[gi.index]; + const vec4 color = unpackUnorm4x8(gi.color).bgra; + const vec2 glyph_min = gi.position + gl.offset_min; + const vec2 glyph_max = gi.position + gl.offset_max; + const vec2 glyph_size = gl.offset_max - gl.offset_min; + const vec2 sample_center = gl_GlobalInvocationID.xy; // half pixel offset goes here? + if (all(greaterThanEqual(sample_center, glyph_min)) && all(lessThanEqual(sample_center, glyph_max))) { + const vec2 uv = mix(vec2(gl.atlas_min), vec2(gl.atlas_max), (sample_center - glyph_min) / glyph_size) / vec2(atlas_width, atlas_height); + const float coverage = textureLod(sampler2D(glyph_atlas, bilinear_sampler), uv, 0.0).r * color.a; + accum.rgb = (coverage * color.rgb) + accum.rgb * (1.0 - coverage); + accum.a = coverage + accum.a * (1.0 - coverage); + } + } + } + + imageStore(ui_image, ivec2(gl_GlobalInvocationID.xy), accum); +} diff --git a/title/shark-shaders/shaders/primitive_2d_tiled.comp.glsl b/title/shark-shaders/shaders/primitive_2d_tiled.comp.glsl deleted file mode 100644 index 4903ea5..0000000 --- a/title/shark-shaders/shaders/primitive_2d_tiled.comp.glsl +++ /dev/null @@ -1,65 +0,0 @@ -#version 460 - -#extension GL_GOOGLE_include_directive : require - -#extension GL_EXT_scalar_block_layout : require -#extension GL_EXT_control_flow_attributes : require - -#include "primitive_2d_types.h" - -layout(std430, set = 0, binding = 0) uniform uniformBuffer { - uint screen_width; - uint screen_height; - uint atlas_width; - uint atlas_height; - uint num_primitives; -}; - -layout (set = 0, binding = 1) uniform sampler bilinear_sampler; -layout (set = 0, binding = 2) uniform texture2D glyph_atlas; - -layout(std430, set = 0, binding = 3) readonly buffer glyphBuffer { - Glyph glyphs[]; -}; - -layout(std430, set = 0, binding = 4) readonly buffer glyphInstanceBuffer { - GlyphInstance glyph_instances[]; -}; - -layout(std430, set = 0, binding = 5) readonly buffer primitiveInstanceBuffer { - PrimitiveInstance primitive_instances[]; -}; - -layout(std430, set = 0, binding = 6) readonly buffer tileBuffer { - Tile tiles[]; -}; - -layout (set = 0, binding = 7, rgba16f) uniform writeonly image2D ui_image; - -layout (local_size_x = 8, local_size_y = 8, local_size_z = 1) in; - -void main() { - vec4 accum = vec4(0.0); - - for (int i = 0; i < num_primitives; i++) { - const GlyphInstance gi = glyph_instances[i]; - const Glyph gl = glyphs[gi.index]; - const vec4 color = unpackUnorm4x8(gi.color).bgra; - const vec2 glyph_top_left = vec2(gi.x + gl.offset_x0, gi.y + gl.offset_y0); - const vec2 glyph_bottom_right = vec2(gi.x + gl.offset_x1, gi.y + gl.offset_y1); - const vec2 glyph_size = vec2(gl.offset_x1 - gl.offset_x0, gl.offset_y1 - gl.offset_y0); - const vec2 sample_center = gl_GlobalInvocationID.xy; // half pixel offset goes here? - if (sample_center.x >= glyph_top_left.x && - sample_center.x <= glyph_bottom_right.x && - sample_center.y >= glyph_top_left.y && - sample_center.y <= glyph_bottom_right.y) { - const vec2 uv = mix(vec2(gl.x0, gl.y0), vec2(gl.x1, gl.y1), (sample_center - glyph_top_left) / glyph_size) / vec2(atlas_width, atlas_height); - const float coverage = textureLod(sampler2D(glyph_atlas, bilinear_sampler), uv, 0.0).r; - accum = coverage * color; - accum.a = coverage; - break; - } - } - - imageStore(ui_image, ivec2(gl_GlobalInvocationID.xy), accum); -} diff --git a/title/shark-shaders/shaders/primitive_2d_types.h b/title/shark-shaders/shaders/primitive_2d_types.h index fd661a1..6b531a8 100644 --- a/title/shark-shaders/shaders/primitive_2d_types.h +++ b/title/shark-shaders/shaders/primitive_2d_types.h @@ -1,19 +1,14 @@ struct Glyph { - uint x0; - uint x1; - uint y0; - uint y1; + ivec2 atlas_min; + ivec2 atlas_max; - float offset_x0; - float offset_x1; - float offset_y0; - float offset_y1; + vec2 offset_min; + vec2 offset_max; }; struct GlyphInstance { - float x; - float y; + vec2 position; uint index; uint color; }; @@ -22,8 +17,3 @@ struct PrimitiveInstance { uint type; uint index; }; - -struct Tile { - uint index; - uint count; -}; diff --git a/title/shark/src/main.rs b/title/shark/src/main.rs index 341fa2c..a519092 100644 --- a/title/shark/src/main.rs +++ b/title/shark/src/main.rs @@ -12,13 +12,13 @@ use narcissus_app::{create_app, Event, Key, PressedState, WindowDesc}; use narcissus_core::{box_assume_init, default, rand::Pcg64, zeroed_box, BitIter}; use narcissus_font::{FontCollection, GlyphCache, HorizontalMetrics}; use narcissus_gpu::{ - create_device, Access, Bind, BufferImageCopy, BufferUsageFlags, ClearValue, CmdEncoder, - ColorSpace, Device, DeviceExt, Extent2d, Extent3d, Frame, Image, ImageAspectFlags, - ImageBarrier, ImageDesc, ImageDimension, ImageFormat, ImageLayout, ImageSubresourceRange, - ImageTiling, ImageUsageFlags, IndexType, LoadOp, MemoryLocation, Offset2d, PersistentBuffer, - PresentMode, RenderingAttachment, RenderingDesc, Sampler, SamplerAddressMode, SamplerDesc, - SamplerFilter, Scissor, StoreOp, SwapchainConfigurator, SwapchainImage, ThreadToken, TypedBind, - Viewport, + create_device, Access, Bind, BufferDesc, BufferImageCopy, BufferUsageFlags, ClearValue, + CmdEncoder, ColorSpace, Device, DeviceExt, Extent2d, Extent3d, Frame, GlobalBarrier, Image, + ImageAspectFlags, ImageBarrier, ImageDesc, ImageDimension, ImageFormat, ImageLayout, + ImageSubresourceRange, ImageTiling, ImageUsageFlags, IndexType, LoadOp, MemoryLocation, + Offset2d, PersistentBuffer, PresentMode, RenderingAttachment, RenderingDesc, Sampler, + SamplerAddressMode, SamplerDesc, SamplerFilter, Scissor, StoreOp, SwapchainConfigurator, + SwapchainImage, ThreadToken, TypedBind, Viewport, }; use narcissus_image as image; use narcissus_maths::{ @@ -510,7 +510,7 @@ impl<'a> UiState<'a> { x, y, touched_glyph_index, - color: 0xff000000, + color: 0x880000ff, }); x += advance_width * scale; @@ -852,6 +852,8 @@ struct DrawState<'gpu> { rt_image: Image, ui_image: Image, + tile_bitmap_buffer: PersistentBuffer<'gpu>, + glyph_atlas_image: Image, samplers: Samplers, @@ -871,6 +873,20 @@ impl<'gpu> DrawState<'gpu> { let models = Models::load(gpu); let images = Images::load(gpu, thread_token); + const MAX_PRIMS: usize = 0x8000; + const TILE_STRIDE: usize = MAX_PRIMS / 32; + const MAX_TILES_X: usize = 180; + const MAX_TILES_Y: usize = 113; + + const BITMAP_SIZE: usize = (MAX_TILES_X * MAX_TILES_Y * TILE_STRIDE) * 4; + + let tile_bitmap_buffer = gpu.create_persistent_buffer(&BufferDesc { + memory_location: MemoryLocation::Device, + host_mapped: true, + usage: BufferUsageFlags::STORAGE, + size: BITMAP_SIZE, + }); + Self { gpu, basic_pipeline, @@ -881,6 +897,7 @@ impl<'gpu> DrawState<'gpu> { depth_image: default(), rt_image: default(), ui_image: default(), + tile_bitmap_buffer, glyph_atlas_image: default(), samplers, models, @@ -1254,8 +1271,6 @@ impl<'gpu> DrawState<'gpu> { // Render UI { - gpu.cmd_set_pipeline(cmd_encoder, self.primitive_2d_pipeline.pipeline); - let uniforms_buffer = gpu.request_transient_buffer_with_data( frame, thread_token, @@ -1286,14 +1301,8 @@ impl<'gpu> DrawState<'gpu> { BufferUsageFlags::STORAGE, &[0u32], ); - let tile_buffer = gpu.request_transient_buffer_with_data( - frame, - thread_token, - BufferUsageFlags::STORAGE, - &[0u32], - ); - ui_state.primitive_instances.clear(); + gpu.cmd_set_pipeline(cmd_encoder, self.primitive_2d_pipeline.bin_pipeline); gpu.cmd_set_bind_group( frame, @@ -1337,7 +1346,7 @@ impl<'gpu> DrawState<'gpu> { Bind { binding: 6, array_element: 0, - typed: TypedBind::StorageBuffer(&[tile_buffer.to_arg()]), + typed: TypedBind::StorageBuffer(&[self.tile_bitmap_buffer.to_arg()]), }, Bind { binding: 7, @@ -1350,7 +1359,33 @@ impl<'gpu> DrawState<'gpu> { ], ); - gpu.cmd_dispatch(cmd_encoder, (self.width + 7) / 8, (self.height + 7) / 8, 1); + gpu.cmd_dispatch( + cmd_encoder, + (ui_state.primitive_instances.len() as u32 + 63) / 64, + (self.width + 15) / 16, + (self.height + 15) / 16, + ); + + gpu.cmd_barrier( + cmd_encoder, + Some(&GlobalBarrier { + prev_access: &[Access::ShaderWrite], + next_access: &[Access::ShaderOtherRead], + }), + &[], + ); + + gpu.cmd_set_pipeline(cmd_encoder, self.primitive_2d_pipeline.rasterize_pipeline); + + gpu.cmd_dispatch( + cmd_encoder, + (self.width + 15) / 16, + (self.height + 15) / 16, + 1, + ); + + // Cleanup + ui_state.primitive_instances.clear(); } // Display transform and composite @@ -1600,21 +1635,24 @@ pub fn main() { let tick_duration = Instant::now() - tick_start; - ui_state.text_fmt( - 5.0, - 40.0, - FontFamily::RobotoRegular, - 30.0, - format_args!("tick: {:?}", tick_duration), - ); + for i in 0..80 { + let i = i as f32; + ui_state.text_fmt( + 5.0, + i * 20.0, + FontFamily::RobotoRegular, + 40.0, + format_args!("tick: {:?}", tick_duration), + ); - ui_state.text_fmt( - 5.0, - 90.0, - FontFamily::NotoSansJapanese, - 30.0, - format_args!("お握り The Quick Brown Fox Jumped Over The Lazy Dog"), - ); + ui_state.text_fmt( + 200.0, + i * 20.0, + FontFamily::NotoSansJapanese, + 40.0, + format_args!("お握り The Quick Brown Fox Jumped Over The Lazy Dog. ████████"), + ); + } draw_state.draw( thread_token, diff --git a/title/shark/src/pipelines/primitive_2d.rs b/title/shark/src/pipelines/primitive_2d.rs index 51f9841..8e0fa64 100644 --- a/title/shark/src/pipelines/primitive_2d.rs +++ b/title/shark/src/pipelines/primitive_2d.rs @@ -27,7 +27,8 @@ pub struct GlyphInstance { pub struct Primitive2dPipeline { pub bind_group_layout: BindGroupLayout, - pub pipeline: Pipeline, + pub bin_pipeline: Pipeline, + pub rasterize_pipeline: Pipeline, } impl Primitive2dPipeline { @@ -51,17 +52,26 @@ impl Primitive2dPipeline { BindDesc::new(ShaderStageFlags::COMPUTE, BindingType::StorageImage), ]); - let pipeline = gpu.create_compute_pipeline(&ComputePipelineDesc { + let bin_pipeline = gpu.create_compute_pipeline(&ComputePipelineDesc { shader: ShaderDesc { entry: c"main", - code: shark_shaders::PRIMITIVE_2D_TILED_COMP_SPV, + code: shark_shaders::PRIMITIVE_2D_BIN_COMP_SPV, + }, + bind_group_layouts: &[bind_group_layout], + }); + + let rasterize_pipeline = gpu.create_compute_pipeline(&ComputePipelineDesc { + shader: ShaderDesc { + entry: c"main", + code: shark_shaders::PRIMITIVE_2D_RASTERIZE_COMP_SPV, }, bind_group_layouts: &[bind_group_layout], }); Self { bind_group_layout, - pipeline, + bin_pipeline, + rasterize_pipeline, } } } -- 2.49.0