From a787f9e761399ddb48fbd4702a90d71faf364f90 Mon Sep 17 00:00:00 2001 From: Josh Simmons Date: Sun, 26 May 2024 11:55:14 +0200 Subject: [PATCH] shark: Track dirty tiles --- title/shark-shaders/build.rs | 4 ++ .../shaders/display_transform.comp.glsl | 36 +++++++++++-- .../shaders/primitive_2d_bin_fine.comp.glsl | 5 ++ .../shaders/primitive_2d_bindings.h | 14 +++-- .../shaders/primitive_2d_clear_fine.comp.glsl | 16 ++++++ .../shaders/primitive_2d_rasterize.comp.glsl | 14 +++-- title/shark/src/main.rs | 52 +++++++++++++++++-- .../shark/src/pipelines/display_transform.rs | 12 ++++- title/shark/src/pipelines/primitive_2d.rs | 12 +++++ 9 files changed, 147 insertions(+), 18 deletions(-) create mode 100644 title/shark-shaders/shaders/primitive_2d_clear_fine.comp.glsl diff --git a/title/shark-shaders/build.rs b/title/shark-shaders/build.rs index 10d140a..c6eb9d1 100644 --- a/title/shark-shaders/build.rs +++ b/title/shark-shaders/build.rs @@ -26,6 +26,10 @@ const SHADERS: &[Shader] = &[ stage: "comp", name: "primitive_2d_bin_fine", }, + Shader { + stage: "comp", + name: "primitive_2d_clear_fine", + }, Shader { stage: "comp", name: "primitive_2d_rasterize", diff --git a/title/shark-shaders/shaders/display_transform.comp.glsl b/title/shark-shaders/shaders/display_transform.comp.glsl index 7b5e9a8..41cb758 100644 --- a/title/shark-shaders/shaders/display_transform.comp.glsl +++ b/title/shark-shaders/shaders/display_transform.comp.glsl @@ -1,5 +1,23 @@ #version 460 +#extension GL_EXT_control_flow_attributes : require + +struct PrimitiveUniforms { + uvec2 screen_resolution; + uvec2 tile_resolution_coarse; + uvec2 tile_resolution_fine; + uvec2 atlas_resolution; + + uint num_primitives; + uint num_primitives_32; + uint num_primitives_1024; + uint pad_1; +}; + +layout(std430, push_constant) uniform uniformBuffer { + PrimitiveUniforms primitive_uniforms; +}; + layout (set = 0, binding = 0) uniform sampler bilinear_sampler; layout (set = 0, binding = 1) uniform texture3D tony_mc_mapface_lut; @@ -9,6 +27,10 @@ layout (set = 0, binding = 3, rgba16f) uniform readonly image2D layer_ui; layout (set = 0, binding = 4, rgba16f) uniform writeonly image2D composited_output; +layout(std430, set = 0, binding = 5) readonly buffer fineTileCountBufferRead { + uint fine_count_ro[]; +}; + float srgb_oetf(float a) { return (.0031308f >= a) ? 12.92f * a : 1.055f * pow(a, .4166666666666667f) - .055f; } @@ -27,10 +49,18 @@ vec3 tony_mc_mapface(vec3 stimulus) { layout (local_size_x = 8, local_size_y = 8, local_size_z = 1) in; void main() { + const uvec2 tile_coord = gl_WorkGroupID.xy >> 1; + const uint tile_index = tile_coord.y * primitive_uniforms.tile_resolution_fine.x + tile_coord.x; + const vec3 stimulus = imageLoad(layer_rt, ivec2(gl_GlobalInvocationID.xy)).rgb; const vec3 transformed = tony_mc_mapface(stimulus); - const vec3 srgb = srgb_oetf(transformed); - const vec4 ui = imageLoad(layer_ui, ivec2(gl_GlobalInvocationID.xy)).rgba; - const vec3 composited = ui.rgb + (srgb * (1.0 - ui.a)); + vec3 composited = srgb_oetf(transformed); + + [[branch]] + if (fine_count_ro[tile_index] != 0) { + const vec4 ui = imageLoad(layer_ui, ivec2(gl_GlobalInvocationID.xy)).rgba; + composited = ui.rgb + (composited * (1.0 - ui.a)); + } + imageStore(composited_output, ivec2(gl_GlobalInvocationID.xy), vec4(composited, 1.0)); } diff --git a/title/shark-shaders/shaders/primitive_2d_bin_fine.comp.glsl b/title/shark-shaders/shaders/primitive_2d_bin_fine.comp.glsl index f5e6d3c..51f642e 100644 --- a/title/shark-shaders/shaders/primitive_2d_bin_fine.comp.glsl +++ b/title/shark-shaders/shaders/primitive_2d_bin_fine.comp.glsl @@ -53,5 +53,10 @@ void main() { const uint tile_bitmap_l1_base_fine = tile_base_fine + TILE_BITMAP_L1_OFFSET_FINE; fine_bitmap_wo[tile_bitmap_l1_base_fine + 2 * gl_WorkGroupID.x + 0] = ballot_result.x; fine_bitmap_wo[tile_bitmap_l1_base_fine + 2 * gl_WorkGroupID.x + 1] = ballot_result.y; + + const uint count = uint(ballot_result.x != 0) + uint(ballot_result.y != 0); + if (count != 0) { + atomicAdd(fine_count_wo[tile_index], count); + } } } diff --git a/title/shark-shaders/shaders/primitive_2d_bindings.h b/title/shark-shaders/shaders/primitive_2d_bindings.h index 47294b0..6d5931d 100644 --- a/title/shark-shaders/shaders/primitive_2d_bindings.h +++ b/title/shark-shaders/shaders/primitive_2d_bindings.h @@ -28,12 +28,20 @@ layout(std430, set = 0, binding = 5) writeonly buffer coarseTileBufferWrite { uint coarse_bitmap_wo[]; }; -layout(std430, set = 0, binding = 6) readonly buffer fineTileBufferRead { +layout(std430, set = 0, binding = 6) readonly buffer fineTileBitmapBufferRead { uint fine_bitmap_ro[]; }; -layout(std430, set = 0, binding = 6) writeonly buffer fineTileBufferWrite { +layout(std430, set = 0, binding = 6) writeonly buffer fineTileBitmapBufferWrite { uint fine_bitmap_wo[]; }; -layout (set = 0, binding = 7, rgba16f) uniform writeonly image2D ui_image; +layout(std430, set = 0, binding = 7) readonly buffer fineTileCountBufferRead { + uint fine_count_ro[]; +}; + +layout(std430, set = 0, binding = 7) writeonly buffer fineTileCountBufferWrite { + uint fine_count_wo[]; +}; + +layout (set = 0, binding = 8, rgba16f) uniform writeonly image2D ui_image; diff --git a/title/shark-shaders/shaders/primitive_2d_clear_fine.comp.glsl b/title/shark-shaders/shaders/primitive_2d_clear_fine.comp.glsl new file mode 100644 index 0000000..c310354 --- /dev/null +++ b/title/shark-shaders/shaders/primitive_2d_clear_fine.comp.glsl @@ -0,0 +1,16 @@ +#version 460 + +#extension GL_GOOGLE_include_directive : require + +#extension GL_EXT_scalar_block_layout : require +#extension GL_EXT_control_flow_attributes : require + +#include "primitive_2d.h" + +layout (local_size_x = 64, local_size_y = 1, local_size_z = 1) in; + +void main() { + if (gl_GlobalInvocationID.x < (primitive_uniforms.tile_resolution_fine.x * primitive_uniforms.tile_resolution_fine.y)) { + fine_count_wo[gl_GlobalInvocationID.x] = 0; + } +} diff --git a/title/shark-shaders/shaders/primitive_2d_rasterize.comp.glsl b/title/shark-shaders/shaders/primitive_2d_rasterize.comp.glsl index 24dfffb..04f83f3 100644 --- a/title/shark-shaders/shaders/primitive_2d_rasterize.comp.glsl +++ b/title/shark-shaders/shaders/primitive_2d_rasterize.comp.glsl @@ -18,12 +18,16 @@ void main() { vec4 accum = vec4(0.0); - // For each tile, iterate over all words in the L1 bitmap. - // - // TODO: Count the non-zero words in the tile with atomics, so we can early out on empty tiles? - for (int index_l1 = 0; index_l1 < primitive_uniforms.num_primitives_1024; index_l1++) { + uint word_count = fine_count_ro[tile_index]; + + // For each tile, iterate over all words in the L1 bitmap. + for (int index_l1 = 0; word_count != 0 && index_l1 < primitive_uniforms.num_primitives_1024; index_l1++) { // For each word, iterate all set bits. uint bitmap_l1 = fine_bitmap_ro[tile_bitmap_l1_base_fine + index_l1]; + + if (bitmap_l1 != 0) + word_count -= 1; + while (bitmap_l1 != 0) { const uint i = findLSB(bitmap_l1); bitmap_l1 ^= bitmap_l1 & -bitmap_l1; @@ -43,7 +47,7 @@ void main() { const Glyph gl = glyphs[gi.index]; const vec2 glyph_min = gi.position + gl.offset_min; const vec2 glyph_max = gi.position + gl.offset_max; - const vec2 sample_center = gl_GlobalInvocationID.xy; // half pixel offset goes here? + const vec2 sample_center = gl_GlobalInvocationID.xy + vec2(0.5); if (all(greaterThanEqual(sample_center, glyph_min)) && all(lessThanEqual(sample_center, glyph_max))) { const vec2 glyph_size = gl.offset_max - gl.offset_min; const vec4 color = unpackUnorm4x8(gi.color).bgra; diff --git a/title/shark/src/main.rs b/title/shark/src/main.rs index 65d8a9b..cb753fe 100644 --- a/title/shark/src/main.rs +++ b/title/shark/src/main.rs @@ -24,7 +24,8 @@ use narcissus_gpu::{ }; use narcissus_image as image; use narcissus_maths::{ - clamp, perlin_noise3, sin_pi_f32, vec3, Affine3, Deg, HalfTurn, Mat3, Mat4, Point3, Vec3, + clamp, perlin_noise3, sin_cos_pi_f32, sin_pi_f32, vec3, Affine3, Deg, HalfTurn, Mat3, Mat4, + Point3, Vec3, }; use pipelines::primitive_2d::{GlyphInstance, Primitive2dPipeline}; use spring::simple_spring_damper_exact; @@ -862,6 +863,7 @@ struct DrawState<'gpu> { coarse_tile_bitmap_buffer: Buffer, fine_tile_bitmap_buffer: Buffer, + fine_tile_color_buffer: Buffer, glyph_atlas_image: Image, @@ -898,6 +900,7 @@ impl<'gpu> DrawState<'gpu> { ui_image: default(), coarse_tile_bitmap_buffer: default(), fine_tile_bitmap_buffer: default(), + fine_tile_color_buffer: default(), glyph_atlas_image: default(), samplers, models, @@ -1020,6 +1023,7 @@ impl<'gpu> DrawState<'gpu> { { gpu.destroy_buffer(frame, self.coarse_tile_bitmap_buffer); gpu.destroy_buffer(frame, self.fine_tile_bitmap_buffer); + gpu.destroy_buffer(frame, self.fine_tile_color_buffer); let coarse_bitmap_buffer_size = tile_resolution_coarse_x * tile_resolution_coarse_y @@ -1031,6 +1035,10 @@ impl<'gpu> DrawState<'gpu> { * TILE_STRIDE_FINE * std::mem::size_of::() as u32; + let fine_color_buffer_size = tile_resolution_fine_x + * tile_resolution_fine_y + * std::mem::size_of::() as u32; + self.coarse_tile_bitmap_buffer = gpu.create_buffer(&BufferDesc { memory_location: MemoryLocation::Device, host_mapped: false, @@ -1045,6 +1053,13 @@ impl<'gpu> DrawState<'gpu> { size: fine_bitmap_buffer_size.widen(), }); + self.fine_tile_color_buffer = gpu.create_buffer(&BufferDesc { + memory_location: MemoryLocation::Device, + host_mapped: false, + usage: BufferUsageFlags::STORAGE, + size: fine_color_buffer_size.widen(), + }); + self.tile_resolution_coarse_x = tile_resolution_coarse_x; self.tile_resolution_coarse_y = tile_resolution_coarse_y; self.tile_resolution_fine_x = tile_resolution_fine_x; @@ -1410,6 +1425,13 @@ impl<'gpu> DrawState<'gpu> { Bind { binding: 7, array_element: 0, + typed: TypedBind::StorageBuffer(&[self + .fine_tile_color_buffer + .to_arg()]), + }, + Bind { + binding: 8, + array_element: 0, typed: TypedBind::StorageImage(&[( ImageLayout::General, self.ui_image, @@ -1425,6 +1447,15 @@ impl<'gpu> DrawState<'gpu> { self.tile_resolution_coarse_y, ); + gpu.cmd_set_pipeline(cmd_encoder, self.primitive_2d_pipeline.fine_clear_pipeline); + + gpu.cmd_dispatch( + cmd_encoder, + (self.tile_resolution_coarse_x * self.tile_resolution_coarse_y + 63) / 64, + 1, + 1, + ); + gpu.cmd_barrier( cmd_encoder, Some(&GlobalBarrier { @@ -1537,6 +1568,13 @@ impl<'gpu> DrawState<'gpu> { swapchain_image, )]), }, + Bind { + binding: 5, + array_element: 0, + typed: TypedBind::StorageBuffer(&[self + .fine_tile_color_buffer + .to_arg()]), + }, ], ); @@ -1712,11 +1750,15 @@ pub fn main() { let tick_duration = Instant::now() - tick_start; + let (base_x, base_y) = sin_cos_pi_f32(game_state.time); + let base_x = (base_x + 1.0) * 0.5; + let base_y = (base_y + 1.0) * 0.5; + for i in 0..80 { let i = i as f32; ui_state.text_fmt( - 5.0, - i * 15.0 * scale, + base_x * 100.0 * scale + 5.0, + base_y * 100.0 * scale + i * 15.0 * scale, FontFamily::RobotoRegular, 40.0, format_args!("tick: {:?}", tick_duration), @@ -1730,8 +1772,8 @@ pub fn main() { let x = 200.0 + j * 200.0; let y = 100.0 + j * 100.0; ui_state.text_fmt( - x * scale, - (y + i * 15.0) * scale, + base_x * 100.0 * scale +x * scale, + base_y * 100.0 * scale +(y + i * 15.0) * scale, FontFamily::NotoSansJapanese, 15.0, format_args!( diff --git a/title/shark/src/pipelines/display_transform.rs b/title/shark/src/pipelines/display_transform.rs index 0133787..ae01864 100644 --- a/title/shark/src/pipelines/display_transform.rs +++ b/title/shark/src/pipelines/display_transform.rs @@ -1,10 +1,12 @@ use narcissus_gpu::{ BindDesc, BindGroupLayout, BindingType, ComputePipelineDesc, Pipeline, PipelineLayout, - ShaderDesc, ShaderStageFlags, + PushConstantRange, ShaderDesc, ShaderStageFlags, }; use crate::Gpu; +use super::primitive_2d::PrimitiveUniforms; + pub struct DisplayTransformPipeline { pub bind_group_layout: BindGroupLayout, pub pipeline: Pipeline, @@ -23,11 +25,17 @@ impl DisplayTransformPipeline { BindDesc::new(ShaderStageFlags::COMPUTE, BindingType::StorageImage), // Composited Output BindDesc::new(ShaderStageFlags::COMPUTE, BindingType::StorageImage), + // Tile color buffer + BindDesc::new(ShaderStageFlags::COMPUTE, BindingType::StorageBuffer), ]); let layout = &PipelineLayout { bind_group_layouts: &[bind_group_layout], - push_constant_ranges: &[], + push_constant_ranges: &[PushConstantRange { + stage_flags: ShaderStageFlags::COMPUTE, + offset: 0, + size: std::mem::size_of::() as u32, + }], }; let pipeline = gpu.create_compute_pipeline(&ComputePipelineDesc { diff --git a/title/shark/src/pipelines/primitive_2d.rs b/title/shark/src/pipelines/primitive_2d.rs index 125bcf3..ca37a7b 100644 --- a/title/shark/src/pipelines/primitive_2d.rs +++ b/title/shark/src/pipelines/primitive_2d.rs @@ -46,6 +46,7 @@ pub struct Primitive2dPipeline { pub bind_group_layout: BindGroupLayout, pub coarse_bin_pipeline: Pipeline, pub fine_bin_pipeline: Pipeline, + pub fine_clear_pipeline: Pipeline, pub rasterize_pipeline: Pipeline, } @@ -66,6 +67,8 @@ impl Primitive2dPipeline { BindDesc::new(ShaderStageFlags::COMPUTE, BindingType::StorageBuffer), // Fine Tiles BindDesc::new(ShaderStageFlags::COMPUTE, BindingType::StorageBuffer), + // Fine Color + BindDesc::new(ShaderStageFlags::COMPUTE, BindingType::StorageBuffer), // UI Image Output BindDesc::new(ShaderStageFlags::COMPUTE, BindingType::StorageImage), ]); @@ -95,6 +98,14 @@ impl Primitive2dPipeline { layout, }); + let fine_clear_pipeline = gpu.create_compute_pipeline(&ComputePipelineDesc { + shader: ShaderDesc { + entry: c"main", + code: shark_shaders::PRIMITIVE_2D_CLEAR_FINE_COMP_SPV, + }, + layout, + }); + let rasterize_pipeline = gpu.create_compute_pipeline(&ComputePipelineDesc { shader: ShaderDesc { entry: c"main", @@ -107,6 +118,7 @@ impl Primitive2dPipeline { bind_group_layout, coarse_bin_pipeline, fine_bin_pipeline, + fine_clear_pipeline, rasterize_pipeline, } } -- 2.49.0