From: Josh Simmons Date: Fri, 24 May 2024 15:28:59 +0000 (+0200) Subject: shark: Two level binning for big performance X-Git-Url: https://git.nega.tv//gitweb.cgi?a=commitdiff_plain;h=a2f73579cd48f1fec26503a170f02e50719ee7d2;p=josh%2Fnarcissus shark: Two level binning for big performance --- diff --git a/title/shark-shaders/build.rs b/title/shark-shaders/build.rs index 5990ab9..10d140a 100644 --- a/title/shark-shaders/build.rs +++ b/title/shark-shaders/build.rs @@ -20,7 +20,11 @@ const SHADERS: &[Shader] = &[ }, Shader { stage: "comp", - name: "primitive_2d_bin", + name: "primitive_2d_bin_coarse", + }, + Shader { + stage: "comp", + name: "primitive_2d_bin_fine", }, Shader { stage: "comp", @@ -78,7 +82,7 @@ fn main() { assert!(status.success()); } - for Shader { stage, name } in SHADERS { + for &Shader { stage, name } in SHADERS { let depfile = std::fs::read_to_string(format!("{out_dir}/{name}.{stage}.d")).unwrap(); struct Lexer<'a> { diff --git a/title/shark-shaders/shaders/primitive_2d.h b/title/shark-shaders/shaders/primitive_2d.h new file mode 100644 index 0000000..c1d045b --- /dev/null +++ b/title/shark-shaders/shaders/primitive_2d.h @@ -0,0 +1,49 @@ +#define MAX_PRIMS 0x20000u +#define TILE_SIZE_COARSE 128 +#define TILE_SIZE_FINE 16 +#define TILE_SIZE_SHIFT 3 +#define TILE_BITMAP_WORDS_L1 (MAX_PRIMS / 32 / 32) +#define TILE_BITMAP_WORDS_L0 (MAX_PRIMS / 32) +#define TILE_STRIDE_COARSE TILE_BITMAP_WORDS_L0 +#define TILE_STRIDE_FINE (TILE_BITMAP_WORDS_L0 + TILE_BITMAP_WORDS_L1) + +struct PrimitiveUniforms { + uvec2 screen_resolution; + uvec2 tile_resolution_coarse; + uvec2 tile_resolution_fine; + uvec2 atlas_resolution; + + uint num_primitives; + uint num_primitives_32; + uint num_primitives_1024; + uint pad_1; +}; + +struct Glyph { + ivec2 atlas_min; + ivec2 atlas_max; + + vec2 offset_min; + vec2 offset_max; +}; + +struct GlyphInstance { + vec2 position; + uint index; + uint color; +}; + +struct PrimitiveInstance { + uint type; + uint index; +}; + +#include "primitive_2d_bindings.h" + +bool test_glyph(uint index, uvec2 tile_min, uvec2 tile_max) { + const GlyphInstance gi = glyph_instances[index]; + const Glyph gl = glyphs[gi.index]; + const vec2 glyph_min = gi.position + gl.offset_min; + const vec2 glyph_max = gi.position + gl.offset_max; + return !(any(lessThan(tile_max, glyph_min)) || any(greaterThan(tile_min, glyph_max))); +} diff --git a/title/shark-shaders/shaders/primitive_2d_bin.comp.glsl b/title/shark-shaders/shaders/primitive_2d_bin.comp.glsl deleted file mode 100644 index d618644..0000000 --- a/title/shark-shaders/shaders/primitive_2d_bin.comp.glsl +++ /dev/null @@ -1,68 +0,0 @@ -#version 460 - -#extension GL_GOOGLE_include_directive : require - -#extension GL_EXT_scalar_block_layout : require -#extension GL_EXT_control_flow_attributes : require - -#extension GL_KHR_shader_subgroup_vote : require -#extension GL_KHR_shader_subgroup_ballot : require - -#include "primitive_2d_types.h" -#include "primitive_2d_constants.h" - -layout(std430, set = 0, binding = 0) uniform uniformBuffer { - ivec2 screen_size; - ivec2 atlas_size; - uint num_primitives; -}; - -layout(std430, set = 0, binding = 3) readonly buffer glyphBuffer { - Glyph glyphs[]; -}; - -layout(std430, set = 0, binding = 4) readonly buffer glyphInstanceBuffer { - GlyphInstance glyph_instances[]; -}; - -layout(std430, set = 0, binding = 5) readonly buffer primitiveInstanceBuffer { - PrimitiveInstance primitive_instances[]; -}; - -bool test_glyph(uint index, ivec2 tile_min, ivec2 tile_max) { - const GlyphInstance gi = glyph_instances[index]; - const Glyph gl = glyphs[gi.index]; - const vec2 glyph_min = gi.position + gl.offset_min; - const vec2 glyph_max = gi.position + gl.offset_max; - return !(any(lessThan(tile_max, glyph_min)) || any(greaterThan(tile_min, glyph_max))); -} - -layout(std430, set = 0, binding = 6) writeonly buffer tileBuffer { - uint tile_bitmap[]; -}; - -// TODO: Spec constant support for different subgroup sizes. -layout (local_size_x = 64, local_size_y = 1, local_size_z = 1) in; - -void main() { - const ivec2 tile_size = ivec2(TILE_SIZE, TILE_SIZE); - const ivec2 tile_coord = ivec2(gl_GlobalInvocationID.yz); - const ivec2 tile_min = ivec2(tile_coord * tile_size); - const ivec2 tile_max = min(tile_min + tile_size, screen_size); - - const uint local_index = gl_SubgroupID * gl_SubgroupSize + gl_SubgroupInvocationID; - const uint primitive_index = gl_WorkGroupID.x * gl_WorkGroupSize.x + local_index; - - bool intersects = false; - if (primitive_index < num_primitives) { - intersects = test_glyph(primitive_index, tile_min, tile_max); - } - - uvec4 ballot_result = subgroupBallot(intersects); - if (subgroupElect()) { // managed democracy wins again - const int tile_index = tile_coord.y * MAX_TILES_X + tile_coord.x; - const uint bitmap_base_offset = uint(tile_index * TILE_STRIDE); - tile_bitmap[bitmap_base_offset + 2u * gl_WorkGroupID.x + 0u] = ballot_result.x; - tile_bitmap[bitmap_base_offset + 2u * gl_WorkGroupID.x + 1u] = ballot_result.y; - } -} diff --git a/title/shark-shaders/shaders/primitive_2d_bin_coarse.comp.glsl b/title/shark-shaders/shaders/primitive_2d_bin_coarse.comp.glsl new file mode 100644 index 0000000..637a730 --- /dev/null +++ b/title/shark-shaders/shaders/primitive_2d_bin_coarse.comp.glsl @@ -0,0 +1,36 @@ +#version 460 + +#extension GL_GOOGLE_include_directive : require + +#extension GL_EXT_scalar_block_layout : require +#extension GL_EXT_control_flow_attributes : require + +#extension GL_KHR_shader_subgroup_vote : require +#extension GL_KHR_shader_subgroup_ballot : require + +#include "primitive_2d.h" + +// TODO: Spec constant support for different subgroup sizes. +layout (local_size_x = 64, local_size_y = 1, local_size_z = 1) in; + +void main() { + const uvec2 tile_size = uvec2(TILE_SIZE_COARSE, TILE_SIZE_COARSE); + const uvec2 tile_coord = gl_GlobalInvocationID.yz; + const uvec2 tile_min = tile_coord * tile_size; + const uvec2 tile_max = min(tile_min + tile_size, primitive_uniforms.screen_resolution); + + const uint primitive_index = gl_WorkGroupID.x * gl_WorkGroupSize.x + gl_SubgroupID * gl_SubgroupSize + gl_SubgroupInvocationID; + + bool intersects = false; + if (primitive_index < primitive_uniforms.num_primitives) { + intersects = test_glyph(primitive_index, tile_min, tile_max); + } + + uvec4 ballot_result = subgroupBallot(intersects); + if (subgroupElect()) { // managed democracy wins again + const uint tile_index = tile_coord.y * primitive_uniforms.tile_resolution_coarse.x + tile_coord.x; + const uint bitmap_offset = tile_index * TILE_STRIDE_COARSE; + coarse_bitmap_wo[bitmap_offset + 2 * gl_WorkGroupID.x + 0] = ballot_result.x; + coarse_bitmap_wo[bitmap_offset + 2 * gl_WorkGroupID.x + 1] = ballot_result.y; + } +} diff --git a/title/shark-shaders/shaders/primitive_2d_bin_fine.comp.glsl b/title/shark-shaders/shaders/primitive_2d_bin_fine.comp.glsl new file mode 100644 index 0000000..d421615 --- /dev/null +++ b/title/shark-shaders/shaders/primitive_2d_bin_fine.comp.glsl @@ -0,0 +1,53 @@ +#version 460 + +#extension GL_GOOGLE_include_directive : require + +#extension GL_EXT_scalar_block_layout : require +#extension GL_EXT_control_flow_attributes : require + +#extension GL_KHR_shader_subgroup_vote : require +#extension GL_KHR_shader_subgroup_ballot : require + +#include "primitive_2d.h" + +// TODO: Spec constant support for different subgroup sizes. +layout (local_size_x = 64, local_size_y = 1, local_size_z = 1) in; + +void main() { + const uvec2 tile_size = uvec2(TILE_SIZE_FINE, TILE_SIZE_FINE); + const uvec2 tile_coord = gl_GlobalInvocationID.yz; + const uvec2 tile_min = tile_coord * tile_size; + const uvec2 tile_max = min(tile_min + tile_size, primitive_uniforms.screen_resolution); + const uint tile_index = tile_coord.y * primitive_uniforms.tile_resolution_fine.x + tile_coord.x; + + const uint bitmap_index = gl_WorkGroupID.x * gl_WorkGroupSize.x + gl_SubgroupID * gl_SubgroupSize + gl_SubgroupInvocationID; + + uint bitmap_l0 = 0; + if (bitmap_index < primitive_uniforms.num_primitives_32) { + const uvec2 tile_coord_coarse = tile_coord >> TILE_SIZE_SHIFT; + const uint tile_index_coarse = tile_coord_coarse.y * primitive_uniforms.tile_resolution_coarse.x + tile_coord_coarse.x; + const uint bitmap_offset_coarse = tile_index_coarse * TILE_STRIDE_COARSE + bitmap_index; + + uint bitmap_coarse = coarse_bitmap_ro[bitmap_offset_coarse]; + while (bitmap_coarse != 0) { + const uint i = findLSB(bitmap_coarse); + const uint primitive_index = bitmap_index * 32 + i; + bitmap_coarse ^= bitmap_coarse & -bitmap_coarse; + + if (test_glyph(primitive_index, tile_min, tile_max)) { + bitmap_l0 |= 1 << i; + } + } + } + + const uint fine_bitmap_l0_offset = tile_index * TILE_STRIDE_FINE + TILE_BITMAP_WORDS_L1 + bitmap_index; + fine_bitmap_wo[fine_bitmap_l0_offset] = bitmap_l0; + + const bool bit_l1 = bitmap_l0 != 0; + uvec4 ballot_result = subgroupBallot(bit_l1); + if (subgroupElect()) { + const uint fine_bitmap_l1_offset = tile_index * TILE_STRIDE_FINE; + fine_bitmap_wo[fine_bitmap_l1_offset + 2 * gl_WorkGroupID.x + 0] = ballot_result.x; + fine_bitmap_wo[fine_bitmap_l1_offset + 2 * gl_WorkGroupID.x + 1] = ballot_result.y; + } +} diff --git a/title/shark-shaders/shaders/primitive_2d_bindings.h b/title/shark-shaders/shaders/primitive_2d_bindings.h new file mode 100644 index 0000000..5962fef --- /dev/null +++ b/title/shark-shaders/shaders/primitive_2d_bindings.h @@ -0,0 +1,39 @@ + + +layout(std430, set = 0, binding = 0) uniform uniformBuffer { + PrimitiveUniforms primitive_uniforms; +}; + +layout (set = 0, binding = 1) uniform sampler bilinear_sampler; + +layout (set = 0, binding = 2) uniform texture2D glyph_atlas; + +layout(std430, set = 0, binding = 3) readonly buffer glyphBuffer { + Glyph glyphs[]; +}; + +layout(std430, set = 0, binding = 4) readonly buffer glyphInstanceBuffer { + GlyphInstance glyph_instances[]; +}; + +layout(std430, set = 0, binding = 5) readonly buffer primitiveInstanceBuffer { + PrimitiveInstance primitive_instances[]; +}; + +layout(std430, set = 0, binding = 6) readonly buffer coarseTileBufferRead { + uint coarse_bitmap_ro[]; +}; + +layout(std430, set = 0, binding = 6) writeonly buffer coarseTileBufferWrite { + uint coarse_bitmap_wo[]; +}; + +layout(std430, set = 0, binding = 7) readonly buffer fineTileBufferRead { + uint fine_bitmap_ro[]; +}; + +layout(std430, set = 0, binding = 7) writeonly buffer fineTileBufferWrite { + uint fine_bitmap_wo[]; +}; + +layout (set = 0, binding = 8, rgba16f) uniform writeonly image2D ui_image; diff --git a/title/shark-shaders/shaders/primitive_2d_constants.h b/title/shark-shaders/shaders/primitive_2d_constants.h deleted file mode 100644 index 5830e6f..0000000 --- a/title/shark-shaders/shaders/primitive_2d_constants.h +++ /dev/null @@ -1,6 +0,0 @@ -#define MAX_PRIMS 0x10000u -#define TILE_SIZE 16 -#define TILE_STRIDE (MAX_PRIMS / 32u) -#define MAX_TILES_X 180 -#define MAX_TILES_Y 113 -#define NUM_TILES (MAX_TILES_X * MAX_TILES_Y) \ No newline at end of file diff --git a/title/shark-shaders/shaders/primitive_2d_rasterize.comp.glsl b/title/shark-shaders/shaders/primitive_2d_rasterize.comp.glsl index 127dd44..16e94ac 100644 --- a/title/shark-shaders/shaders/primitive_2d_rasterize.comp.glsl +++ b/title/shark-shaders/shaders/primitive_2d_rasterize.comp.glsl @@ -5,66 +5,40 @@ #extension GL_EXT_scalar_block_layout : require #extension GL_EXT_control_flow_attributes : require -#include "primitive_2d_constants.h" -#include "primitive_2d_types.h" +#include "primitive_2d.h" -layout(std430, set = 0, binding = 0) uniform uniformBuffer { - uint screen_width; - uint screen_height; - uint atlas_width; - uint atlas_height; - uint num_primitives; -}; - -layout (set = 0, binding = 1) uniform sampler bilinear_sampler; -layout (set = 0, binding = 2) uniform texture2D glyph_atlas; - -layout(std430, set = 0, binding = 3) readonly buffer glyphBuffer { - Glyph glyphs[]; -}; - -layout(std430, set = 0, binding = 4) readonly buffer glyphInstanceBuffer { - GlyphInstance glyph_instances[]; -}; - -layout(std430, set = 0, binding = 5) readonly buffer primitiveInstanceBuffer { - PrimitiveInstance primitive_instances[]; -}; - -layout(std430, set = 0, binding = 6) readonly buffer tileBuffer { - uint tile_bitmap[]; -}; - -layout (set = 0, binding = 7, rgba16f) uniform writeonly image2D ui_image; - -layout (local_size_x = 16, local_size_y = 16, local_size_z = 1) in; +layout (local_size_x = TILE_SIZE_FINE, local_size_y = TILE_SIZE_FINE, local_size_z = 1) in; void main() { vec4 accum = vec4(0.0); - const ivec2 tile_coord = ivec2(gl_WorkGroupID.xy); - const int tile_index = tile_coord.y * MAX_TILES_X + tile_coord.x; - const uint bitmap_base_offset = uint(tile_index * TILE_STRIDE); - - for (int i = 0; i < num_primitives / 32; i++) { - uint bitmap = tile_bitmap[bitmap_base_offset + i]; - while (bitmap != 0) { - const uint t = bitmap & -bitmap; - const int index = i * 32 + findLSB(bitmap); - bitmap ^= t; - - const GlyphInstance gi = glyph_instances[index]; - const Glyph gl = glyphs[gi.index]; - const vec4 color = unpackUnorm4x8(gi.color).bgra; - const vec2 glyph_min = gi.position + gl.offset_min; - const vec2 glyph_max = gi.position + gl.offset_max; - const vec2 glyph_size = gl.offset_max - gl.offset_min; - const vec2 sample_center = gl_GlobalInvocationID.xy; // half pixel offset goes here? - if (all(greaterThanEqual(sample_center, glyph_min)) && all(lessThanEqual(sample_center, glyph_max))) { - const vec2 uv = mix(vec2(gl.atlas_min), vec2(gl.atlas_max), (sample_center - glyph_min) / glyph_size) / vec2(atlas_width, atlas_height); - const float coverage = textureLod(sampler2D(glyph_atlas, bilinear_sampler), uv, 0.0).r * color.a; - accum.rgb = (coverage * color.rgb) + accum.rgb * (1.0 - coverage); - accum.a = coverage + accum.a * (1.0 - coverage); + const uvec2 tile_coord = gl_WorkGroupID.xy; + const uint tile_index = tile_coord.y * primitive_uniforms.tile_resolution_fine.x + tile_coord.x; + const uint bitmap_offset = tile_index * TILE_STRIDE_FINE; + + for (int index_l1 = 0; index_l1 < primitive_uniforms.num_primitives_1024; index_l1++) { + uint bitmap_l1 = fine_bitmap_ro[bitmap_offset + index_l1]; + while (bitmap_l1 != 0) { + const uint index_l0 = index_l1 * 32 + findLSB(bitmap_l1); + uint bitmap_l0 = fine_bitmap_ro[bitmap_offset + TILE_BITMAP_WORDS_L1 + index_l0]; + bitmap_l1 ^= bitmap_l1 & -bitmap_l1; + while (bitmap_l0 != 0) { + const uint primitive_index = index_l0 * 32 + findLSB(bitmap_l0); + bitmap_l0 ^= bitmap_l0 & -bitmap_l0; + + const GlyphInstance gi = glyph_instances[primitive_index]; + const Glyph gl = glyphs[gi.index]; + const vec2 glyph_min = gi.position + gl.offset_min; + const vec2 glyph_max = gi.position + gl.offset_max; + const vec2 sample_center = gl_GlobalInvocationID.xy; // half pixel offset goes here? + if (all(greaterThanEqual(sample_center, glyph_min)) && all(lessThanEqual(sample_center, glyph_max))) { + const vec2 glyph_size = gl.offset_max - gl.offset_min; + const vec4 color = unpackUnorm4x8(gi.color).bgra; + const vec2 uv = mix(gl.atlas_min, gl.atlas_max, (sample_center - glyph_min) / glyph_size) / primitive_uniforms.atlas_resolution; + const float coverage = textureLod(sampler2D(glyph_atlas, bilinear_sampler), uv, 0.0).r * color.a; + accum.rgb = (coverage * color.rgb) + accum.rgb * (1.0 - coverage); + accum.a = coverage + accum.a * (1.0 - coverage); + } } } } diff --git a/title/shark-shaders/shaders/primitive_2d_types.h b/title/shark-shaders/shaders/primitive_2d_types.h deleted file mode 100644 index 6b531a8..0000000 --- a/title/shark-shaders/shaders/primitive_2d_types.h +++ /dev/null @@ -1,19 +0,0 @@ - -struct Glyph { - ivec2 atlas_min; - ivec2 atlas_max; - - vec2 offset_min; - vec2 offset_max; -}; - -struct GlyphInstance { - vec2 position; - uint index; - uint color; -}; - -struct PrimitiveInstance { - uint type; - uint index; -}; diff --git a/title/shark/src/helpers.rs b/title/shark/src/helpers.rs index e45e0ba..d211c6f 100644 --- a/title/shark/src/helpers.rs +++ b/title/shark/src/helpers.rs @@ -3,7 +3,7 @@ use std::path::Path; use narcissus_core::{obj, Widen}; use narcissus_maths::{vec2, vec3, vec4, Vec2, Vec3}; -use crate::pipelines::Vertex; +use crate::pipelines::basic::Vertex; pub fn load_obj>(path: P) -> (Vec, Vec) { #[derive(Default)] diff --git a/title/shark/src/main.rs b/title/shark/src/main.rs index a3f68fd..15f83c3 100644 --- a/title/shark/src/main.rs +++ b/title/shark/src/main.rs @@ -3,7 +3,9 @@ use std::ops::Index; use std::path::Path; use std::time::{Duration, Instant}; -use narcissus_core::dds; +use narcissus_core::{dds, Widen as _}; +use pipelines::basic::BasicPipeline; +use pipelines::display_transform::DisplayTransformPipeline; use renderdoc_sys as rdoc; use fonts::{FontFamily, Fonts}; @@ -12,7 +14,7 @@ use narcissus_app::{create_app, Event, Key, PressedState, WindowDesc}; use narcissus_core::{box_assume_init, default, rand::Pcg64, zeroed_box, BitIter}; use narcissus_font::{FontCollection, GlyphCache, HorizontalMetrics}; use narcissus_gpu::{ - create_device, Access, Bind, BufferDesc, BufferImageCopy, BufferUsageFlags, ClearValue, + create_device, Access, Bind, Buffer, BufferDesc, BufferImageCopy, BufferUsageFlags, ClearValue, CmdEncoder, ColorSpace, Device, DeviceExt, Extent2d, Extent3d, Frame, GlobalBarrier, Image, ImageAspectFlags, ImageBarrier, ImageDesc, ImageDimension, ImageFormat, ImageLayout, ImageSubresourceRange, ImageTiling, ImageUsageFlags, IndexType, LoadOp, MemoryLocation, @@ -24,12 +26,13 @@ use narcissus_image as image; use narcissus_maths::{ clamp, perlin_noise3, sin_pi_f32, vec3, Affine3, Deg, HalfTurn, Mat3, Mat4, Point3, Vec3, }; -use pipelines::{ - BasicPipeline, BasicUniforms, DisplayTransformPipeline, GlyphInstance, Primitive2dPipeline, -}; +use pipelines::primitive_2d::{GlyphInstance, Primitive2dPipeline}; use spring::simple_spring_damper_exact; -use crate::pipelines::PrimitiveUniforms; +use crate::pipelines::basic::BasicUniforms; +use crate::pipelines::primitive_2d::{ + PrimitiveUniforms, TILE_SIZE_COARSE, TILE_SIZE_FINE, TILE_STRIDE_COARSE, TILE_STRIDE_FINE, +}; mod fonts; mod helpers; @@ -848,11 +851,17 @@ struct DrawState<'gpu> { width: u32, height: u32, + tile_resolution_coarse_x: u32, + tile_resolution_coarse_y: u32, + tile_resolution_fine_x: u32, + tile_resolution_fine_y: u32, + depth_image: Image, rt_image: Image, ui_image: Image, - tile_bitmap_buffer: PersistentBuffer<'gpu>, + coarse_tile_bitmap_buffer: Buffer, + fine_tile_bitmap_buffer: Buffer, glyph_atlas_image: Image, @@ -873,20 +882,6 @@ impl<'gpu> DrawState<'gpu> { let models = Models::load(gpu); let images = Images::load(gpu, thread_token); - const MAX_PRIMS: usize = 0x10000; - const TILE_STRIDE: usize = MAX_PRIMS / 32; - const MAX_TILES_X: usize = 180; - const MAX_TILES_Y: usize = 113; - - const BITMAP_SIZE: usize = (MAX_TILES_X * MAX_TILES_Y * TILE_STRIDE) * 4; - - let tile_bitmap_buffer = gpu.create_persistent_buffer(&BufferDesc { - memory_location: MemoryLocation::Device, - host_mapped: true, - usage: BufferUsageFlags::STORAGE, - size: BITMAP_SIZE, - }); - Self { gpu, basic_pipeline, @@ -894,10 +889,15 @@ impl<'gpu> DrawState<'gpu> { display_transform_pipeline, width: 0, height: 0, + tile_resolution_coarse_x: 0, + tile_resolution_coarse_y: 0, + tile_resolution_fine_x: 0, + tile_resolution_fine_y: 0, depth_image: default(), rt_image: default(), ui_image: default(), - tile_bitmap_buffer, + coarse_tile_bitmap_buffer: default(), + fine_tile_bitmap_buffer: default(), glyph_atlas_image: default(), samplers, models, @@ -1008,6 +1008,49 @@ impl<'gpu> DrawState<'gpu> { gpu.destroy_image(frame, self.rt_image); gpu.destroy_image(frame, self.ui_image); + let tile_resolution_coarse_x = (width + (TILE_SIZE_COARSE - 1)) / TILE_SIZE_COARSE; + let tile_resolution_coarse_y = (height + (TILE_SIZE_COARSE - 1)) / TILE_SIZE_COARSE; + let tile_resolution_fine_x = (width + (TILE_SIZE_FINE - 1)) / TILE_SIZE_FINE; + let tile_resolution_fine_y = (height + (TILE_SIZE_FINE - 1)) / TILE_SIZE_FINE; + + if tile_resolution_coarse_x != self.tile_resolution_coarse_x + || tile_resolution_coarse_y != self.tile_resolution_coarse_y + || tile_resolution_fine_x != self.tile_resolution_fine_x + || tile_resolution_fine_y != self.tile_resolution_fine_y + { + gpu.destroy_buffer(frame, self.coarse_tile_bitmap_buffer); + gpu.destroy_buffer(frame, self.fine_tile_bitmap_buffer); + + let coarse_bitmap_buffer_size = tile_resolution_coarse_x + * tile_resolution_coarse_y + * TILE_STRIDE_COARSE + * std::mem::size_of::() as u32; + + let fine_bitmap_buffer_size = tile_resolution_fine_x + * tile_resolution_fine_y + * TILE_STRIDE_FINE + * std::mem::size_of::() as u32; + + self.coarse_tile_bitmap_buffer = gpu.create_buffer(&BufferDesc { + memory_location: MemoryLocation::Device, + host_mapped: false, + usage: BufferUsageFlags::STORAGE, + size: coarse_bitmap_buffer_size.widen(), + }); + + self.fine_tile_bitmap_buffer = gpu.create_buffer(&BufferDesc { + memory_location: MemoryLocation::Device, + host_mapped: false, + usage: BufferUsageFlags::STORAGE, + size: fine_bitmap_buffer_size.widen(), + }); + + self.tile_resolution_coarse_x = tile_resolution_coarse_x; + self.tile_resolution_coarse_y = tile_resolution_coarse_y; + self.tile_resolution_fine_x = tile_resolution_fine_x; + self.tile_resolution_fine_y = tile_resolution_fine_y; + } + self.depth_image = gpu.create_image(&ImageDesc { memory_location: MemoryLocation::Device, host_mapped: false, @@ -1271,16 +1314,26 @@ impl<'gpu> DrawState<'gpu> { // Render UI { + let num_primitives = ui_state.primitive_instances.len() as u32; + let num_primitives_32 = (num_primitives + 31) / 32; + let num_primitives_1024 = (num_primitives_32 + 31) / 32; let uniforms_buffer = gpu.request_transient_buffer_with_data( frame, thread_token, BufferUsageFlags::UNIFORM, &PrimitiveUniforms { - screen_width: width, - screen_height: height, - atlas_width, - atlas_height, - num_primitives: ui_state.primitive_instances.len() as u32, + screen_resolution_x: self.width, + screen_resolution_y: self.height, + tile_resolution_coarse_x: self.tile_resolution_coarse_x, + tile_resolution_coarse_y: self.tile_resolution_coarse_y, + tile_resolution_fine_x: self.tile_resolution_fine_x, + tile_resolution_fine_y: self.tile_resolution_fine_y, + atlas_resolution_x: atlas_width, + atlas_resolution_y: atlas_height, + num_primitives, + num_primitives_32, + num_primitives_1024, + _pad0: 0, }, ); let glyph_buffer = gpu.request_transient_buffer_with_data( @@ -1302,7 +1355,7 @@ impl<'gpu> DrawState<'gpu> { &[0u32], ); - gpu.cmd_set_pipeline(cmd_encoder, self.primitive_2d_pipeline.bin_pipeline); + gpu.cmd_set_pipeline(cmd_encoder, self.primitive_2d_pipeline.coarse_bin_pipeline); gpu.cmd_set_bind_group( frame, @@ -1346,11 +1399,20 @@ impl<'gpu> DrawState<'gpu> { Bind { binding: 6, array_element: 0, - typed: TypedBind::StorageBuffer(&[self.tile_bitmap_buffer.to_arg()]), + typed: TypedBind::StorageBuffer(&[self + .coarse_tile_bitmap_buffer + .to_arg()]), }, Bind { binding: 7, array_element: 0, + typed: TypedBind::StorageBuffer(&[self + .fine_tile_bitmap_buffer + .to_arg()]), + }, + Bind { + binding: 8, + array_element: 0, typed: TypedBind::StorageImage(&[( ImageLayout::General, self.ui_image, @@ -1361,9 +1423,27 @@ impl<'gpu> DrawState<'gpu> { gpu.cmd_dispatch( cmd_encoder, - (ui_state.primitive_instances.len() as u32 + 63) / 64, - (self.width + 15) / 16, - (self.height + 15) / 16, + (num_primitives + 63) / 64, + self.tile_resolution_coarse_x, + self.tile_resolution_coarse_y, + ); + + gpu.cmd_barrier( + cmd_encoder, + Some(&GlobalBarrier { + prev_access: &[Access::ShaderWrite], + next_access: &[Access::ShaderOtherRead], + }), + &[], + ); + + gpu.cmd_set_pipeline(cmd_encoder, self.primitive_2d_pipeline.fine_bin_pipeline); + + gpu.cmd_dispatch( + cmd_encoder, + (num_primitives_32 + 63) / 64, + self.tile_resolution_fine_x, + self.tile_resolution_fine_y, ); gpu.cmd_barrier( @@ -1379,8 +1459,8 @@ impl<'gpu> DrawState<'gpu> { gpu.cmd_dispatch( cmd_encoder, - (self.width + 15) / 16, - (self.height + 15) / 16, + self.tile_resolution_fine_x, + self.tile_resolution_fine_y, 1, ); diff --git a/title/shark/src/pipelines/mod.rs b/title/shark/src/pipelines/mod.rs index d6b57ec..2c861f8 100644 --- a/title/shark/src/pipelines/mod.rs +++ b/title/shark/src/pipelines/mod.rs @@ -1,7 +1,3 @@ -mod basic; -mod display_transform; -mod primitive_2d; - -pub use basic::{BasicPipeline, BasicUniforms, Vertex}; -pub use display_transform::DisplayTransformPipeline; -pub use primitive_2d::{GlyphInstance, Primitive2dPipeline, PrimitiveUniforms}; +pub mod basic; +pub mod display_transform; +pub mod primitive_2d; diff --git a/title/shark/src/pipelines/primitive_2d.rs b/title/shark/src/pipelines/primitive_2d.rs index 8e0fa64..c91b00d 100644 --- a/title/shark/src/pipelines/primitive_2d.rs +++ b/title/shark/src/pipelines/primitive_2d.rs @@ -6,14 +6,31 @@ use narcissus_gpu::{ use crate::Gpu; +pub const MAX_PRIMS: u32 = 0x20000; +pub const TILE_SIZE_COARSE: u32 = 128; +pub const TILE_SIZE_FINE: u32 = 16; +pub const TILE_BITMAP_WORDS_L1: u32 = MAX_PRIMS / 32 / 32; +pub const TILE_BITMAP_WORDS_L0: u32 = MAX_PRIMS / 32; +pub const TILE_STRIDE_COARSE: u32 = TILE_BITMAP_WORDS_L0; +pub const TILE_STRIDE_FINE: u32 = TILE_BITMAP_WORDS_L0 + TILE_BITMAP_WORDS_L1; + #[allow(unused)] #[repr(C)] pub struct PrimitiveUniforms { - pub screen_width: u32, - pub screen_height: u32, - pub atlas_width: u32, - pub atlas_height: u32, + pub screen_resolution_x: u32, + pub screen_resolution_y: u32, + pub tile_resolution_coarse_x: u32, + pub tile_resolution_coarse_y: u32, + pub tile_resolution_fine_x: u32, + pub tile_resolution_fine_y: u32, + pub atlas_resolution_x: u32, + pub atlas_resolution_y: u32, + pub num_primitives: u32, + pub num_primitives_32: u32, + pub num_primitives_1024: u32, + + pub _pad0: u32, } #[allow(unused)] @@ -27,7 +44,8 @@ pub struct GlyphInstance { pub struct Primitive2dPipeline { pub bind_group_layout: BindGroupLayout, - pub bin_pipeline: Pipeline, + pub coarse_bin_pipeline: Pipeline, + pub fine_bin_pipeline: Pipeline, pub rasterize_pipeline: Pipeline, } @@ -46,16 +64,26 @@ impl Primitive2dPipeline { BindDesc::new(ShaderStageFlags::COMPUTE, BindingType::StorageBuffer), // Primitive Instances BindDesc::new(ShaderStageFlags::COMPUTE, BindingType::StorageBuffer), - // Tiles + // Coarse Tiles BindDesc::new(ShaderStageFlags::COMPUTE, BindingType::StorageBuffer), - // UI + // Fine Tiles + BindDesc::new(ShaderStageFlags::COMPUTE, BindingType::StorageBuffer), + // UI Image Output BindDesc::new(ShaderStageFlags::COMPUTE, BindingType::StorageImage), ]); - let bin_pipeline = gpu.create_compute_pipeline(&ComputePipelineDesc { + let coarse_bin_pipeline = gpu.create_compute_pipeline(&ComputePipelineDesc { + shader: ShaderDesc { + entry: c"main", + code: shark_shaders::PRIMITIVE_2D_BIN_COARSE_COMP_SPV, + }, + bind_group_layouts: &[bind_group_layout], + }); + + let fine_bin_pipeline = gpu.create_compute_pipeline(&ComputePipelineDesc { shader: ShaderDesc { entry: c"main", - code: shark_shaders::PRIMITIVE_2D_BIN_COMP_SPV, + code: shark_shaders::PRIMITIVE_2D_BIN_FINE_COMP_SPV, }, bind_group_layouts: &[bind_group_layout], }); @@ -70,7 +98,8 @@ impl Primitive2dPipeline { Self { bind_group_layout, - bin_pipeline, + coarse_bin_pipeline, + fine_bin_pipeline, rasterize_pipeline, } }