From 178533d157df50a171c1c93a6353c228d727b8ad Mon Sep 17 00:00:00 2001 From: Josh Simmons Date: Sun, 9 Jun 2024 23:05:13 +0200 Subject: [PATCH] shark: Use BDA instead of compute bindings --- .../shark-shaders/shaders/compute_bindings.h | 60 ++++++ .../shaders/display_transform.comp.glsl | 50 ++--- title/shark-shaders/shaders/primitive_2d.h | 30 +-- .../shaders/primitive_2d_bin.comp.glsl | 23 ++- .../shaders/primitive_2d_bin_clear.comp.glsl | 7 +- .../shaders/primitive_2d_bindings.h | 27 --- .../shaders/primitive_2d_rasterize.comp.glsl | 29 +-- title/shark/src/main.rs | 195 ++++++++++-------- .../shark/src/pipelines/display_transform.rs | 54 ----- title/shark/src/pipelines/mod.rs | 38 +++- title/shark/src/pipelines/primitive_2d.rs | 103 --------- 11 files changed, 257 insertions(+), 359 deletions(-) create mode 100644 title/shark-shaders/shaders/compute_bindings.h delete mode 100644 title/shark-shaders/shaders/primitive_2d_bindings.h delete mode 100644 title/shark/src/pipelines/display_transform.rs delete mode 100644 title/shark/src/pipelines/primitive_2d.rs diff --git a/title/shark-shaders/shaders/compute_bindings.h b/title/shark-shaders/shaders/compute_bindings.h new file mode 100644 index 0000000..f8cb41e --- /dev/null +++ b/title/shark-shaders/shaders/compute_bindings.h @@ -0,0 +1,60 @@ + +struct Glyph { + ivec2 atlas_min; + ivec2 atlas_max; + + vec2 offset_min; + vec2 offset_max; +}; + +struct GlyphInstance { + vec2 position; + uint index; + uint color; +}; + +layout(buffer_reference, std430, buffer_reference_align = 16) readonly buffer Glyphs +{ + Glyph values[]; +}; + +layout(buffer_reference, std430, buffer_reference_align = 16) readonly buffer GlyphInstances +{ + GlyphInstance values[]; +}; + +layout(buffer_reference, std430, buffer_reference_align = 4) readonly buffer TilesRead +{ + uint values[]; +}; + +layout(buffer_reference, std430, buffer_reference_align = 4) writeonly buffer TilesWrite +{ + uint values[]; +}; + +struct ComputeUniforms { + uvec2 screen_resolution; + uvec2 atlas_resolution; + + uint num_primitives; + uint num_primitives_32; + uint num_primitives_1024; + uint tile_stride; + + Glyphs glyphs; + GlyphInstances glyph_instances; + TilesWrite tiles; +}; + +layout(std430, push_constant) uniform UniformBuffer { + ComputeUniforms uniforms; +}; + +layout (set = 0, binding = 0) uniform sampler bilinear_sampler; +layout (set = 0, binding = 1) uniform texture3D tony_mc_mapface_lut; +layout (set = 0, binding = 2) uniform texture2D glyph_atlas; +layout (set = 0, binding = 3, rgba16f) uniform writeonly image2D ui_layer_write; +layout (set = 0, binding = 3, rgba16f) uniform readonly image2D ui_layer_read; +layout (set = 0, binding = 4, rgba16f) uniform readonly image2D color_layer; +layout (set = 0, binding = 5, rgba16f) uniform writeonly image2D composited_output; diff --git a/title/shark-shaders/shaders/display_transform.comp.glsl b/title/shark-shaders/shaders/display_transform.comp.glsl index 5f1bf32..be5cb3e 100644 --- a/title/shark-shaders/shaders/display_transform.comp.glsl +++ b/title/shark-shaders/shaders/display_transform.comp.glsl @@ -1,39 +1,13 @@ #version 460 -#extension GL_EXT_control_flow_attributes : require +#extension GL_GOOGLE_include_directive : require -const uint MAX_PRIMS = 1 << 18; -const uint TILE_BITMAP_L1_WORDS = (MAX_PRIMS / 32 / 32); -const uint TILE_BITMAP_L0_WORDS = (MAX_PRIMS / 32); -const uint TILE_STRIDE = (TILE_BITMAP_L0_WORDS + TILE_BITMAP_L1_WORDS + 2); -const uint TILE_BITMAP_RANGE_OFFSET = 0; +#extension GL_EXT_buffer_reference : require +#extension GL_EXT_buffer_reference2 : require +#extension GL_EXT_scalar_block_layout : require -struct PrimitiveUniforms { - uvec2 screen_resolution; - uvec2 atlas_resolution; - - uint num_primitives; - uint num_primitives_32; - uint num_primitives_1024; - uint tile_stride; -}; - -layout(std430, push_constant) uniform uniformBuffer { - PrimitiveUniforms primitive_uniforms; -}; - -layout (set = 0, binding = 0) uniform sampler bilinear_sampler; - -layout (set = 0, binding = 1) uniform texture3D tony_mc_mapface_lut; - -layout(std430, set = 0, binding = 2) readonly buffer tileBufferRead { - uint tile_bitmap_ro[]; -}; - -layout (set = 0, binding = 3, rgba16f) uniform readonly image2D layer_rt; -layout (set = 0, binding = 4, rgba16f) uniform readonly image2D layer_ui; - -layout (set = 0, binding = 5, rgba16f) uniform writeonly image2D composited_output; +#include "compute_bindings.h" +#include "primitive_2d.h" float srgb_oetf(float a) { return (.0031308f >= a) ? 12.92f * a : 1.055f * pow(a, .4166666666666667f) - .055f; @@ -53,18 +27,20 @@ vec3 tony_mc_mapface(vec3 stimulus) { layout (local_size_x = 8, local_size_y = 8, local_size_z = 1) in; void main() { - const vec3 stimulus = imageLoad(layer_rt, ivec2(gl_GlobalInvocationID.xy)).rgb; + const vec3 stimulus = imageLoad(color_layer, ivec2(gl_GlobalInvocationID.xy)).rgb; const vec3 transformed = tony_mc_mapface(stimulus); vec3 composited = srgb_oetf(transformed); const uvec2 tile_coord = gl_WorkGroupID.xy / 4; - const uint tile_index = tile_coord.y * primitive_uniforms.tile_stride + tile_coord.x; + const uint tile_index = tile_coord.y * uniforms.tile_stride + tile_coord.x; const uint tile_base = tile_index * TILE_STRIDE; - const uint first = tile_bitmap_ro[tile_base + TILE_BITMAP_RANGE_OFFSET + 0]; - const uint last = tile_bitmap_ro[tile_base + TILE_BITMAP_RANGE_OFFSET + 1]; + TilesRead tiles_read = TilesRead(uniforms.tiles); + + const uint first = tiles_read.values[tile_base + TILE_BITMAP_RANGE_OFFSET + 0]; + const uint last = tiles_read.values[tile_base + TILE_BITMAP_RANGE_OFFSET + 1]; if (first <= last) { - const vec4 ui = imageLoad(layer_ui, ivec2(gl_GlobalInvocationID.xy)).rgba; + const vec4 ui = imageLoad(ui_layer_read, ivec2(gl_GlobalInvocationID.xy)).rgba; composited = ui.rgb + (composited * (1.0 - ui.a)); } diff --git a/title/shark-shaders/shaders/primitive_2d.h b/title/shark-shaders/shaders/primitive_2d.h index 231cc6f..37ebb1f 100644 --- a/title/shark-shaders/shaders/primitive_2d.h +++ b/title/shark-shaders/shaders/primitive_2d.h @@ -8,35 +8,9 @@ const uint TILE_BITMAP_RANGE_OFFSET = 0; const uint TILE_BITMAP_L1_OFFSET = 2; const uint TILE_BITMAP_L0_OFFSET = TILE_BITMAP_L1_OFFSET + TILE_BITMAP_L1_WORDS; -struct PrimitiveUniforms { - uvec2 screen_resolution; - uvec2 atlas_resolution; - - uint num_primitives; - uint num_primitives_32; - uint num_primitives_1024; - uint tile_stride; -}; - -struct Glyph { - ivec2 atlas_min; - ivec2 atlas_max; - - vec2 offset_min; - vec2 offset_max; -}; - -struct GlyphInstance { - vec2 position; - uint index; - uint color; -}; - -#include "primitive_2d_bindings.h" - bool test_glyph(uint index, uvec2 tile_min, uvec2 tile_max) { - const GlyphInstance gi = glyph_instances[index]; - const Glyph gl = glyphs[gi.index]; + const GlyphInstance gi = uniforms.glyph_instances.values[index]; + const Glyph gl = uniforms.glyphs.values[gi.index]; const vec2 glyph_min = gi.position + gl.offset_min; const vec2 glyph_max = gi.position + gl.offset_max; return !(any(lessThan(tile_max, glyph_min)) || any(greaterThan(tile_min, glyph_max))); diff --git a/title/shark-shaders/shaders/primitive_2d_bin.comp.glsl b/title/shark-shaders/shaders/primitive_2d_bin.comp.glsl index b2e3e9f..298f040 100644 --- a/title/shark-shaders/shaders/primitive_2d_bin.comp.glsl +++ b/title/shark-shaders/shaders/primitive_2d_bin.comp.glsl @@ -2,12 +2,15 @@ #extension GL_GOOGLE_include_directive : require +#extension GL_EXT_buffer_reference : require +#extension GL_EXT_buffer_reference2 : require #extension GL_EXT_scalar_block_layout : require #extension GL_EXT_control_flow_attributes : require #extension GL_KHR_shader_subgroup_vote : require #extension GL_KHR_shader_subgroup_ballot : require +#include "compute_bindings.h" #include "primitive_2d.h" const uint SUBGROUP_SIZE = 64; @@ -21,14 +24,14 @@ shared uint bitmap_0[SUBGROUP_SIZE]; void main() { const uvec2 bin_coord = gl_GlobalInvocationID.yz; const uvec2 bin_min = bin_coord * TILE_SIZE * 8; - const uvec2 bin_max = min(bin_min + TILE_SIZE * 8, primitive_uniforms.screen_resolution); + const uvec2 bin_max = min(bin_min + TILE_SIZE * 8, uniforms.screen_resolution); for (uint i = 0; i < NUM_PRIMS_WG; i += gl_SubgroupSize.x) { const uint prim_index = gl_WorkGroupID.x * NUM_PRIMS_WG + i + gl_SubgroupInvocationID; bool intersects = false; - if (prim_index < primitive_uniforms.num_primitives) { - const GlyphInstance gi = glyph_instances[prim_index]; - const Glyph gl = glyphs[gi.index]; + if (prim_index < uniforms.num_primitives) { + const GlyphInstance gi = uniforms.glyph_instances.values[prim_index]; + const Glyph gl = uniforms.glyphs.values[gi.index]; const vec2 glyph_min = gi.position + gl.offset_min; const vec2 glyph_max = gi.position + gl.offset_max; intersects = !(any(lessThan(bin_max, glyph_min)) || any(greaterThan(bin_min, glyph_max))); @@ -44,10 +47,10 @@ void main() { const uint y = gl_SubgroupInvocationID.x >> 3; const uvec2 tile_coord = gl_GlobalInvocationID.yz * 8 + uvec2(x, y); const uvec2 tile_min = tile_coord * TILE_SIZE; - const uvec2 tile_max = min(tile_min + TILE_SIZE, primitive_uniforms.screen_resolution); + const uvec2 tile_max = min(tile_min + TILE_SIZE, uniforms.screen_resolution); if (all(lessThan(tile_min, tile_max))) { - const uint tile_index = tile_coord.y * primitive_uniforms.tile_stride + tile_coord.x; + const uint tile_index = tile_coord.y * uniforms.tile_stride + tile_coord.x; for (uint i = 0; i < 2; i++) { uint out_1 = 0; @@ -68,15 +71,15 @@ void main() { if (out_0 != 0) { out_1 |= 1 << j; - tile_bitmap_wo[tile_index * TILE_STRIDE + TILE_BITMAP_L0_OFFSET + gl_WorkGroupID.x * 64 + index_0] = out_0; + uniforms.tiles.values[tile_index * TILE_STRIDE + TILE_BITMAP_L0_OFFSET + gl_WorkGroupID.x * 64 + index_0] = out_0; } } - tile_bitmap_wo[tile_index * TILE_STRIDE + TILE_BITMAP_L1_OFFSET + gl_WorkGroupID.x * 2 + i] = out_1; + uniforms.tiles.values[tile_index * TILE_STRIDE + TILE_BITMAP_L1_OFFSET + gl_WorkGroupID.x * 2 + i] = out_1; if (out_1 != 0) { - atomicMin(tile_bitmap_wo[tile_index * TILE_STRIDE + TILE_BITMAP_RANGE_OFFSET + 0], gl_WorkGroupID.x * 2 + i); - atomicMax(tile_bitmap_wo[tile_index * TILE_STRIDE + TILE_BITMAP_RANGE_OFFSET + 1], gl_WorkGroupID.x * 2 + i); + atomicMin(uniforms.tiles.values[tile_index * TILE_STRIDE + TILE_BITMAP_RANGE_OFFSET + 0], gl_WorkGroupID.x * 2 + i); + atomicMax(uniforms.tiles.values[tile_index * TILE_STRIDE + TILE_BITMAP_RANGE_OFFSET + 1], gl_WorkGroupID.x * 2 + i); } } } diff --git a/title/shark-shaders/shaders/primitive_2d_bin_clear.comp.glsl b/title/shark-shaders/shaders/primitive_2d_bin_clear.comp.glsl index 9913ec0..d9e138b 100644 --- a/title/shark-shaders/shaders/primitive_2d_bin_clear.comp.glsl +++ b/title/shark-shaders/shaders/primitive_2d_bin_clear.comp.glsl @@ -2,18 +2,21 @@ #extension GL_GOOGLE_include_directive : require +#extension GL_EXT_buffer_reference : require +#extension GL_EXT_buffer_reference2 : require #extension GL_EXT_scalar_block_layout : require #extension GL_EXT_control_flow_attributes : require #extension GL_KHR_shader_subgroup_vote : require #extension GL_KHR_shader_subgroup_ballot : require +#include "compute_bindings.h" #include "primitive_2d.h" // TODO: Spec constant support for different subgroup sizes. layout (local_size_x = 64, local_size_y = 1, local_size_z = 1) in; void main() { - tile_bitmap_wo[gl_GlobalInvocationID.x * TILE_STRIDE + TILE_BITMAP_RANGE_OFFSET + 0] = 0xffffffff; - tile_bitmap_wo[gl_GlobalInvocationID.x * TILE_STRIDE + TILE_BITMAP_RANGE_OFFSET + 1] = 0; + uniforms.tiles.values[gl_GlobalInvocationID.x * TILE_STRIDE + TILE_BITMAP_RANGE_OFFSET + 0] = 0xffffffff; + uniforms.tiles.values[gl_GlobalInvocationID.x * TILE_STRIDE + TILE_BITMAP_RANGE_OFFSET + 1] = 0; } diff --git a/title/shark-shaders/shaders/primitive_2d_bindings.h b/title/shark-shaders/shaders/primitive_2d_bindings.h deleted file mode 100644 index c08cece..0000000 --- a/title/shark-shaders/shaders/primitive_2d_bindings.h +++ /dev/null @@ -1,27 +0,0 @@ - - -layout(std430, push_constant) uniform uniformBuffer { - PrimitiveUniforms primitive_uniforms; -}; - -layout (set = 0, binding = 0) uniform sampler bilinear_sampler; - -layout (set = 0, binding = 1) uniform texture2D glyph_atlas; - -layout(std430, set = 0, binding = 2) readonly buffer glyphBuffer { - Glyph glyphs[]; -}; - -layout(std430, set = 0, binding = 3) readonly buffer glyphInstanceBuffer { - GlyphInstance glyph_instances[]; -}; - -layout(std430, set = 0, binding = 4) readonly buffer tileBufferRead { - uint tile_bitmap_ro[]; -}; - -layout(std430, set = 0, binding = 4) writeonly buffer tileBufferWrite { - uint tile_bitmap_wo[]; -}; - -layout (set = 0, binding = 5, rgba16f) uniform writeonly image2D ui_image; diff --git a/title/shark-shaders/shaders/primitive_2d_rasterize.comp.glsl b/title/shark-shaders/shaders/primitive_2d_rasterize.comp.glsl index 2bd3cd5..2692ed6 100644 --- a/title/shark-shaders/shaders/primitive_2d_rasterize.comp.glsl +++ b/title/shark-shaders/shaders/primitive_2d_rasterize.comp.glsl @@ -2,12 +2,15 @@ #extension GL_GOOGLE_include_directive : require +#extension GL_EXT_buffer_reference : require +#extension GL_EXT_buffer_reference2 : require #extension GL_EXT_scalar_block_layout : require #extension GL_EXT_control_flow_attributes : require #extension GL_KHR_shader_subgroup_vote : require #extension GL_KHR_shader_subgroup_ballot : require +#include "compute_bindings.h" #include "primitive_2d.h" layout (local_size_x = 8, local_size_y = 8, local_size_z = 1) in; @@ -32,11 +35,13 @@ vec3 plasma_quintic(float x) void main() { const uvec2 tile_coord = gl_WorkGroupID.xy / 4; - const uint tile_index = tile_coord.y * primitive_uniforms.tile_stride + tile_coord.x; + const uint tile_index = tile_coord.y * uniforms.tile_stride + tile_coord.x; const uint tile_base = tile_index * TILE_STRIDE; - const uint first = tile_bitmap_ro[tile_base + TILE_BITMAP_RANGE_OFFSET + 0]; - const uint last = tile_bitmap_ro[tile_base + TILE_BITMAP_RANGE_OFFSET + 1]; + TilesRead tiles_read = TilesRead(uniforms.tiles); + + const uint first = tiles_read.values[tile_base + TILE_BITMAP_RANGE_OFFSET + 0]; + const uint last = tiles_read.values[tile_base + TILE_BITMAP_RANGE_OFFSET + 1]; [[branch]] if (last < first) { @@ -49,7 +54,7 @@ void main() { // For each tile, iterate over all words in the L1 bitmap. for (uint index_l1 = first; index_l1 <= last; index_l1++) { // For each word, iterate all set bits. - uint bitmap_l1 = tile_bitmap_ro[tile_base + TILE_BITMAP_L1_OFFSET + index_l1]; + uint bitmap_l1 = tiles_read.values[tile_base + TILE_BITMAP_L1_OFFSET + index_l1]; while (bitmap_l1 != 0) { const uint i = findLSB(bitmap_l1); @@ -58,14 +63,14 @@ void main() { // For each set bit in the L1 bitmap, iterate the set bits in the // corresponding L0 bitmap. const uint index_l0 = index_l1 * 32 + i; - uint bitmap_l0 = tile_bitmap_ro[tile_base + TILE_BITMAP_L0_OFFSET + index_l0]; + uint bitmap_l0 = tiles_read.values[tile_base + TILE_BITMAP_L0_OFFSET + index_l0]; count += bitCount(bitmap_l0); } } const vec3 color = plasma_quintic(float(count) / 100.0); - imageStore(ui_image, ivec2(gl_GlobalInvocationID.xy), vec4(color, 1.0)); + imageStore(layer_ui_write, ivec2(gl_GlobalInvocationID.xy), vec4(color, 1.0)); #else @@ -74,7 +79,7 @@ void main() { // For each tile, iterate over all words in the L1 bitmap. for (uint index_l1 = first; index_l1 <= last; index_l1++) { // For each word, iterate all set bits. - uint bitmap_l1 = tile_bitmap_ro[tile_base + TILE_BITMAP_L1_OFFSET + index_l1]; + uint bitmap_l1 = tiles_read.values[tile_base + TILE_BITMAP_L1_OFFSET + index_l1]; while (bitmap_l1 != 0) { const uint i = findLSB(bitmap_l1); @@ -83,7 +88,7 @@ void main() { // For each set bit in the L1 bitmap, iterate the set bits in the // corresponding L0 bitmap. const uint index_l0 = index_l1 * 32 + i; - uint bitmap_l0 = tile_bitmap_ro[tile_base + TILE_BITMAP_L0_OFFSET + index_l0]; + uint bitmap_l0 = tiles_read.values[tile_base + TILE_BITMAP_L0_OFFSET + index_l0]; while (bitmap_l0 != 0) { const uint j = findLSB(bitmap_l0); bitmap_l0 ^= bitmap_l0 & -bitmap_l0; @@ -91,8 +96,8 @@ void main() { // Set bits in the L0 bitmap indicate binned primitives for this tile. const uint primitive_index = index_l0 * 32 + j; - const GlyphInstance gi = glyph_instances[primitive_index]; - const Glyph gl = glyphs[gi.index]; + const GlyphInstance gi = uniforms.glyph_instances.values[primitive_index]; + const Glyph gl = uniforms.glyphs.values[gi.index]; const vec2 glyph_min = gi.position + gl.offset_min; const vec2 glyph_max = gi.position + gl.offset_max; const vec2 sample_center = gl_GlobalInvocationID.xy + vec2(0.5); @@ -100,7 +105,7 @@ void main() { if (all(greaterThanEqual(sample_center, glyph_min)) && all(lessThanEqual(sample_center, glyph_max))) { const vec2 glyph_size = gl.offset_max - gl.offset_min; const vec4 color = unpackUnorm4x8(gi.color).bgra; - const vec2 uv = mix(gl.atlas_min, gl.atlas_max, (sample_center - glyph_min) / glyph_size) / primitive_uniforms.atlas_resolution; + const vec2 uv = mix(gl.atlas_min, gl.atlas_max, (sample_center - glyph_min) / glyph_size) / uniforms.atlas_resolution; const float coverage = textureLod(sampler2D(glyph_atlas, bilinear_sampler), uv, 0.0).r * color.a; accum.rgb = (coverage * color.rgb) + accum.rgb * (1.0 - coverage); accum.a = coverage + accum.a * (1.0 - coverage); @@ -109,7 +114,7 @@ void main() { } } - imageStore(ui_image, ivec2(gl_GlobalInvocationID.xy), accum); + imageStore(ui_layer_write, ivec2(gl_GlobalInvocationID.xy), accum); #endif } diff --git a/title/shark/src/main.rs b/title/shark/src/main.rs index c79ccab..6e8aeb6 100644 --- a/title/shark/src/main.rs +++ b/title/shark/src/main.rs @@ -5,7 +5,7 @@ use std::time::{Duration, Instant}; use narcissus_core::{dds, Widen as _}; use pipelines::basic::BasicPipeline; -use pipelines::display_transform::DisplayTransformPipeline; +use pipelines::{GlyphInstance, PrimitiveUniforms, TILE_SIZE, TILE_STRIDE}; use renderdoc_sys as rdoc; use fonts::{FontFamily, Fonts}; @@ -14,24 +14,23 @@ use narcissus_app::{create_app, Event, Key, PressedState, WindowDesc}; use narcissus_core::{box_assume_init, default, rand::Pcg64, zeroed_box, BitIter}; use narcissus_font::{FontCollection, GlyphCache, HorizontalMetrics}; use narcissus_gpu::{ - create_device, Access, Bind, Buffer, BufferDesc, BufferImageCopy, BufferUsageFlags, ClearValue, - CmdEncoder, ColorSpace, Device, DeviceExt, Extent2d, Extent3d, Frame, GlobalBarrier, Image, - ImageAspectFlags, ImageBarrier, ImageDesc, ImageDimension, ImageFormat, ImageLayout, - ImageSubresourceRange, ImageTiling, ImageUsageFlags, IndexType, LoadOp, MemoryLocation, - Offset2d, PersistentBuffer, PresentMode, RenderingAttachment, RenderingDesc, Sampler, - SamplerAddressMode, SamplerDesc, SamplerFilter, Scissor, ShaderStageFlags, StoreOp, - SwapchainConfigurator, SwapchainImage, ThreadToken, TypedBind, Viewport, + create_device, Access, Bind, BindDesc, BindGroupLayout, BindingType, Buffer, BufferDesc, + BufferImageCopy, BufferUsageFlags, ClearValue, CmdEncoder, ColorSpace, ComputePipelineDesc, + Device, DeviceExt, Extent2d, Extent3d, Frame, GlobalBarrier, Image, ImageAspectFlags, + ImageBarrier, ImageDesc, ImageDimension, ImageFormat, ImageLayout, ImageSubresourceRange, + ImageTiling, ImageUsageFlags, IndexType, LoadOp, MemoryLocation, Offset2d, PersistentBuffer, + Pipeline, PipelineLayout, PresentMode, PushConstantRange, RenderingAttachment, RenderingDesc, + Sampler, SamplerAddressMode, SamplerDesc, SamplerFilter, Scissor, ShaderDesc, ShaderStageFlags, + StoreOp, SwapchainConfigurator, SwapchainImage, ThreadToken, TypedBind, Viewport, }; use narcissus_image as image; use narcissus_maths::{ clamp, perlin_noise3, sin_cos_pi_f32, sin_pi_f32, vec3, Affine3, Deg, HalfTurn, Mat3, Mat4, Point3, Vec3, }; -use pipelines::primitive_2d::{GlyphInstance, Primitive2dPipeline}; use spring::simple_spring_damper_exact; use crate::pipelines::basic::BasicUniforms; -use crate::pipelines::primitive_2d::{PrimitiveUniforms, TILE_SIZE, TILE_STRIDE}; mod fonts; mod helpers; @@ -860,8 +859,12 @@ struct DrawState<'gpu> { gpu: &'gpu Gpu, basic_pipeline: BasicPipeline, - primitive_2d_pipeline: Primitive2dPipeline, - display_transform_pipeline: DisplayTransformPipeline, + + compute_bind_group_layout: BindGroupLayout, + bin_clear_pipeline: Pipeline, + bin_pipeline: Pipeline, + rasterize_pipeline: Pipeline, + display_transform_pipeline: Pipeline, width: u32, height: u32, @@ -870,10 +873,10 @@ struct DrawState<'gpu> { tile_resolution_y: u32, depth_image: Image, - rt_image: Image, + color_image: Image, ui_image: Image, - tile_bitmap_buffer: Buffer, + tiles_buffer: Buffer, glyph_atlas_image: Image, @@ -889,9 +892,63 @@ impl<'gpu> DrawState<'gpu> { let samplers = Samplers::load(gpu); let immutable_samplers = &[samplers[SamplerRes::Bilinear]]; + let compute_bind_group_layout = gpu.create_bind_group_layout(&[ + // Samplers + BindDesc::with_immutable_samplers(ShaderStageFlags::COMPUTE, immutable_samplers), + // Tony mc mapface LUT + BindDesc::new(ShaderStageFlags::COMPUTE, BindingType::SampledImage), + // Glyph Atlas + BindDesc::new(ShaderStageFlags::COMPUTE, BindingType::SampledImage), + // UI Render Target + BindDesc::new(ShaderStageFlags::COMPUTE, BindingType::StorageImage), + // Color Render Target + BindDesc::new(ShaderStageFlags::COMPUTE, BindingType::StorageImage), + // Composited output + BindDesc::new(ShaderStageFlags::COMPUTE, BindingType::StorageImage), + ]); + + let compute_pipeline_layout = PipelineLayout { + bind_group_layouts: &[compute_bind_group_layout], + push_constant_ranges: &[PushConstantRange { + stage_flags: ShaderStageFlags::COMPUTE, + offset: 0, + size: std::mem::size_of::() as u32, + }], + }; + + let bin_clear_pipeline = gpu.create_compute_pipeline(&ComputePipelineDesc { + shader: ShaderDesc { + entry: c"main", + code: shark_shaders::PRIMITIVE_2D_BIN_CLEAR_COMP_SPV, + }, + layout: &compute_pipeline_layout, + }); + + let bin_pipeline = gpu.create_compute_pipeline(&ComputePipelineDesc { + shader: ShaderDesc { + entry: c"main", + code: shark_shaders::PRIMITIVE_2D_BIN_COMP_SPV, + }, + layout: &compute_pipeline_layout, + }); + + let rasterize_pipeline = gpu.create_compute_pipeline(&ComputePipelineDesc { + shader: ShaderDesc { + entry: c"main", + code: shark_shaders::PRIMITIVE_2D_RASTERIZE_COMP_SPV, + }, + layout: &compute_pipeline_layout, + }); + + let display_transform_pipeline = gpu.create_compute_pipeline(&ComputePipelineDesc { + shader: ShaderDesc { + entry: c"main", + code: shark_shaders::DISPLAY_TRANSFORM_COMP_SPV, + }, + layout: &compute_pipeline_layout, + }); + let basic_pipeline = BasicPipeline::new(gpu, immutable_samplers); - let primitive_2d_pipeline = Primitive2dPipeline::new(gpu, immutable_samplers); - let display_transform_pipeline = DisplayTransformPipeline::new(gpu, immutable_samplers); let models = Models::load(gpu); let images = Images::load(gpu, thread_token); @@ -899,16 +956,19 @@ impl<'gpu> DrawState<'gpu> { Self { gpu, basic_pipeline, - primitive_2d_pipeline, + compute_bind_group_layout, + bin_clear_pipeline, + bin_pipeline, + rasterize_pipeline, display_transform_pipeline, width: 0, height: 0, tile_resolution_x: 0, tile_resolution_y: 0, depth_image: default(), - rt_image: default(), + color_image: default(), ui_image: default(), - tile_bitmap_buffer: default(), + tiles_buffer: default(), glyph_atlas_image: default(), _samplers: samplers, models, @@ -1018,7 +1078,7 @@ impl<'gpu> DrawState<'gpu> { if width != self.width || height != self.height { gpu.destroy_image(frame, self.depth_image); - gpu.destroy_image(frame, self.rt_image); + gpu.destroy_image(frame, self.color_image); gpu.destroy_image(frame, self.ui_image); let tile_resolution_x = (width + (TILE_SIZE - 1)) / TILE_SIZE; @@ -1027,21 +1087,21 @@ impl<'gpu> DrawState<'gpu> { if tile_resolution_x != self.tile_resolution_x || tile_resolution_y != self.tile_resolution_y { - gpu.destroy_buffer(frame, self.tile_bitmap_buffer); + gpu.destroy_buffer(frame, self.tiles_buffer); let bitmap_buffer_size = tile_resolution_x * tile_resolution_y * TILE_STRIDE * std::mem::size_of::() as u32; - self.tile_bitmap_buffer = gpu.create_buffer(&BufferDesc { + self.tiles_buffer = gpu.create_buffer(&BufferDesc { memory_location: MemoryLocation::Device, host_mapped: false, usage: BufferUsageFlags::STORAGE, size: bitmap_buffer_size.widen(), }); - gpu.debug_name_buffer(self.tile_bitmap_buffer.to_arg(), "tile bitmap"); + gpu.debug_name_buffer(self.tiles_buffer.to_arg(), "tile bitmap"); println!("tile_resolution: ({tile_resolution_x},{tile_resolution_y})"); @@ -1065,7 +1125,7 @@ impl<'gpu> DrawState<'gpu> { gpu.debug_name_image(self.depth_image, "depth"); - self.rt_image = gpu.create_image(&ImageDesc { + self.color_image = gpu.create_image(&ImageDesc { memory_location: MemoryLocation::Device, host_mapped: false, usage: ImageUsageFlags::COLOR_ATTACHMENT | ImageUsageFlags::STORAGE, @@ -1079,7 +1139,7 @@ impl<'gpu> DrawState<'gpu> { mip_levels: 1, }); - gpu.debug_name_image(self.rt_image, "render target"); + gpu.debug_name_image(self.color_image, "render target"); self.ui_image = gpu.create_image(&ImageDesc { memory_location: MemoryLocation::Device, @@ -1176,7 +1236,7 @@ impl<'gpu> DrawState<'gpu> { ImageBarrier::layout_optimal( &[Access::None], &[Access::ColorAttachmentWrite], - self.rt_image, + self.color_image, ImageAspectFlags::COLOR, ), ImageBarrier { @@ -1200,7 +1260,7 @@ impl<'gpu> DrawState<'gpu> { width, height, color_attachments: &[RenderingAttachment { - image: self.rt_image, + image: self.color_image, load_op: LoadOp::Clear(ClearValue::ColorF32([1.0, 1.0, 1.0, 1.0])), store_op: StoreOp::Store, }], @@ -1348,12 +1408,12 @@ impl<'gpu> DrawState<'gpu> { ui_state.primitive_instances.clear(); - gpu.cmd_set_pipeline(cmd_encoder, self.primitive_2d_pipeline.bin_clear_pipeline); + gpu.cmd_set_pipeline(cmd_encoder, self.bin_clear_pipeline); gpu.cmd_set_bind_group( frame, cmd_encoder, - self.primitive_2d_pipeline.bind_group_layout, + self.compute_bind_group_layout, 0, &[ Bind { @@ -1361,30 +1421,39 @@ impl<'gpu> DrawState<'gpu> { array_element: 0, typed: TypedBind::SampledImage(&[( ImageLayout::Optimal, - self.glyph_atlas_image, + self.images[ImageRes::TonyMcMapfaceLut], )]), }, Bind { binding: 2, array_element: 0, - typed: TypedBind::StorageBuffer(&[glyph_buffer.to_arg()]), + typed: TypedBind::SampledImage(&[( + ImageLayout::Optimal, + self.glyph_atlas_image, + )]), }, Bind { binding: 3, array_element: 0, - typed: TypedBind::StorageBuffer(&[glyph_instance_buffer.to_arg()]), + typed: TypedBind::StorageImage(&[( + ImageLayout::General, + self.ui_image, + )]), }, Bind { binding: 4, array_element: 0, - typed: TypedBind::StorageBuffer(&[self.tile_bitmap_buffer.to_arg()]), + typed: TypedBind::StorageImage(&[( + ImageLayout::General, + self.color_image, + )]), }, Bind { binding: 5, array_element: 0, typed: TypedBind::StorageImage(&[( ImageLayout::General, - self.ui_image, + swapchain_image, )]), }, ], @@ -1403,6 +1472,10 @@ impl<'gpu> DrawState<'gpu> { num_primitives_32, num_primitives_1024, tile_stride: self.tile_resolution_x, + glyphs_buffer: gpu.get_buffer_address(glyph_buffer.to_arg()), + glyph_instances_buffer: gpu + .get_buffer_address(glyph_instance_buffer.to_arg()), + tiles_buffer: gpu.get_buffer_address(self.tiles_buffer.to_arg()), }, ); @@ -1422,7 +1495,7 @@ impl<'gpu> DrawState<'gpu> { &[], ); - gpu.cmd_set_pipeline(cmd_encoder, self.primitive_2d_pipeline.bin_pipeline); + gpu.cmd_set_pipeline(cmd_encoder, self.bin_pipeline); gpu.cmd_dispatch( cmd_encoder, @@ -1440,7 +1513,7 @@ impl<'gpu> DrawState<'gpu> { &[], ); - gpu.cmd_set_pipeline(cmd_encoder, self.primitive_2d_pipeline.rasterize_pipeline); + gpu.cmd_set_pipeline(cmd_encoder, self.rasterize_pipeline); gpu.cmd_dispatch(cmd_encoder, (self.width + 7) / 8, (self.height + 7) / 8, 1); @@ -1464,7 +1537,7 @@ impl<'gpu> DrawState<'gpu> { prev_layout: ImageLayout::Optimal, next_access: &[Access::ShaderOtherRead], next_layout: ImageLayout::General, - image: self.rt_image, + image: self.color_image, subresource_range: ImageSubresourceRange::default(), }, ImageBarrier { @@ -1480,53 +1553,7 @@ impl<'gpu> DrawState<'gpu> { gpu.cmd_compute_touch_swapchain(cmd_encoder, swapchain_image); - gpu.cmd_set_pipeline(cmd_encoder, self.display_transform_pipeline.pipeline); - - gpu.cmd_set_bind_group( - frame, - cmd_encoder, - self.display_transform_pipeline.bind_group_layout, - 0, - &[ - Bind { - binding: 1, - array_element: 0, - typed: TypedBind::SampledImage(&[( - ImageLayout::Optimal, - self.images[ImageRes::TonyMcMapfaceLut], - )]), - }, - Bind { - binding: 2, - array_element: 0, - typed: TypedBind::StorageBuffer(&[self.tile_bitmap_buffer.to_arg()]), - }, - Bind { - binding: 3, - array_element: 0, - typed: TypedBind::StorageImage(&[( - ImageLayout::General, - self.rt_image, - )]), - }, - Bind { - binding: 4, - array_element: 0, - typed: TypedBind::StorageImage(&[( - ImageLayout::General, - self.ui_image, - )]), - }, - Bind { - binding: 5, - array_element: 0, - typed: TypedBind::StorageImage(&[( - ImageLayout::General, - swapchain_image, - )]), - }, - ], - ); + gpu.cmd_set_pipeline(cmd_encoder, self.display_transform_pipeline); gpu.cmd_dispatch(cmd_encoder, (self.width + 7) / 8, (self.height + 7) / 8, 1); diff --git a/title/shark/src/pipelines/display_transform.rs b/title/shark/src/pipelines/display_transform.rs deleted file mode 100644 index 3b96a93..0000000 --- a/title/shark/src/pipelines/display_transform.rs +++ /dev/null @@ -1,54 +0,0 @@ -use narcissus_gpu::{ - BindDesc, BindGroupLayout, BindingType, ComputePipelineDesc, Pipeline, PipelineLayout, - PushConstantRange, Sampler, ShaderDesc, ShaderStageFlags, -}; - -use crate::Gpu; - -use super::primitive_2d::PrimitiveUniforms; - -pub struct DisplayTransformPipeline { - pub bind_group_layout: BindGroupLayout, - pub pipeline: Pipeline, -} - -impl DisplayTransformPipeline { - pub fn new(gpu: &Gpu, immutable_samplers: &[Sampler]) -> Self { - let bind_group_layout = gpu.create_bind_group_layout(&[ - // Samplers - BindDesc::with_immutable_samplers(ShaderStageFlags::COMPUTE, immutable_samplers), - // Tony Mc'mapface LUT - BindDesc::new(ShaderStageFlags::COMPUTE, BindingType::SampledImage), - // Tiles - BindDesc::new(ShaderStageFlags::COMPUTE, BindingType::StorageBuffer), - // Layer RT - BindDesc::new(ShaderStageFlags::COMPUTE, BindingType::StorageImage), - // Layer UI - BindDesc::new(ShaderStageFlags::COMPUTE, BindingType::StorageImage), - // Composited Output - BindDesc::new(ShaderStageFlags::COMPUTE, BindingType::StorageImage), - ]); - - let layout = &PipelineLayout { - bind_group_layouts: &[bind_group_layout], - push_constant_ranges: &[PushConstantRange { - stage_flags: ShaderStageFlags::COMPUTE, - offset: 0, - size: std::mem::size_of::() as u32, - }], - }; - - let pipeline = gpu.create_compute_pipeline(&ComputePipelineDesc { - shader: ShaderDesc { - entry: c"main", - code: shark_shaders::DISPLAY_TRANSFORM_COMP_SPV, - }, - layout, - }); - - Self { - bind_group_layout, - pipeline, - } - } -} diff --git a/title/shark/src/pipelines/mod.rs b/title/shark/src/pipelines/mod.rs index 2c861f8..3f38de7 100644 --- a/title/shark/src/pipelines/mod.rs +++ b/title/shark/src/pipelines/mod.rs @@ -1,3 +1,37 @@ +use narcissus_font::TouchedGlyphIndex; + pub mod basic; -pub mod display_transform; -pub mod primitive_2d; + +pub const TILE_SIZE: u32 = 32; +pub const MAX_PRIMS: u32 = 1 << 18; +pub const TILE_BITMAP_WORDS_L1: u32 = MAX_PRIMS / 32 / 32; +pub const TILE_BITMAP_WORDS_L0: u32 = MAX_PRIMS / 32; +pub const TILE_STRIDE: u32 = TILE_BITMAP_WORDS_L0 + TILE_BITMAP_WORDS_L1 + 2; + +#[allow(unused)] +#[repr(C)] +pub struct PrimitiveUniforms { + pub screen_resolution_x: u32, + pub screen_resolution_y: u32, + pub atlas_resolution_x: u32, + pub atlas_resolution_y: u32, + + pub num_primitives: u32, + pub num_primitives_32: u32, + pub num_primitives_1024: u32, + + pub tile_stride: u32, + + pub glyphs_buffer: u64, + pub glyph_instances_buffer: u64, + pub tiles_buffer: u64, +} + +#[allow(unused)] +#[repr(C)] +pub struct GlyphInstance { + pub x: f32, + pub y: f32, + pub touched_glyph_index: TouchedGlyphIndex, + pub color: u32, +} diff --git a/title/shark/src/pipelines/primitive_2d.rs b/title/shark/src/pipelines/primitive_2d.rs deleted file mode 100644 index 246b419..0000000 --- a/title/shark/src/pipelines/primitive_2d.rs +++ /dev/null @@ -1,103 +0,0 @@ -use narcissus_font::TouchedGlyphIndex; -use narcissus_gpu::{ - BindDesc, BindGroupLayout, BindingType, ComputePipelineDesc, Pipeline, PipelineLayout, - PushConstantRange, Sampler, ShaderDesc, ShaderStageFlags, -}; - -use crate::Gpu; - -pub const TILE_SIZE: u32 = 32; -pub const MAX_PRIMS: u32 = 1 << 18; -pub const TILE_BITMAP_WORDS_L1: u32 = MAX_PRIMS / 32 / 32; -pub const TILE_BITMAP_WORDS_L0: u32 = MAX_PRIMS / 32; -pub const TILE_STRIDE: u32 = TILE_BITMAP_WORDS_L0 + TILE_BITMAP_WORDS_L1 + 2; - -#[allow(unused)] -#[repr(C)] -pub struct PrimitiveUniforms { - pub screen_resolution_x: u32, - pub screen_resolution_y: u32, - pub atlas_resolution_x: u32, - pub atlas_resolution_y: u32, - - pub num_primitives: u32, - pub num_primitives_32: u32, - pub num_primitives_1024: u32, - - pub tile_stride: u32, -} - -#[allow(unused)] -#[repr(C)] -pub struct GlyphInstance { - pub x: f32, - pub y: f32, - pub touched_glyph_index: TouchedGlyphIndex, - pub color: u32, -} - -pub struct Primitive2dPipeline { - pub bind_group_layout: BindGroupLayout, - pub bin_clear_pipeline: Pipeline, - pub bin_pipeline: Pipeline, - pub rasterize_pipeline: Pipeline, -} - -impl Primitive2dPipeline { - pub fn new(gpu: &Gpu, immutable_samplers: &[Sampler]) -> Self { - let bind_group_layout = gpu.create_bind_group_layout(&[ - // Sampler - BindDesc::with_immutable_samplers(ShaderStageFlags::COMPUTE, immutable_samplers), - // Glyph Atlas - BindDesc::new(ShaderStageFlags::COMPUTE, BindingType::SampledImage), - // Glyphs - BindDesc::new(ShaderStageFlags::COMPUTE, BindingType::StorageBuffer), - // Glyph Instances - BindDesc::new(ShaderStageFlags::COMPUTE, BindingType::StorageBuffer), - // Tiles - BindDesc::new(ShaderStageFlags::COMPUTE, BindingType::StorageBuffer), - // UI Image Output - BindDesc::new(ShaderStageFlags::COMPUTE, BindingType::StorageImage), - ]); - - let layout = &PipelineLayout { - bind_group_layouts: &[bind_group_layout], - push_constant_ranges: &[PushConstantRange { - stage_flags: ShaderStageFlags::COMPUTE, - offset: 0, - size: std::mem::size_of::() as u32, - }], - }; - - let bin_clear_pipeline = gpu.create_compute_pipeline(&ComputePipelineDesc { - shader: ShaderDesc { - entry: c"main", - code: shark_shaders::PRIMITIVE_2D_BIN_CLEAR_COMP_SPV, - }, - layout, - }); - - let bin_pipeline = gpu.create_compute_pipeline(&ComputePipelineDesc { - shader: ShaderDesc { - entry: c"main", - code: shark_shaders::PRIMITIVE_2D_BIN_COMP_SPV, - }, - layout, - }); - - let rasterize_pipeline = gpu.create_compute_pipeline(&ComputePipelineDesc { - shader: ShaderDesc { - entry: c"main", - code: shark_shaders::PRIMITIVE_2D_RASTERIZE_COMP_SPV, - }, - layout, - }); - - Self { - bind_group_layout, - bin_clear_pipeline, - bin_pipeline, - rasterize_pipeline, - } - } -} -- 2.49.0