From: Joshua Simmons Date: Sun, 12 Oct 2025 09:39:09 +0000 (+0200) Subject: shark-shaders: Migrate draw_2d pipeline to slang X-Git-Url: https://git.nega.tv//gitweb.cgi?a=commitdiff_plain;h=844cbf74936dde0721e472d04effc125e708b4d2;p=josh%2Fnarcissus shark-shaders: Migrate draw_2d pipeline to slang --- diff --git a/title/shark-shaders/build.rs b/title/shark-shaders/build.rs index 5c0b65c..a626761 100644 --- a/title/shark-shaders/build.rs +++ b/title/shark-shaders/build.rs @@ -76,29 +76,12 @@ struct SlangShader { name: &'static str, } -const SLANG_SHADERS: &[SlangShader] = &[SlangShader { name: "basic" }]; +const SLANG_SHADERS: &[SlangShader] = &[ + SlangShader { name: "basic" }, + SlangShader { name: "draw_2d" }, +]; const SHADERS: &[Shader] = &[ - Shader { - stage: "comp", - name: "draw_2d_bin_0_clear", - }, - Shader { - stage: "comp", - name: "draw_2d_bin_1_scatter", - }, - Shader { - stage: "comp", - name: "draw_2d_bin_2_sort", - }, - Shader { - stage: "comp", - name: "draw_2d_bin_3_resolve", - }, - Shader { - stage: "comp", - name: "draw_2d_rasterize", - }, Shader { stage: "comp", name: "radix_sort_0_upsweep", @@ -131,6 +114,7 @@ fn main() { .arg("-fvk-use-scalar-layout") .arg("-fvk-use-entrypoint-name") .arg("-matrix-layout-row-major") + .arg("-O2") .arg(format!("-g{debug}")) .args(["-depfile", &format!("{out_dir}/{name}.d")]) .args(["-o", &format!("{out_dir}/{name}.spv")]) diff --git a/title/shark-shaders/shaders/bindings_compute.slang b/title/shark-shaders/shaders/bindings_compute.slang index 05c3ef7..58837c7 100644 --- a/title/shark-shaders/shaders/bindings_compute.slang +++ b/title/shark-shaders/shaders/bindings_compute.slang @@ -5,10 +5,8 @@ public Texture3D tony_mc_mapface_lut; [[vk::binding(2, 0)]] public Texture2D glyph_atlas; [[vk::binding(3, 0)]] -public RWTexture2D ui_layer_write; +public RWTexture2D ui_layer; [[vk::binding(4, 0)]] -public Texture2D ui_layer_read; +public RWTexture2D color_layer; [[vk::binding(5, 0)]] -public Texture2D color_layer; -[[vk::binding(6, 0)]] public RWTexture2D composited_output; diff --git a/title/shark-shaders/shaders/composite.comp b/title/shark-shaders/shaders/composite.comp index e5319a2..a9d019d 100644 --- a/title/shark-shaders/shaders/composite.comp +++ b/title/shark-shaders/shaders/composite.comp @@ -8,7 +8,18 @@ #extension GL_EXT_shader_image_load_formatted : require #include "bindings_compute.h" -#include "draw_2d.h" + +// TODO: Remove this +const uint TILE_SIZE = 32; + +struct Tile { + uint index_min; + uint index_max; +}; + +layout(buffer_reference, std430, buffer_reference_align = 4) readonly buffer TileReadRef { + Tile values[]; +}; float srgb_oetf(float a) { return (.0031308f >= a) ? 12.92f * a : 1.055f * pow(a, .4166666666666667f) - .055f; @@ -27,7 +38,7 @@ vec3 tony_mc_mapface(vec3 stimulus) { struct CompositeConstants { uvec2 tile_resolution; - TileRef tile_buffer; + TileReadRef tile_buffer; }; layout(std430, push_constant) uniform CompositeConstantsBlock { diff --git a/title/shark-shaders/shaders/draw_2d.h b/title/shark-shaders/shaders/draw_2d.h deleted file mode 100644 index cb5e70a..0000000 --- a/title/shark-shaders/shaders/draw_2d.h +++ /dev/null @@ -1,104 +0,0 @@ -#ifndef DRAW_2D_H -#define DRAW_2D_H - -const uint TILE_SIZE = 32; - -const uint DRAW_2D_CMD_RECT = 0; -const uint DRAW_2D_CMD_GLYPH = 1; - -struct Tile { - uint index_min; - uint index_max; -}; - -struct Glyph { - ivec2 atlas_min; - ivec2 atlas_max; - - vec2 offset_min; - vec2 offset_max; -}; - -struct Scissor { - vec2 offset_min; - vec2 offset_max; -}; - -struct Cmd { - uint packed_type; - uint words[7]; -}; - -struct CmdRect { - vec2 position; - vec2 bound; - - uint border_radii; - uint border_color; - - uint background_color; -}; - -struct CmdGlyph { - uint index; - vec2 position; - uint color; -}; - -CmdRect decode_rect(Cmd cmd) { - CmdRect rect = { - { uintBitsToFloat(cmd.words[0]), uintBitsToFloat(cmd.words[1]) }, // position - { uintBitsToFloat(cmd.words[2]), uintBitsToFloat(cmd.words[3]) }, // bound - cmd.words[4], // border_radii - cmd.words[5], // border_color - cmd.words[6], // background_color - }; - return rect; -} - -CmdGlyph decode_glyph(Cmd cmd) { - CmdGlyph glyph = { - cmd.words[0], // index - { uintBitsToFloat(cmd.words[1]), uintBitsToFloat(cmd.words[2]) }, // position - cmd.words[3], // color - }; - return glyph; -} - -layout(buffer_reference, std430, buffer_reference_align = 16) readonly buffer CommandRef { - Cmd values[]; -}; - -layout(buffer_reference, std430, buffer_reference_align = 16) readonly buffer ScissorRef { - Scissor values[]; -}; - -layout(buffer_reference, std430, buffer_reference_align = 16) readonly buffer GlyphRef { - Glyph values[]; -}; - -layout(buffer_reference, std430, buffer_reference_align = 4) buffer CoarseRef { - uint values[]; -}; - -layout(buffer_reference, std430, buffer_reference_align = 4) readonly buffer CoarseReadRef { - uint values[]; -}; - -layout(buffer_reference, std430, buffer_reference_align = 4) buffer FineRef { - uint values[]; -}; - -layout(buffer_reference, std430, buffer_reference_align = 4) readonly buffer FineReadRef { - uint values[]; -}; - -layout(buffer_reference, std430, buffer_reference_align = 4) buffer TileRef { - Tile values[]; -}; - -layout(buffer_reference, std430, buffer_reference_align = 4) readonly buffer TileReadRef { - Tile values[]; -}; - -#endif \ No newline at end of file diff --git a/title/shark-shaders/shaders/draw_2d.slang b/title/shark-shaders/shaders/draw_2d.slang new file mode 100644 index 0000000..a9fe81c --- /dev/null +++ b/title/shark-shaders/shaders/draw_2d.slang @@ -0,0 +1,567 @@ + +import bindings_samplers; +import bindings_compute; + +import sdf; + +namespace Draw2d { +public static const uint TILE_SIZE = 32; + +public struct Tile { + public uint index_min; + public uint index_max; +} +} + +static const uint MAX_TILES = 256; +static const uint BITMAP_STRIDE = MAX_TILES / 32; +static const uint BITMAP_SIZE = MAX_TILES * BITMAP_STRIDE; + +struct Glyph { + int2 atlas_min; + int2 atlas_max; + + float2 offset_min; + float2 offset_max; +} + +struct Scissor { + float2 offset_min; + float2 offset_max; +} + +enum CmdType { + Rect = 0, + Glyph = 1, + Line = 2 +} + +struct Cmd { + uint packed_type; + uint words[7]; +} + +struct CmdRect { + uint packed_type; + + float2 position; + float2 bound; + + uint border_radii; + uint border_color; + + uint background_color; +}; + +struct CmdGlyph { + uint packed_type; + + uint index; + float2 position; + uint color; + uint padding[3]; +}; + +struct ClearConstants { + uint *finished_buffer; + uint *coarse_buffer; +} + +[shader("compute")] +[numthreads(1, 1, 1)] +void clear(uniform ClearConstants constants) { + constants.finished_buffer[0] = 0; + constants.coarse_buffer[0] = 0; +} + +struct ScatterConstants { + uint2 tile_resolution; + + uint draw_buffer_len; + uint coarse_buffer_len; + + Cmd *draw_buffer; + Scissor *scissor_buffer; + Glyph *glyph_buffer; + uint *coarse_buffer; +}; + +[vk::specialization_constant] +const int WGP_SIZE = 64; + +groupshared uint scatter_intersected_tiles[BITMAP_SIZE]; + +[shader("compute")] +[require(spvGroupNonUniformBallot, spvGroupNonUniformArithmetic, spvGroupNonUniformVote)] +[numthreads(WGP_SIZE, 1, 1)] +void scatter(uniform ScatterConstants constants, uint3 thread_id: SV_DispatchThreadID, uint3 group_id: SV_GroupID) { + let in_bounds = thread_id.x < constants.draw_buffer_len; + + uint cmd_scissor = 0; + var cmd_min = float2(99999.9); + var cmd_max = float2(-99999.9); + if (in_bounds) { + let packed_type = constants.draw_buffer[thread_id.x].packed_type; + let cmd_type = packed_type >> 24; + cmd_scissor = packed_type & 0xffff; + + for (;;) { + let scalar_type = WaveReadLaneFirst(cmd_type); + [[branch]] + if (scalar_type == cmd_type) { + switch (CmdType(scalar_type)) { + case CmdType::Rect: + let cmd_rect = reinterpret(constants.draw_buffer[thread_id.x]); + cmd_min = cmd_rect.position; + cmd_max = cmd_rect.position + cmd_rect.bound; + break; + case CmdType::Glyph: + let cmd_glyph = reinterpret(constants.draw_buffer[thread_id.x]); + let glyph = constants.glyph_buffer[cmd_glyph.index]; + cmd_min = cmd_glyph.position + glyph.offset_min; + cmd_max = cmd_glyph.position + glyph.offset_max; + break; + } + break; + } + } + } + + let scissor = constants.scissor_buffer[cmd_scissor]; + let out_of_bounds = any(cmd_min >= cmd_max) || any(cmd_min > scissor.offset_max) || any(cmd_max < scissor.offset_min); + + // Are all draws off-screen? + if (WaveActiveAllTrue(out_of_bounds)) { + return; + } + + // Clip command bounds to our scissor rect. + cmd_min = max(cmd_min, scissor.offset_min); + cmd_max = min(cmd_max, scissor.offset_max); + + // Make sure off-screen commands don't contribute to the bounds. + let cmds_tile_min = uint2(clamp(WaveActiveMin(out_of_bounds ? int2(999999) : int2(floor(cmd_min / Draw2d::TILE_SIZE))), int2(0), constants.tile_resolution)); + let cmds_tile_max = uint2(clamp(WaveActiveMax(out_of_bounds ? int2(-999999) : int2(floor(cmd_max / Draw2d::TILE_SIZE))), int2(0), constants.tile_resolution)); + let cmd_tile_min = uint2(clamp(int2(floor(cmd_min / Draw2d::TILE_SIZE)), int2(0), constants.tile_resolution)); + let cmd_tile_max = uint2(clamp(int2(floor(cmd_max / Draw2d::TILE_SIZE)), int2(0), constants.tile_resolution)); + + // If any single rect in our wave covers the entire touched area, that + // means we are required to always write to every tile in its bounds. + // + // When that's the case, we want to avoid spending effort on fine-grained + // calculation of the exact intersected tiles. + let cmd_dominates_bounds = all(cmd_tile_min == cmds_tile_min) && all(cmd_tile_max == cmds_tile_max); + let use_combined_bounds = WaveActiveAnyTrue(cmd_dominates_bounds); + + if (use_combined_bounds) { + let tile_count = cmds_tile_max - cmds_tile_min + int2(1); + + uint offset; + if (WaveIsFirstLane()) { + InterlockedAdd(constants.coarse_buffer[0], tile_count.x * tile_count.y, offset); + } + offset = WaveReadLaneFirst(offset + 1); + + for (uint i = 0; i < tile_count.y; i++) { + for (uint j = 0; j < tile_count.x; j += WaveGetLaneCount()) { + let j = j + WaveGetLaneIndex(); + let y = cmds_tile_min.y + i; + let x = cmds_tile_min.x + j; + if (j < tile_count.x) { + let packed = ((y & 0xff) << 24) | ((x & 0xff) << 16) | (group_id.x & 0xffff); + let index = offset + i * tile_count.x + j; + if (index < constants.coarse_buffer_len) { + constants.coarse_buffer[index] = packed; + } + } + } + } + } else { + let start = cmds_tile_min.y * BITMAP_STRIDE + cmds_tile_min.x / 32; + let end = cmds_tile_max.y * BITMAP_STRIDE + cmds_tile_max.x / 32; + + for (uint i = start; i <= end; i += WaveGetLaneCount()) { + let i = i + WaveGetLaneIndex(); + if (i < BITMAP_SIZE) { + scatter_intersected_tiles[i] = 0; + } + } + + GroupMemoryBarrierWithWaveSync(); + + if (!out_of_bounds) { + let min_word = cmd_tile_min.x / 32; + let max_word = cmd_tile_max.x / 32; + let min_bit = cmd_tile_min.x & 31; + let max_bit = cmd_tile_max.x & 31; + let lsb = ~((1 << min_bit) - 1); + let msb = ((1 << max_bit) - 1) | 1 << max_bit; + if (min_word == max_word) { + for (uint y = cmd_tile_min.y; y <= cmd_tile_max.y; y++) { + InterlockedOr(scatter_intersected_tiles[y * BITMAP_STRIDE + min_word], lsb & msb); + } + } else { + for (uint y = cmd_tile_min.y; y <= cmd_tile_max.y; y++) { + InterlockedOr(scatter_intersected_tiles[y * BITMAP_STRIDE + min_word], lsb); + for (uint i = min_word + 1; i <= (max_word - 1); i++) { + scatter_intersected_tiles[y * BITMAP_STRIDE + i] = 0xffffffff; + } + InterlockedOr(scatter_intersected_tiles[y * BITMAP_STRIDE + max_word], msb); + } + } + } + + GroupMemoryBarrierWithWaveSync(); + + uint count = 0; + for (uint i = start; i <= end; i += WaveGetLaneCount()) { + let i = i + WaveGetLaneIndex(); + count += WaveActiveSum(i < BITMAP_SIZE ? countbits(scatter_intersected_tiles[i]) : 0); + } + + if (count == 0) { + return; + } + + uint offset; + if (WaveIsFirstLane()) { + InterlockedAdd(constants.coarse_buffer[0], count, offset); + } + offset = WaveReadLaneFirst(offset + 1); + + for (uint i = start; i <= end; i += WaveGetLaneCount()) { + let i = i + WaveGetLaneIndex(); + if (i >= BITMAP_SIZE) { + continue; + } + + var bitmap = scatter_intersected_tiles[i]; + let count = countbits(bitmap); + var scan = WavePrefixSum(count); + + while (bitmap != 0) { + let index = firstbitlow(bitmap); + bitmap ^= bitmap & -bitmap; + let y = (i * 32 + index) / MAX_TILES; + let x = (i * 32 + index) & (MAX_TILES - 1); + let packed = (y << 24) | (x << 16) | (group_id.x & 0xffff); + if (offset + scan < constants.coarse_buffer_len) { + constants.coarse_buffer[offset + scan] = packed; + } + scan++; + } + + offset += WaveActiveSum(count); + } + } +} + +struct VkDispatchIndirectCommand { + uint x; + uint y; + uint z; +}; + +struct SortConstants { + uint coarse_buffer_len; + uint _pad; + VkDispatchIndirectCommand *indirect_dispatch_buffer; + uint *coarse_buffer; +}; + +// TODO: Remove! +static const uint RADIX_WGP_SIZE = 256; +static const uint RADIX_ITEMS_PER_INVOCATION = 16; +static const uint RADIX_ITEMS_PER_WGP = RADIX_WGP_SIZE * RADIX_ITEMS_PER_INVOCATION; + +[shader("compute")] +[numthreads(1, 1, 1)] +void sort(uniform SortConstants constants) { + // We can't overflow the coarse buffer in the scatter phase, but we can + // still end up with a count that's larger than the buffer size since we + // unconditionally atomicAdd. So we need to clamp to the actual size now + // before dispatching sort work. + let count = min(constants.coarse_buffer_len, constants.coarse_buffer[0]); + constants.coarse_buffer[0] = count; + + constants.indirect_dispatch_buffer.x = (count + (RADIX_ITEMS_PER_WGP - 1)) / RADIX_ITEMS_PER_WGP; + constants.indirect_dispatch_buffer.y = 1; + constants.indirect_dispatch_buffer.z = 1; +} + +struct ResolveConstants { + uint tile_stride; + uint draw_buffer_len; + + Cmd *draw_buffer; + Scissor *scissor_buffer; + Glyph *glyph_buffer; + uint *coarse_buffer; + uint *fine_buffer; + Draw2d::Tile *tile_buffer; +}; + +[shader("compute")] +[require(spvGroupNonUniformBallot, spvGroupNonUniformVote)] +[numthreads(WGP_SIZE, 1, 1)] +void resolve(uniform ResolveConstants constants, uint3 thread_id: SV_DispatchThreadID) { + let x = thread_id.y; + let y = thread_id.z; + let tile_offset = constants.tile_stride * y + x; + let search = ((y & 0xff) << 24) | ((x & 0xff) << 16); + let count = constants.coarse_buffer[0]; + + if (count == 0) { + constants.tile_buffer[tile_offset].index_min = 0; + constants.tile_buffer[tile_offset].index_max = 0; + return; + } + + // Binary search for the upper bound of the tile. + uint base = 0; + { + uint n = count; + uint mid; + uint max_iters = 32; + while (max_iters-- > 0 && (mid = n / 2) > 0) { + let value = constants.coarse_buffer[1 + base + mid] & 0xffff0000; + base = value > search ? base : base + mid; + n -= mid; + } + } + + let tile_min = uint2(x, y) * Draw2d::TILE_SIZE; + let tile_max = tile_min + Draw2d::TILE_SIZE; + + bool hit_opaque = false; + uint lo = base + 1; + let hi = base + 1; + for (; !hit_opaque && lo > 0; lo--) { + let i = lo; + let packed = constants.coarse_buffer[i]; + + if ((packed & 0xffff0000) != (search & 0xffff0000)) { + break; + } + + let draw_offset = packed & 0xffff; + let draw_index = draw_offset * WGP_SIZE + WaveGetLaneIndex(); + + bool intersects = false; + bool opaque_tile = false; + + if (draw_index < constants.draw_buffer_len) { + var cmd_min = float2(99999.9); + var cmd_max = float2(-99999.9); + + let packed_type = constants.draw_buffer[draw_index].packed_type; + let cmd_type = packed_type >> 24; + let cmd_scissor = packed_type & 0xffff; + + let scissor = constants.scissor_buffer[cmd_scissor]; + + // If the tile doesn't intersect the scissor region it doesn't need to do work here. + if (any(scissor.offset_max < tile_min) || any(scissor.offset_min > tile_max)) { + intersects = false; + } else { + for (;;) { + let scalar_type = WaveReadLaneFirst(cmd_type); + [[branch]] + if (scalar_type == cmd_type) { + switch (CmdType(scalar_type)) { + case CmdType::Rect: + let cmd_rect = reinterpret(constants.draw_buffer[draw_index]); + cmd_min = cmd_rect.position; + cmd_max = cmd_rect.position + cmd_rect.bound; + + const bool background_opaque = (cmd_rect.background_color & 0xff000000) == 0xff000000; + if (background_opaque) { + let border_width = float((packed_type >> 16) & 0xff); + let border_opaque = (cmd_rect.border_color & 0xff000000) == 0xff000000; + let border_radii = unpackUnorm4x8ToFloat(cmd_rect.border_radii); + let max_border_radius = max(border_radii.x, max(border_radii.y, max(border_radii.z, border_radii.w))) * 255.0; + let shrink = ((2.0 - sqrt(2.0)) * max_border_radius) + (border_opaque ? 0.0 : border_width); + + let cmd_shrunk_min = max(scissor.offset_min, cmd_min + shrink); + let cmd_shrunk_max = min(scissor.offset_max, cmd_max - shrink); + opaque_tile = all(cmd_shrunk_max > cmd_shrunk_min) && all(tile_min > cmd_shrunk_min) && all(tile_max < cmd_shrunk_max); + } + break; + case CmdType::Glyph: + let cmd_glyph = reinterpret(constants.draw_buffer[draw_index]); + const Glyph glyph = constants.glyph_buffer[cmd_glyph.index]; + cmd_min = cmd_glyph.position + glyph.offset_min; + cmd_max = cmd_glyph.position + glyph.offset_max; + break; + } + break; + } + } + + cmd_min = max(cmd_min, scissor.offset_min); + cmd_max = min(cmd_max, scissor.offset_max); + intersects = !(any(tile_max < cmd_min) || any(tile_min > cmd_max)); + } + } + + var intersects_mask = WaveActiveBallot(intersects).x; + + if (WaveActiveAnyTrue(opaque_tile)) { + let opaque_tile_ballot = WaveActiveBallot(opaque_tile); + // TODO: Needs to check all live words of the ballot... + let first_opaque_tile = firstbithigh(opaque_tile_ballot).x; + let opaque_mask = ~((1 << first_opaque_tile) - 1); + intersects_mask &= opaque_mask; + constants.fine_buffer[i] = intersects_mask; + hit_opaque = true; + } else { + constants.fine_buffer[i] = intersects_mask; + } + } + + constants.tile_buffer[tile_offset].index_min = lo + 1; + constants.tile_buffer[tile_offset].index_max = hi + 1; +} + +struct RasterizeConstants { + uint tile_stride; + uint _pad; + + Cmd *draw_buffer; + Scissor *scissor_buffer; + Glyph *glyph_buffer; + uint *coarse_buffer; + uint *fine_buffer; + Draw2d::Tile *tile_buffer; +}; + +/// x = (((index >> 2) & 0x0007) & 0xFFFE) | index & 0x0001 +/// y = ((index >> 1) & 0x0003) | (((index >> 3) & 0x0007) & 0xFFFC) + +#define DEBUG_SHOW_TILES 0 + +float3 plasma_quintic(float x) { + let x = clamp(x, 0.0, 1.0); + let x1 = float4(1.0, x, x * x, x * x * x); // 1 x x2 x3 + let x2 = x1 * x1.w * x; // x4 x5 x6 x7 + return float3( + dot(x1.xyzw, float4(+0.063861086, +1.992659096, -1.023901152, -0.490832805)) + dot(x2.xy, float2(+1.308442123, -0.914547012)), + dot(x1.xyzw, float4(+0.049718590, -0.791144343, +2.892305078, +0.811726816)) + dot(x2.xy, float2(-4.686502417, +2.717794514)), + dot(x1.xyzw, float4(+0.513275779, +1.580255060, -5.164414457, +4.559573646)) + dot(x2.xy, float2(-1.916810682, +0.570638854))); +} + +[shader("compute")] +[numthreads(8, 8, 1)] +void rasterize(uniform RasterizeConstants constants, uint3 thread_id: SV_DispatchThreadID, uint3 group_id: SV_GroupID) { + let tile_coord = group_id.xy * WorkgroupSize().xy / Draw2d::TILE_SIZE; + let tile_index = tile_coord.y * constants.tile_stride + tile_coord.x; + + let lo = constants.tile_buffer[tile_index].index_min; + let hi = constants.tile_buffer[tile_index].index_max; + + if (lo == hi) { + return; + } + +#if DEBUG_SHOW_TILES == 1 + + let color = plasma_quintic(float(hi - lo) / 50.0); + ui_layer_write.Store(thread_id.xy, float4(color, 1.0)); + +#elif DEBUG_SHOW_TILES == 2 + + uint count = 0; + for (uint i = lo; i < hi; i++) { + count += countbits(constants.fine_buffer[i]); + } + let color = plasma_quintic(float(count) / 600.0); + ui_layer_write.Store(thread_id.xy, float4(color, 1.0)); + +#else + + let sample_center = thread_id.xy + float2(0.5); + var accum = float4(0.0); + + for (uint i = lo; i < hi; i++) { + var bitmap = constants.fine_buffer[i]; + + while (bitmap != 0) { + let index = firstbitlow(bitmap); + bitmap ^= bitmap & -bitmap; + + let base_index = (constants.coarse_buffer[i] & 0xffff) * 32; + let cmd = constants.draw_buffer[base_index + index]; + let cmd_type = cmd.packed_type >> 24; + let cmd_scissor = cmd.packed_type & 0xffff; + + let scissor = constants.scissor_buffer[cmd_scissor]; + + var primitive_color = float4(0.0); + + switch (CmdType(cmd_type)) { + case CmdType::Rect: { + let cmd_rect = reinterpret(cmd); + + let cmd_min = cmd_rect.position; + let cmd_max = cmd_rect.position + cmd_rect.bound; + + if (all(sample_center >= cmd_min) && all(sample_center <= cmd_max)) { + let border_width = float((cmd.packed_type >> 16) & 0xff); + let border_radii = unpackUnorm4x8ToFloat(cmd_rect.border_radii) * 255.0; + let max_border_radius = max(border_radii.x, max(border_radii.y, max(border_radii.z, border_radii.w))); + let shrink = (2.0 - sqrt(2.0)) * max_border_radius; + let background_color = unpackUnorm4x8ToFloat(cmd_rect.background_color).bgra; + + let cmd_min_clipped = max(scissor.offset_min, cmd_min + border_width + shrink); + let cmd_max_clipped = min(scissor.offset_max, cmd_max - border_width - shrink); + + if (all(sample_center >= cmd_min_clipped) && all(sample_center <= cmd_max_clipped)) { + primitive_color = background_color; + } else { + let b = cmd_rect.bound / 2.0; + let p = cmd_rect.position + b - sample_center; + + float d; + if (all(border_radii == float4(0.0))) { + d = sdf::box(p, b); + } else { + d = sdf::rounded_box(p, b, border_radii); + } + + let border_color = unpackUnorm4x8ToFloat(cmd_rect.border_color).bgra; + primitive_color = lerp(background_color, border_color, smoothstep(1.0, 0.0, 1.0 - d - border_width)); + primitive_color = lerp(primitive_color, float4(0.0), smoothstep(1.0, 0.0, 1.0 - d)); + + let clip_b = (scissor.offset_max - scissor.offset_min) / 2.0; + let clip_p = scissor.offset_min + clip_b - sample_center; + d = max(d, sdf::box(clip_p, clip_b)); + primitive_color = d < 0.0 ? primitive_color : float4(0.0); + } + } + break; + } + case CmdType::Glyph: { + let cmd_glyph = reinterpret(cmd); + let glyph = constants.glyph_buffer[cmd_glyph.index]; + let cmd_min = cmd_glyph.position + glyph.offset_min; + let cmd_max = cmd_glyph.position + glyph.offset_max; + if (all(sample_center >= max(scissor.offset_min, cmd_min)) && all(sample_center <= min(scissor.offset_max, cmd_max))) { + let glyph_size = glyph.offset_max - glyph.offset_min; + let uv = lerp(glyph.atlas_min, glyph.atlas_max, (sample_center - cmd_min) / glyph_size); + let color = unpackUnorm4x8ToFloat(cmd_glyph.color).bgra; + let coverage = glyph_atlas.SampleLevel(samplers[Sampler::BilinearUnnormalized], uv, 0.0).r * color.a; + primitive_color = color * coverage; + } + break; + } + } + + // does it blend? + accum.rgba = primitive_color.rgba + accum.rgba * (1.0 - primitive_color.a); + } + } + + ui_layer.Store(thread_id.xy, accum); + +#endif +} diff --git a/title/shark-shaders/shaders/draw_2d_bin_0_clear.comp b/title/shark-shaders/shaders/draw_2d_bin_0_clear.comp deleted file mode 100644 index f5fb9b9..0000000 --- a/title/shark-shaders/shaders/draw_2d_bin_0_clear.comp +++ /dev/null @@ -1,30 +0,0 @@ -#version 460 - -#extension GL_GOOGLE_include_directive : require - -#extension GL_EXT_buffer_reference : require -#extension GL_EXT_buffer_reference2 : require -#extension GL_EXT_scalar_block_layout : require -#extension GL_EXT_control_flow_attributes : require - -#extension GL_KHR_shader_subgroup_vote : require -#extension GL_KHR_shader_subgroup_ballot : require - -#include "draw_2d.h" -#include "radix_sort.h" - -struct ClearConstants { - FinishedRef finished_buffer; - CoarseRef coarse_buffer; -}; - -layout(std430, push_constant) uniform ClearConstantsBlock { - ClearConstants constants; -}; - -layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in; - -void main() { - constants.finished_buffer.value = 0; - constants.coarse_buffer.values[0] = 0; -} diff --git a/title/shark-shaders/shaders/draw_2d_bin_1_scatter.comp b/title/shark-shaders/shaders/draw_2d_bin_1_scatter.comp deleted file mode 100644 index f3cbef0..0000000 --- a/title/shark-shaders/shaders/draw_2d_bin_1_scatter.comp +++ /dev/null @@ -1,198 +0,0 @@ -#version 460 - -#extension GL_GOOGLE_include_directive : require - -#extension GL_EXT_buffer_reference : require -#extension GL_EXT_buffer_reference2 : require -#extension GL_EXT_scalar_block_layout : require -#extension GL_EXT_control_flow_attributes : require - -#extension GL_KHR_shader_subgroup_arithmetic : require -#extension GL_KHR_shader_subgroup_ballot : require -#extension GL_KHR_shader_subgroup_vote : require - -#include "draw_2d.h" - -struct ScatterConstants { - uvec2 tile_resolution; - - uint draw_buffer_len; - uint coarse_buffer_len; - - CommandRef draw_buffer; - ScissorRef scissor_buffer; - GlyphRef glyph_buffer; - CoarseRef coarse_buffer; -}; - -layout(std430, push_constant) uniform ScatterConstantsBlock { - ScatterConstants constants; -}; - -const uint MAX_TILES = 256; -const uint BITMAP_STRIDE = MAX_TILES / 32; -const uint BITMAP_SIZE = MAX_TILES * BITMAP_STRIDE; - -shared uint intersected_tiles[BITMAP_SIZE]; - -layout (local_size_x_id = 0, local_size_y = 1, local_size_z = 1) in; - -void main() { - const uint local_id = gl_SubgroupID * gl_SubgroupSize + gl_SubgroupInvocationID; - const uint draw_index = gl_WorkGroupID.x * gl_WorkGroupSize.x + local_id; - - const bool in_bounds = draw_index < constants.draw_buffer_len; - - uint cmd_scissor = 0; - vec2 cmd_min = vec2(99999.9); - vec2 cmd_max = vec2(-99999.9); - if (in_bounds) { - const uint packed_type = constants.draw_buffer.values[draw_index].packed_type; - const uint cmd_type = packed_type >> 24; - cmd_scissor = packed_type & 0xffff; - - for (;;) { - const uint scalar_type = subgroupBroadcastFirst(cmd_type); - [[branch]] - if (scalar_type == cmd_type) { - switch (scalar_type) { - case DRAW_2D_CMD_RECT: - const CmdRect cmd_rect = decode_rect(constants.draw_buffer.values[draw_index]); - cmd_min = cmd_rect.position; - cmd_max = cmd_rect.position + cmd_rect.bound; - break; - case DRAW_2D_CMD_GLYPH: - const CmdGlyph cmd_glyph = decode_glyph(constants.draw_buffer.values[draw_index]); - const Glyph glyph = constants.glyph_buffer.values[cmd_glyph.index]; - cmd_min = cmd_glyph.position + glyph.offset_min; - cmd_max = cmd_glyph.position + glyph.offset_max; - break; - } - break; - } - } - } - - const Scissor scissor = constants.scissor_buffer.values[cmd_scissor]; - - const bool out_of_bounds = any(greaterThanEqual(cmd_min, cmd_max)) || any(greaterThan(cmd_min, scissor.offset_max)) || any(lessThan(cmd_max, scissor.offset_min)); - - // Are all draws off-screen? - if (subgroupAll(out_of_bounds)) { - return; - } - - cmd_min = max(cmd_min, scissor.offset_min); - cmd_max = min(cmd_max, scissor.offset_max); - - // Make sure off-screen commands don't contribute to the bounds. - const uvec2 cmds_tile_min = uvec2(clamp(subgroupMin(out_of_bounds ? ivec2(999999) : ivec2(floor(cmd_min / TILE_SIZE))), ivec2(0), constants.tile_resolution)); - const uvec2 cmds_tile_max = uvec2(clamp(subgroupMax(out_of_bounds ? ivec2(-999999) : ivec2(floor(cmd_max / TILE_SIZE))), ivec2(0), constants.tile_resolution)); - const uvec2 cmd_tile_min = uvec2(clamp(ivec2(floor(cmd_min / TILE_SIZE)), ivec2(0), constants.tile_resolution)); - const uvec2 cmd_tile_max = uvec2(clamp(ivec2(floor(cmd_max / TILE_SIZE)), ivec2(0), constants.tile_resolution)); - - const bool cmd_dominates_bounds = all(equal(cmd_tile_min, cmds_tile_min)) && all(equal(cmd_tile_max, cmds_tile_max)); - const bool use_combined_bounds = subgroupAny(cmd_dominates_bounds); - - if (use_combined_bounds) { - const uvec2 tile_count = cmds_tile_max - cmds_tile_min + ivec2(1); - - uint offset; - if (subgroupElect()) { - offset = atomicAdd(constants.coarse_buffer.values[0], tile_count.x * tile_count.y) + 1; - } - offset = subgroupBroadcastFirst(offset); - - for (uint i = 0; i < tile_count.y; i++) { - for (uint j = 0; j < tile_count.x; j += gl_SubgroupSize) { - const uint jj = j + gl_SubgroupInvocationID; - const uint y = cmds_tile_min.y + i; - const uint x = cmds_tile_min.x + jj; - if (jj < tile_count.x) { - const uint packed = ((y & 0xff) << 24) | ((x & 0xff) << 16) | (gl_WorkGroupID.x & 0xffff); - const uint index = offset + i * tile_count.x + jj; - if (index < constants.coarse_buffer_len) { - constants.coarse_buffer.values[index] = packed; - } - } - } - } - } else { - const uint start = cmds_tile_min.y * BITMAP_STRIDE + cmds_tile_min.x / 32; - const uint end = cmds_tile_max.y * BITMAP_STRIDE + cmds_tile_max.x / 32; - - for (uint i = start; i <= end; i += gl_SubgroupSize) { - const uint ii = i + gl_SubgroupInvocationID; - if (ii < BITMAP_SIZE) { - intersected_tiles[ii] = 0; - } - } - - subgroupBarrier(); - - if (!out_of_bounds) { - const uint min_word = cmd_tile_min.x / 32; - const uint max_word = cmd_tile_max.x / 32; - const uint min_bit = cmd_tile_min.x & 31; - const uint max_bit = cmd_tile_max.x & 31; - const uint lsb = ~((1 << min_bit) - 1); - const uint msb = ((1 << max_bit) - 1) | 1 << max_bit; - if (min_word == max_word) { - for (uint y = cmd_tile_min.y; y <= cmd_tile_max.y; y++) { - atomicOr(intersected_tiles[y * BITMAP_STRIDE + min_word], lsb & msb); - } - } else { - for (uint y = cmd_tile_min.y; y <= cmd_tile_max.y; y++) { - atomicOr(intersected_tiles[y * BITMAP_STRIDE + min_word], lsb); - for (uint i = min_word + 1; i <= (max_word - 1); i++) { - intersected_tiles[y * BITMAP_STRIDE + i] = 0xffffffff; - } - atomicOr(intersected_tiles[y * BITMAP_STRIDE + max_word], msb); - } - } - } - - subgroupBarrier(); - - uint count = 0; - for (uint i = start; i <= end; i += gl_SubgroupSize) { - const uint ii = i + gl_SubgroupInvocationID; - count += subgroupAdd(ii < BITMAP_SIZE ? bitCount(intersected_tiles[ii]) : 0); - } - - if (count == 0) { - return; - } - - uint offset; - if (subgroupElect()) { - offset = atomicAdd(constants.coarse_buffer.values[0], count) + 1; - } - offset = subgroupBroadcastFirst(offset); - - for (uint i = start; i <= end; i += gl_SubgroupSize) { - const uint ii = i + gl_SubgroupInvocationID; - if (ii >= BITMAP_SIZE) { - continue; - } - - uint bitmap = intersected_tiles[ii]; - const uint count = bitCount(bitmap); - uint scan = subgroupExclusiveAdd(count); - - while (bitmap != 0) { - const uint index = findLSB(bitmap); - bitmap ^= bitmap & -bitmap; - const uint y = (ii * 32 + index) / MAX_TILES; - const uint x = (ii * 32 + index) & (MAX_TILES - 1); - const uint packed = (y << 24) | (x << 16) | (gl_WorkGroupID.x & 0xffff); - if (offset + scan < constants.coarse_buffer_len) { - constants.coarse_buffer.values[offset + scan] = packed; - } - scan++; - } - - offset += subgroupAdd(count); - } - } -} diff --git a/title/shark-shaders/shaders/draw_2d_bin_2_sort.comp b/title/shark-shaders/shaders/draw_2d_bin_2_sort.comp deleted file mode 100644 index 3df9805..0000000 --- a/title/shark-shaders/shaders/draw_2d_bin_2_sort.comp +++ /dev/null @@ -1,47 +0,0 @@ -#version 460 - -#extension GL_GOOGLE_include_directive : require - -#extension GL_EXT_buffer_reference : require -#extension GL_EXT_buffer_reference2 : require -#extension GL_EXT_scalar_block_layout : require -#extension GL_EXT_control_flow_attributes : require - -#extension GL_KHR_shader_subgroup_arithmetic : require -#extension GL_KHR_shader_subgroup_ballot : require -#extension GL_KHR_shader_subgroup_shuffle_relative: enable -#extension GL_KHR_shader_subgroup_vote : require - -#include "draw_2d.h" -#include "indirect.h" -#include "radix_sort.h" - -layout(buffer_reference, std430, buffer_reference_align = 4) buffer VkDispatchIndirectCommandRef { - VkDispatchIndirectCommand dimensions; -}; - -struct SortConstants { - uint coarse_buffer_len; - uint _pad; - VkDispatchIndirectCommandRef indirect_dispatch_buffer; - CoarseRef coarse_buffer; -}; - -layout(std430, push_constant) uniform SortConstantsBlock { - SortConstants constants; -}; - -layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in; - -void main() { - // We shouldn't overflow the coarse buffer in the scatter phase, but we can - // still end up with a count that's larger than the buffer size since we - // unconditionally atomicAdd. So we need to clamp to the actual size now - // before dispatching sort work. - const uint count = min(constants.coarse_buffer_len, constants.coarse_buffer.values[0]); - constants.coarse_buffer.values[0] = count; - - constants.indirect_dispatch_buffer.dimensions.x = (count + (RADIX_ITEMS_PER_WGP - 1)) / RADIX_ITEMS_PER_WGP; - constants.indirect_dispatch_buffer.dimensions.y = 1; - constants.indirect_dispatch_buffer.dimensions.z = 1; -} diff --git a/title/shark-shaders/shaders/draw_2d_bin_3_resolve.comp b/title/shark-shaders/shaders/draw_2d_bin_3_resolve.comp deleted file mode 100644 index 5a8f389..0000000 --- a/title/shark-shaders/shaders/draw_2d_bin_3_resolve.comp +++ /dev/null @@ -1,151 +0,0 @@ -#version 460 - -#extension GL_GOOGLE_include_directive : require - -#extension GL_EXT_buffer_reference : require -#extension GL_EXT_buffer_reference2 : require -#extension GL_EXT_scalar_block_layout : require -#extension GL_EXT_control_flow_attributes : require - -#extension GL_KHR_shader_subgroup_arithmetic : require -#extension GL_KHR_shader_subgroup_ballot : require -#extension GL_KHR_shader_subgroup_vote : require - -#include "draw_2d.h" - -struct ResolveConstants { - uint tile_stride; - uint draw_buffer_len; - - CommandRef draw_buffer; - ScissorRef scissor_buffer; - GlyphRef glyph_buffer; - CoarseReadRef coarse_buffer; - FineRef fine_buffer; - TileRef tile_buffer; -}; - -layout(std430, push_constant) uniform ResolveConstantsBlock { - ResolveConstants constants; -}; - -layout (local_size_x_id = 0, local_size_y = 1, local_size_z = 1) in; - -void main() { - const uint local_id = gl_SubgroupID * gl_SubgroupSize + gl_SubgroupInvocationID; - const uint x = gl_GlobalInvocationID.y; - const uint y = gl_GlobalInvocationID.z; - const uint tile_offset = constants.tile_stride * y + x; - const uint search = ((y & 0xff) << 24) | ((x & 0xff) << 16); - const uint count = constants.coarse_buffer.values[0]; - - if (count == 0) { - constants.tile_buffer.values[tile_offset].index_min = 0; - constants.tile_buffer.values[tile_offset].index_max = 0; - return; - } - - // Binary search for the upper bound of the tile. - uint base = 0; - { - uint n = count; - uint mid; - uint max_iters = 32; - while (max_iters --> 0 && (mid = n / 2) > 0) { - const uint value = constants.coarse_buffer.values[1 + base + mid] & 0xffff0000; - base = value > search ? base : base + mid; - n -= mid; - } - } - - const vec2 tile_min = uvec2(x, y) * TILE_SIZE; - const vec2 tile_max = tile_min + TILE_SIZE; - - bool hit_opaque = false; - uint lo = base + 1; - const uint hi = base + 1; - for(; !hit_opaque && lo > 0; lo--) { - const uint i = lo; - const uint packed = constants.coarse_buffer.values[i]; - - if ((packed & 0xffff0000) != (search & 0xffff0000)) { - break; - } - - const uint draw_offset = packed & 0xffff; - const uint draw_index = draw_offset * gl_WorkGroupSize.x + local_id; - - bool intersects = false; - bool opaque_tile = false; - - if (draw_index < constants.draw_buffer_len) { - vec2 cmd_min = vec2(99999.9); - vec2 cmd_max = vec2(-99999.9); - - const uint packed_type = constants.draw_buffer.values[draw_index].packed_type; - const uint cmd_type = packed_type >> 24; - const uint cmd_scissor = packed_type & 0xffff; - - const Scissor scissor = constants.scissor_buffer.values[cmd_scissor]; - - // If the tile doesn't intersect the scissor region it doesn't need to do work here. - if (any(lessThan(scissor.offset_max, tile_min)) || any(greaterThan(scissor.offset_min, tile_max))) { - intersects = false; - } else { - for (;;) { - const uint scalar_type = subgroupBroadcastFirst(cmd_type); - [[branch]] - if (scalar_type == cmd_type) { - switch (scalar_type) { - case DRAW_2D_CMD_RECT: - const CmdRect cmd_rect = decode_rect(constants.draw_buffer.values[draw_index]); - cmd_min = cmd_rect.position; - cmd_max = cmd_rect.position + cmd_rect.bound; - - const bool background_opaque = (cmd_rect.background_color & 0xff000000) == 0xff000000; - if (background_opaque) { - const float border_width = float((packed_type >> 16) & 0xff); - const bool border_opaque = (cmd_rect.border_color & 0xff000000) == 0xff000000; - const vec4 border_radii = unpackUnorm4x8(cmd_rect.border_radii); - const float max_border_radius = max(border_radii.x, max(border_radii.y, max(border_radii.z, border_radii.w))) * 255.0; - const float shrink = ((2.0 - sqrt(2.0)) * max_border_radius) + (border_opaque ? 0.0 : border_width); - - const vec2 cmd_shrunk_min = max(scissor.offset_min, cmd_min + shrink); - const vec2 cmd_shrunk_max = min(scissor.offset_max, cmd_max - shrink); - opaque_tile = all(greaterThan(cmd_shrunk_max, cmd_shrunk_min)) && all(greaterThan(tile_min, cmd_shrunk_min)) && all(lessThan(tile_max, cmd_shrunk_max)); - } - break; - case DRAW_2D_CMD_GLYPH: - const CmdGlyph cmd_glyph = decode_glyph(constants.draw_buffer.values[draw_index]); - const Glyph glyph = constants.glyph_buffer.values[cmd_glyph.index]; - cmd_min = cmd_glyph.position + glyph.offset_min; - cmd_max = cmd_glyph.position + glyph.offset_max; - break; - } - break; - } - } - - cmd_min = max(cmd_min, scissor.offset_min); - cmd_max = min(cmd_max, scissor.offset_max); - intersects = !(any(lessThan(tile_max, cmd_min)) || any(greaterThan(tile_min, cmd_max))); - } - } - - uint intersects_mask = subgroupBallot(intersects).x; - - if (subgroupAny(opaque_tile)) { - const uvec4 opaque_tile_ballot = subgroupBallot(opaque_tile); - const uint first_opaque_tile = subgroupBallotFindMSB(opaque_tile_ballot); - const uint opaque_mask = ~((1 << first_opaque_tile) - 1); - intersects_mask &= opaque_mask; - constants.fine_buffer.values[i] = intersects_mask; - hit_opaque = true; - } else { - constants.fine_buffer.values[i] = intersects_mask; - } - } - - constants.tile_buffer.values[tile_offset].index_min = lo + 1; - constants.tile_buffer.values[tile_offset].index_max = hi + 1; -} diff --git a/title/shark-shaders/shaders/draw_2d_rasterize.comp b/title/shark-shaders/shaders/draw_2d_rasterize.comp deleted file mode 100644 index bdc761e..0000000 --- a/title/shark-shaders/shaders/draw_2d_rasterize.comp +++ /dev/null @@ -1,169 +0,0 @@ -#version 460 - -#extension GL_GOOGLE_include_directive : require - -#extension GL_EXT_control_flow_attributes : require -#extension GL_EXT_buffer_reference : require -#extension GL_EXT_buffer_reference2 : require -#extension GL_EXT_scalar_block_layout : require -#extension GL_EXT_shader_image_load_formatted : require - -#extension GL_KHR_shader_subgroup_vote : require -#extension GL_KHR_shader_subgroup_ballot : require - -#include "bindings_compute.h" -#include "draw_2d.h" -#include "sdf.h" - -struct RasterizeConstants { - uint tile_stride; - uint _pad; - - CommandRef draw_buffer; - ScissorRef scissor_buffer; - GlyphRef glyph_buffer; - CoarseReadRef coarse_buffer; - FineReadRef fine_buffer; - TileReadRef tile_buffer; -}; - -layout(std430, push_constant) uniform RasterizeConstantsBlock { - RasterizeConstants constants; -}; - -/// x = (((index >> 2) & 0x0007) & 0xFFFE) | index & 0x0001 -/// y = ((index >> 1) & 0x0003) | (((index >> 3) & 0x0007) & 0xFFFC) - -#define DEBUG_SHOW_TILES 0 - -vec3 plasma_quintic(in float x) -{ - x = clamp(x, 0.0, 1.0); - vec4 x1 = vec4(1.0, x, x * x, x * x * x); // 1 x x2 x3 - vec4 x2 = x1 * x1.w * x; // x4 x5 x6 x7 - return vec3( - dot(x1.xyzw, vec4(+0.063861086, +1.992659096, -1.023901152, -0.490832805)) + dot(x2.xy, vec2(+1.308442123, -0.914547012)), - dot(x1.xyzw, vec4(+0.049718590, -0.791144343, +2.892305078, +0.811726816)) + dot(x2.xy, vec2(-4.686502417, +2.717794514)), - dot(x1.xyzw, vec4(+0.513275779, +1.580255060, -5.164414457, +4.559573646)) + dot(x2.xy, vec2(-1.916810682, +0.570638854)) - ); -} - -layout (local_size_x = 8, local_size_y = 8, local_size_z = 1) in; - -void main() { - const uvec2 tile_coord = gl_WorkGroupID.xy / (TILE_SIZE / gl_WorkGroupSize.xy); - const uint tile_index = tile_coord.y * constants.tile_stride + tile_coord.x; - - const uint lo = constants.tile_buffer.values[tile_index].index_min; - const uint hi = constants.tile_buffer.values[tile_index].index_max; - - if (lo == hi) { - return; - } - -#if DEBUG_SHOW_TILES == 1 - - uint count = hi - lo; - const vec3 color = plasma_quintic(float(count) / 50.0); - imageStore(ui_layer_write, ivec2(gl_GlobalInvocationID.xy), vec4(color, 1.0)); - -#elif DEBUG_SHOW_TILES == 2 - - uint count = 0; - for (uint i = lo; i < hi; i++) { - count += bitCount(constants.fine_buffer.values[i]); - } - const vec3 color = plasma_quintic(float(count) / 600.0); - imageStore(ui_layer_write, ivec2(gl_GlobalInvocationID.xy), vec4(color, 1.0)); - -#else - - const vec2 sample_center = gl_GlobalInvocationID.xy + vec2(0.5); - - vec4 accum = vec4(0.0); - - for (uint i = lo; i < hi; i++) { - uint bitmap = constants.fine_buffer.values[i]; - - while (bitmap != 0) { - const uint index = findLSB(bitmap); - bitmap ^= bitmap & -bitmap; - - const uint base_index = (constants.coarse_buffer.values[i] & 0xffff) * 32; - const Cmd cmd = constants.draw_buffer.values[base_index + index]; - const uint cmd_type = cmd.packed_type >> 24; - const uint cmd_scissor = cmd.packed_type & 0xffff; - - const Scissor scissor = constants.scissor_buffer.values[cmd_scissor]; - - vec4 primitive_color = vec4(0.0); - switch (cmd_type) { - case DRAW_2D_CMD_RECT: { - const CmdRect cmd_rect = decode_rect(cmd); - - const vec2 cmd_min = cmd_rect.position; - const vec2 cmd_max = cmd_rect.position + cmd_rect.bound; - - if (any(lessThan(sample_center, cmd_min)) && any(greaterThan(sample_center, cmd_max))) { - continue; - } - - const float border_width = float((cmd.packed_type >> 16) & 0xff); - const vec4 border_radii = unpackUnorm4x8(cmd_rect.border_radii) * 255.0; - const float max_border_radius = max(border_radii.x, max(border_radii.y, max(border_radii.z, border_radii.w))); - const float shrink = (2.0 - sqrt(2.0)) * max_border_radius; - const vec4 background_color = unpackUnorm4x8(cmd_rect.background_color).bgra; - - const vec2 cmd_min_clipped = max(scissor.offset_min, cmd_min + border_width + shrink); - const vec2 cmd_max_clipped = min(scissor.offset_max, cmd_max - border_width - shrink); - - if (all(greaterThan(sample_center, cmd_min_clipped)) && all(lessThan(sample_center, cmd_max_clipped))) { - primitive_color = background_color; - } else { - const vec2 b = cmd_rect.bound / 2.0; - const vec2 p = cmd_rect.position + b - sample_center; - - float d; - if (all(equal(border_radii, vec4(0.0)))) { - d = sdf_box(p, b); - } else { - d = sdf_rounded_box(p, b, border_radii); - } - - const vec4 border_color = unpackUnorm4x8(cmd_rect.border_color).bgra; - primitive_color = mix(background_color, border_color, smoothstep(1.0, 0.0, 1.0 - d - border_width)); - primitive_color = mix(primitive_color, vec4(0), smoothstep(1.0, 0.0, 1.0 - d)); - - const vec2 clip_b = (scissor.offset_max - scissor.offset_min) / 2.0; - const vec2 clip_p = scissor.offset_min + clip_b - sample_center; - d = max(d, sdf_box(clip_p, clip_b)); - primitive_color = d < 0.0 ? primitive_color : vec4(0); - } - break; - } - case DRAW_2D_CMD_GLYPH: { - const CmdGlyph cmd_glyph = decode_glyph(cmd); - const Glyph glyph = constants.glyph_buffer.values[cmd_glyph.index]; - const vec2 cmd_min = cmd_glyph.position + glyph.offset_min; - const vec2 cmd_max = cmd_glyph.position + glyph.offset_max; - if (all(greaterThanEqual(sample_center, max(scissor.offset_min, cmd_min))) && all(lessThanEqual(sample_center, min(scissor.offset_max, cmd_max)))) { - const vec2 glyph_size = glyph.offset_max - glyph.offset_min; - const vec2 uv = mix(glyph.atlas_min, glyph.atlas_max, (sample_center - cmd_min) / glyph_size); - const vec4 color = unpackUnorm4x8(cmd_glyph.color).bgra; - const float coverage = textureLod(sampler2D(glyph_atlas, samplers[SAMPLER_BILINEAR_UNNORMALIZED]), uv, 0.0).r * color.a; - primitive_color = color * coverage; - } - break; - } - } - - // does it blend? - accum.rgba = primitive_color.rgba + accum.rgba * (1.0 - primitive_color.a); - } - } - - imageStore(ui_layer_write, ivec2(gl_GlobalInvocationID.xy), accum); - -#endif - -} diff --git a/title/shark-shaders/shaders/radix_sort_0_upsweep.comp b/title/shark-shaders/shaders/radix_sort_0_upsweep.comp index 4aaa8a7..1e9e571 100644 --- a/title/shark-shaders/shaders/radix_sort_0_upsweep.comp +++ b/title/shark-shaders/shaders/radix_sort_0_upsweep.comp @@ -14,7 +14,6 @@ #include "radix_sort.h" -#include "draw_2d.h" #include "indirect.h" layout (constant_id = 0) const uint SUBGROUP_SIZE = 64; diff --git a/title/shark-shaders/shaders/radix_sort_1_downsweep.comp b/title/shark-shaders/shaders/radix_sort_1_downsweep.comp index 415ab83..23ff1c3 100644 --- a/title/shark-shaders/shaders/radix_sort_1_downsweep.comp +++ b/title/shark-shaders/shaders/radix_sort_1_downsweep.comp @@ -14,7 +14,6 @@ #include "radix_sort.h" -#include "draw_2d.h" #include "indirect.h" layout (constant_id = 0) const uint SUBGROUP_SIZE = 64; diff --git a/title/shark-shaders/shaders/sdf.h b/title/shark-shaders/shaders/sdf.h deleted file mode 100644 index a7b71c6..0000000 --- a/title/shark-shaders/shaders/sdf.h +++ /dev/null @@ -1,20 +0,0 @@ -#ifndef SDF_H -#define SDF_H - -// https://iquilezles.org/articles/distfunctions2d/ - -float sdf_box(in vec2 p, in vec2 b) -{ - const vec2 d = abs(p) - b; - return length(max(d, 0.0)) + min(max(d.x, d.y), 0.0); -} - -float sdf_rounded_box( in vec2 p, in vec2 b, in vec4 r ) -{ - r.xy = (p.x > 0.0) ? r.xy : r.zw; - r.x = (p.y > 0.0) ? r.x : r.y; - const vec2 q = abs(p) - b + r.x; - return min(max(q.x, q.y), 0.0) + length(max(q, 0.0)) - r.x; -} - -#endif \ No newline at end of file diff --git a/title/shark-shaders/shaders/sdf.slang b/title/shark-shaders/shaders/sdf.slang new file mode 100644 index 0000000..28f6eb5 --- /dev/null +++ b/title/shark-shaders/shaders/sdf.slang @@ -0,0 +1,19 @@ +module sdf; + +// https://iquilezles.org/articles/distfunctions2d/ + +namespace sdf { + +public float box(float2 p, float2 b) { + let d = abs(p) - b; + return length(max(d, 0.0)) + min(max(d.x, d.y), 0.0); +} + +public float rounded_box(float2 p, float2 b, float4 r) { + r.xy = (p.x > 0.0) ? r.xy : r.zw; + r.x = (p.y > 0.0) ? r.x : r.y; + let q = abs(p) - b + r.x; + return min(max(q.x, q.y), 0.0) + length(max(q, 0.0)) - r.x; +} + +} diff --git a/title/shark-shaders/src/pipelines.rs b/title/shark-shaders/src/pipelines.rs index 48dfcc0..9c3734d 100644 --- a/title/shark-shaders/src/pipelines.rs +++ b/title/shark-shaders/src/pipelines.rs @@ -360,27 +360,34 @@ impl Pipelines { gpu.debug_name_pipeline(basic_pipeline, "basic"); - let create_compute_pipeline = - |code, name, workgroup_size, require_full_subgroups, push_constant_size| { + let create_compute_pipeline_with_entry = + |code, entry, name, workgroup_size, require_full_subgroups, push_constant_size| { let push_constant_range = PushConstantRange { stage_flags: ShaderStageFlags::COMPUTE, offset: 0, size: push_constant_size as u32, }; + let workgroup_size_spec_constant = [SpecConstant::U32 { + id: 0, + value: workgroup_size, + }]; + let pipeline = gpu.create_compute_pipeline(&ComputePipelineDesc { shader: ShaderDesc { code, + entry, require_full_subgroups, required_subgroup_size: if workgroup_size != 0 { Some(workgroup_size) } else { None }, - spec_constants: &[SpecConstant::U32 { - id: 0, - value: workgroup_size, - }], + spec_constants: if workgroup_size != 0 { + &workgroup_size_spec_constant + } else { + &[] + }, ..default() }, layout: PipelineLayout { @@ -399,42 +406,59 @@ impl Pipelines { pipeline }; - let draw_2d_bin_0_clear_pipeline = create_compute_pipeline( - crate::DRAW_2D_BIN_0_CLEAR_COMP_SPV, - "draw2d_bin_clear", + let create_compute_pipeline = + |code, name, workgroup_size, require_full_subgroups, push_constant_size| { + create_compute_pipeline_with_entry( + code, + c"main", + name, + workgroup_size, + require_full_subgroups, + push_constant_size, + ) + }; + + let draw_2d_bin_0_clear_pipeline = create_compute_pipeline_with_entry( + crate::DRAW_2D_SPV, + c"clear", + "draw2d clear", 0, false, std::mem::size_of::(), ); let draw_2d_bin_1_scatter_pipeline_workgroup_size = 32; - let draw_2d_bin_1_scatter_pipeline = create_compute_pipeline( - crate::DRAW_2D_BIN_1_SCATTER_COMP_SPV, - "draw2d_bin_scatter", + let draw_2d_bin_1_scatter_pipeline = create_compute_pipeline_with_entry( + crate::DRAW_2D_SPV, + c"scatter", + "draw2d scatter", draw_2d_bin_1_scatter_pipeline_workgroup_size, true, std::mem::size_of::(), ); - let draw_2d_bin_2_sort_pipeline = create_compute_pipeline( - crate::DRAW_2D_BIN_2_SORT_COMP_SPV, - "draw2d_bin_sort", + let draw_2d_bin_2_sort_pipeline = create_compute_pipeline_with_entry( + crate::DRAW_2D_SPV, + c"sort", + "draw2d sort", 0, false, std::mem::size_of::(), ); - let draw_2d_bin_3_resolve_pipeline = create_compute_pipeline( - crate::DRAW_2D_BIN_3_RESOLVE_COMP_SPV, - "draw2d_bin_resolve", + let draw_2d_bin_3_resolve_pipeline = create_compute_pipeline_with_entry( + crate::DRAW_2D_SPV, + c"resolve", + "draw2d resolve", 32, true, std::mem::size_of::(), ); - let draw_2d_rasterize_pipeline = create_compute_pipeline( - crate::DRAW_2D_RASTERIZE_COMP_SPV, - "draw2d_rasterize", + let draw_2d_rasterize_pipeline = create_compute_pipeline_with_entry( + crate::DRAW_2D_SPV, + c"rasterize", + "draw2d rasterize", 0, false, std::mem::size_of::(),