]> git.nega.tv - josh/narcissus/commitdiff
shark-shaders: Migrate draw_2d pipeline to slang
authorJoshua Simmons <josh@nega.tv>
Sun, 12 Oct 2025 09:39:09 +0000 (11:39 +0200)
committerJoshua Simmons <josh@nega.tv>
Sun, 12 Oct 2025 10:32:27 +0000 (12:32 +0200)
15 files changed:
title/shark-shaders/build.rs
title/shark-shaders/shaders/bindings_compute.slang
title/shark-shaders/shaders/composite.comp
title/shark-shaders/shaders/draw_2d.h [deleted file]
title/shark-shaders/shaders/draw_2d.slang [new file with mode: 0644]
title/shark-shaders/shaders/draw_2d_bin_0_clear.comp [deleted file]
title/shark-shaders/shaders/draw_2d_bin_1_scatter.comp [deleted file]
title/shark-shaders/shaders/draw_2d_bin_2_sort.comp [deleted file]
title/shark-shaders/shaders/draw_2d_bin_3_resolve.comp [deleted file]
title/shark-shaders/shaders/draw_2d_rasterize.comp [deleted file]
title/shark-shaders/shaders/radix_sort_0_upsweep.comp
title/shark-shaders/shaders/radix_sort_1_downsweep.comp
title/shark-shaders/shaders/sdf.h [deleted file]
title/shark-shaders/shaders/sdf.slang [new file with mode: 0644]
title/shark-shaders/src/pipelines.rs

index 5c0b65c06dec1f7215f83a5403be5c1aa460c5ff..a62676132ec0f9921f48772ddcba2d022e418dbe 100644 (file)
@@ -76,29 +76,12 @@ struct SlangShader {
     name: &'static str,
 }
 
-const SLANG_SHADERS: &[SlangShader] = &[SlangShader { name: "basic" }];
+const SLANG_SHADERS: &[SlangShader] = &[
+    SlangShader { name: "basic" },
+    SlangShader { name: "draw_2d" },
+];
 
 const SHADERS: &[Shader] = &[
-    Shader {
-        stage: "comp",
-        name: "draw_2d_bin_0_clear",
-    },
-    Shader {
-        stage: "comp",
-        name: "draw_2d_bin_1_scatter",
-    },
-    Shader {
-        stage: "comp",
-        name: "draw_2d_bin_2_sort",
-    },
-    Shader {
-        stage: "comp",
-        name: "draw_2d_bin_3_resolve",
-    },
-    Shader {
-        stage: "comp",
-        name: "draw_2d_rasterize",
-    },
     Shader {
         stage: "comp",
         name: "radix_sort_0_upsweep",
@@ -131,6 +114,7 @@ fn main() {
                 .arg("-fvk-use-scalar-layout")
                 .arg("-fvk-use-entrypoint-name")
                 .arg("-matrix-layout-row-major")
+                .arg("-O2")
                 .arg(format!("-g{debug}"))
                 .args(["-depfile", &format!("{out_dir}/{name}.d")])
                 .args(["-o", &format!("{out_dir}/{name}.spv")])
index 05c3ef7617b7607d177551a98273a0dc17922633..58837c750477dd7f215d829776a886cdcf0bde30 100644 (file)
@@ -5,10 +5,8 @@ public Texture3D tony_mc_mapface_lut;
 [[vk::binding(2, 0)]]
 public Texture2D glyph_atlas;
 [[vk::binding(3, 0)]]
-public RWTexture2D ui_layer_write;
+public RWTexture2D ui_layer;
 [[vk::binding(4, 0)]]
-public Texture2D ui_layer_read;
+public RWTexture2D color_layer;
 [[vk::binding(5, 0)]]
-public Texture2D color_layer;
-[[vk::binding(6, 0)]]
 public RWTexture2D composited_output;
index e5319a2236acac8a1f672784f44f09e4dd2ff31a..a9d019d10d11f4f177f8c62b25a255ce5a512cfa 100644 (file)
@@ -8,7 +8,18 @@
 #extension GL_EXT_shader_image_load_formatted : require
 
 #include "bindings_compute.h"
-#include "draw_2d.h"
+
+// TODO: Remove this
+const uint TILE_SIZE = 32;
+
+struct Tile {
+    uint index_min;
+    uint index_max;
+};
+
+layout(buffer_reference, std430, buffer_reference_align = 4) readonly buffer TileReadRef {
+    Tile values[];
+};
 
 float srgb_oetf(float a) {
     return (.0031308f >= a) ? 12.92f * a : 1.055f * pow(a, .4166666666666667f) - .055f;
@@ -27,7 +38,7 @@ vec3 tony_mc_mapface(vec3 stimulus) {
 
 struct CompositeConstants {
     uvec2 tile_resolution;
-    TileRef tile_buffer;
+    TileReadRef tile_buffer;
 };
 
 layout(std430, push_constant) uniform CompositeConstantsBlock {
diff --git a/title/shark-shaders/shaders/draw_2d.h b/title/shark-shaders/shaders/draw_2d.h
deleted file mode 100644 (file)
index cb5e70a..0000000
+++ /dev/null
@@ -1,104 +0,0 @@
-#ifndef DRAW_2D_H
-#define DRAW_2D_H
-
-const uint TILE_SIZE = 32;
-
-const uint DRAW_2D_CMD_RECT = 0;
-const uint DRAW_2D_CMD_GLYPH = 1;
-
-struct Tile {
-    uint index_min;
-    uint index_max;
-};
-
-struct Glyph {
-    ivec2 atlas_min;
-    ivec2 atlas_max;
-
-    vec2 offset_min;
-    vec2 offset_max;
-};
-
-struct Scissor {
-    vec2 offset_min;
-    vec2 offset_max;
-};
-
-struct Cmd {
-    uint packed_type;
-    uint words[7];
-};
-
-struct CmdRect {
-    vec2 position;
-    vec2 bound;
-
-    uint border_radii;
-    uint border_color;
-
-    uint background_color;
-};
-
-struct CmdGlyph {
-    uint index;
-    vec2 position;
-    uint color;
-};
-
-CmdRect decode_rect(Cmd cmd) {
-    CmdRect rect = {
-        { uintBitsToFloat(cmd.words[0]), uintBitsToFloat(cmd.words[1]) }, // position
-        { uintBitsToFloat(cmd.words[2]), uintBitsToFloat(cmd.words[3]) }, // bound
-        cmd.words[4], // border_radii
-        cmd.words[5], // border_color
-        cmd.words[6], // background_color
-    };
-    return rect;
-}
-
-CmdGlyph decode_glyph(Cmd cmd) {
-    CmdGlyph glyph = {
-        cmd.words[0], // index
-        { uintBitsToFloat(cmd.words[1]), uintBitsToFloat(cmd.words[2]) }, // position
-        cmd.words[3], // color
-    };
-    return glyph;
-}
-
-layout(buffer_reference, std430, buffer_reference_align = 16) readonly buffer CommandRef {
-    Cmd values[];
-};
-
-layout(buffer_reference, std430, buffer_reference_align = 16) readonly buffer ScissorRef {
-    Scissor values[];
-};
-
-layout(buffer_reference, std430, buffer_reference_align = 16) readonly buffer GlyphRef {
-    Glyph values[];
-};
-
-layout(buffer_reference, std430, buffer_reference_align = 4) buffer CoarseRef {
-    uint values[];
-};
-
-layout(buffer_reference, std430, buffer_reference_align = 4) readonly buffer CoarseReadRef {
-    uint values[];
-};
-
-layout(buffer_reference, std430, buffer_reference_align = 4) buffer FineRef {
-    uint values[];
-};
-
-layout(buffer_reference, std430, buffer_reference_align = 4) readonly buffer FineReadRef {
-    uint values[];
-};
-
-layout(buffer_reference, std430, buffer_reference_align = 4) buffer TileRef {
-    Tile values[];
-};
-
-layout(buffer_reference, std430, buffer_reference_align = 4) readonly buffer TileReadRef {
-    Tile values[];
-};
-
-#endif
\ No newline at end of file
diff --git a/title/shark-shaders/shaders/draw_2d.slang b/title/shark-shaders/shaders/draw_2d.slang
new file mode 100644 (file)
index 0000000..a9fe81c
--- /dev/null
@@ -0,0 +1,567 @@
+
+import bindings_samplers;
+import bindings_compute;
+
+import sdf;
+
+namespace Draw2d {
+public static const uint TILE_SIZE = 32;
+
+public struct Tile {
+    public uint index_min;
+    public uint index_max;
+}
+}
+
+static const uint MAX_TILES = 256;
+static const uint BITMAP_STRIDE = MAX_TILES / 32;
+static const uint BITMAP_SIZE = MAX_TILES * BITMAP_STRIDE;
+
+struct Glyph {
+    int2 atlas_min;
+    int2 atlas_max;
+
+    float2 offset_min;
+    float2 offset_max;
+}
+
+struct Scissor {
+    float2 offset_min;
+    float2 offset_max;
+}
+
+enum CmdType {
+    Rect = 0,
+    Glyph = 1,
+    Line = 2
+}
+
+struct Cmd {
+    uint packed_type;
+    uint words[7];
+}
+
+struct CmdRect {
+    uint packed_type;
+
+    float2 position;
+    float2 bound;
+
+    uint border_radii;
+    uint border_color;
+
+    uint background_color;
+};
+
+struct CmdGlyph {
+    uint packed_type;
+
+    uint index;
+    float2 position;
+    uint color;
+    uint padding[3];
+};
+
+struct ClearConstants {
+    uint *finished_buffer;
+    uint *coarse_buffer;
+}
+
+[shader("compute")]
+[numthreads(1, 1, 1)]
+void clear(uniform ClearConstants constants) {
+    constants.finished_buffer[0] = 0;
+    constants.coarse_buffer[0] = 0;
+}
+
+struct ScatterConstants {
+    uint2 tile_resolution;
+
+    uint draw_buffer_len;
+    uint coarse_buffer_len;
+
+    Cmd *draw_buffer;
+    Scissor *scissor_buffer;
+    Glyph *glyph_buffer;
+    uint *coarse_buffer;
+};
+
+[vk::specialization_constant]
+const int WGP_SIZE = 64;
+
+groupshared uint scatter_intersected_tiles[BITMAP_SIZE];
+
+[shader("compute")]
+[require(spvGroupNonUniformBallot, spvGroupNonUniformArithmetic, spvGroupNonUniformVote)]
+[numthreads(WGP_SIZE, 1, 1)]
+void scatter(uniform ScatterConstants constants, uint3 thread_id: SV_DispatchThreadID, uint3 group_id: SV_GroupID) {
+    let in_bounds = thread_id.x < constants.draw_buffer_len;
+
+    uint cmd_scissor = 0;
+    var cmd_min = float2(99999.9);
+    var cmd_max = float2(-99999.9);
+    if (in_bounds) {
+        let packed_type = constants.draw_buffer[thread_id.x].packed_type;
+        let cmd_type = packed_type >> 24;
+        cmd_scissor = packed_type & 0xffff;
+
+        for (;;) {
+            let scalar_type = WaveReadLaneFirst(cmd_type);
+            [[branch]]
+            if (scalar_type == cmd_type) {
+                switch (CmdType(scalar_type)) {
+                case CmdType::Rect:
+                    let cmd_rect = reinterpret<CmdRect>(constants.draw_buffer[thread_id.x]);
+                    cmd_min = cmd_rect.position;
+                    cmd_max = cmd_rect.position + cmd_rect.bound;
+                    break;
+                case CmdType::Glyph:
+                    let cmd_glyph = reinterpret<CmdGlyph>(constants.draw_buffer[thread_id.x]);
+                    let glyph = constants.glyph_buffer[cmd_glyph.index];
+                    cmd_min = cmd_glyph.position + glyph.offset_min;
+                    cmd_max = cmd_glyph.position + glyph.offset_max;
+                    break;
+                }
+                break;
+            }
+        }
+    }
+
+    let scissor = constants.scissor_buffer[cmd_scissor];
+    let out_of_bounds = any(cmd_min >= cmd_max) || any(cmd_min > scissor.offset_max) || any(cmd_max < scissor.offset_min);
+
+    // Are all draws off-screen?
+    if (WaveActiveAllTrue(out_of_bounds)) {
+        return;
+    }
+
+    // Clip command bounds to our scissor rect.
+    cmd_min = max(cmd_min, scissor.offset_min);
+    cmd_max = min(cmd_max, scissor.offset_max);
+
+    // Make sure off-screen commands don't contribute to the bounds.
+    let cmds_tile_min = uint2(clamp(WaveActiveMin(out_of_bounds ? int2(999999) : int2(floor(cmd_min / Draw2d::TILE_SIZE))), int2(0), constants.tile_resolution));
+    let cmds_tile_max = uint2(clamp(WaveActiveMax(out_of_bounds ? int2(-999999) : int2(floor(cmd_max / Draw2d::TILE_SIZE))), int2(0), constants.tile_resolution));
+    let cmd_tile_min = uint2(clamp(int2(floor(cmd_min / Draw2d::TILE_SIZE)), int2(0), constants.tile_resolution));
+    let cmd_tile_max = uint2(clamp(int2(floor(cmd_max / Draw2d::TILE_SIZE)), int2(0), constants.tile_resolution));
+
+    // If any single rect in our wave covers the entire touched area, that
+    // means we are required to always write to every tile in its bounds.
+    //
+    // When that's the case, we want to avoid spending effort on fine-grained
+    // calculation of the exact intersected tiles.
+    let cmd_dominates_bounds = all(cmd_tile_min == cmds_tile_min) && all(cmd_tile_max == cmds_tile_max);
+    let use_combined_bounds = WaveActiveAnyTrue(cmd_dominates_bounds);
+
+    if (use_combined_bounds) {
+        let tile_count = cmds_tile_max - cmds_tile_min + int2(1);
+
+        uint offset;
+        if (WaveIsFirstLane()) {
+            InterlockedAdd(constants.coarse_buffer[0], tile_count.x * tile_count.y, offset);
+        }
+        offset = WaveReadLaneFirst(offset + 1);
+
+        for (uint i = 0; i < tile_count.y; i++) {
+            for (uint j = 0; j < tile_count.x; j += WaveGetLaneCount()) {
+                let j = j + WaveGetLaneIndex();
+                let y = cmds_tile_min.y + i;
+                let x = cmds_tile_min.x + j;
+                if (j < tile_count.x) {
+                    let packed = ((y & 0xff) << 24) | ((x & 0xff) << 16) | (group_id.x & 0xffff);
+                    let index = offset + i * tile_count.x + j;
+                    if (index < constants.coarse_buffer_len) {
+                        constants.coarse_buffer[index] = packed;
+                    }
+                }
+            }
+        }
+    } else {
+        let start = cmds_tile_min.y * BITMAP_STRIDE + cmds_tile_min.x / 32;
+        let end = cmds_tile_max.y * BITMAP_STRIDE + cmds_tile_max.x / 32;
+
+        for (uint i = start; i <= end; i += WaveGetLaneCount()) {
+            let i = i + WaveGetLaneIndex();
+            if (i < BITMAP_SIZE) {
+                scatter_intersected_tiles[i] = 0;
+            }
+        }
+
+        GroupMemoryBarrierWithWaveSync();
+
+        if (!out_of_bounds) {
+            let min_word = cmd_tile_min.x / 32;
+            let max_word = cmd_tile_max.x / 32;
+            let min_bit = cmd_tile_min.x & 31;
+            let max_bit = cmd_tile_max.x & 31;
+            let lsb = ~((1 << min_bit) - 1);
+            let msb = ((1 << max_bit) - 1) | 1 << max_bit;
+            if (min_word == max_word) {
+                for (uint y = cmd_tile_min.y; y <= cmd_tile_max.y; y++) {
+                    InterlockedOr(scatter_intersected_tiles[y * BITMAP_STRIDE + min_word], lsb & msb);
+                }
+            } else {
+                for (uint y = cmd_tile_min.y; y <= cmd_tile_max.y; y++) {
+                    InterlockedOr(scatter_intersected_tiles[y * BITMAP_STRIDE + min_word], lsb);
+                    for (uint i = min_word + 1; i <= (max_word - 1); i++) {
+                        scatter_intersected_tiles[y * BITMAP_STRIDE + i] = 0xffffffff;
+                    }
+                    InterlockedOr(scatter_intersected_tiles[y * BITMAP_STRIDE + max_word], msb);
+                }
+            }
+        }
+
+        GroupMemoryBarrierWithWaveSync();
+
+        uint count = 0;
+        for (uint i = start; i <= end; i += WaveGetLaneCount()) {
+            let i = i + WaveGetLaneIndex();
+            count += WaveActiveSum(i < BITMAP_SIZE ? countbits(scatter_intersected_tiles[i]) : 0);
+        }
+
+        if (count == 0) {
+            return;
+        }
+
+        uint offset;
+        if (WaveIsFirstLane()) {
+            InterlockedAdd(constants.coarse_buffer[0], count, offset);
+        }
+        offset = WaveReadLaneFirst(offset + 1);
+
+        for (uint i = start; i <= end; i += WaveGetLaneCount()) {
+            let i = i + WaveGetLaneIndex();
+            if (i >= BITMAP_SIZE) {
+                continue;
+            }
+
+            var bitmap = scatter_intersected_tiles[i];
+            let count = countbits(bitmap);
+            var scan = WavePrefixSum(count);
+
+            while (bitmap != 0) {
+                let index = firstbitlow(bitmap);
+                bitmap ^= bitmap & -bitmap;
+                let y = (i * 32 + index) / MAX_TILES;
+                let x = (i * 32 + index) & (MAX_TILES - 1);
+                let packed = (y << 24) | (x << 16) | (group_id.x & 0xffff);
+                if (offset + scan < constants.coarse_buffer_len) {
+                    constants.coarse_buffer[offset + scan] = packed;
+                }
+                scan++;
+            }
+
+            offset += WaveActiveSum(count);
+        }
+    }
+}
+
+struct VkDispatchIndirectCommand {
+    uint x;
+    uint y;
+    uint z;
+};
+
+struct SortConstants {
+    uint coarse_buffer_len;
+    uint _pad;
+    VkDispatchIndirectCommand *indirect_dispatch_buffer;
+    uint *coarse_buffer;
+};
+
+// TODO: Remove!
+static const uint RADIX_WGP_SIZE = 256;
+static const uint RADIX_ITEMS_PER_INVOCATION = 16;
+static const uint RADIX_ITEMS_PER_WGP = RADIX_WGP_SIZE * RADIX_ITEMS_PER_INVOCATION;
+
+[shader("compute")]
+[numthreads(1, 1, 1)]
+void sort(uniform SortConstants constants) {
+    // We can't overflow the coarse buffer in the scatter phase, but we can
+    // still end up with a count that's larger than the buffer size since we
+    // unconditionally atomicAdd. So we need to clamp to the actual size now
+    // before dispatching sort work.
+    let count = min(constants.coarse_buffer_len, constants.coarse_buffer[0]);
+    constants.coarse_buffer[0] = count;
+
+    constants.indirect_dispatch_buffer.x = (count + (RADIX_ITEMS_PER_WGP - 1)) / RADIX_ITEMS_PER_WGP;
+    constants.indirect_dispatch_buffer.y = 1;
+    constants.indirect_dispatch_buffer.z = 1;
+}
+
+struct ResolveConstants {
+    uint tile_stride;
+    uint draw_buffer_len;
+
+    Cmd *draw_buffer;
+    Scissor *scissor_buffer;
+    Glyph *glyph_buffer;
+    uint *coarse_buffer;
+    uint *fine_buffer;
+    Draw2d::Tile *tile_buffer;
+};
+
+[shader("compute")]
+[require(spvGroupNonUniformBallot, spvGroupNonUniformVote)]
+[numthreads(WGP_SIZE, 1, 1)]
+void resolve(uniform ResolveConstants constants, uint3 thread_id: SV_DispatchThreadID) {
+    let x = thread_id.y;
+    let y = thread_id.z;
+    let tile_offset = constants.tile_stride * y + x;
+    let search = ((y & 0xff) << 24) | ((x & 0xff) << 16);
+    let count = constants.coarse_buffer[0];
+
+    if (count == 0) {
+        constants.tile_buffer[tile_offset].index_min = 0;
+        constants.tile_buffer[tile_offset].index_max = 0;
+        return;
+    }
+
+    // Binary search for the upper bound of the tile.
+    uint base = 0;
+    {
+        uint n = count;
+        uint mid;
+        uint max_iters = 32;
+        while (max_iters-- > 0 && (mid = n / 2) > 0) {
+            let value = constants.coarse_buffer[1 + base + mid] & 0xffff0000;
+            base = value > search ? base : base + mid;
+            n -= mid;
+        }
+    }
+
+    let tile_min = uint2(x, y) * Draw2d::TILE_SIZE;
+    let tile_max = tile_min + Draw2d::TILE_SIZE;
+
+    bool hit_opaque = false;
+    uint lo = base + 1;
+    let hi = base + 1;
+    for (; !hit_opaque && lo > 0; lo--) {
+        let i = lo;
+        let packed = constants.coarse_buffer[i];
+
+        if ((packed & 0xffff0000) != (search & 0xffff0000)) {
+            break;
+        }
+
+        let draw_offset = packed & 0xffff;
+        let draw_index = draw_offset * WGP_SIZE + WaveGetLaneIndex();
+
+        bool intersects = false;
+        bool opaque_tile = false;
+
+        if (draw_index < constants.draw_buffer_len) {
+            var cmd_min = float2(99999.9);
+            var cmd_max = float2(-99999.9);
+
+            let packed_type = constants.draw_buffer[draw_index].packed_type;
+            let cmd_type = packed_type >> 24;
+            let cmd_scissor = packed_type & 0xffff;
+
+            let scissor = constants.scissor_buffer[cmd_scissor];
+
+            // If the tile doesn't intersect the scissor region it doesn't need to do work here.
+            if (any(scissor.offset_max < tile_min) || any(scissor.offset_min > tile_max)) {
+                intersects = false;
+            } else {
+                for (;;) {
+                    let scalar_type = WaveReadLaneFirst(cmd_type);
+                    [[branch]]
+                    if (scalar_type == cmd_type) {
+                        switch (CmdType(scalar_type)) {
+                        case CmdType::Rect:
+                            let cmd_rect = reinterpret<CmdRect>(constants.draw_buffer[draw_index]);
+                            cmd_min = cmd_rect.position;
+                            cmd_max = cmd_rect.position + cmd_rect.bound;
+
+                            const bool background_opaque = (cmd_rect.background_color & 0xff000000) == 0xff000000;
+                            if (background_opaque) {
+                                let border_width = float((packed_type >> 16) & 0xff);
+                                let border_opaque = (cmd_rect.border_color & 0xff000000) == 0xff000000;
+                                let border_radii = unpackUnorm4x8ToFloat(cmd_rect.border_radii);
+                                let max_border_radius = max(border_radii.x, max(border_radii.y, max(border_radii.z, border_radii.w))) * 255.0;
+                                let shrink = ((2.0 - sqrt(2.0)) * max_border_radius) + (border_opaque ? 0.0 : border_width);
+
+                                let cmd_shrunk_min = max(scissor.offset_min, cmd_min + shrink);
+                                let cmd_shrunk_max = min(scissor.offset_max, cmd_max - shrink);
+                                opaque_tile = all(cmd_shrunk_max > cmd_shrunk_min) && all(tile_min > cmd_shrunk_min) && all(tile_max < cmd_shrunk_max);
+                            }
+                            break;
+                        case CmdType::Glyph:
+                            let cmd_glyph = reinterpret<CmdGlyph>(constants.draw_buffer[draw_index]);
+                            const Glyph glyph = constants.glyph_buffer[cmd_glyph.index];
+                            cmd_min = cmd_glyph.position + glyph.offset_min;
+                            cmd_max = cmd_glyph.position + glyph.offset_max;
+                            break;
+                        }
+                        break;
+                    }
+                }
+
+                cmd_min = max(cmd_min, scissor.offset_min);
+                cmd_max = min(cmd_max, scissor.offset_max);
+                intersects = !(any(tile_max < cmd_min) || any(tile_min > cmd_max));
+            }
+        }
+
+        var intersects_mask = WaveActiveBallot(intersects).x;
+
+        if (WaveActiveAnyTrue(opaque_tile)) {
+            let opaque_tile_ballot = WaveActiveBallot(opaque_tile);
+            // TODO: Needs to check all live words of the ballot...
+            let first_opaque_tile = firstbithigh(opaque_tile_ballot).x;
+            let opaque_mask = ~((1 << first_opaque_tile) - 1);
+            intersects_mask &= opaque_mask;
+            constants.fine_buffer[i] = intersects_mask;
+            hit_opaque = true;
+        } else {
+            constants.fine_buffer[i] = intersects_mask;
+        }
+    }
+
+    constants.tile_buffer[tile_offset].index_min = lo + 1;
+    constants.tile_buffer[tile_offset].index_max = hi + 1;
+}
+
+struct RasterizeConstants {
+    uint tile_stride;
+    uint _pad;
+
+    Cmd *draw_buffer;
+    Scissor *scissor_buffer;
+    Glyph *glyph_buffer;
+    uint *coarse_buffer;
+    uint *fine_buffer;
+    Draw2d::Tile *tile_buffer;
+};
+
+/// x = (((index >> 2) & 0x0007) & 0xFFFE) | index & 0x0001
+/// y = ((index >> 1) & 0x0003) | (((index >> 3) & 0x0007) & 0xFFFC)
+
+#define DEBUG_SHOW_TILES 0
+
+float3 plasma_quintic(float x) {
+    let x = clamp(x, 0.0, 1.0);
+    let x1 = float4(1.0, x, x * x, x * x * x); // 1 x x2 x3
+    let x2 = x1 * x1.w * x;                    // x4 x5 x6 x7
+    return float3(
+        dot(x1.xyzw, float4(+0.063861086, +1.992659096, -1.023901152, -0.490832805)) + dot(x2.xy, float2(+1.308442123, -0.914547012)),
+        dot(x1.xyzw, float4(+0.049718590, -0.791144343, +2.892305078, +0.811726816)) + dot(x2.xy, float2(-4.686502417, +2.717794514)),
+        dot(x1.xyzw, float4(+0.513275779, +1.580255060, -5.164414457, +4.559573646)) + dot(x2.xy, float2(-1.916810682, +0.570638854)));
+}
+
+[shader("compute")]
+[numthreads(8, 8, 1)]
+void rasterize(uniform RasterizeConstants constants, uint3 thread_id: SV_DispatchThreadID, uint3 group_id: SV_GroupID) {
+    let tile_coord = group_id.xy * WorkgroupSize().xy / Draw2d::TILE_SIZE;
+    let tile_index = tile_coord.y * constants.tile_stride + tile_coord.x;
+
+    let lo = constants.tile_buffer[tile_index].index_min;
+    let hi = constants.tile_buffer[tile_index].index_max;
+
+    if (lo == hi) {
+        return;
+    }
+
+#if DEBUG_SHOW_TILES == 1
+
+    let color = plasma_quintic(float(hi - lo) / 50.0);
+    ui_layer_write.Store(thread_id.xy, float4(color, 1.0));
+
+#elif DEBUG_SHOW_TILES == 2
+
+    uint count = 0;
+    for (uint i = lo; i < hi; i++) {
+        count += countbits(constants.fine_buffer[i]);
+    }
+    let color = plasma_quintic(float(count) / 600.0);
+    ui_layer_write.Store(thread_id.xy, float4(color, 1.0));
+
+#else
+
+    let sample_center = thread_id.xy + float2(0.5);
+    var accum = float4(0.0);
+
+    for (uint i = lo; i < hi; i++) {
+        var bitmap = constants.fine_buffer[i];
+
+        while (bitmap != 0) {
+            let index = firstbitlow(bitmap);
+            bitmap ^= bitmap & -bitmap;
+
+            let base_index = (constants.coarse_buffer[i] & 0xffff) * 32;
+            let cmd = constants.draw_buffer[base_index + index];
+            let cmd_type = cmd.packed_type >> 24;
+            let cmd_scissor = cmd.packed_type & 0xffff;
+
+            let scissor = constants.scissor_buffer[cmd_scissor];
+
+            var primitive_color = float4(0.0);
+
+            switch (CmdType(cmd_type)) {
+            case CmdType::Rect: {
+                let cmd_rect = reinterpret<CmdRect>(cmd);
+
+                let cmd_min = cmd_rect.position;
+                let cmd_max = cmd_rect.position + cmd_rect.bound;
+
+                if (all(sample_center >= cmd_min) && all(sample_center <= cmd_max)) {
+                    let border_width = float((cmd.packed_type >> 16) & 0xff);
+                    let border_radii = unpackUnorm4x8ToFloat(cmd_rect.border_radii) * 255.0;
+                    let max_border_radius = max(border_radii.x, max(border_radii.y, max(border_radii.z, border_radii.w)));
+                    let shrink = (2.0 - sqrt(2.0)) * max_border_radius;
+                    let background_color = unpackUnorm4x8ToFloat(cmd_rect.background_color).bgra;
+
+                    let cmd_min_clipped = max(scissor.offset_min, cmd_min + border_width + shrink);
+                    let cmd_max_clipped = min(scissor.offset_max, cmd_max - border_width - shrink);
+
+                    if (all(sample_center >= cmd_min_clipped) && all(sample_center <= cmd_max_clipped)) {
+                        primitive_color = background_color;
+                    } else {
+                        let b = cmd_rect.bound / 2.0;
+                        let p = cmd_rect.position + b - sample_center;
+
+                        float d;
+                        if (all(border_radii == float4(0.0))) {
+                            d = sdf::box(p, b);
+                        } else {
+                            d = sdf::rounded_box(p, b, border_radii);
+                        }
+
+                        let border_color = unpackUnorm4x8ToFloat(cmd_rect.border_color).bgra;
+                        primitive_color = lerp(background_color, border_color, smoothstep(1.0, 0.0, 1.0 - d - border_width));
+                        primitive_color = lerp(primitive_color, float4(0.0), smoothstep(1.0, 0.0, 1.0 - d));
+
+                        let clip_b = (scissor.offset_max - scissor.offset_min) / 2.0;
+                        let clip_p = scissor.offset_min + clip_b - sample_center;
+                        d = max(d, sdf::box(clip_p, clip_b));
+                        primitive_color = d < 0.0 ? primitive_color : float4(0.0);
+                    }
+                }
+                break;
+            }
+            case CmdType::Glyph: {
+                let cmd_glyph = reinterpret<CmdGlyph>(cmd);
+                let glyph = constants.glyph_buffer[cmd_glyph.index];
+                let cmd_min = cmd_glyph.position + glyph.offset_min;
+                let cmd_max = cmd_glyph.position + glyph.offset_max;
+                if (all(sample_center >= max(scissor.offset_min, cmd_min)) && all(sample_center <= min(scissor.offset_max, cmd_max))) {
+                    let glyph_size = glyph.offset_max - glyph.offset_min;
+                    let uv = lerp(glyph.atlas_min, glyph.atlas_max, (sample_center - cmd_min) / glyph_size);
+                    let color = unpackUnorm4x8ToFloat(cmd_glyph.color).bgra;
+                    let coverage = glyph_atlas.SampleLevel(samplers[Sampler::BilinearUnnormalized], uv, 0.0).r * color.a;
+                    primitive_color = color * coverage;
+                }
+                break;
+            }
+            }
+
+            // does it blend?
+            accum.rgba = primitive_color.rgba + accum.rgba * (1.0 - primitive_color.a);
+        }
+    }
+
+    ui_layer.Store(thread_id.xy, accum);
+
+#endif
+}
diff --git a/title/shark-shaders/shaders/draw_2d_bin_0_clear.comp b/title/shark-shaders/shaders/draw_2d_bin_0_clear.comp
deleted file mode 100644 (file)
index f5fb9b9..0000000
+++ /dev/null
@@ -1,30 +0,0 @@
-#version 460
-
-#extension GL_GOOGLE_include_directive : require
-
-#extension GL_EXT_buffer_reference : require
-#extension GL_EXT_buffer_reference2 : require
-#extension GL_EXT_scalar_block_layout : require
-#extension GL_EXT_control_flow_attributes : require
-
-#extension GL_KHR_shader_subgroup_vote : require
-#extension GL_KHR_shader_subgroup_ballot : require
-
-#include "draw_2d.h"
-#include "radix_sort.h"
-
-struct ClearConstants {
-    FinishedRef finished_buffer;
-    CoarseRef coarse_buffer;
-};
-
-layout(std430, push_constant) uniform ClearConstantsBlock {
-    ClearConstants constants;
-};
-
-layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
-
-void main() {
-    constants.finished_buffer.value = 0;
-    constants.coarse_buffer.values[0] = 0;
-}
diff --git a/title/shark-shaders/shaders/draw_2d_bin_1_scatter.comp b/title/shark-shaders/shaders/draw_2d_bin_1_scatter.comp
deleted file mode 100644 (file)
index f3cbef0..0000000
+++ /dev/null
@@ -1,198 +0,0 @@
-#version 460
-
-#extension GL_GOOGLE_include_directive : require
-
-#extension GL_EXT_buffer_reference : require
-#extension GL_EXT_buffer_reference2 : require
-#extension GL_EXT_scalar_block_layout : require
-#extension GL_EXT_control_flow_attributes : require
-
-#extension GL_KHR_shader_subgroup_arithmetic : require
-#extension GL_KHR_shader_subgroup_ballot : require
-#extension GL_KHR_shader_subgroup_vote : require
-
-#include "draw_2d.h"
-
-struct ScatterConstants {
-    uvec2 tile_resolution;
-
-    uint draw_buffer_len;
-    uint coarse_buffer_len;
-
-    CommandRef draw_buffer;
-    ScissorRef scissor_buffer;
-    GlyphRef glyph_buffer;
-    CoarseRef coarse_buffer;
-};
-
-layout(std430, push_constant) uniform ScatterConstantsBlock {
-    ScatterConstants constants;
-};
-
-const uint MAX_TILES = 256;
-const uint BITMAP_STRIDE = MAX_TILES / 32;
-const uint BITMAP_SIZE = MAX_TILES * BITMAP_STRIDE;
-
-shared uint intersected_tiles[BITMAP_SIZE];
-
-layout (local_size_x_id = 0, local_size_y = 1, local_size_z = 1) in;
-
-void main() {
-    const uint local_id = gl_SubgroupID * gl_SubgroupSize + gl_SubgroupInvocationID;
-    const uint draw_index = gl_WorkGroupID.x * gl_WorkGroupSize.x + local_id;
-
-    const bool in_bounds = draw_index < constants.draw_buffer_len;
-
-    uint cmd_scissor = 0;
-    vec2 cmd_min = vec2(99999.9);
-    vec2 cmd_max = vec2(-99999.9);
-    if (in_bounds) {
-        const uint packed_type = constants.draw_buffer.values[draw_index].packed_type;
-        const uint cmd_type = packed_type >> 24;
-        cmd_scissor = packed_type & 0xffff;
-
-        for (;;) {
-            const uint scalar_type = subgroupBroadcastFirst(cmd_type);
-            [[branch]]
-            if (scalar_type == cmd_type) {
-                switch (scalar_type) {
-                    case DRAW_2D_CMD_RECT:
-                        const CmdRect cmd_rect = decode_rect(constants.draw_buffer.values[draw_index]);
-                        cmd_min = cmd_rect.position;
-                        cmd_max = cmd_rect.position + cmd_rect.bound;
-                        break;
-                    case DRAW_2D_CMD_GLYPH:
-                        const CmdGlyph cmd_glyph = decode_glyph(constants.draw_buffer.values[draw_index]);
-                        const Glyph glyph = constants.glyph_buffer.values[cmd_glyph.index];
-                        cmd_min = cmd_glyph.position + glyph.offset_min;
-                        cmd_max = cmd_glyph.position + glyph.offset_max;
-                        break;
-                }
-                break;
-            }
-        }
-    }
-
-    const Scissor scissor = constants.scissor_buffer.values[cmd_scissor];
-
-    const bool out_of_bounds = any(greaterThanEqual(cmd_min, cmd_max)) || any(greaterThan(cmd_min, scissor.offset_max)) || any(lessThan(cmd_max, scissor.offset_min));
-
-    // Are all draws off-screen?
-    if (subgroupAll(out_of_bounds)) {
-        return;
-    }
-
-    cmd_min = max(cmd_min, scissor.offset_min);
-    cmd_max = min(cmd_max, scissor.offset_max);
-
-    // Make sure off-screen commands don't contribute to the bounds.
-    const uvec2 cmds_tile_min = uvec2(clamp(subgroupMin(out_of_bounds ? ivec2(999999) : ivec2(floor(cmd_min / TILE_SIZE))), ivec2(0), constants.tile_resolution));
-    const uvec2 cmds_tile_max = uvec2(clamp(subgroupMax(out_of_bounds ? ivec2(-999999) : ivec2(floor(cmd_max / TILE_SIZE))), ivec2(0), constants.tile_resolution));
-    const uvec2 cmd_tile_min = uvec2(clamp(ivec2(floor(cmd_min / TILE_SIZE)), ivec2(0), constants.tile_resolution));
-    const uvec2 cmd_tile_max = uvec2(clamp(ivec2(floor(cmd_max / TILE_SIZE)), ivec2(0), constants.tile_resolution));
-
-    const bool cmd_dominates_bounds = all(equal(cmd_tile_min, cmds_tile_min)) && all(equal(cmd_tile_max, cmds_tile_max));
-    const bool use_combined_bounds = subgroupAny(cmd_dominates_bounds);
-
-    if (use_combined_bounds) {
-        const uvec2 tile_count = cmds_tile_max - cmds_tile_min + ivec2(1);
-
-        uint offset;
-        if (subgroupElect()) {
-            offset = atomicAdd(constants.coarse_buffer.values[0], tile_count.x * tile_count.y) + 1;
-        }
-        offset = subgroupBroadcastFirst(offset);
-
-        for (uint i = 0; i < tile_count.y; i++) {
-            for (uint j = 0; j < tile_count.x; j += gl_SubgroupSize) {
-                const uint jj = j + gl_SubgroupInvocationID;
-                const uint y = cmds_tile_min.y + i;
-                const uint x = cmds_tile_min.x + jj;
-                if (jj < tile_count.x) {
-                    const uint packed = ((y & 0xff) << 24) | ((x & 0xff) << 16) | (gl_WorkGroupID.x & 0xffff);
-                    const uint index = offset + i * tile_count.x + jj;
-                    if (index < constants.coarse_buffer_len) {
-                        constants.coarse_buffer.values[index] = packed;
-                    }
-                }
-            }
-        }
-    } else {
-        const uint start = cmds_tile_min.y * BITMAP_STRIDE + cmds_tile_min.x / 32;
-        const uint end = cmds_tile_max.y * BITMAP_STRIDE + cmds_tile_max.x / 32;
-
-        for (uint i = start; i <= end; i += gl_SubgroupSize) {
-            const uint ii = i + gl_SubgroupInvocationID;
-            if (ii < BITMAP_SIZE) {
-                intersected_tiles[ii] = 0;
-            }
-        }
-
-        subgroupBarrier();
-
-        if (!out_of_bounds) {
-            const uint min_word = cmd_tile_min.x / 32;
-            const uint max_word = cmd_tile_max.x / 32;
-            const uint min_bit = cmd_tile_min.x & 31;
-            const uint max_bit = cmd_tile_max.x & 31;
-            const uint lsb = ~((1 << min_bit) - 1);
-            const uint msb = ((1 << max_bit) - 1) | 1 << max_bit;
-            if (min_word == max_word) {
-                for (uint y = cmd_tile_min.y; y <= cmd_tile_max.y; y++) {
-                    atomicOr(intersected_tiles[y * BITMAP_STRIDE + min_word], lsb & msb);
-                }
-            } else {
-                for (uint y = cmd_tile_min.y; y <= cmd_tile_max.y; y++) {
-                    atomicOr(intersected_tiles[y * BITMAP_STRIDE + min_word], lsb);
-                    for (uint i = min_word + 1; i <= (max_word - 1); i++) {
-                        intersected_tiles[y * BITMAP_STRIDE + i] = 0xffffffff;
-                    }
-                    atomicOr(intersected_tiles[y * BITMAP_STRIDE + max_word], msb);
-                }
-            }
-        }
-
-        subgroupBarrier();
-
-        uint count = 0;
-        for (uint i = start; i <= end; i += gl_SubgroupSize) {
-            const uint ii = i + gl_SubgroupInvocationID;
-            count += subgroupAdd(ii < BITMAP_SIZE ? bitCount(intersected_tiles[ii]) : 0);
-        }
-
-        if (count == 0) {
-            return;
-        }
-
-        uint offset;
-        if (subgroupElect()) {
-            offset = atomicAdd(constants.coarse_buffer.values[0], count) + 1;
-        }
-        offset = subgroupBroadcastFirst(offset);
-
-        for (uint i = start; i <= end; i += gl_SubgroupSize) {
-            const uint ii = i + gl_SubgroupInvocationID;
-            if (ii >= BITMAP_SIZE) {
-                continue;
-            }
-
-            uint bitmap = intersected_tiles[ii];
-            const uint count = bitCount(bitmap);
-            uint scan = subgroupExclusiveAdd(count);
-
-            while (bitmap != 0) {
-                const uint index = findLSB(bitmap);
-                bitmap ^= bitmap & -bitmap;
-                const uint y = (ii * 32 + index) / MAX_TILES;
-                const uint x = (ii * 32 + index) & (MAX_TILES - 1);
-                const uint packed = (y << 24) | (x << 16) | (gl_WorkGroupID.x & 0xffff);
-                if (offset + scan < constants.coarse_buffer_len) {
-                    constants.coarse_buffer.values[offset + scan] = packed;
-                }
-                scan++;
-            }
-
-            offset += subgroupAdd(count);
-        }
-    }
-}
diff --git a/title/shark-shaders/shaders/draw_2d_bin_2_sort.comp b/title/shark-shaders/shaders/draw_2d_bin_2_sort.comp
deleted file mode 100644 (file)
index 3df9805..0000000
+++ /dev/null
@@ -1,47 +0,0 @@
-#version 460
-
-#extension GL_GOOGLE_include_directive : require
-
-#extension GL_EXT_buffer_reference : require
-#extension GL_EXT_buffer_reference2 : require
-#extension GL_EXT_scalar_block_layout : require
-#extension GL_EXT_control_flow_attributes : require
-
-#extension GL_KHR_shader_subgroup_arithmetic : require
-#extension GL_KHR_shader_subgroup_ballot : require
-#extension GL_KHR_shader_subgroup_shuffle_relative: enable
-#extension GL_KHR_shader_subgroup_vote : require
-
-#include "draw_2d.h"
-#include "indirect.h"
-#include "radix_sort.h"
-
-layout(buffer_reference, std430, buffer_reference_align = 4) buffer VkDispatchIndirectCommandRef {
-    VkDispatchIndirectCommand dimensions;
-};
-
-struct SortConstants {
-    uint coarse_buffer_len;
-    uint _pad;
-    VkDispatchIndirectCommandRef indirect_dispatch_buffer;
-    CoarseRef coarse_buffer;
-};
-
-layout(std430, push_constant) uniform SortConstantsBlock {
-    SortConstants constants;
-};
-
-layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
-
-void main() {
-    // We shouldn't overflow the coarse buffer in the scatter phase, but we can
-    // still end up with a count that's larger than the buffer size since we
-    // unconditionally atomicAdd. So we need to clamp to the actual size now
-    // before dispatching sort work.
-    const uint count = min(constants.coarse_buffer_len, constants.coarse_buffer.values[0]);
-    constants.coarse_buffer.values[0] = count;
-
-    constants.indirect_dispatch_buffer.dimensions.x = (count + (RADIX_ITEMS_PER_WGP - 1)) / RADIX_ITEMS_PER_WGP;
-    constants.indirect_dispatch_buffer.dimensions.y = 1;
-    constants.indirect_dispatch_buffer.dimensions.z = 1;
-}
diff --git a/title/shark-shaders/shaders/draw_2d_bin_3_resolve.comp b/title/shark-shaders/shaders/draw_2d_bin_3_resolve.comp
deleted file mode 100644 (file)
index 5a8f389..0000000
+++ /dev/null
@@ -1,151 +0,0 @@
-#version 460
-
-#extension GL_GOOGLE_include_directive : require
-
-#extension GL_EXT_buffer_reference : require
-#extension GL_EXT_buffer_reference2 : require
-#extension GL_EXT_scalar_block_layout : require
-#extension GL_EXT_control_flow_attributes : require
-
-#extension GL_KHR_shader_subgroup_arithmetic : require
-#extension GL_KHR_shader_subgroup_ballot : require
-#extension GL_KHR_shader_subgroup_vote : require
-
-#include "draw_2d.h"
-
-struct ResolveConstants {
-    uint tile_stride;
-    uint draw_buffer_len;
-
-    CommandRef draw_buffer;
-    ScissorRef scissor_buffer;
-    GlyphRef glyph_buffer;
-    CoarseReadRef coarse_buffer;
-    FineRef fine_buffer;
-    TileRef tile_buffer;
-};
-
-layout(std430, push_constant) uniform ResolveConstantsBlock {
-    ResolveConstants constants;
-};
-
-layout (local_size_x_id = 0, local_size_y = 1, local_size_z = 1) in;
-
-void main() {
-    const uint local_id = gl_SubgroupID * gl_SubgroupSize + gl_SubgroupInvocationID;
-    const uint x = gl_GlobalInvocationID.y;
-    const uint y = gl_GlobalInvocationID.z;
-    const uint tile_offset = constants.tile_stride * y + x;
-    const uint search = ((y & 0xff) << 24) | ((x & 0xff) << 16);
-    const uint count = constants.coarse_buffer.values[0];
-
-    if (count == 0) {
-        constants.tile_buffer.values[tile_offset].index_min = 0;
-        constants.tile_buffer.values[tile_offset].index_max = 0;
-        return;
-    }
-
-    // Binary search for the upper bound of the tile.
-    uint base = 0;
-    {
-        uint n = count;
-        uint mid;
-        uint max_iters = 32;
-        while (max_iters --> 0 && (mid = n / 2) > 0) {
-            const uint value = constants.coarse_buffer.values[1 + base + mid] & 0xffff0000;
-            base = value > search ? base : base + mid;
-            n -= mid;
-        }
-    }
-
-    const vec2 tile_min = uvec2(x, y) * TILE_SIZE;
-    const vec2 tile_max = tile_min + TILE_SIZE;
-
-    bool hit_opaque = false;
-    uint lo = base + 1;
-    const uint hi = base + 1;
-    for(; !hit_opaque && lo > 0; lo--) {
-        const uint i = lo;
-        const uint packed = constants.coarse_buffer.values[i];
-
-        if ((packed & 0xffff0000) != (search & 0xffff0000)) {
-            break;
-        }
-
-        const uint draw_offset = packed & 0xffff;
-        const uint draw_index = draw_offset * gl_WorkGroupSize.x + local_id;
-
-        bool intersects = false;
-        bool opaque_tile = false;
-
-        if (draw_index < constants.draw_buffer_len) {
-            vec2 cmd_min = vec2(99999.9);
-            vec2 cmd_max = vec2(-99999.9);
-
-            const uint packed_type = constants.draw_buffer.values[draw_index].packed_type;
-            const uint cmd_type = packed_type >> 24;
-            const uint cmd_scissor = packed_type & 0xffff;
-
-            const Scissor scissor = constants.scissor_buffer.values[cmd_scissor];
-
-            // If the tile doesn't intersect the scissor region it doesn't need to do work here.
-            if (any(lessThan(scissor.offset_max, tile_min)) || any(greaterThan(scissor.offset_min, tile_max))) {
-               intersects = false;
-            } else {
-                for (;;) {
-                    const uint scalar_type = subgroupBroadcastFirst(cmd_type);
-                    [[branch]]
-                    if (scalar_type == cmd_type) {
-                        switch (scalar_type) {
-                            case DRAW_2D_CMD_RECT:
-                                const CmdRect cmd_rect = decode_rect(constants.draw_buffer.values[draw_index]);
-                                cmd_min = cmd_rect.position;
-                                cmd_max = cmd_rect.position + cmd_rect.bound;
-
-                                const bool background_opaque = (cmd_rect.background_color & 0xff000000) == 0xff000000;
-                                if (background_opaque) {
-                                    const float border_width = float((packed_type >> 16) & 0xff);
-                                    const bool border_opaque = (cmd_rect.border_color & 0xff000000) == 0xff000000;
-                                    const vec4 border_radii = unpackUnorm4x8(cmd_rect.border_radii);
-                                    const float max_border_radius = max(border_radii.x, max(border_radii.y, max(border_radii.z, border_radii.w))) * 255.0;
-                                    const float shrink = ((2.0 - sqrt(2.0)) * max_border_radius) + (border_opaque ? 0.0 : border_width);
-
-                                    const vec2 cmd_shrunk_min = max(scissor.offset_min, cmd_min + shrink);
-                                    const vec2 cmd_shrunk_max = min(scissor.offset_max, cmd_max - shrink);
-                                    opaque_tile = all(greaterThan(cmd_shrunk_max, cmd_shrunk_min)) && all(greaterThan(tile_min, cmd_shrunk_min)) && all(lessThan(tile_max, cmd_shrunk_max));
-                                }
-                                break;
-                            case DRAW_2D_CMD_GLYPH:
-                                const CmdGlyph cmd_glyph = decode_glyph(constants.draw_buffer.values[draw_index]);
-                                const Glyph glyph = constants.glyph_buffer.values[cmd_glyph.index];
-                                cmd_min = cmd_glyph.position + glyph.offset_min;
-                                cmd_max = cmd_glyph.position + glyph.offset_max;
-                                break;
-                        }
-                        break;
-                    }
-                }
-
-                cmd_min = max(cmd_min, scissor.offset_min);
-                cmd_max = min(cmd_max, scissor.offset_max);
-                intersects = !(any(lessThan(tile_max, cmd_min)) || any(greaterThan(tile_min, cmd_max)));
-            }
-        }
-
-        uint intersects_mask = subgroupBallot(intersects).x;
-
-        if (subgroupAny(opaque_tile)) {
-            const uvec4 opaque_tile_ballot = subgroupBallot(opaque_tile);
-            const uint first_opaque_tile = subgroupBallotFindMSB(opaque_tile_ballot);
-            const uint opaque_mask = ~((1 << first_opaque_tile) - 1);
-            intersects_mask &= opaque_mask;
-            constants.fine_buffer.values[i] = intersects_mask;
-            hit_opaque = true;
-        } else {
-            constants.fine_buffer.values[i] = intersects_mask;
-        }
-    }
-
-    constants.tile_buffer.values[tile_offset].index_min = lo + 1;
-    constants.tile_buffer.values[tile_offset].index_max = hi + 1;
-}
diff --git a/title/shark-shaders/shaders/draw_2d_rasterize.comp b/title/shark-shaders/shaders/draw_2d_rasterize.comp
deleted file mode 100644 (file)
index bdc761e..0000000
+++ /dev/null
@@ -1,169 +0,0 @@
-#version 460
-
-#extension GL_GOOGLE_include_directive : require
-
-#extension GL_EXT_control_flow_attributes : require
-#extension GL_EXT_buffer_reference : require
-#extension GL_EXT_buffer_reference2 : require
-#extension GL_EXT_scalar_block_layout : require
-#extension GL_EXT_shader_image_load_formatted : require
-
-#extension GL_KHR_shader_subgroup_vote : require
-#extension GL_KHR_shader_subgroup_ballot : require
-
-#include "bindings_compute.h"
-#include "draw_2d.h"
-#include "sdf.h"
-
-struct RasterizeConstants {
-    uint tile_stride;
-    uint _pad;
-
-    CommandRef draw_buffer;
-    ScissorRef scissor_buffer;
-    GlyphRef glyph_buffer;
-    CoarseReadRef coarse_buffer;
-    FineReadRef fine_buffer;
-    TileReadRef tile_buffer;
-};
-
-layout(std430, push_constant) uniform RasterizeConstantsBlock {
-    RasterizeConstants constants;
-};
-
-/// x = (((index >> 2) & 0x0007) & 0xFFFE) | index & 0x0001
-/// y = ((index >> 1) & 0x0003) | (((index >> 3) & 0x0007) & 0xFFFC)
-
-#define DEBUG_SHOW_TILES 0
-
-vec3 plasma_quintic(in float x)
-{
-       x = clamp(x, 0.0, 1.0);
-       vec4 x1 = vec4(1.0, x, x * x, x * x * x); // 1 x x2 x3
-       vec4 x2 = x1 * x1.w * x; // x4 x5 x6 x7
-       return vec3(
-               dot(x1.xyzw, vec4(+0.063861086, +1.992659096, -1.023901152, -0.490832805)) + dot(x2.xy, vec2(+1.308442123, -0.914547012)),
-               dot(x1.xyzw, vec4(+0.049718590, -0.791144343, +2.892305078, +0.811726816)) + dot(x2.xy, vec2(-4.686502417, +2.717794514)),
-               dot(x1.xyzw, vec4(+0.513275779, +1.580255060, -5.164414457, +4.559573646)) + dot(x2.xy, vec2(-1.916810682, +0.570638854))
-    );
-}
-
-layout (local_size_x = 8, local_size_y = 8, local_size_z = 1) in;
-
-void main() {
-    const uvec2 tile_coord = gl_WorkGroupID.xy / (TILE_SIZE / gl_WorkGroupSize.xy);
-    const uint tile_index = tile_coord.y * constants.tile_stride + tile_coord.x;
-
-    const uint lo = constants.tile_buffer.values[tile_index].index_min;
-    const uint hi = constants.tile_buffer.values[tile_index].index_max;
-
-    if (lo == hi) {
-        return;
-    }
-
-#if DEBUG_SHOW_TILES == 1
-
-    uint count = hi - lo;
-    const vec3 color = plasma_quintic(float(count) / 50.0);
-    imageStore(ui_layer_write, ivec2(gl_GlobalInvocationID.xy), vec4(color, 1.0));
-
-#elif DEBUG_SHOW_TILES == 2
-
-    uint count = 0;
-    for (uint i = lo; i < hi; i++) {
-        count += bitCount(constants.fine_buffer.values[i]);
-    }
-    const vec3 color = plasma_quintic(float(count) / 600.0);
-    imageStore(ui_layer_write, ivec2(gl_GlobalInvocationID.xy), vec4(color, 1.0));
-
-#else
-
-    const vec2 sample_center = gl_GlobalInvocationID.xy + vec2(0.5);
-
-    vec4 accum = vec4(0.0);
-
-    for (uint i = lo; i < hi; i++) {
-        uint bitmap = constants.fine_buffer.values[i];
-
-        while (bitmap != 0) {
-            const uint index = findLSB(bitmap);
-            bitmap ^= bitmap & -bitmap;
-
-            const uint base_index = (constants.coarse_buffer.values[i] & 0xffff) * 32;
-            const Cmd cmd = constants.draw_buffer.values[base_index + index];
-            const uint cmd_type = cmd.packed_type >> 24;
-            const uint cmd_scissor = cmd.packed_type & 0xffff;
-
-            const Scissor scissor = constants.scissor_buffer.values[cmd_scissor];
-
-            vec4 primitive_color = vec4(0.0);
-            switch (cmd_type) {
-                case DRAW_2D_CMD_RECT: {
-                    const CmdRect cmd_rect = decode_rect(cmd);
-
-                    const vec2 cmd_min = cmd_rect.position;
-                    const vec2 cmd_max = cmd_rect.position + cmd_rect.bound;
-
-                    if (any(lessThan(sample_center, cmd_min)) && any(greaterThan(sample_center, cmd_max))) {
-                        continue;
-                    }
-
-                    const float border_width = float((cmd.packed_type >> 16) & 0xff);
-                    const vec4 border_radii = unpackUnorm4x8(cmd_rect.border_radii) * 255.0;
-                    const float max_border_radius = max(border_radii.x, max(border_radii.y, max(border_radii.z, border_radii.w)));
-                    const float shrink = (2.0 - sqrt(2.0)) * max_border_radius;
-                    const vec4 background_color = unpackUnorm4x8(cmd_rect.background_color).bgra;
-
-                    const vec2 cmd_min_clipped = max(scissor.offset_min, cmd_min + border_width + shrink);
-                    const vec2 cmd_max_clipped = min(scissor.offset_max, cmd_max - border_width - shrink);
-
-                    if (all(greaterThan(sample_center, cmd_min_clipped)) && all(lessThan(sample_center, cmd_max_clipped))) {
-                        primitive_color = background_color;
-                    } else {
-                        const vec2 b = cmd_rect.bound / 2.0;
-                        const vec2 p = cmd_rect.position + b - sample_center;
-
-                        float d;
-                        if (all(equal(border_radii, vec4(0.0)))) {
-                            d = sdf_box(p, b);
-                        } else {
-                            d = sdf_rounded_box(p, b, border_radii);
-                        }
-
-                        const vec4 border_color = unpackUnorm4x8(cmd_rect.border_color).bgra;
-                        primitive_color = mix(background_color, border_color, smoothstep(1.0, 0.0, 1.0 - d - border_width));
-                        primitive_color = mix(primitive_color, vec4(0), smoothstep(1.0, 0.0, 1.0 - d));
-
-                        const vec2 clip_b = (scissor.offset_max - scissor.offset_min) / 2.0;
-                        const vec2 clip_p = scissor.offset_min + clip_b - sample_center;
-                        d = max(d, sdf_box(clip_p, clip_b));
-                        primitive_color = d < 0.0 ? primitive_color : vec4(0);
-                    }
-                    break;
-                }
-                case DRAW_2D_CMD_GLYPH: {
-                    const CmdGlyph cmd_glyph = decode_glyph(cmd);
-                    const Glyph glyph = constants.glyph_buffer.values[cmd_glyph.index];
-                    const vec2 cmd_min = cmd_glyph.position + glyph.offset_min;
-                    const vec2 cmd_max = cmd_glyph.position + glyph.offset_max;
-                    if (all(greaterThanEqual(sample_center, max(scissor.offset_min, cmd_min))) && all(lessThanEqual(sample_center, min(scissor.offset_max, cmd_max)))) {
-                        const vec2 glyph_size = glyph.offset_max - glyph.offset_min;
-                        const vec2 uv = mix(glyph.atlas_min, glyph.atlas_max, (sample_center - cmd_min) / glyph_size);
-                        const vec4 color = unpackUnorm4x8(cmd_glyph.color).bgra;
-                        const float coverage = textureLod(sampler2D(glyph_atlas, samplers[SAMPLER_BILINEAR_UNNORMALIZED]), uv, 0.0).r * color.a;
-                        primitive_color = color * coverage;
-                    }
-                    break;
-                }
-            }
-
-            // does it blend?
-            accum.rgba = primitive_color.rgba + accum.rgba * (1.0 - primitive_color.a);
-        }
-    }
-
-    imageStore(ui_layer_write, ivec2(gl_GlobalInvocationID.xy), accum);
-
-#endif
-
-}
index 4aaa8a7b4b4b0d14b516ec76af7f0e0ebbd4025a..1e9e57164c38b5e9adf97e6082c5dd11f96486c4 100644 (file)
@@ -14,7 +14,6 @@
 
 #include "radix_sort.h"
 
-#include "draw_2d.h"
 #include "indirect.h"
 
 layout (constant_id = 0) const uint SUBGROUP_SIZE = 64;
index 415ab8321fff34ffd40570197af175e255639b9a..23ff1c37bbacc1ce1e11b6709456adb8641b4442 100644 (file)
@@ -14,7 +14,6 @@
 
 #include "radix_sort.h"
 
-#include "draw_2d.h"
 #include "indirect.h"
 
 layout (constant_id = 0) const uint SUBGROUP_SIZE = 64;
diff --git a/title/shark-shaders/shaders/sdf.h b/title/shark-shaders/shaders/sdf.h
deleted file mode 100644 (file)
index a7b71c6..0000000
+++ /dev/null
@@ -1,20 +0,0 @@
-#ifndef SDF_H
-#define SDF_H
-
-// https://iquilezles.org/articles/distfunctions2d/
-
-float sdf_box(in vec2 p, in vec2 b)
-{
-    const vec2 d = abs(p) - b;
-    return length(max(d, 0.0)) + min(max(d.x, d.y), 0.0);
-}
-
-float sdf_rounded_box( in vec2 p, in vec2 b, in vec4 r )
-{
-    r.xy = (p.x > 0.0) ? r.xy : r.zw;
-    r.x  = (p.y > 0.0) ? r.x : r.y;
-    const vec2 q = abs(p) - b + r.x;
-    return min(max(q.x, q.y), 0.0) + length(max(q, 0.0)) - r.x;
-}
-
-#endif
\ No newline at end of file
diff --git a/title/shark-shaders/shaders/sdf.slang b/title/shark-shaders/shaders/sdf.slang
new file mode 100644 (file)
index 0000000..28f6eb5
--- /dev/null
@@ -0,0 +1,19 @@
+module sdf;
+
+// https://iquilezles.org/articles/distfunctions2d/
+
+namespace sdf {
+
+public float box(float2 p, float2 b) {
+    let d = abs(p) - b;
+    return length(max(d, 0.0)) + min(max(d.x, d.y), 0.0);
+}
+
+public float rounded_box(float2 p, float2 b, float4 r) {
+    r.xy = (p.x > 0.0) ? r.xy : r.zw;
+    r.x = (p.y > 0.0) ? r.x : r.y;
+    let q = abs(p) - b + r.x;
+    return min(max(q.x, q.y), 0.0) + length(max(q, 0.0)) - r.x;
+}
+
+}
index 48dfcc0f0d73ca02aeb78b9f285d53a8e7fe13af..9c3734dd550a0b188fa1741bf98a95c53a7966e0 100644 (file)
@@ -360,27 +360,34 @@ impl Pipelines {
 
         gpu.debug_name_pipeline(basic_pipeline, "basic");
 
-        let create_compute_pipeline =
-            |code, name, workgroup_size, require_full_subgroups, push_constant_size| {
+        let create_compute_pipeline_with_entry =
+            |code, entry, name, workgroup_size, require_full_subgroups, push_constant_size| {
                 let push_constant_range = PushConstantRange {
                     stage_flags: ShaderStageFlags::COMPUTE,
                     offset: 0,
                     size: push_constant_size as u32,
                 };
 
+                let workgroup_size_spec_constant = [SpecConstant::U32 {
+                    id: 0,
+                    value: workgroup_size,
+                }];
+
                 let pipeline = gpu.create_compute_pipeline(&ComputePipelineDesc {
                     shader: ShaderDesc {
                         code,
+                        entry,
                         require_full_subgroups,
                         required_subgroup_size: if workgroup_size != 0 {
                             Some(workgroup_size)
                         } else {
                             None
                         },
-                        spec_constants: &[SpecConstant::U32 {
-                            id: 0,
-                            value: workgroup_size,
-                        }],
+                        spec_constants: if workgroup_size != 0 {
+                            &workgroup_size_spec_constant
+                        } else {
+                            &[]
+                        },
                         ..default()
                     },
                     layout: PipelineLayout {
@@ -399,42 +406,59 @@ impl Pipelines {
                 pipeline
             };
 
-        let draw_2d_bin_0_clear_pipeline = create_compute_pipeline(
-            crate::DRAW_2D_BIN_0_CLEAR_COMP_SPV,
-            "draw2d_bin_clear",
+        let create_compute_pipeline =
+            |code, name, workgroup_size, require_full_subgroups, push_constant_size| {
+                create_compute_pipeline_with_entry(
+                    code,
+                    c"main",
+                    name,
+                    workgroup_size,
+                    require_full_subgroups,
+                    push_constant_size,
+                )
+            };
+
+        let draw_2d_bin_0_clear_pipeline = create_compute_pipeline_with_entry(
+            crate::DRAW_2D_SPV,
+            c"clear",
+            "draw2d clear",
             0,
             false,
             std::mem::size_of::<Draw2dClearConstants>(),
         );
 
         let draw_2d_bin_1_scatter_pipeline_workgroup_size = 32;
-        let draw_2d_bin_1_scatter_pipeline = create_compute_pipeline(
-            crate::DRAW_2D_BIN_1_SCATTER_COMP_SPV,
-            "draw2d_bin_scatter",
+        let draw_2d_bin_1_scatter_pipeline = create_compute_pipeline_with_entry(
+            crate::DRAW_2D_SPV,
+            c"scatter",
+            "draw2d scatter",
             draw_2d_bin_1_scatter_pipeline_workgroup_size,
             true,
             std::mem::size_of::<Draw2dScatterConstants>(),
         );
 
-        let draw_2d_bin_2_sort_pipeline = create_compute_pipeline(
-            crate::DRAW_2D_BIN_2_SORT_COMP_SPV,
-            "draw2d_bin_sort",
+        let draw_2d_bin_2_sort_pipeline = create_compute_pipeline_with_entry(
+            crate::DRAW_2D_SPV,
+            c"sort",
+            "draw2d sort",
             0,
             false,
             std::mem::size_of::<Draw2dSortConstants>(),
         );
 
-        let draw_2d_bin_3_resolve_pipeline = create_compute_pipeline(
-            crate::DRAW_2D_BIN_3_RESOLVE_COMP_SPV,
-            "draw2d_bin_resolve",
+        let draw_2d_bin_3_resolve_pipeline = create_compute_pipeline_with_entry(
+            crate::DRAW_2D_SPV,
+            c"resolve",
+            "draw2d resolve",
             32,
             true,
             std::mem::size_of::<Draw2dResolveConstants>(),
         );
 
-        let draw_2d_rasterize_pipeline = create_compute_pipeline(
-            crate::DRAW_2D_RASTERIZE_COMP_SPV,
-            "draw2d_rasterize",
+        let draw_2d_rasterize_pipeline = create_compute_pipeline_with_entry(
+            crate::DRAW_2D_SPV,
+            c"rasterize",
+            "draw2d rasterize",
             0,
             false,
             std::mem::size_of::<Draw2dRasterizeConstants>(),