]> git.nega.tv - josh/narcissus/commitdiff
shark: Restore 2d drawing with new binning
authorJosh Simmons <josh@nega.tv>
Sat, 9 Nov 2024 12:38:33 +0000 (13:38 +0100)
committerJosh Simmons <josh@nega.tv>
Sat, 9 Nov 2024 12:38:33 +0000 (13:38 +0100)
title/shark-shaders/shaders/composite.comp
title/shark-shaders/shaders/draw_2d.h
title/shark-shaders/shaders/draw_2d_bin_1_scatter.comp
title/shark-shaders/shaders/draw_2d_bin_3_resolve.comp
title/shark-shaders/shaders/draw_2d_rasterize.comp
title/shark-shaders/shaders/radix_sort_1_spine.comp
title/shark-shaders/src/pipelines.rs
title/shark/src/main.rs

index 0e6211d16063f7db03395424be44ab2338dd8389..26be8387f66457e4091537a9b539328fb6adf033 100644 (file)
@@ -7,6 +7,7 @@
 #extension GL_EXT_scalar_block_layout : require
 
 #include "compute_bindings.h"
+#include "draw_2d.h"
 
 float srgb_oetf(float a) {
     return (.0031308f >= a) ? 12.92f * a : 1.055f * pow(a, .4166666666666667f) - .055f;
@@ -23,15 +24,23 @@ vec3 tony_mc_mapface(vec3 stimulus) {
     return textureLod(sampler3D(tony_mc_mapface_lut, bilinear_sampler), uv, 0.0).rgb;
 }
 
+struct CompositeConstants {
+    uvec2 tile_resolution;
+    TileRef tile_buffer;
+};
+
+layout(std430, push_constant) uniform CompositeConstantsBlock {
+    CompositeConstants constants;
+};
+
 layout (local_size_x = 8, local_size_y = 8, local_size_z = 1) in;
 
 void main() {
-    // const uvec2 tile_coord = gl_WorkGroupID.xy / 4;
-    // const uint tile_index = tile_coord.y * uniforms.tile_resolution.x + tile_coord.x;
-    
-    // TilesRead tiles_read = TilesRead(uniforms.tiles);
-    // const uint lo = tiles_read.values[tile_base + TILE_BITMAP_RANGE_LO_OFFSET];
-    // const uint hi = tiles_read.values[tile_base + TILE_BITMAP_RANGE_HI_OFFSET];
+    const uvec2 tile_coord = gl_WorkGroupID.xy / (TILE_SIZE / gl_WorkGroupSize.xy);
+    const uint tile_index = tile_coord.y * constants.tile_resolution.x + tile_coord.x;
+
+    const uint lo = constants.tile_buffer.values[tile_index].min_index;
+    const uint hi = constants.tile_buffer.values[tile_index].max_index;
 
     // Display transform
     const vec3 stimulus = imageLoad(color_layer, ivec2(gl_GlobalInvocationID.xy)).rgb;
@@ -39,10 +48,10 @@ void main() {
     vec3 composited = srgb_oetf(transformed);
 
     // UI Composite
-    // if (lo <= hi) {
-    //     const vec4 ui = imageLoad(ui_layer_read, ivec2(gl_GlobalInvocationID.xy)).rgba;
-    //     composited = ui.rgb + (composited * (1.0 - ui.a));
-    // }
+    if (lo != hi) {
+        const vec4 ui = imageLoad(ui_layer_read, ivec2(gl_GlobalInvocationID.xy)).rgba;
+        composited = ui.rgb + (composited * (1.0 - ui.a));
+    }
 
     imageStore(composited_output, ivec2(gl_GlobalInvocationID.xy), vec4(composited, 1.0));
 }
index 43181ebb8aa8703c1b6e92072c88a0d45e9ca5fc..b39368eadb6431dced0c4ee07d0b035f7df671a0 100644 (file)
@@ -6,6 +6,11 @@ const uint TILE_SIZE = 32;
 const uint DRAW_2D_CMD_RECT = 0;
 const uint DRAW_2D_CMD_GLYPH = 1;
 
+struct Tile {
+    uint min_index;
+    uint max_index;
+};
+
 struct Glyph {
     ivec2 atlas_min;
     ivec2 atlas_max;
@@ -20,7 +25,7 @@ struct Draw2dCmd {
 };
 
 struct Draw2dCmdRect {
-    uint border_width;
+    float border_width;
     vec2 position;
     vec2 half_extent;
     uint background_color;
@@ -35,7 +40,7 @@ struct Draw2dCmdGlyph {
 
 Draw2dCmdRect decode_rect(Draw2dCmd cmd) {
     return Draw2dCmdRect(
-        cmd.words[0],
+        uintBitsToFloat(cmd.words[0]),
         vec2(uintBitsToFloat(cmd.words[1]), uintBitsToFloat(cmd.words[2])),
         vec2(uintBitsToFloat(cmd.words[3]), uintBitsToFloat(cmd.words[4])),
         cmd.words[5],
@@ -66,4 +71,12 @@ layout(buffer_reference, std430, buffer_reference_align = 4) buffer CoarseRef
     uint values[];
 };
 
+layout(buffer_reference, std430, buffer_reference_align = 4) buffer FineRef {
+    uint values[];
+};
+
+layout(buffer_reference, std430, buffer_reference_align = 4) buffer TileRef {
+    Tile values[];
+};
+
 #endif
\ No newline at end of file
index d991eb142eae94567d5ce79c7deca57a864b707a..df750ed2737aa1a3e54454b6e2d8b7d2bb364af4 100644 (file)
@@ -33,7 +33,7 @@ layout (local_size_x_id = 0, local_size_y = 1, local_size_z = 1) in;
 
 void main() {
     const uint local_id = gl_SubgroupID * gl_SubgroupSize + gl_SubgroupInvocationID;
-    const uint draw_index = gl_WorkGroupID.x * gl_WorkGroupSize.x +  local_id;
+    const uint draw_index = gl_WorkGroupID.x * gl_WorkGroupSize.x + local_id;
 
     // Bounds for this command, any tiles which intersect this AABB will be written.
     vec2 cmd_min = vec2(99999.9);
@@ -82,8 +82,47 @@ void main() {
     const bool cmd_absolute_max = cmd_max_tile == cmds_max_tile;
     const bool use_individual_bounds = !any(notEqual(subgroupBallot(cmd_absolute_min) & subgroupBallot(cmd_absolute_max), uvec4(0)));
 
-    if (false && use_individual_bounds) {
+    if (use_individual_bounds) {
+        uint count = 0;
 
+        for (uint y = cmds_min_tile.y; y <= cmds_max_tile.y; y++) {
+            for (uint x = cmds_min_tile.x; x <= cmds_max_tile.x; x++) {
+                const vec2 tile_min = uvec2(x, y) * TILE_SIZE;
+                const vec2 tile_max = min(tile_min + TILE_SIZE, constants.screen_resolution);
+
+                const bool intersects = !(any(lessThan(tile_max, cmd_min)) || any(greaterThan(tile_min, cmd_max)));
+                const uvec4 ballot = subgroupBallot(intersects);
+
+                if (subgroupElect()) {
+                    count += uint(ballot.x != 0);
+                }
+            }
+        }
+
+        uint offset;
+        if (subgroupElect()) {
+            offset = atomicAdd(constants.coarse_buffer.values[0], count) + 1;
+        }
+        offset = subgroupBroadcastFirst(offset);
+
+        if (offset >= constants.coarse_buffer_len) {
+            return;
+        }
+
+        for (uint y = cmds_min_tile.y; y <= cmds_max_tile.y; y++) {
+            for (uint x = cmds_min_tile.x; x <= cmds_max_tile.x; x++) {
+                const vec2 tile_min = uvec2(x, y) * TILE_SIZE;
+                const vec2 tile_max = min(tile_min + TILE_SIZE, constants.screen_resolution);
+
+                const bool intersects = !(any(lessThan(tile_max, cmd_min)) || any(greaterThan(tile_min, cmd_max)));
+                const uvec4 ballot = subgroupBallot(intersects);
+
+                if (subgroupElect() && ballot.x != 0 && offset < constants.coarse_buffer_len) {
+                    const uint packed = ((y & 0xff) << 24) | ((x & 0xff) << 16) | (gl_WorkGroupID.x & 0xffff);
+                    constants.coarse_buffer.values[offset++] = packed;
+                }
+            }
+        }
     } else {
         const uvec2 tile_count = cmds_max_tile - cmds_min_tile + uvec2(1);
         const uint count = tile_count.x * tile_count.y;
@@ -112,78 +151,4 @@ void main() {
             offset += tile_count.x;
         }
     }
-
-    // if (gl_SubgroupSize == 32 && fullscreen_ballot.x != 0) {
-    //     uint offset;
-    //     if (subgroupElect()) {
-    //         const uint count = constants.tile_resolution.x * constants.tile_resolution.y;
-    //         offset = atomicAdd(constants.coarse_buffer.values[0], count) + 1;
-    //     }
-    //     offset = subgroupBroadcastFirst(offset);
-
-    //     if (offset >= constants.coarse_buffer_len) {
-    //         return;
-    //     }
-
-    //     const uint word_index = gl_WorkGroupID.x;
-
-    //     for (uint y = 0; y < constants.tile_resolution.y; y++) {
-    //         for (uint x = 0; x < constants.tile_resolution.x; x++) {
-    //             const uint tile_index = y * constants.tile_resolution.x + x;
-    //             const uint packed = (tile_index << 16) | word_index;
-    //             if (subgroupElect() && offset + tile_index < constants.coarse_buffer_len) {
-    //                 constants.coarse_buffer.values[offset + tile_index] = packed;
-    //             }
-    //         }
-    //     }
-
-    //     return;
-    // }
-
-    // uint count = 0;
-
-    // for (uint y = cmds_min_tile.y; y <= cmds_max_tile.y; y++) {
-    //     for (uint x = cmds_min_tile.x; x <= cmds_max_tile.x; x++) {
-    //         const vec2 tile_min = uvec2(x, y) * TILE_SIZE;
-    //         const vec2 tile_max = min(tile_min + TILE_SIZE, constants.screen_resolution);
-
-    //         const bool intersects = !(any(lessThan(tile_max, cmd_min)) || any(greaterThan(tile_min, cmd_max)));
-    //         const uvec4 ballot = subgroupBallot(intersects);
-
-    //         if (subgroupElect()) {
-    //             count += uint(ballot.x != 0);
-    //         }
-    //     }
-    // }
-
-    // if (count == 0) {
-    //     return;
-    // }
-
-    // uint offset;
-    // if (subgroupElect()) {
-    //     offset = atomicAdd(constants.coarse_buffer.values[0], count) + 1;
-    // }
-    // offset = subgroupBroadcastFirst(offset);
-
-    // if (offset >= constants.coarse_buffer_len) {
-    //     return;
-    // }
-
-    // for (uint y = cmds_min_tile.y; y <= cmds_max_tile.y; y++) {
-    //     for (uint x = cmds_min_tile.x; x <= cmds_max_tile.x; x++) {
-    //         const vec2 tile_min = uvec2(x, y) * TILE_SIZE;
-    //         const vec2 tile_max = min(tile_min + TILE_SIZE, constants.screen_resolution);
-    //         const uint tile_index = y * constants.tile_resolution.x + x;
-
-    //         const bool intersects = !(any(lessThan(tile_max, cmd_min)) || any(greaterThan(tile_min, cmd_max)));
-    //         const uvec4 ballot = subgroupBallot(intersects);
-
-    //         if (subgroupElect() && ballot.x != 0 && offset < constants.coarse_buffer_len) {
-    //             const uint word_index = gl_WorkGroupID.x;
-    //             const uint packed = (tile_index << 16) | word_index;
-    //             constants.coarse_buffer.values[offset++] = packed;
-    //         }
-    //     }
-    // }
 }
index 718563d3177e0c8566d8172b2aac57f52653f6b0..aeb6f33e98edeecd4867f0f01fcda928aff2984d 100644 (file)
 #extension GL_EXT_buffer_reference2 : require
 #extension GL_EXT_scalar_block_layout : require
 #extension GL_EXT_control_flow_attributes : require
+#extension GL_EXT_control_flow_attributes2 : require
 
 #extension GL_KHR_shader_subgroup_arithmetic : require
 #extension GL_KHR_shader_subgroup_ballot : require
 #extension GL_KHR_shader_subgroup_vote : require
 
-#include "compute_bindings.h"
+#include "draw_2d.h"
 
-const uint SUBGROUP_SIZE = 64;
+struct Draw2dResolveConstants {
+    uvec2 screen_resolution;
+    uvec2 tile_resolution;
 
-// TODO: Spec constant support for different subgroup sizes.
-layout (local_size_x = SUBGROUP_SIZE, local_size_y = 1, local_size_z = 1) in;
+    uint draw_buffer_len;
+    uint _pad;
+
+    Draw2dCommandRef draw_buffer;
+    GlyphRef glyph_buffer;
+    CoarseRef coarse_buffer;
+    FineRef fine_buffer;
+    TileRef tile_buffer;
+};
+
+layout(std430, push_constant) uniform Draw2dResolveConstantsBlock {
+    Draw2dResolveConstants constants;
+};
+
+layout (local_size_x_id = 0, local_size_y = 1, local_size_z = 1) in;
 
 void main() {
+    const uint local_id = gl_SubgroupID * gl_SubgroupSize + gl_SubgroupInvocationID;
+    const uint x = gl_GlobalInvocationID.y;
+    const uint y = gl_GlobalInvocationID.z;
+    const uint tile_offset = (constants.tile_resolution.x * y + x);
+    const uint search = ((y & 0xff) << 24) | ((x & 0xff) << 16);
+    const uint count = constants.coarse_buffer.values[0];
+
+    if (count == 0) {
+        constants.tile_buffer.values[tile_offset].min_index = 0;
+        constants.tile_buffer.values[tile_offset].max_index = 0;
+        return;
+    }
+
+    // Binary search for the upper bound of the tile.
+    uint base = 0;
+    {
+        uint n = count;
+        uint mid;
+        uint max_iters = 32;
+        while (max_iters --> 0 && (mid = n / 2) > 0) {
+            const uint value = constants.coarse_buffer.values[1 + base + mid] & 0xffff0000;
+            base = value > search ? base : base + mid;
+            n -= mid;
+        }
+    }
+
+    const vec2 tile_min = uvec2(x, y) * TILE_SIZE;
+    const vec2 tile_max = min(tile_min + TILE_SIZE, constants.screen_resolution);
+
+    bool hit_opaque = false;
+    uint lo = base + 1;
+    const uint hi = base + 1;
+    for(; !hit_opaque && lo > 0; lo--) {
+        const uint i = lo;
+        const uint packed = constants.coarse_buffer.values[i];
+
+        if ((packed & 0xffff0000) != (search & 0xffff0000)) {
+            break;
+        }
+
+        const uint draw_offset = packed & 0xffff;
+        const uint draw_index = draw_offset * gl_WorkGroupSize.x + local_id;
+
+        bool intersects = false;
+        bool opaque_tile = false;
+
+        if (draw_index < constants.draw_buffer_len) {
+            vec2 cmd_min = vec2(99999.9);
+            vec2 cmd_max = vec2(-99999.9);
+
+            const Draw2dCmd cmd = constants.draw_buffer.values[draw_index];
+            for (;;) {
+                const uint scalar_type = subgroupBroadcastFirst(cmd.type);
+                [[branch]]
+                if (scalar_type == cmd.type) {
+                    switch (scalar_type) {
+                        case DRAW_2D_CMD_RECT:
+                            const Draw2dCmdRect cmd_rect = decode_rect(cmd);
+                            cmd_min = cmd_rect.position - cmd_rect.half_extent - cmd_rect.border_width;
+                            cmd_max = cmd_rect.position + cmd_rect.half_extent + cmd_rect.border_width;
+                            opaque_tile = all(greaterThanEqual(tile_min, cmd_min)) && all(lessThanEqual(tile_max, cmd_max));
+                            opaque_tile = opaque_tile && ((cmd_rect.background_color & 0xff000000) == 0xff000000);
+                            break;
+                        case DRAW_2D_CMD_GLYPH:
+                            const Draw2dCmdGlyph cmd_glyph = decode_glyph(cmd);
+                            const Glyph glyph = constants.glyph_buffer.values[cmd_glyph.index];
+                            cmd_min = cmd_glyph.position + glyph.offset_min;
+                            cmd_max = cmd_glyph.position + glyph.offset_max;
+                            break;
+                    }
+                    break;
+                }
+            }
+
+            intersects = !(any(lessThan(tile_max, cmd_min)) || any(greaterThan(tile_min, cmd_max)));
+        }
+
+        uint intersects_mask = subgroupBallot(intersects).x;
+
+        if (subgroupAny(opaque_tile)) {
+            const uvec4 opaque_tile_ballot = subgroupBallot(opaque_tile);
+            const uint first_opaque_tile = subgroupBallotFindMSB(opaque_tile_ballot);
+            const uint opaque_mask = ~((1 << first_opaque_tile) - 1);
+            intersects_mask &= opaque_mask;
+            constants.fine_buffer.values[i] = intersects_mask;
+            hit_opaque = true;
+        } else {
+            constants.fine_buffer.values[i] = intersects_mask;
+        }
+    }
+
+    constants.tile_buffer.values[tile_offset].min_index = lo + 1;
+    constants.tile_buffer.values[tile_offset].max_index = hi + 1;
 }
index dfa9b7da0650404981ed721dfa984fdef4f610f6..bdcc6f1ba2471f01313ead5aa1a1fb9139201dc0 100644 (file)
 #extension GL_KHR_shader_subgroup_ballot : require
 
 #include "compute_bindings.h"
+#include "draw_2d.h"
 
-layout (local_size_x = 8, local_size_y = 8, local_size_z = 1) in;
+struct Draw2dRasterizeConstants {
+    uvec2 screen_resolution;
+    uvec2 tile_resolution;
+    uvec2 atlas_resolution;
+
+    Draw2dCommandRef draw_buffer;
+    GlyphRef glyph_buffer;
+    CoarseRef coarse_buffer;
+    FineRef fine_buffer;
+    TileRef tile_buffer;
+};
+
+layout(std430, push_constant) uniform Draw2dRasterizeConstantsBlock {
+    Draw2dRasterizeConstants constants;
+};
 
 /// x = (((index >> 2) & 0x0007) & 0xFFFE) | index & 0x0001
 /// y = ((index >> 1) & 0x0003) | (((index >> 3) & 0x0007) & 0xFFFC)
 
 #define DEBUG_SHOW_TILES 0
 
-#if DEBUG_SHOW_TILES != 0
-
-vec3 plasma_quintic(float x)
+vec3 plasma_quintic(in float x)
 {
        x = clamp(x, 0.0, 1.0);
        vec4 x1 = vec4(1.0, x, x * x, x * x * x); // 1 x x2 x3
@@ -33,113 +46,81 @@ vec3 plasma_quintic(float x)
     );
 }
 
-#endif
+layout (local_size_x = 8, local_size_y = 8, local_size_z = 1) in;
 
 void main() {
-    // const uvec2 tile_coord = gl_WorkGroupID.xy / (TILE_SIZE / gl_WorkGroupSize.xy);
-    // const uint tile_index = tile_coord.y * uniforms.tile_resolution.x + tile_coord.x;
-
-    // TilesRead tiles_read = TilesRead(uniforms.tiles);
-
-    // const uint lo = tiles_read.values[tile_base + TILE_BITMAP_RANGE_LO_OFFSET];
-    // const uint hi = tiles_read.values[tile_base + TILE_BITMAP_RANGE_HI_OFFSET];
-
-    // if (hi < lo) {
-    //     return;
-    // }
-
-// #if DEBUG_SHOW_TILES == 1
-
-//     uint count = 0;
-//     // For each tile, iterate over all words in the L1 bitmap.
-//     for (uint index_l1 = lo; index_l1 <= hi; index_l1++) {
-//         // For each word, iterate all set bits.
-//         uint bitmap_l1 = tiles_read.values[tile_base + TILE_BITMAP_L1_OFFSET + index_l1];
-
-//         while (bitmap_l1 != 0) {
-//             const uint i = findLSB(bitmap_l1);
-//             bitmap_l1 ^= bitmap_l1 & -bitmap_l1;
-
-//             // For each set bit in the L1 bitmap, iterate the set bits in the
-//             // corresponding L0 bitmap.
-//             const uint index_l0 = index_l1 * 32 + i;
-//             uint bitmap_l0 = tiles_read.values[tile_base + TILE_BITMAP_L0_OFFSET + index_l0];
-
-//             count += bitCount(bitmap_l0);
-//         }
-//     }
-
-//     const vec3 color = plasma_quintic(float(count) / 100.0);
-//     imageStore(ui_layer_write, ivec2(gl_GlobalInvocationID.xy), vec4(color, 1.0));
-
-// #elif DEBUG_SHOW_TILES == 2
-
-//     uint count = hi - lo;
-//     const vec3 color = plasma_quintic(float(count) / 100.0);
-//     imageStore(ui_layer_write, ivec2(gl_GlobalInvocationID.xy), vec4(color, 1.0));
-
-// #else
-
-//     const vec2 sample_center = gl_GlobalInvocationID.xy + vec2(0.5);
-
-//     vec4 accum = vec4(0.0);
-
-//     // For each tile, iterate over all words in the L1 bitmap.
-//     for (uint index_l1 = lo; index_l1 <= hi; index_l1++) {
-//         // For each word, iterate all set bits.
-//         uint bitmap_l1 = tiles_read.values[tile_base + TILE_BITMAP_L1_OFFSET + index_l1];
-
-//         while (bitmap_l1 != 0) {
-//             const uint i = findLSB(bitmap_l1);
-//             bitmap_l1 ^= bitmap_l1 & -bitmap_l1;
-
-//             // For each set bit in the L1 bitmap, iterate the set bits in the
-//             // corresponding L0 bitmap.
-//             const uint index_l0 = index_l1 * 32 + i;
-//             uint bitmap_l0 = tiles_read.values[tile_base + TILE_BITMAP_L0_OFFSET + index_l0];
-//             while (bitmap_l0 != 0) {
-//                 const uint j = findLSB(bitmap_l0);
-//                 bitmap_l0 ^= bitmap_l0 & -bitmap_l0;
-
-//                 // Set bits in the L0 bitmap indicate binned primitives for this tile.
-//                 const uint primitive_index = index_l0 * 32 + j;
-//                 const Primitive2d primitive = uniforms.primitives.values[primitive_index];
-//                 const uint type = primitive.type;
-
-//                 vec4 primitive_color = vec4(0.0);
-
-//                 switch (type) {
-//                     case PRIMITIVE_TYPE_RECT: {
-//                         // const Rect rect = uniforms.rects.values[offset];
-//                         // const vec2 rect_min = primitive_instance.position - rect.half_extent - rect.border_width;
-//                         // const vec2 rect_max = primitive_instance.position + rect.half_extent + rect.border_width;
-//                         if (all(greaterThanEqual(sample_center, rect_min)) && all(lessThanEqual(sample_center, rect_max))) {
-//                             primitive_color = unpackUnorm4x8(primitive_instance.color).bgra;
-//                         }
-//                         break;
-//                     }
-//                     case PRIMITIVE_TYPE_GLYPH: {
-//                         const Glyph glyph = uniforms.glyphs.values[offset];
-//                         const vec2 glyph_min = primitive_instance.position + glyph.offset_min;
-//                         const vec2 glyph_max = primitive_instance.position + glyph.offset_max;
-//                         if (all(greaterThanEqual(sample_center, glyph_min)) && all(lessThanEqual(sample_center, glyph_max))) {
-//                             const vec2 glyph_size = glyph.offset_max - glyph.offset_min;
-//                             const vec2 uv = mix(glyph.atlas_min, glyph.atlas_max, (sample_center - glyph_min) / glyph_size) / uniforms.atlas_resolution;
-//                             const vec4 color = unpackUnorm4x8(primitive_instance.color).bgra;
-//                             const float coverage = textureLod(sampler2D(glyph_atlas, bilinear_sampler), uv, 0.0).r * color.a;
-//                             primitive_color = color * coverage;
-//                         }
-//                         break;
-//                     }
-//                 }
-
-//                 // does it blend?
-//                 accum.rgba = primitive_color.rgba + accum.rgba * (1.0 - primitive_color.a);
-//             }
-//         }
-//     }
-
-//     imageStore(ui_layer_write, ivec2(gl_GlobalInvocationID.xy), accum);
-
-// #endif
+    const uvec2 tile_coord = gl_WorkGroupID.xy / (TILE_SIZE / gl_WorkGroupSize.xy);
+    const uint tile_index = tile_coord.y * constants.tile_resolution.x + tile_coord.x;
+
+    const uint lo = constants.tile_buffer.values[tile_index].min_index;
+    const uint hi = constants.tile_buffer.values[tile_index].max_index;
+
+    if (lo == hi) {
+        return;
+    }
+
+#if DEBUG_SHOW_TILES == 1
+
+    uint count = hi - lo;
+    const vec3 color = plasma_quintic(float(count) / 50.0);
+    imageStore(ui_layer_write, ivec2(gl_GlobalInvocationID.xy), vec4(color, 1.0));
+
+#elif DEBUG_SHOW_TILES == 2
+
+    uint count = 0;
+    for (uint i = lo; i < hi; i++) {
+        count += bitCount(constants.fine_buffer.values[i]);
+    }
+    const vec3 color = plasma_quintic(float(count) / 600.0);
+    imageStore(ui_layer_write, ivec2(gl_GlobalInvocationID.xy), vec4(color, 1.0));
+
+#else
+
+    const vec2 sample_center = gl_GlobalInvocationID.xy + vec2(0.5);
+
+    vec4 accum = vec4(0.0);
+
+    for (uint i = lo; i < hi; i++) {
+        uint bitmap = constants.fine_buffer.values[i];
+        while (bitmap != 0) {
+            const uint index = findLSB(bitmap);
+            bitmap ^= bitmap & -bitmap;
+
+            const uint base_index = (constants.coarse_buffer.values[i] & 0xffff) * 32;
+            const Draw2dCmd cmd = constants.draw_buffer.values[base_index + index];
+
+            vec4 primitive_color = vec4(0.0);
+            switch (cmd.type) {
+                case DRAW_2D_CMD_RECT:
+                    const Draw2dCmdRect cmd_rect = decode_rect(cmd);
+                    const vec2 rect_min = cmd_rect.position - cmd_rect.half_extent - cmd_rect.border_width;
+                    const vec2 rect_max = cmd_rect.position + cmd_rect.half_extent + cmd_rect.border_width;
+                    if (all(greaterThanEqual(sample_center, rect_min)) && all(lessThanEqual(sample_center, rect_max))) {
+                        primitive_color = unpackUnorm4x8(cmd_rect.background_color).bgra;
+                    }
+                    break;
+                case DRAW_2D_CMD_GLYPH:
+                    const Draw2dCmdGlyph cmd_glyph = decode_glyph(cmd);
+                    const Glyph glyph = constants.glyph_buffer.values[cmd_glyph.index];
+                    const vec2 glyph_min = cmd_glyph.position + glyph.offset_min;
+                    const vec2 glyph_max = cmd_glyph.position + glyph.offset_max;
+                    if (all(greaterThanEqual(sample_center, glyph_min)) && all(lessThanEqual(sample_center, glyph_max))) {
+                        const vec2 glyph_size = glyph.offset_max - glyph.offset_min;
+                        const vec2 uv = mix(glyph.atlas_min, glyph.atlas_max, (sample_center - glyph_min) / glyph_size) / constants.atlas_resolution;
+                        const vec4 color = unpackUnorm4x8(cmd_glyph.color).bgra;
+                        const float coverage = textureLod(sampler2D(glyph_atlas, bilinear_sampler), uv, 0.0).r * color.a;
+                        primitive_color = color * coverage;
+                    }
+                    break;
+            }
+
+            // does it blend?
+            accum.rgba = primitive_color.rgba + accum.rgba * (1.0 - primitive_color.a);
+        }
+    }
+
+    imageStore(ui_layer_write, ivec2(gl_GlobalInvocationID.xy), accum);
+
+#endif
+
 }
index c9459bd1e2397199bdef73699915c4ac886ef3e4..a02949e97956735634f60a95d0e8e1bda37ffda5 100644 (file)
@@ -12,8 +12,6 @@
 #extension GL_KHR_shader_subgroup_shuffle_relative: enable
 #extension GL_KHR_shader_subgroup_vote : require
 
-//#extension GL_EXT_debug_printf : enable
-
 #include "compute_bindings.h"
 
 #include "radix_sort.h"
index 9bc69e3d2a1fb88361b00d7b204ff1b3e2661304..9eb81551dd4ef8f92fbb5ad6e96dca67d5e3ebfc 100644 (file)
@@ -169,6 +169,46 @@ pub struct Draw2dSortConstants<'a> {
     pub coarse_buffer_address: BufferAddress<'a>,
 }
 
+#[repr(C)]
+pub struct Draw2dResolveConstants<'a> {
+    pub screen_resolution_x: u32,
+    pub screen_resolution_y: u32,
+    pub tile_resolution_x: u32,
+    pub tile_resolution_y: u32,
+
+    pub draw_buffer_len: u32,
+    pub _pad: u32,
+
+    pub draw_buffer_address: BufferAddress<'a>,
+    pub glyph_buffer_address: BufferAddress<'a>,
+    pub coarse_buffer_address: BufferAddress<'a>,
+    pub fine_buffer_address: BufferAddress<'a>,
+    pub tile_buffer_address: BufferAddress<'a>,
+}
+
+#[repr(C)]
+pub struct Draw2dRasterizeConstants<'a> {
+    pub screen_resolution_x: u32,
+    pub screen_resolution_y: u32,
+    pub tile_resolution_x: u32,
+    pub tile_resolution_y: u32,
+    pub atlas_resolution_x: u32,
+    pub atlas_resolution_y: u32,
+
+    pub draw_buffer_address: BufferAddress<'a>,
+    pub glyph_buffer_address: BufferAddress<'a>,
+    pub coarse_buffer_address: BufferAddress<'a>,
+    pub fine_buffer_address: BufferAddress<'a>,
+    pub tile_buffer_address: BufferAddress<'a>,
+}
+
+#[repr(C)]
+pub struct CompositeConstants<'a> {
+    pub tile_resolution_x: u32,
+    pub tile_resolution_y: u32,
+    pub tile_buffer_address: BufferAddress<'a>,
+}
+
 #[repr(C)]
 pub struct RadixSortUpsweepConstants<'a> {
     pub shift: u32,
@@ -298,48 +338,50 @@ impl Pipelines {
 
         gpu.debug_name_pipeline(basic_pipeline, "basic");
 
-        let create_compute_pipeline = |code, name, workgroup_size, push_constant_size| {
-            let push_constant_range = PushConstantRange {
-                stage_flags: ShaderStageFlags::COMPUTE,
-                offset: 0,
-                size: push_constant_size as u32,
-            };
-
-            let pipeline = gpu.create_compute_pipeline(&ComputePipelineDesc {
-                shader: ShaderDesc {
-                    code,
-                    require_full_subgroups: workgroup_size != 0,
-                    required_subgroup_size: if workgroup_size != 0 {
-                        Some(workgroup_size)
-                    } else {
-                        None
+        let create_compute_pipeline =
+            |code, name, workgroup_size, require_full_subgroups, push_constant_size| {
+                let push_constant_range = PushConstantRange {
+                    stage_flags: ShaderStageFlags::COMPUTE,
+                    offset: 0,
+                    size: push_constant_size as u32,
+                };
+
+                let pipeline = gpu.create_compute_pipeline(&ComputePipelineDesc {
+                    shader: ShaderDesc {
+                        code,
+                        require_full_subgroups,
+                        required_subgroup_size: if workgroup_size != 0 {
+                            Some(workgroup_size)
+                        } else {
+                            None
+                        },
+                        spec_constants: &[SpecConstant::U32 {
+                            id: 0,
+                            value: workgroup_size,
+                        }],
+                        ..default()
                     },
-                    spec_constants: &[SpecConstant::U32 {
-                        id: 0,
-                        value: workgroup_size,
-                    }],
-                    ..default()
-                },
-                layout: PipelineLayout {
-                    bind_group_layouts: &[compute_bind_group_layout],
-                    // Validation cries about push constant ranges with zero size.
-                    push_constant_ranges: if push_constant_range.size != 0 {
-                        std::slice::from_ref(&push_constant_range)
-                    } else {
-                        &[]
+                    layout: PipelineLayout {
+                        bind_group_layouts: &[compute_bind_group_layout],
+                        // Validation cries about push constant ranges with zero size.
+                        push_constant_ranges: if push_constant_range.size != 0 {
+                            std::slice::from_ref(&push_constant_range)
+                        } else {
+                            &[]
+                        },
                     },
-                },
-            });
+                });
 
-            gpu.debug_name_pipeline(pipeline, name);
+                gpu.debug_name_pipeline(pipeline, name);
 
-            pipeline
-        };
+                pipeline
+            };
 
         let draw_2d_bin_0_clear_pipeline = create_compute_pipeline(
             crate::DRAW_2D_BIN_0_CLEAR_COMP_SPV,
             "draw2d_bin_clear",
             0,
+            false,
             std::mem::size_of::<Draw2dClearConstants>(),
         );
 
@@ -348,6 +390,7 @@ impl Pipelines {
             crate::DRAW_2D_BIN_1_SCATTER_COMP_SPV,
             "draw2d_bin_scatter",
             draw_2d_bin_1_scatter_pipeline_workgroup_size,
+            true,
             std::mem::size_of::<Draw2dScatterConstants>(),
         );
 
@@ -355,23 +398,31 @@ impl Pipelines {
             crate::DRAW_2D_BIN_2_SORT_COMP_SPV,
             "draw2d_bin_sort",
             0,
+            false,
             std::mem::size_of::<Draw2dSortConstants>(),
         );
 
         let draw_2d_bin_3_resolve_pipeline = create_compute_pipeline(
             crate::DRAW_2D_BIN_3_RESOLVE_COMP_SPV,
             "draw2d_bin_resolve",
-            0,
-            0,
+            32,
+            true,
+            std::mem::size_of::<Draw2dResolveConstants>(),
         );
 
-        let draw_2d_rasterize_pipeline =
-            create_compute_pipeline(crate::DRAW_2D_RASTERIZE_COMP_SPV, "draw2d_rasterize", 0, 0);
+        let draw_2d_rasterize_pipeline = create_compute_pipeline(
+            crate::DRAW_2D_RASTERIZE_COMP_SPV,
+            "draw2d_rasterize",
+            0,
+            false,
+            std::mem::size_of::<Draw2dRasterizeConstants>(),
+        );
 
         let radix_sort_0_upsweep_pipeline = create_compute_pipeline(
             crate::RADIX_SORT_0_UPSWEEP_COMP_SPV,
             "radix_sort_upsweep",
             32,
+            true,
             std::mem::size_of::<RadixSortUpsweepConstants>(),
         );
 
@@ -379,6 +430,7 @@ impl Pipelines {
             crate::RADIX_SORT_1_SPINE_COMP_SPV,
             "radix_sort_spine",
             32,
+            true,
             std::mem::size_of::<RadixSortSpineConstants>(),
         );
 
@@ -386,11 +438,17 @@ impl Pipelines {
             crate::RADIX_SORT_2_DOWNSWEEP_COMP_SPV,
             "radix_sort_downsweep",
             32,
+            true,
             std::mem::size_of::<RadixSortDownsweepConstants>(),
         );
 
-        let composite_pipeline =
-            create_compute_pipeline(crate::COMPOSITE_COMP_SPV, "composite", 0, 0);
+        let composite_pipeline = create_compute_pipeline(
+            crate::COMPOSITE_COMP_SPV,
+            "composite",
+            0,
+            false,
+            std::mem::size_of::<CompositeConstants>(),
+        );
 
         Self {
             _samplers: samplers,
index f1a368fb34bcc6ddb976bf3b36cc8e1b80fa1249..6ffb3cb18090455eec963f4b00a2d3fd3958a723 100644 (file)
@@ -6,10 +6,10 @@ use std::time::{Duration, Instant};
 use narcissus_core::dds;
 
 use shark_shaders::pipelines::{
-    calculate_spine_size, BasicConstants, ComputeBinds, Draw2dClearConstants, Draw2dCmd,
-    Draw2dScatterConstants, Draw2dSortConstants, GraphicsBinds, Pipelines,
-    RadixSortDownsweepConstants, RadixSortSpineConstants, RadixSortUpsweepConstants,
-    DRAW_2D_TILE_SIZE,
+    calculate_spine_size, BasicConstants, CompositeConstants, ComputeBinds, Draw2dClearConstants,
+    Draw2dCmd, Draw2dRasterizeConstants, Draw2dResolveConstants, Draw2dScatterConstants,
+    Draw2dSortConstants, GraphicsBinds, Pipelines, RadixSortDownsweepConstants,
+    RadixSortSpineConstants, RadixSortUpsweepConstants, DRAW_2D_TILE_SIZE,
 };
 
 use renderdoc_sys as rdoc;
@@ -469,7 +469,7 @@ impl<'a> UiState<'a> {
         }
     }
 
-    fn rect(&mut self, x: f32, y: f32, width: f32, height: f32) {
+    fn rect(&mut self, x: f32, y: f32, width: f32, height: f32, background_color: u32) {
         let half_extent_x = width / 2.0;
         let half_extent_y = height / 2.0;
         let center_x = x + half_extent_x;
@@ -480,8 +480,8 @@ impl<'a> UiState<'a> {
             center_y,
             half_extent_x,
             half_extent_y,
-            1.0,
-            0x4400ff00,
+            5.0,
+            background_color,
             0xffff0000,
         ))
     }
@@ -527,7 +527,7 @@ impl<'a> UiState<'a> {
             x += advance * scale;
 
             self.draw_cmds
-                .push(Draw2dCmd::glyph(touched_glyph_index, 0x880000ff, x, y));
+                .push(Draw2dCmd::glyph(touched_glyph_index, 0xff0000ff, x, y));
 
             x += advance_width * scale;
         }
@@ -1269,6 +1269,17 @@ impl<'gpu> DrawState<'gpu> {
                 ],
             );
 
+            let tile_buffer = gpu.request_transient_buffer(
+                frame,
+                thread_token,
+                BufferUsageFlags::STORAGE,
+                self.tile_resolution_x as usize
+                    * self.tile_resolution_y as usize
+                    * std::mem::size_of::<u32>()
+                    * 2,
+            );
+            let tile_buffer_address = gpu.get_buffer_address(tile_buffer.to_arg());
+
             // Render UI
             {
                 gpu.cmd_begin_debug_marker(
@@ -1515,11 +1526,29 @@ impl<'gpu> DrawState<'gpu> {
 
                 gpu.cmd_set_pipeline(cmd_encoder, self.pipelines.draw_2d_bin_3_resolve_pipeline);
                 gpu.cmd_set_bind_group(cmd_encoder, 0, &compute_bind_group);
+                gpu.cmd_push_constants(
+                    cmd_encoder,
+                    ShaderStageFlags::COMPUTE,
+                    0,
+                    &Draw2dResolveConstants {
+                        screen_resolution_x: self.width,
+                        screen_resolution_y: self.height,
+                        tile_resolution_x: self.tile_resolution_x,
+                        tile_resolution_y: self.tile_resolution_y,
+                        draw_buffer_len,
+                        _pad: 0,
+                        draw_buffer_address,
+                        glyph_buffer_address,
+                        coarse_buffer_address,
+                        fine_buffer_address: tmp_buffer_address,
+                        tile_buffer_address,
+                    },
+                );
                 gpu.cmd_dispatch(
                     cmd_encoder,
+                    1,
                     self.tile_resolution_x,
                     self.tile_resolution_y,
-                    1,
                 );
 
                 gpu.cmd_barrier(
@@ -1533,6 +1562,24 @@ impl<'gpu> DrawState<'gpu> {
 
                 gpu.cmd_set_pipeline(cmd_encoder, self.pipelines.draw_2d_rasterize_pipeline);
                 gpu.cmd_set_bind_group(cmd_encoder, 0, &compute_bind_group);
+                gpu.cmd_push_constants(
+                    cmd_encoder,
+                    ShaderStageFlags::COMPUTE,
+                    0,
+                    &Draw2dRasterizeConstants {
+                        screen_resolution_x: self.width,
+                        screen_resolution_y: self.height,
+                        tile_resolution_x: self.tile_resolution_x,
+                        tile_resolution_y: self.tile_resolution_y,
+                        atlas_resolution_x: atlas_width,
+                        atlas_resolution_y: atlas_height,
+                        draw_buffer_address,
+                        glyph_buffer_address,
+                        coarse_buffer_address,
+                        fine_buffer_address: tmp_buffer_address,
+                        tile_buffer_address,
+                    },
+                );
                 gpu.cmd_dispatch(cmd_encoder, (self.width + 7) / 8, (self.height + 7) / 8, 1);
 
                 gpu.cmd_end_debug_marker(cmd_encoder);
@@ -1542,7 +1589,7 @@ impl<'gpu> DrawState<'gpu> {
             {
                 gpu.cmd_begin_debug_marker(
                     cmd_encoder,
-                    "display transform",
+                    "composite",
                     microshades::GREEN_RGBA_F32[3],
                 );
 
@@ -1573,7 +1620,16 @@ impl<'gpu> DrawState<'gpu> {
 
                 gpu.cmd_set_pipeline(cmd_encoder, self.pipelines.composite_pipeline);
                 gpu.cmd_set_bind_group(cmd_encoder, 0, &compute_bind_group);
-
+                gpu.cmd_push_constants(
+                    cmd_encoder,
+                    ShaderStageFlags::COMPUTE,
+                    0,
+                    &CompositeConstants {
+                        tile_resolution_x: self.tile_resolution_x,
+                        tile_resolution_y: self.tile_resolution_y,
+                        tile_buffer_address,
+                    },
+                );
                 gpu.cmd_dispatch(cmd_encoder, (self.width + 7) / 8, (self.height + 7) / 8, 1);
 
                 gpu.cmd_end_debug_marker(cmd_encoder);
@@ -1753,10 +1809,16 @@ pub fn main() {
             let base_y = (base_y + 1.0) * 0.5;
 
             for _ in 0..100 {
-                ui_state.rect(0.0, 0.0, width as f32, height as f32);
+                ui_state.rect(
+                    100.0,
+                    100.0,
+                    width as f32 - 200.0,
+                    height as f32 - 200.0,
+                    0x88008800,
+                );
             }
 
-            for i in 0..80 {
+            for i in 0..1 {
                 let i = i as f32;
                 ui_state.text_fmt(
                     base_x * 100.0 * scale - 5.0,
@@ -1780,16 +1842,10 @@ pub fn main() {
                     );
             }
 
-            ui_state.rect(base_x * 60.0, base_y * 60.0, 120.0, 120.0);
-            ui_state.rect(base_x * 500.0, base_y * 100.0, 120.0, 120.0);
-            ui_state.rect(base_x * 90.0, base_y * 290.0, 140.0, 120.0);
-            ui_state.rect(base_x * 800.0, base_y * 320.0, 120.0, 120.0);
-            ui_state.rect(base_x * 200.0, base_y * 200.0, 120.0, 120.0);
-            ui_state.rect(base_x * 300.0, base_y * 120.0, 120.0, 170.0);
-            ui_state.rect(base_x * 1000.0, base_y * 30.0, 50.0, 120.0);
-            ui_state.rect(base_x * 340.0, base_y * 400.0, 120.0, 110.0);
-            ui_state.rect(base_x * 290.0, base_y * 80.0, 120.0, 10.0);
-            ui_state.rect(base_x * 310.0, base_y * 190.0, 10.0, 120.0);
+            for _ in 0..500 {
+                ui_state.rect(base_x * 60.0, base_y * 60.0, 2400.0, 900.0, 0xff00ff00);
+            }
+            ui_state.rect(base_x * 60.0, base_y * 60.0, 2400.0, 900.0, 0xffff0000);
 
             draw_state.draw(
                 thread_token,