]> git.nega.tv - josh/narcissus/commitdiff
shark: Switch to single-pass binning for primitive-2d
authorJosh Simmons <josh@nega.tv>
Fri, 31 May 2024 04:41:11 +0000 (06:41 +0200)
committerJosh Simmons <josh@nega.tv>
Fri, 31 May 2024 04:41:11 +0000 (06:41 +0200)
12 files changed:
title/shark-shaders/build.rs
title/shark-shaders/shaders/display_transform.comp.glsl
title/shark-shaders/shaders/primitive_2d.h
title/shark-shaders/shaders/primitive_2d_bin.comp.glsl [new file with mode: 0644]
title/shark-shaders/shaders/primitive_2d_bin_coarse.comp.glsl [deleted file]
title/shark-shaders/shaders/primitive_2d_bin_fine.comp.glsl [deleted file]
title/shark-shaders/shaders/primitive_2d_bindings.h
title/shark-shaders/shaders/primitive_2d_clear_fine.comp.glsl [deleted file]
title/shark-shaders/shaders/primitive_2d_rasterize.comp.glsl
title/shark/src/main.rs
title/shark/src/pipelines/display_transform.rs
title/shark/src/pipelines/primitive_2d.rs

index c6eb9d1f2c295a58e907acc42231d098d5cac88d..af405407d052fdb37013c06e989409f43bb7d5a5 100644 (file)
@@ -20,15 +20,7 @@ const SHADERS: &[Shader] = &[
     },
     Shader {
         stage: "comp",
-        name: "primitive_2d_bin_coarse",
-    },
-    Shader {
-        stage: "comp",
-        name: "primitive_2d_bin_fine",
-    },
-    Shader {
-        stage: "comp",
-        name: "primitive_2d_clear_fine",
+        name: "primitive_2d_bin",
     },
     Shader {
         stage: "comp",
index 08f6a86b1100c9e757c25762b69ec6ba56ba57c7..f9277132a9afc93bfdcce3d3793f7bea928ef27a 100644 (file)
@@ -9,10 +9,7 @@ struct PrimitiveUniforms {
     uint num_primitives;
     uint num_primitives_32;
     uint num_primitives_1024;
-
-    uint tile_stride_fine;
-
-    uvec2 tile_offset;
+    uint tile_stride;
 };
 
 layout(std430, push_constant) uniform uniformBuffer {
@@ -28,10 +25,6 @@ layout (set = 0, binding = 3, rgba16f) uniform readonly image2D layer_ui;
 
 layout (set = 0, binding = 4, rgba16f) uniform writeonly image2D composited_output;
 
-layout(std430, set = 0, binding = 5) readonly buffer fineTileCountBufferRead {
-    uint fine_count_ro[];
-};
-
 float srgb_oetf(float a) {
     return (.0031308f >= a) ? 12.92f * a : 1.055f * pow(a, .4166666666666667f) - .055f;
 }
@@ -50,18 +43,12 @@ vec3 tony_mc_mapface(vec3 stimulus) {
 layout (local_size_x = 8, local_size_y = 8, local_size_z = 1) in;
 
 void main() {
-    const uvec2 tile_coord = gl_WorkGroupID.xy >> 1;
-    const uint tile_index = tile_coord.y * primitive_uniforms.tile_stride_fine + tile_coord.x;
-
     const vec3 stimulus = imageLoad(layer_rt, ivec2(gl_GlobalInvocationID.xy)).rgb;
     const vec3 transformed = tony_mc_mapface(stimulus);
     vec3 composited = srgb_oetf(transformed);
 
-    [[branch]]
-    if (fine_count_ro[tile_index] != 0) {
-        const vec4 ui = imageLoad(layer_ui, ivec2(gl_GlobalInvocationID.xy)).rgba;
-        composited = ui.rgb + (composited * (1.0 - ui.a));
-    }
+    const vec4 ui = imageLoad(layer_ui, ivec2(gl_GlobalInvocationID.xy)).rgba;
+    composited = ui.rgb + (composited * (1.0 - ui.a));
 
     imageStore(composited_output, ivec2(gl_GlobalInvocationID.xy), vec4(composited, 1.0));
 }
index 16a2c18cfcb9376da9ab66d0b8b8b017cd0322a4..b8a3d0c161ffb8a1ecd84b2a255288d7152934d2 100644 (file)
@@ -1,16 +1,11 @@
-#define MAX_PRIMS 0x20000u
-#define TILE_SIZE_COARSE 64
-#define TILE_SIZE_FINE 16
-#define TILE_SIZE_MUL (TILE_SIZE_COARSE / TILE_SIZE_FINE)
+#define TILE_SIZE 32
+
+#define MAX_PRIMS (1 << 18)
 #define TILE_BITMAP_L1_WORDS (MAX_PRIMS / 32 / 32)
 #define TILE_BITMAP_L0_WORDS (MAX_PRIMS / 32)
-#define TILE_STRIDE_COARSE TILE_BITMAP_L0_WORDS
-#define TILE_STRIDE_FINE (TILE_BITMAP_L0_WORDS + TILE_BITMAP_L1_WORDS)
-#define TILE_BITMAP_OFFSET_COARSE 0
-#define TILE_BITMAP_L1_OFFSET_FINE 0
-#define TILE_BITMAP_L0_OFFSET_FINE TILE_BITMAP_L1_WORDS
-
-#define TILE_DISPATCH_X 15
+#define TILE_STRIDE (TILE_BITMAP_L0_WORDS + TILE_BITMAP_L1_WORDS)
+#define TILE_BITMAP_L1_OFFSET 0
+#define TILE_BITMAP_L0_OFFSET TILE_BITMAP_L1_WORDS
 
 struct PrimitiveUniforms {
     uvec2 screen_resolution;
@@ -19,9 +14,7 @@ struct PrimitiveUniforms {
     uint num_primitives;
     uint num_primitives_32;
     uint num_primitives_1024;
-    uint tile_stride_fine;
-
-    uvec2 tile_offset_coarse;
+    uint tile_stride;
 };
 
 struct Glyph {
diff --git a/title/shark-shaders/shaders/primitive_2d_bin.comp.glsl b/title/shark-shaders/shaders/primitive_2d_bin.comp.glsl
new file mode 100644 (file)
index 0000000..894ebb5
--- /dev/null
@@ -0,0 +1,86 @@
+#version 460
+
+#extension GL_GOOGLE_include_directive : require
+
+#extension GL_EXT_scalar_block_layout : require
+#extension GL_EXT_control_flow_attributes : require
+
+#extension GL_KHR_shader_subgroup_vote : require
+#extension GL_KHR_shader_subgroup_ballot : require
+
+#include "primitive_2d.h"
+
+// TODO: Spec constant support for different subgroup sizes.
+layout (local_size_x = 64, local_size_y = 1, local_size_z = 1) in;
+
+shared uint bitmap_0[64];
+
+void main() {
+    const uvec2 bin_coord = gl_GlobalInvocationID.yz;
+    const uvec2 bin_min = bin_coord * TILE_SIZE * 8;
+    const uvec2 bin_max = min(bin_min + TILE_SIZE * 8, primitive_uniforms.screen_resolution);
+
+    for (uint i = 0; i < 2048; i += 64) {
+        const uint prim_index = gl_WorkGroupID.x * 2048 + i + gl_SubgroupInvocationID;
+        bool intersects = false;
+        if (prim_index < primitive_uniforms.num_primitives) {
+            intersects = test_glyph(prim_index, bin_min, bin_max);
+        }
+        const uvec4 ballot = subgroupBallot(intersects);
+        bitmap_0[i / 32 + 0] = ballot.x;
+        bitmap_0[i / 32 + 1] = ballot.y;
+    }
+
+    memoryBarrierShared();
+
+    uint bitmap_1[2];
+    {
+        const uvec4 ballot = subgroupBallot(bitmap_0[gl_SubgroupInvocationID] != 0);
+        bitmap_1[0] = ballot.x;
+        bitmap_1[1] = ballot.y;
+    }
+
+    for (uint y = 0; y < 8; y++) {
+        for (uint x = 0; x < 8; x++) {
+            const uvec2 tile_coord = gl_GlobalInvocationID.yz * 8 + uvec2(x, y);
+            const uvec2 tile_min = tile_coord * TILE_SIZE;
+            const uvec2 tile_max = min(tile_min + TILE_SIZE, primitive_uniforms.screen_resolution);
+            [[branch]]
+            if (any(greaterThanEqual(tile_min, tile_max))) {
+                continue;
+            }
+
+            const uint tile_index = tile_coord.y * primitive_uniforms.tile_stride + tile_coord.x;
+
+            for (uint i = 0; i < 2; i++) {
+                uint out_1 = 0;
+
+                uint word_1 = bitmap_1[i];
+                while (word_1 != 0) {
+                    const uint bit_1 = findLSB(word_1);
+                    word_1 ^= word_1 & -word_1;
+
+                    uint out_0 = 0;
+                    uint index_0 = i * 32 + bit_1;
+                    uint word_0 = bitmap_0[index_0];
+                    while (word_0 != 0) {
+                        const uint bit_0 = findLSB(word_0);
+                        word_0 ^= word_0 & -word_0;
+
+                        const uint prim_index = gl_WorkGroupID.x * 2048 + index_0 * 32 + bit_0;
+                        if (test_glyph(prim_index, tile_min, tile_max)) {
+                            out_0 |= 1 << bit_0;
+                        }
+                    }
+
+                    if (out_0 != 0) {
+                        out_1 |= 1 << bit_1;
+                    }
+                    tile_bitmap_wo[tile_index * TILE_STRIDE + TILE_BITMAP_L0_OFFSET + gl_WorkGroupID.x * 64 + index_0] = out_0;
+                }
+
+                tile_bitmap_wo[tile_index * TILE_STRIDE + TILE_BITMAP_L1_OFFSET + gl_WorkGroupID.x * 2 + i] = out_1;
+            }
+        }
+    }
+}
diff --git a/title/shark-shaders/shaders/primitive_2d_bin_coarse.comp.glsl b/title/shark-shaders/shaders/primitive_2d_bin_coarse.comp.glsl
deleted file mode 100644 (file)
index 00fa31c..0000000
+++ /dev/null
@@ -1,37 +0,0 @@
-#version 460
-
-#extension GL_GOOGLE_include_directive : require
-
-#extension GL_EXT_scalar_block_layout : require
-#extension GL_EXT_control_flow_attributes : require
-
-#extension GL_KHR_shader_subgroup_vote : require
-#extension GL_KHR_shader_subgroup_ballot : require
-
-#include "primitive_2d.h"
-
-// TODO: Spec constant support for different subgroup sizes.
-layout (local_size_x = 64, local_size_y = 1, local_size_z = 1) in;
-
-void main() {
-    const uvec2 tile_coord = gl_GlobalInvocationID.yz;
-    const uvec2 tile_coord_global = tile_coord + primitive_uniforms.tile_offset_coarse;
-
-    const uvec2 tile_min = tile_coord_global * TILE_SIZE_COARSE;
-    const uvec2 tile_max = min(tile_min + TILE_SIZE_COARSE, primitive_uniforms.screen_resolution);
-
-    const uint primitive_index = gl_WorkGroupID.x * gl_WorkGroupSize.x + gl_SubgroupID * gl_SubgroupSize + gl_SubgroupInvocationID;
-
-    bool intersects = false;
-    if (primitive_index < primitive_uniforms.num_primitives) {
-        intersects = test_glyph(primitive_index, tile_min, tile_max);
-    }
-
-    uvec4 ballot_result = subgroupBallot(intersects);
-    if (subgroupElect()) { // managed democracy wins again
-        const uint tile_index = tile_coord.y * (primitive_uniforms.tile_stride_fine / TILE_SIZE_MUL)  + tile_coord.x;
-        const uint tile_offset = tile_index * TILE_STRIDE_COARSE;
-        coarse_bitmap_wo[tile_offset + 2 * gl_WorkGroupID.x + 0] = ballot_result.x;
-        coarse_bitmap_wo[tile_offset + 2 * gl_WorkGroupID.x + 1] = ballot_result.y;
-    }
-}
diff --git a/title/shark-shaders/shaders/primitive_2d_bin_fine.comp.glsl b/title/shark-shaders/shaders/primitive_2d_bin_fine.comp.glsl
deleted file mode 100644 (file)
index 06c9a7b..0000000
+++ /dev/null
@@ -1,65 +0,0 @@
-#version 460
-
-#extension GL_GOOGLE_include_directive : require
-
-#extension GL_EXT_scalar_block_layout : require
-#extension GL_EXT_control_flow_attributes : require
-
-#extension GL_KHR_shader_subgroup_vote : require
-#extension GL_KHR_shader_subgroup_ballot : require
-
-#include "primitive_2d.h"
-
-// TODO: Spec constant support for different subgroup sizes.
-layout (local_size_x = 64, local_size_y = 1, local_size_z = 1) in;
-
-void main() {
-    const uvec2 tile_coord = gl_WorkGroupID.yz;
-    const uint tile_index = tile_coord.y * TILE_DISPATCH_X * TILE_SIZE_MUL + tile_coord.x;
-
-    const uvec2 tile_coord_global = tile_coord + primitive_uniforms.tile_offset_coarse * TILE_SIZE_MUL;
-    const uint tile_index_global = tile_coord_global.y * primitive_uniforms.tile_stride_fine + tile_coord_global.x;
-
-    const uvec2 tile_min = tile_coord_global * TILE_SIZE_FINE;
-    const uvec2 tile_max = min(tile_min + TILE_SIZE_FINE, primitive_uniforms.screen_resolution);
-
-    const uint index = gl_WorkGroupID.x * gl_WorkGroupSize.x + gl_SubgroupID * gl_SubgroupSize + gl_SubgroupInvocationID;
-
-    uint bitmap_l0 = 0;
-    if (index < primitive_uniforms.num_primitives_32) {
-        const uvec2 tile_coord_coarse = (tile_coord / TILE_SIZE_MUL) + primitive_uniforms.tile_offset_coarse;
-        const uint tile_index_coarse = tile_coord_coarse.y * (primitive_uniforms.tile_stride_fine / TILE_SIZE_MUL) + tile_coord_coarse.x;
-        const uint tile_base_coarse = tile_index_coarse * TILE_STRIDE_COARSE;
-        const uint tile_bitmap_base_coarse = tile_base_coarse + TILE_BITMAP_OFFSET_COARSE;
-
-        uint bitmap_coarse = coarse_bitmap_ro[tile_bitmap_base_coarse + index];
-        while (bitmap_coarse != 0) {
-            const uint i = findLSB(bitmap_coarse);
-            bitmap_coarse ^= bitmap_coarse & -bitmap_coarse;
-
-            const uint primitive_index = index * 32 + i;
-            if (test_glyph(primitive_index, tile_min, tile_max)) {
-                bitmap_l0 |= 1 << i;
-            }
-        }
-    }
-
-    const uint tile_base_fine = tile_index * TILE_STRIDE_FINE;
-    const uint tile_bitmap_l0_base_fine = tile_base_fine + TILE_BITMAP_L0_OFFSET_FINE;
-
-    // Write the L0 per-primitive bitmap.
-    fine_bitmap_wo[tile_bitmap_l0_base_fine + index] = bitmap_l0;
-
-    uvec4 ballot_result = subgroupBallot(bitmap_l0 != 0);
-    if (subgroupElect()) {
-        // Write the L1 per-bitmap-word bitmap.
-        const uint tile_bitmap_l1_base_fine = tile_base_fine + TILE_BITMAP_L1_OFFSET_FINE;
-        fine_bitmap_wo[tile_bitmap_l1_base_fine + 2 * gl_WorkGroupID.x + 0] = ballot_result.x;
-        fine_bitmap_wo[tile_bitmap_l1_base_fine + 2 * gl_WorkGroupID.x + 1] = ballot_result.y;
-
-        const uint count = uint(ballot_result.x != 0) + uint(ballot_result.y != 0);
-        if (count != 0) {
-            atomicAdd(fine_count_wo[tile_index_global], count);
-        }
-    }
-}
index e54758b73d8a99b9838f219278e6c23f37247894..c08cece0dd47cc73425533dc7d0c28511e93a714 100644 (file)
@@ -16,28 +16,12 @@ layout(std430, set = 0, binding = 3) readonly buffer glyphInstanceBuffer {
     GlyphInstance glyph_instances[];
 };
 
-layout(std430, set = 0, binding = 4) readonly buffer coarseTileBufferRead {
-    uint coarse_bitmap_ro[];
+layout(std430, set = 0, binding = 4) readonly buffer tileBufferRead {
+    uint tile_bitmap_ro[];
 };
 
-layout(std430, set = 0, binding = 4) writeonly buffer coarseTileBufferWrite {
-    uint coarse_bitmap_wo[];
+layout(std430, set = 0, binding = 4) writeonly buffer tileBufferWrite {
+    uint tile_bitmap_wo[];
 };
 
-layout(std430, set = 0, binding = 5) readonly buffer fineTileBitmapBufferRead {
-    uint fine_bitmap_ro[];
-};
-
-layout(std430, set = 0, binding = 5) writeonly buffer fineTileBitmapBufferWrite {
-    uint fine_bitmap_wo[];
-};
-
-layout(std430, set = 0, binding = 6) readonly buffer fineTileCountBufferRead {
-    uint fine_count_ro[];
-};
-
-layout(std430, set = 0, binding = 6) writeonly buffer fineTileCountBufferWrite {
-    uint fine_count_wo[];
-};
-
-layout (set = 0, binding = 7, rgba16f) uniform writeonly image2D ui_image;
+layout (set = 0, binding = 5, rgba16f) uniform writeonly image2D ui_image;
diff --git a/title/shark-shaders/shaders/primitive_2d_clear_fine.comp.glsl b/title/shark-shaders/shaders/primitive_2d_clear_fine.comp.glsl
deleted file mode 100644 (file)
index 3d486b6..0000000
+++ /dev/null
@@ -1,14 +0,0 @@
-#version 460
-
-#extension GL_GOOGLE_include_directive : require
-
-#extension GL_EXT_scalar_block_layout : require
-#extension GL_EXT_control_flow_attributes : require
-
-#include "primitive_2d.h"
-
-layout (local_size_x = 64, local_size_y = 1, local_size_z = 1) in;
-
-void main() {
-    fine_count_wo[gl_GlobalInvocationID.x] = 0;
-}
index 813eddbc43f082031f4650049f383972a70fb9e0..01bb88224fe9d66848d1ab3b9f8d96d56a6d445f 100644 (file)
@@ -10,7 +10,7 @@
 
 #include "primitive_2d.h"
 
-layout (local_size_x = TILE_SIZE_FINE, local_size_y = TILE_SIZE_FINE, local_size_z = 1) in;
+layout (local_size_x = 8, local_size_y = 8, local_size_z = 1) in;
 
 #define DEBUG_SHOW_TILES 0
 
@@ -31,23 +31,19 @@ vec3 plasma_quintic(float x)
 #endif
 
 void main() {
-    const uvec2 tile_coord = gl_WorkGroupID.xy;
-    const uint tile_index = tile_coord.y * TILE_DISPATCH_X * TILE_SIZE_MUL + tile_coord.x;
-
-    const uvec2 tile_coord_global = tile_coord + primitive_uniforms.tile_offset_coarse * TILE_SIZE_MUL;
-    const uint tile_index_global = tile_coord_global.y * primitive_uniforms.tile_stride_fine + tile_coord_global.x;
-
-    const uint tile_base_fine = tile_index * TILE_STRIDE_FINE;
-    const uint tile_bitmap_l1_base_fine = tile_base_fine + TILE_BITMAP_L1_OFFSET_FINE;
-    const uint tile_bitmap_l0_base_fine = tile_base_fine + TILE_BITMAP_L0_OFFSET_FINE;
+    const uvec2 tile_coord = gl_WorkGroupID.xy / 4;
+    const uint tile_index = tile_coord.y * primitive_uniforms.tile_stride + tile_coord.x;
+    const uint tile_base = tile_index * TILE_STRIDE;
+    const uint tile_bitmap_l1_base_fine = tile_base + TILE_BITMAP_L1_OFFSET;
+    const uint tile_bitmap_l0_base_fine = tile_base + TILE_BITMAP_L0_OFFSET;
 
 #if DEBUG_SHOW_TILES == 1
 
-    uint count = 0;
+    int count = 0;
     // For each tile, iterate over all words in the L1 bitmap.
     for (int index_l1 = 0; index_l1 < primitive_uniforms.num_primitives_1024; index_l1++) {
         // For each word, iterate all set bits.
-        uint bitmap_l1 = fine_bitmap_ro[tile_bitmap_l1_base_fine + index_l1];
+        uint bitmap_l1 = tile_bitmap_ro[tile_bitmap_l1_base_fine + index_l1];
 
         while (bitmap_l1 != 0) {
             const uint i = findLSB(bitmap_l1);
@@ -56,31 +52,23 @@ void main() {
             // For each set bit in the L1 bitmap, iterate the set bits in the
             // corresponding L0 bitmap.
             const uint index_l0 = index_l1 * 32 + i;
-            uint bitmap_l0 = fine_bitmap_ro[tile_bitmap_l0_base_fine + index_l0];
+            uint bitmap_l0 = tile_bitmap_ro[tile_bitmap_l0_base_fine + index_l0];
 
             count += bitCount(bitmap_l0);
         }
     }
 
-    const vec3 color = plasma_quintic(float(count) / 50.0);
-    imageStore(ui_image, ivec2(gl_GlobalInvocationID.xy + primitive_uniforms.tile_offset_coarse * TILE_SIZE_COARSE), vec4(color, 1.0));
+    const vec3 color = plasma_quintic(float(count) / 100.0);
+    imageStore(ui_image, ivec2(gl_GlobalInvocationID.xy), vec4(color, 1.0));
 
 #else
 
     vec4 accum = vec4(0.0);
 
-    uint word_count = fine_count_ro[tile_index_global];
-    if (word_count == 0) {
-        return;
-    }
-
     // For each tile, iterate over all words in the L1 bitmap. 
-    for (int index_l1 = 0; word_count != 0 && index_l1 < primitive_uniforms.num_primitives_1024; index_l1++) {
+    for (int index_l1 = 0; index_l1 < primitive_uniforms.num_primitives_1024; index_l1++) {
         // For each word, iterate all set bits.
-        uint bitmap_l1 = fine_bitmap_ro[tile_bitmap_l1_base_fine + index_l1];
-
-        if (bitmap_l1 != 0)
-            word_count -= 1;
+        uint bitmap_l1 = tile_bitmap_ro[tile_bitmap_l1_base_fine + index_l1];
 
         while (bitmap_l1 != 0) {
             const uint i = findLSB(bitmap_l1);
@@ -89,7 +77,7 @@ void main() {
             // For each set bit in the L1 bitmap, iterate the set bits in the
             // corresponding L0 bitmap.
             const uint index_l0 = index_l1 * 32 + i;
-            uint bitmap_l0 = fine_bitmap_ro[tile_bitmap_l0_base_fine + index_l0];
+            uint bitmap_l0 = tile_bitmap_ro[tile_bitmap_l0_base_fine + index_l0];
             while (bitmap_l0 != 0) {
                 const uint j = findLSB(bitmap_l0);
                 bitmap_l0 ^= bitmap_l0 & -bitmap_l0;
@@ -101,7 +89,8 @@ void main() {
                 const Glyph gl = glyphs[gi.index];
                 const vec2 glyph_min = gi.position + gl.offset_min;
                 const vec2 glyph_max = gi.position + gl.offset_max;
-                const vec2 sample_center = gl_GlobalInvocationID.xy + primitive_uniforms.tile_offset_coarse * TILE_SIZE_COARSE + vec2(0.5);
+                const vec2 sample_center = gl_GlobalInvocationID.xy + vec2(0.5);
+                [[branch]]
                 if (all(greaterThanEqual(sample_center, glyph_min)) && all(lessThanEqual(sample_center, glyph_max))) {
                     const vec2 glyph_size = gl.offset_max - gl.offset_min;
                     const vec4 color = unpackUnorm4x8(gi.color).bgra;
@@ -114,7 +103,7 @@ void main() {
         }
     }
 
-    imageStore(ui_image, ivec2(gl_GlobalInvocationID.xy + primitive_uniforms.tile_offset_coarse * TILE_SIZE_COARSE), accum);
+    imageStore(ui_image, ivec2(gl_GlobalInvocationID.xy), accum);
 
 #endif
 }
index 64227b81ef8c5d8a06e410624863b3ea07e08aa3..fd621b6b3115523e7c3fbc79b83ba2c69ee18ce4 100644 (file)
@@ -31,10 +31,7 @@ use pipelines::primitive_2d::{GlyphInstance, Primitive2dPipeline};
 use spring::simple_spring_damper_exact;
 
 use crate::pipelines::basic::BasicUniforms;
-use crate::pipelines::primitive_2d::{
-    PrimitiveUniforms, TILE_DISPATCH_COARSE_X, TILE_DISPATCH_COARSE_Y, TILE_DISPATCH_FINE_X,
-    TILE_DISPATCH_FINE_Y, TILE_SIZE_COARSE, TILE_SIZE_FINE, TILE_STRIDE_COARSE, TILE_STRIDE_FINE,
-};
+use crate::pipelines::primitive_2d::{PrimitiveUniforms, TILE_SIZE, TILE_STRIDE};
 
 mod fonts;
 mod helpers;
@@ -853,18 +850,14 @@ struct DrawState<'gpu> {
     width: u32,
     height: u32,
 
-    tile_resolution_coarse_x: u32,
-    tile_resolution_coarse_y: u32,
-    tile_resolution_fine_x: u32,
-    tile_resolution_fine_y: u32,
+    tile_resolution_x: u32,
+    tile_resolution_y: u32,
 
     depth_image: Image,
     rt_image: Image,
     ui_image: Image,
 
-    coarse_tile_bitmap_buffer: Buffer,
-    fine_tile_bitmap_buffer: Buffer,
-    fine_tile_color_buffer: Buffer,
+    tile_bitmap_buffer: Buffer,
 
     glyph_atlas_image: Image,
 
@@ -885,18 +878,6 @@ impl<'gpu> DrawState<'gpu> {
         let models = Models::load(gpu);
         let images = Images::load(gpu, thread_token);
 
-        let fine_bitmap_buffer_size = TILE_DISPATCH_FINE_X
-            * TILE_DISPATCH_FINE_Y
-            * TILE_STRIDE_FINE
-            * std::mem::size_of::<u32>() as u32;
-
-        let fine_tile_bitmap_buffer = gpu.create_buffer(&BufferDesc {
-            memory_location: MemoryLocation::Device,
-            host_mapped: false,
-            usage: BufferUsageFlags::STORAGE,
-            size: fine_bitmap_buffer_size.widen(),
-        });
-
         Self {
             gpu,
             basic_pipeline,
@@ -904,16 +885,12 @@ impl<'gpu> DrawState<'gpu> {
             display_transform_pipeline,
             width: 0,
             height: 0,
-            tile_resolution_coarse_x: 0,
-            tile_resolution_coarse_y: 0,
-            tile_resolution_fine_x: 0,
-            tile_resolution_fine_y: 0,
+            tile_resolution_x: 0,
+            tile_resolution_y: 0,
             depth_image: default(),
             rt_image: default(),
             ui_image: default(),
-            coarse_tile_bitmap_buffer: default(),
-            fine_tile_bitmap_buffer,
-            fine_tile_color_buffer: default(),
+            tile_bitmap_buffer: default(),
             glyph_atlas_image: default(),
             samplers,
             models,
@@ -1024,47 +1001,30 @@ impl<'gpu> DrawState<'gpu> {
                 gpu.destroy_image(frame, self.rt_image);
                 gpu.destroy_image(frame, self.ui_image);
 
-                let tile_resolution_coarse_x = (width + (TILE_SIZE_COARSE - 1)) / TILE_SIZE_COARSE;
-                let tile_resolution_coarse_y = (height + (TILE_SIZE_COARSE - 1)) / TILE_SIZE_COARSE;
-                let tile_resolution_fine_x = (width + (TILE_SIZE_FINE - 1)) / TILE_SIZE_FINE;
-                let tile_resolution_fine_y = (height + (TILE_SIZE_FINE - 1)) / TILE_SIZE_FINE;
+                let tile_resolution_x = (width + (TILE_SIZE - 1)) / TILE_SIZE;
+                let tile_resolution_y = (height + (TILE_SIZE - 1)) / TILE_SIZE;
 
-                if tile_resolution_coarse_x != self.tile_resolution_coarse_x
-                    || tile_resolution_coarse_y != self.tile_resolution_coarse_y
-                    || tile_resolution_fine_x != self.tile_resolution_fine_x
-                    || tile_resolution_fine_y != self.tile_resolution_fine_y
+                if tile_resolution_x != self.tile_resolution_x
+                    || tile_resolution_y != self.tile_resolution_y
                 {
-                    gpu.destroy_buffer(frame, self.fine_tile_color_buffer);
-                    gpu.destroy_buffer(frame, self.coarse_tile_bitmap_buffer);
+                    gpu.destroy_buffer(frame, self.tile_bitmap_buffer);
 
-                    let coarse_bitmap_buffer_size = tile_resolution_coarse_x
-                        * tile_resolution_coarse_y
-                        * TILE_STRIDE_COARSE
+                    let bitmap_buffer_size = tile_resolution_x
+                        * tile_resolution_y
+                        * TILE_STRIDE
                         * std::mem::size_of::<u32>() as u32;
 
-                    self.coarse_tile_bitmap_buffer = gpu.create_buffer(&BufferDesc {
+                    self.tile_bitmap_buffer = gpu.create_buffer(&BufferDesc {
                         memory_location: MemoryLocation::Device,
                         host_mapped: false,
                         usage: BufferUsageFlags::STORAGE,
-                        size: coarse_bitmap_buffer_size.widen(),
+                        size: bitmap_buffer_size.widen(),
                     });
 
-                    // align to the workgroup size to simplify shader
-                    let fine_color_buffer_size =
-                        ((tile_resolution_fine_x * tile_resolution_fine_y + 63) & !63)
-                            * std::mem::size_of::<u32>() as u32;
+                    println!("tile_resolution: ({tile_resolution_x},{tile_resolution_y})");
 
-                    self.fine_tile_color_buffer = gpu.create_buffer(&BufferDesc {
-                        memory_location: MemoryLocation::Device,
-                        host_mapped: false,
-                        usage: BufferUsageFlags::STORAGE,
-                        size: fine_color_buffer_size.widen(),
-                    });
-
-                    self.tile_resolution_coarse_x = tile_resolution_coarse_x;
-                    self.tile_resolution_coarse_y = tile_resolution_coarse_y;
-                    self.tile_resolution_fine_x = tile_resolution_fine_x;
-                    self.tile_resolution_fine_y = tile_resolution_fine_y;
+                    self.tile_resolution_x = tile_resolution_x;
+                    self.tile_resolution_y = tile_resolution_y;
                 }
 
                 self.depth_image = gpu.create_image(&ImageDesc {
@@ -1330,8 +1290,6 @@ impl<'gpu> DrawState<'gpu> {
 
             // Render UI
             {
-                gpu.cmd_set_pipeline(cmd_encoder, self.primitive_2d_pipeline.fine_clear_pipeline);
-
                 let glyph_buffer = gpu.request_transient_buffer_with_data(
                     frame,
                     thread_token,
@@ -1345,6 +1303,14 @@ impl<'gpu> DrawState<'gpu> {
                     ui_state.primitive_instances.as_slice(),
                 );
 
+                let num_primitives = ui_state.primitive_instances.len() as u32;
+                let num_primitives_32 = (num_primitives + 31) / 32;
+                let num_primitives_1024 = (num_primitives_32 + 31) / 32;
+
+                ui_state.primitive_instances.clear();
+
+                gpu.cmd_set_pipeline(cmd_encoder, self.primitive_2d_pipeline.bin_pipeline);
+
                 gpu.cmd_set_bind_group(
                     frame,
                     cmd_encoder,
@@ -1377,27 +1343,11 @@ impl<'gpu> DrawState<'gpu> {
                         Bind {
                             binding: 4,
                             array_element: 0,
-                            typed: TypedBind::StorageBuffer(&[self
-                                .coarse_tile_bitmap_buffer
-                                .to_arg()]),
+                            typed: TypedBind::StorageBuffer(&[self.tile_bitmap_buffer.to_arg()]),
                         },
                         Bind {
                             binding: 5,
                             array_element: 0,
-                            typed: TypedBind::StorageBuffer(&[self
-                                .fine_tile_bitmap_buffer
-                                .to_arg()]),
-                        },
-                        Bind {
-                            binding: 6,
-                            array_element: 0,
-                            typed: TypedBind::StorageBuffer(&[self
-                                .fine_tile_color_buffer
-                                .to_arg()]),
-                        },
-                        Bind {
-                            binding: 7,
-                            array_element: 0,
                             typed: TypedBind::StorageImage(&[(
                                 ImageLayout::General,
                                 self.ui_image,
@@ -1406,21 +1356,6 @@ impl<'gpu> DrawState<'gpu> {
                     ],
                 );
 
-                gpu.cmd_dispatch(
-                    cmd_encoder,
-                    (self.tile_resolution_fine_x * self.tile_resolution_fine_y + 63) / 64,
-                    1,
-                    1,
-                );
-
-                let num_primitives = ui_state.primitive_instances.len() as u32;
-                let num_primitives_32 = (num_primitives + 31) / 32;
-                let num_primitives_1024 = (num_primitives_32 + 31) / 32;
-
-                ui_state.primitive_instances.clear();
-
-                gpu.cmd_set_pipeline(cmd_encoder, self.primitive_2d_pipeline.coarse_bin_pipeline);
-
                 gpu.cmd_push_constants(
                     cmd_encoder,
                     ShaderStageFlags::COMPUTE,
@@ -1433,17 +1368,15 @@ impl<'gpu> DrawState<'gpu> {
                         num_primitives,
                         num_primitives_32,
                         num_primitives_1024,
-                        tile_stride_fine: self.tile_resolution_fine_x,
-                        tile_offset_x: 0,
-                        tile_offset_y: 0,
+                        tile_stride: self.tile_resolution_x,
                     },
                 );
 
                 gpu.cmd_dispatch(
                     cmd_encoder,
-                    (num_primitives + 63) / 64,
-                    self.tile_resolution_coarse_x,
-                    self.tile_resolution_coarse_y,
+                    (num_primitives + 2047) / 2048,
+                    (self.tile_resolution_x + 3) / 4,
+                    (self.tile_resolution_y + 3) / 4,
                 );
 
                 gpu.cmd_barrier(
@@ -1455,73 +1388,9 @@ impl<'gpu> DrawState<'gpu> {
                     &[],
                 );
 
-                for tile_offset_y in
-                    (0..self.tile_resolution_coarse_y).step_by(TILE_DISPATCH_COARSE_Y as usize)
-                {
-                    for tile_offset_x in
-                        (0..self.tile_resolution_coarse_x).step_by(TILE_DISPATCH_COARSE_X as usize)
-                    {
-                        let tile_offset_fine_x =
-                            tile_offset_x * (TILE_SIZE_COARSE / TILE_SIZE_FINE);
-
-                        let tile_offset_fine_y =
-                            tile_offset_y * (TILE_SIZE_COARSE / TILE_SIZE_FINE);
-
-                        let fine_dispatch_x = (tile_offset_fine_x + TILE_DISPATCH_FINE_X)
-                            .min(self.tile_resolution_fine_x)
-                            - tile_offset_fine_x;
-
-                        let fine_dispatch_y = (tile_offset_fine_y + TILE_DISPATCH_FINE_Y)
-                            .min(self.tile_resolution_fine_y)
-                            - tile_offset_fine_y;
-
-                        gpu.cmd_push_constants(
-                            cmd_encoder,
-                            ShaderStageFlags::COMPUTE,
-                            0,
-                            &PrimitiveUniforms {
-                                screen_resolution_x: self.width,
-                                screen_resolution_y: self.height,
-                                atlas_resolution_x: atlas_width,
-                                atlas_resolution_y: atlas_height,
-                                num_primitives,
-                                num_primitives_32,
-                                num_primitives_1024,
-                                tile_stride_fine: self.tile_resolution_fine_x,
-                                tile_offset_x,
-                                tile_offset_y,
-                            },
-                        );
-
-                        gpu.cmd_set_pipeline(
-                            cmd_encoder,
-                            self.primitive_2d_pipeline.fine_bin_pipeline,
-                        );
-
-                        gpu.cmd_dispatch(
-                            cmd_encoder,
-                            (num_primitives_32 + 63) / 64,
-                            fine_dispatch_x,
-                            fine_dispatch_y,
-                        );
-
-                        gpu.cmd_barrier(
-                            cmd_encoder,
-                            Some(&GlobalBarrier {
-                                prev_access: &[Access::ShaderWrite],
-                                next_access: &[Access::ShaderOtherRead],
-                            }),
-                            &[],
-                        );
-
-                        gpu.cmd_set_pipeline(
-                            cmd_encoder,
-                            self.primitive_2d_pipeline.rasterize_pipeline,
-                        );
-
-                        gpu.cmd_dispatch(cmd_encoder, fine_dispatch_x, fine_dispatch_y, 1);
-                    }
-                }
+                gpu.cmd_set_pipeline(cmd_encoder, self.primitive_2d_pipeline.rasterize_pipeline);
+
+                gpu.cmd_dispatch(cmd_encoder, (self.width + 7) / 8, (self.height + 7) / 8, 1);
             }
 
             // Display transform and composite
@@ -1596,13 +1465,6 @@ impl<'gpu> DrawState<'gpu> {
                                 swapchain_image,
                             )]),
                         },
-                        Bind {
-                            binding: 5,
-                            array_element: 0,
-                            typed: TypedBind::StorageBuffer(&[self
-                                .fine_tile_color_buffer
-                                .to_arg()]),
-                        },
                     ],
                 );
 
@@ -1793,7 +1655,7 @@ pub fn main() {
                 );
             }
 
-            for i in 0..180 {
+            for i in 0..224 {
                 let i = i as f32;
                 ui_state.text_fmt(
                         5.0,
index ae018645d1b45b6877417b55910752e0c3ee29b1..7a602c2c8f2b603ee7778f995d7f4b3fee04a40d 100644 (file)
@@ -25,8 +25,6 @@ impl DisplayTransformPipeline {
             BindDesc::new(ShaderStageFlags::COMPUTE, BindingType::StorageImage),
             // Composited Output
             BindDesc::new(ShaderStageFlags::COMPUTE, BindingType::StorageImage),
-            // Tile color buffer
-            BindDesc::new(ShaderStageFlags::COMPUTE, BindingType::StorageBuffer),
         ]);
 
         let layout = &PipelineLayout {
index c9e85e37b609938273b721247f138c8339cf3fbc..fcf828b62f9007d3dbf98c54efafe9f16241876f 100644 (file)
@@ -6,18 +6,11 @@ use narcissus_gpu::{
 
 use crate::Gpu;
 
-pub const MAX_PRIMS: u32 = 0x20000;
-pub const TILE_SIZE_COARSE: u32 = 64;
-pub const TILE_SIZE_FINE: u32 = 16;
+pub const TILE_SIZE: u32 = 32;
+pub const MAX_PRIMS: u32 = 1 << 18;
 pub const TILE_BITMAP_WORDS_L1: u32 = MAX_PRIMS / 32 / 32;
 pub const TILE_BITMAP_WORDS_L0: u32 = MAX_PRIMS / 32;
-pub const TILE_STRIDE_COARSE: u32 = TILE_BITMAP_WORDS_L0;
-pub const TILE_STRIDE_FINE: u32 = TILE_BITMAP_WORDS_L0 + TILE_BITMAP_WORDS_L1;
-
-pub const TILE_DISPATCH_COARSE_X: u32 = 15;
-pub const TILE_DISPATCH_COARSE_Y: u32 = 15;
-pub const TILE_DISPATCH_FINE_X: u32 = TILE_DISPATCH_COARSE_X * (TILE_SIZE_COARSE / TILE_SIZE_FINE);
-pub const TILE_DISPATCH_FINE_Y: u32 = TILE_DISPATCH_COARSE_Y * (TILE_SIZE_COARSE / TILE_SIZE_FINE);
+pub const TILE_STRIDE: u32 = TILE_BITMAP_WORDS_L0 + TILE_BITMAP_WORDS_L1;
 
 #[allow(unused)]
 #[repr(C)]
@@ -31,10 +24,7 @@ pub struct PrimitiveUniforms {
     pub num_primitives_32: u32,
     pub num_primitives_1024: u32,
 
-    pub tile_stride_fine: u32,
-
-    pub tile_offset_x: u32,
-    pub tile_offset_y: u32,
+    pub tile_stride: u32,
 }
 
 #[allow(unused)]
@@ -48,9 +38,7 @@ pub struct GlyphInstance {
 
 pub struct Primitive2dPipeline {
     pub bind_group_layout: BindGroupLayout,
-    pub coarse_bin_pipeline: Pipeline,
-    pub fine_bin_pipeline: Pipeline,
-    pub fine_clear_pipeline: Pipeline,
+    pub bin_pipeline: Pipeline,
     pub rasterize_pipeline: Pipeline,
 }
 
@@ -65,11 +53,7 @@ impl Primitive2dPipeline {
             BindDesc::new(ShaderStageFlags::COMPUTE, BindingType::StorageBuffer),
             // Glyph Instances
             BindDesc::new(ShaderStageFlags::COMPUTE, BindingType::StorageBuffer),
-            // Coarse Tiles
-            BindDesc::new(ShaderStageFlags::COMPUTE, BindingType::StorageBuffer),
-            // Fine Tiles
-            BindDesc::new(ShaderStageFlags::COMPUTE, BindingType::StorageBuffer),
-            // Fine Color
+            // Tiles
             BindDesc::new(ShaderStageFlags::COMPUTE, BindingType::StorageBuffer),
             // UI Image Output
             BindDesc::new(ShaderStageFlags::COMPUTE, BindingType::StorageImage),
@@ -84,26 +68,10 @@ impl Primitive2dPipeline {
             }],
         };
 
-        let coarse_bin_pipeline = gpu.create_compute_pipeline(&ComputePipelineDesc {
-            shader: ShaderDesc {
-                entry: c"main",
-                code: shark_shaders::PRIMITIVE_2D_BIN_COARSE_COMP_SPV,
-            },
-            layout,
-        });
-
-        let fine_bin_pipeline = gpu.create_compute_pipeline(&ComputePipelineDesc {
-            shader: ShaderDesc {
-                entry: c"main",
-                code: shark_shaders::PRIMITIVE_2D_BIN_FINE_COMP_SPV,
-            },
-            layout,
-        });
-
-        let fine_clear_pipeline = gpu.create_compute_pipeline(&ComputePipelineDesc {
+        let bin_pipeline = gpu.create_compute_pipeline(&ComputePipelineDesc {
             shader: ShaderDesc {
                 entry: c"main",
-                code: shark_shaders::PRIMITIVE_2D_CLEAR_FINE_COMP_SPV,
+                code: shark_shaders::PRIMITIVE_2D_BIN_COMP_SPV,
             },
             layout,
         });
@@ -118,9 +86,7 @@ impl Primitive2dPipeline {
 
         Self {
             bind_group_layout,
-            coarse_bin_pipeline,
-            fine_bin_pipeline,
-            fine_clear_pipeline,
+            bin_pipeline,
             rasterize_pipeline,
         }
     }