]> git.nega.tv - josh/narcissus/commitdiff
shark: Two level binning for big performance
authorJosh Simmons <josh@nega.tv>
Fri, 24 May 2024 15:28:59 +0000 (17:28 +0200)
committerJosh Simmons <josh@nega.tv>
Fri, 24 May 2024 15:28:59 +0000 (17:28 +0200)
13 files changed:
title/shark-shaders/build.rs
title/shark-shaders/shaders/primitive_2d.h [new file with mode: 0644]
title/shark-shaders/shaders/primitive_2d_bin.comp.glsl [deleted file]
title/shark-shaders/shaders/primitive_2d_bin_coarse.comp.glsl [new file with mode: 0644]
title/shark-shaders/shaders/primitive_2d_bin_fine.comp.glsl [new file with mode: 0644]
title/shark-shaders/shaders/primitive_2d_bindings.h [new file with mode: 0644]
title/shark-shaders/shaders/primitive_2d_constants.h [deleted file]
title/shark-shaders/shaders/primitive_2d_rasterize.comp.glsl
title/shark-shaders/shaders/primitive_2d_types.h [deleted file]
title/shark/src/helpers.rs
title/shark/src/main.rs
title/shark/src/pipelines/mod.rs
title/shark/src/pipelines/primitive_2d.rs

index 5990ab93375d8599631990989a0bc5f047de7cef..10d140ada5f3efcb6ab0d614ef29d7e38355fa0f 100644 (file)
@@ -20,7 +20,11 @@ const SHADERS: &[Shader] = &[
     },
     Shader {
         stage: "comp",
-        name: "primitive_2d_bin",
+        name: "primitive_2d_bin_coarse",
+    },
+    Shader {
+        stage: "comp",
+        name: "primitive_2d_bin_fine",
     },
     Shader {
         stage: "comp",
@@ -78,7 +82,7 @@ fn main() {
         assert!(status.success());
     }
 
-    for Shader { stage, name } in SHADERS {
+    for &Shader { stage, name } in SHADERS {
         let depfile = std::fs::read_to_string(format!("{out_dir}/{name}.{stage}.d")).unwrap();
 
         struct Lexer<'a> {
diff --git a/title/shark-shaders/shaders/primitive_2d.h b/title/shark-shaders/shaders/primitive_2d.h
new file mode 100644 (file)
index 0000000..c1d045b
--- /dev/null
@@ -0,0 +1,49 @@
+#define MAX_PRIMS 0x20000u
+#define TILE_SIZE_COARSE 128
+#define TILE_SIZE_FINE 16
+#define TILE_SIZE_SHIFT 3
+#define TILE_BITMAP_WORDS_L1 (MAX_PRIMS / 32 / 32)
+#define TILE_BITMAP_WORDS_L0 (MAX_PRIMS / 32)
+#define TILE_STRIDE_COARSE TILE_BITMAP_WORDS_L0
+#define TILE_STRIDE_FINE (TILE_BITMAP_WORDS_L0 + TILE_BITMAP_WORDS_L1)
+
+struct PrimitiveUniforms {
+    uvec2 screen_resolution;
+    uvec2 tile_resolution_coarse;
+    uvec2 tile_resolution_fine;
+    uvec2 atlas_resolution;
+
+    uint num_primitives;
+    uint num_primitives_32;
+    uint num_primitives_1024;
+    uint pad_1;
+};
+
+struct Glyph {
+    ivec2 atlas_min;
+    ivec2 atlas_max;
+
+    vec2 offset_min;
+    vec2 offset_max;
+};
+
+struct GlyphInstance {
+    vec2 position;
+    uint index;
+    uint color;
+};
+
+struct PrimitiveInstance {
+    uint type;
+    uint index;
+};
+
+#include "primitive_2d_bindings.h"
+
+bool test_glyph(uint index, uvec2 tile_min, uvec2 tile_max) {
+    const GlyphInstance gi = glyph_instances[index];
+    const Glyph gl = glyphs[gi.index];
+    const vec2 glyph_min = gi.position + gl.offset_min;
+    const vec2 glyph_max = gi.position + gl.offset_max;
+    return !(any(lessThan(tile_max, glyph_min)) || any(greaterThan(tile_min, glyph_max)));
+}
diff --git a/title/shark-shaders/shaders/primitive_2d_bin.comp.glsl b/title/shark-shaders/shaders/primitive_2d_bin.comp.glsl
deleted file mode 100644 (file)
index d618644..0000000
+++ /dev/null
@@ -1,68 +0,0 @@
-#version 460
-
-#extension GL_GOOGLE_include_directive : require
-
-#extension GL_EXT_scalar_block_layout : require
-#extension GL_EXT_control_flow_attributes : require
-
-#extension GL_KHR_shader_subgroup_vote : require
-#extension GL_KHR_shader_subgroup_ballot : require
-
-#include "primitive_2d_types.h"
-#include "primitive_2d_constants.h"
-
-layout(std430, set = 0, binding = 0) uniform uniformBuffer {
-    ivec2 screen_size;
-    ivec2 atlas_size;
-    uint num_primitives;
-};
-
-layout(std430, set = 0, binding = 3) readonly buffer glyphBuffer {
-    Glyph glyphs[];
-};
-
-layout(std430, set = 0, binding = 4) readonly buffer glyphInstanceBuffer {
-    GlyphInstance glyph_instances[];
-};
-
-layout(std430, set = 0, binding = 5) readonly buffer primitiveInstanceBuffer {
-    PrimitiveInstance primitive_instances[];
-};
-
-bool test_glyph(uint index, ivec2 tile_min, ivec2 tile_max) {
-    const GlyphInstance gi = glyph_instances[index];
-    const Glyph gl = glyphs[gi.index];
-    const vec2 glyph_min = gi.position + gl.offset_min;
-    const vec2 glyph_max = gi.position + gl.offset_max;
-    return !(any(lessThan(tile_max, glyph_min)) || any(greaterThan(tile_min, glyph_max)));
-}
-
-layout(std430, set = 0, binding = 6) writeonly buffer tileBuffer {
-    uint tile_bitmap[];
-};
-
-// TODO: Spec constant support for different subgroup sizes.
-layout (local_size_x = 64, local_size_y = 1, local_size_z = 1) in;
-
-void main() {
-    const ivec2 tile_size = ivec2(TILE_SIZE, TILE_SIZE);
-    const ivec2 tile_coord = ivec2(gl_GlobalInvocationID.yz);
-    const ivec2 tile_min = ivec2(tile_coord * tile_size);
-    const ivec2 tile_max = min(tile_min + tile_size, screen_size);
-
-    const uint local_index = gl_SubgroupID * gl_SubgroupSize + gl_SubgroupInvocationID;
-    const uint primitive_index = gl_WorkGroupID.x * gl_WorkGroupSize.x + local_index;
-
-    bool intersects = false;
-    if (primitive_index < num_primitives) {
-        intersects = test_glyph(primitive_index, tile_min, tile_max);
-    }
-
-    uvec4 ballot_result = subgroupBallot(intersects);
-    if (subgroupElect()) { // managed democracy wins again
-        const int tile_index = tile_coord.y * MAX_TILES_X + tile_coord.x;
-        const uint bitmap_base_offset = uint(tile_index * TILE_STRIDE); 
-        tile_bitmap[bitmap_base_offset + 2u * gl_WorkGroupID.x + 0u] = ballot_result.x;
-        tile_bitmap[bitmap_base_offset + 2u * gl_WorkGroupID.x + 1u] = ballot_result.y;
-    }
-}
diff --git a/title/shark-shaders/shaders/primitive_2d_bin_coarse.comp.glsl b/title/shark-shaders/shaders/primitive_2d_bin_coarse.comp.glsl
new file mode 100644 (file)
index 0000000..637a730
--- /dev/null
@@ -0,0 +1,36 @@
+#version 460
+
+#extension GL_GOOGLE_include_directive : require
+
+#extension GL_EXT_scalar_block_layout : require
+#extension GL_EXT_control_flow_attributes : require
+
+#extension GL_KHR_shader_subgroup_vote : require
+#extension GL_KHR_shader_subgroup_ballot : require
+
+#include "primitive_2d.h"
+
+// TODO: Spec constant support for different subgroup sizes.
+layout (local_size_x = 64, local_size_y = 1, local_size_z = 1) in;
+
+void main() {
+    const uvec2 tile_size = uvec2(TILE_SIZE_COARSE, TILE_SIZE_COARSE);
+    const uvec2 tile_coord = gl_GlobalInvocationID.yz;
+    const uvec2 tile_min = tile_coord * tile_size;
+    const uvec2 tile_max = min(tile_min + tile_size, primitive_uniforms.screen_resolution);
+
+    const uint primitive_index = gl_WorkGroupID.x * gl_WorkGroupSize.x + gl_SubgroupID * gl_SubgroupSize + gl_SubgroupInvocationID;
+
+    bool intersects = false;
+    if (primitive_index < primitive_uniforms.num_primitives) {
+        intersects = test_glyph(primitive_index, tile_min, tile_max);
+    }
+
+    uvec4 ballot_result = subgroupBallot(intersects);
+    if (subgroupElect()) { // managed democracy wins again
+        const uint tile_index = tile_coord.y * primitive_uniforms.tile_resolution_coarse.x + tile_coord.x;
+        const uint bitmap_offset = tile_index * TILE_STRIDE_COARSE;
+        coarse_bitmap_wo[bitmap_offset + 2 * gl_WorkGroupID.x + 0] = ballot_result.x;
+        coarse_bitmap_wo[bitmap_offset + 2 * gl_WorkGroupID.x + 1] = ballot_result.y;
+    }
+}
diff --git a/title/shark-shaders/shaders/primitive_2d_bin_fine.comp.glsl b/title/shark-shaders/shaders/primitive_2d_bin_fine.comp.glsl
new file mode 100644 (file)
index 0000000..d421615
--- /dev/null
@@ -0,0 +1,53 @@
+#version 460
+
+#extension GL_GOOGLE_include_directive : require
+
+#extension GL_EXT_scalar_block_layout : require
+#extension GL_EXT_control_flow_attributes : require
+
+#extension GL_KHR_shader_subgroup_vote : require
+#extension GL_KHR_shader_subgroup_ballot : require
+
+#include "primitive_2d.h"
+
+// TODO: Spec constant support for different subgroup sizes.
+layout (local_size_x = 64, local_size_y = 1, local_size_z = 1) in;
+
+void main() {
+    const uvec2 tile_size = uvec2(TILE_SIZE_FINE, TILE_SIZE_FINE);
+    const uvec2 tile_coord = gl_GlobalInvocationID.yz;
+    const uvec2 tile_min = tile_coord * tile_size;
+    const uvec2 tile_max = min(tile_min + tile_size, primitive_uniforms.screen_resolution);
+    const uint tile_index = tile_coord.y * primitive_uniforms.tile_resolution_fine.x + tile_coord.x;
+
+    const uint bitmap_index = gl_WorkGroupID.x * gl_WorkGroupSize.x + gl_SubgroupID * gl_SubgroupSize + gl_SubgroupInvocationID;
+
+    uint bitmap_l0 = 0;
+    if (bitmap_index < primitive_uniforms.num_primitives_32) {
+        const uvec2 tile_coord_coarse = tile_coord >> TILE_SIZE_SHIFT;
+        const uint tile_index_coarse = tile_coord_coarse.y * primitive_uniforms.tile_resolution_coarse.x + tile_coord_coarse.x;
+        const uint bitmap_offset_coarse = tile_index_coarse * TILE_STRIDE_COARSE + bitmap_index;
+
+        uint bitmap_coarse = coarse_bitmap_ro[bitmap_offset_coarse];
+        while (bitmap_coarse != 0) {
+            const uint i = findLSB(bitmap_coarse);
+            const uint primitive_index = bitmap_index * 32 + i;
+            bitmap_coarse ^= bitmap_coarse & -bitmap_coarse;
+
+            if (test_glyph(primitive_index, tile_min, tile_max)) {
+                bitmap_l0 |= 1 << i;
+            }
+        }
+    }
+
+    const uint fine_bitmap_l0_offset = tile_index * TILE_STRIDE_FINE + TILE_BITMAP_WORDS_L1 + bitmap_index;
+    fine_bitmap_wo[fine_bitmap_l0_offset] = bitmap_l0;
+
+    const bool bit_l1 = bitmap_l0 != 0;
+    uvec4 ballot_result = subgroupBallot(bit_l1);
+    if (subgroupElect()) {
+        const uint fine_bitmap_l1_offset = tile_index * TILE_STRIDE_FINE;
+        fine_bitmap_wo[fine_bitmap_l1_offset + 2 * gl_WorkGroupID.x + 0] = ballot_result.x;
+        fine_bitmap_wo[fine_bitmap_l1_offset + 2 * gl_WorkGroupID.x + 1] = ballot_result.y;
+    }
+}
diff --git a/title/shark-shaders/shaders/primitive_2d_bindings.h b/title/shark-shaders/shaders/primitive_2d_bindings.h
new file mode 100644 (file)
index 0000000..5962fef
--- /dev/null
@@ -0,0 +1,39 @@
+
+
+layout(std430, set = 0, binding = 0) uniform uniformBuffer {
+    PrimitiveUniforms primitive_uniforms; 
+};
+
+layout (set = 0, binding = 1) uniform sampler bilinear_sampler;
+
+layout (set = 0, binding = 2) uniform texture2D glyph_atlas;
+
+layout(std430, set = 0, binding = 3) readonly buffer glyphBuffer {
+    Glyph glyphs[];
+};
+
+layout(std430, set = 0, binding = 4) readonly buffer glyphInstanceBuffer {
+    GlyphInstance glyph_instances[];
+};
+
+layout(std430, set = 0, binding = 5) readonly buffer primitiveInstanceBuffer {
+    PrimitiveInstance primitive_instances[];
+};
+
+layout(std430, set = 0, binding = 6) readonly buffer coarseTileBufferRead {
+    uint coarse_bitmap_ro[];
+};
+
+layout(std430, set = 0, binding = 6) writeonly buffer coarseTileBufferWrite {
+    uint coarse_bitmap_wo[];
+};
+
+layout(std430, set = 0, binding = 7) readonly buffer fineTileBufferRead {
+    uint fine_bitmap_ro[];
+};
+
+layout(std430, set = 0, binding = 7) writeonly buffer fineTileBufferWrite {
+    uint fine_bitmap_wo[];
+};
+
+layout (set = 0, binding = 8, rgba16f) uniform writeonly image2D ui_image;
diff --git a/title/shark-shaders/shaders/primitive_2d_constants.h b/title/shark-shaders/shaders/primitive_2d_constants.h
deleted file mode 100644 (file)
index 5830e6f..0000000
+++ /dev/null
@@ -1,6 +0,0 @@
-#define MAX_PRIMS 0x10000u
-#define TILE_SIZE 16
-#define TILE_STRIDE (MAX_PRIMS / 32u)
-#define MAX_TILES_X 180
-#define MAX_TILES_Y 113
-#define NUM_TILES (MAX_TILES_X * MAX_TILES_Y)
\ No newline at end of file
index 127dd44873a8b437489aec8a52e0f13c7103690d..16e94acaae483a23df2621ec3e296a07c0119809 100644 (file)
@@ -5,66 +5,40 @@
 #extension GL_EXT_scalar_block_layout : require
 #extension GL_EXT_control_flow_attributes : require
 
-#include "primitive_2d_constants.h"
-#include "primitive_2d_types.h"
+#include "primitive_2d.h"
 
-layout(std430, set = 0, binding = 0) uniform uniformBuffer {
-    uint screen_width;
-    uint screen_height;
-    uint atlas_width;
-    uint atlas_height;
-    uint num_primitives;
-};
-
-layout (set = 0, binding = 1) uniform sampler bilinear_sampler;
-layout (set = 0, binding = 2) uniform texture2D glyph_atlas;
-
-layout(std430, set = 0, binding = 3) readonly buffer glyphBuffer {
-    Glyph glyphs[];
-};
-
-layout(std430, set = 0, binding = 4) readonly buffer glyphInstanceBuffer {
-    GlyphInstance glyph_instances[];
-};
-
-layout(std430, set = 0, binding = 5) readonly buffer primitiveInstanceBuffer {
-    PrimitiveInstance primitive_instances[];
-};
-
-layout(std430, set = 0, binding = 6) readonly buffer tileBuffer {
-    uint tile_bitmap[];
-};
-
-layout (set = 0, binding = 7, rgba16f) uniform writeonly image2D ui_image;
-
-layout (local_size_x = 16, local_size_y = 16, local_size_z = 1) in;
+layout (local_size_x = TILE_SIZE_FINE, local_size_y = TILE_SIZE_FINE, local_size_z = 1) in;
 
 void main() {
     vec4 accum = vec4(0.0);
 
-    const ivec2 tile_coord = ivec2(gl_WorkGroupID.xy);
-    const int tile_index = tile_coord.y * MAX_TILES_X + tile_coord.x;
-    const uint bitmap_base_offset = uint(tile_index * TILE_STRIDE);
-
-    for (int i = 0; i < num_primitives / 32; i++) {
-        uint bitmap = tile_bitmap[bitmap_base_offset + i];
-        while (bitmap != 0) {
-            const uint t = bitmap & -bitmap;
-            const int index = i * 32 + findLSB(bitmap);
-            bitmap ^= t;
-
-            const GlyphInstance gi = glyph_instances[index];
-            const Glyph gl = glyphs[gi.index];
-            const vec4 color = unpackUnorm4x8(gi.color).bgra;
-            const vec2 glyph_min = gi.position + gl.offset_min;
-            const vec2 glyph_max = gi.position + gl.offset_max;
-            const vec2 glyph_size = gl.offset_max - gl.offset_min;
-            const vec2 sample_center = gl_GlobalInvocationID.xy; // half pixel offset goes here?
-            if (all(greaterThanEqual(sample_center, glyph_min)) && all(lessThanEqual(sample_center, glyph_max))) {
-                const vec2 uv = mix(vec2(gl.atlas_min), vec2(gl.atlas_max), (sample_center - glyph_min) / glyph_size) / vec2(atlas_width, atlas_height);
-                const float coverage = textureLod(sampler2D(glyph_atlas, bilinear_sampler), uv, 0.0).r * color.a;
-                accum.rgb = (coverage * color.rgb) + accum.rgb * (1.0 - coverage);
-                accum.a = coverage + accum.a * (1.0 - coverage);
+    const uvec2 tile_coord = gl_WorkGroupID.xy;
+    const uint tile_index = tile_coord.y * primitive_uniforms.tile_resolution_fine.x + tile_coord.x;
+    const uint bitmap_offset = tile_index * TILE_STRIDE_FINE;
+
+    for (int index_l1 = 0; index_l1 < primitive_uniforms.num_primitives_1024; index_l1++) {
+        uint bitmap_l1 = fine_bitmap_ro[bitmap_offset + index_l1];
+        while (bitmap_l1 != 0) {
+            const uint index_l0 = index_l1 * 32 + findLSB(bitmap_l1);
+            uint bitmap_l0 = fine_bitmap_ro[bitmap_offset + TILE_BITMAP_WORDS_L1 + index_l0];
+            bitmap_l1 ^= bitmap_l1 & -bitmap_l1;
+            while (bitmap_l0 != 0) {
+                const uint primitive_index = index_l0 * 32 + findLSB(bitmap_l0);
+                bitmap_l0 ^= bitmap_l0 & -bitmap_l0;
+
+                const GlyphInstance gi = glyph_instances[primitive_index];
+                const Glyph gl = glyphs[gi.index];
+                const vec2 glyph_min = gi.position + gl.offset_min;
+                const vec2 glyph_max = gi.position + gl.offset_max;
+                const vec2 sample_center = gl_GlobalInvocationID.xy; // half pixel offset goes here?
+                if (all(greaterThanEqual(sample_center, glyph_min)) && all(lessThanEqual(sample_center, glyph_max))) {
+                    const vec2 glyph_size = gl.offset_max - gl.offset_min;
+                    const vec4 color = unpackUnorm4x8(gi.color).bgra;
+                    const vec2 uv = mix(gl.atlas_min, gl.atlas_max, (sample_center - glyph_min) / glyph_size) / primitive_uniforms.atlas_resolution;
+                    const float coverage = textureLod(sampler2D(glyph_atlas, bilinear_sampler), uv, 0.0).r * color.a;
+                    accum.rgb = (coverage * color.rgb) + accum.rgb * (1.0 - coverage);
+                    accum.a = coverage + accum.a * (1.0 - coverage);
+                }
             }
         }
     }
diff --git a/title/shark-shaders/shaders/primitive_2d_types.h b/title/shark-shaders/shaders/primitive_2d_types.h
deleted file mode 100644 (file)
index 6b531a8..0000000
+++ /dev/null
@@ -1,19 +0,0 @@
-
-struct Glyph {
-    ivec2 atlas_min;
-    ivec2 atlas_max;
-
-    vec2 offset_min;
-    vec2 offset_max;
-};
-
-struct GlyphInstance {
-    vec2 position;
-    uint index;
-    uint color;
-};
-
-struct PrimitiveInstance {
-    uint type;
-    uint index;
-};
index e45e0ba8f64843a56a5280043baf450bbebc84e2..d211c6f4ea271bc75ae9317fba879e831013ed8b 100644 (file)
@@ -3,7 +3,7 @@ use std::path::Path;
 use narcissus_core::{obj, Widen};
 use narcissus_maths::{vec2, vec3, vec4, Vec2, Vec3};
 
-use crate::pipelines::Vertex;
+use crate::pipelines::basic::Vertex;
 
 pub fn load_obj<P: AsRef<Path>>(path: P) -> (Vec<Vertex>, Vec<u16>) {
     #[derive(Default)]
index a3f68fda7709c7e5199e3a6b78bb1ad7e3454739..15f83c31b0ebc7fd398338e34f384b7db2b9b780 100644 (file)
@@ -3,7 +3,9 @@ use std::ops::Index;
 use std::path::Path;
 use std::time::{Duration, Instant};
 
-use narcissus_core::dds;
+use narcissus_core::{dds, Widen as _};
+use pipelines::basic::BasicPipeline;
+use pipelines::display_transform::DisplayTransformPipeline;
 use renderdoc_sys as rdoc;
 
 use fonts::{FontFamily, Fonts};
@@ -12,7 +14,7 @@ use narcissus_app::{create_app, Event, Key, PressedState, WindowDesc};
 use narcissus_core::{box_assume_init, default, rand::Pcg64, zeroed_box, BitIter};
 use narcissus_font::{FontCollection, GlyphCache, HorizontalMetrics};
 use narcissus_gpu::{
-    create_device, Access, Bind, BufferDesc, BufferImageCopy, BufferUsageFlags, ClearValue,
+    create_device, Access, Bind, Buffer, BufferDesc, BufferImageCopy, BufferUsageFlags, ClearValue,
     CmdEncoder, ColorSpace, Device, DeviceExt, Extent2d, Extent3d, Frame, GlobalBarrier, Image,
     ImageAspectFlags, ImageBarrier, ImageDesc, ImageDimension, ImageFormat, ImageLayout,
     ImageSubresourceRange, ImageTiling, ImageUsageFlags, IndexType, LoadOp, MemoryLocation,
@@ -24,12 +26,13 @@ use narcissus_image as image;
 use narcissus_maths::{
     clamp, perlin_noise3, sin_pi_f32, vec3, Affine3, Deg, HalfTurn, Mat3, Mat4, Point3, Vec3,
 };
-use pipelines::{
-    BasicPipeline, BasicUniforms, DisplayTransformPipeline, GlyphInstance, Primitive2dPipeline,
-};
+use pipelines::primitive_2d::{GlyphInstance, Primitive2dPipeline};
 use spring::simple_spring_damper_exact;
 
-use crate::pipelines::PrimitiveUniforms;
+use crate::pipelines::basic::BasicUniforms;
+use crate::pipelines::primitive_2d::{
+    PrimitiveUniforms, TILE_SIZE_COARSE, TILE_SIZE_FINE, TILE_STRIDE_COARSE, TILE_STRIDE_FINE,
+};
 
 mod fonts;
 mod helpers;
@@ -848,11 +851,17 @@ struct DrawState<'gpu> {
     width: u32,
     height: u32,
 
+    tile_resolution_coarse_x: u32,
+    tile_resolution_coarse_y: u32,
+    tile_resolution_fine_x: u32,
+    tile_resolution_fine_y: u32,
+
     depth_image: Image,
     rt_image: Image,
     ui_image: Image,
 
-    tile_bitmap_buffer: PersistentBuffer<'gpu>,
+    coarse_tile_bitmap_buffer: Buffer,
+    fine_tile_bitmap_buffer: Buffer,
 
     glyph_atlas_image: Image,
 
@@ -873,20 +882,6 @@ impl<'gpu> DrawState<'gpu> {
         let models = Models::load(gpu);
         let images = Images::load(gpu, thread_token);
 
-        const MAX_PRIMS: usize = 0x10000;
-        const TILE_STRIDE: usize = MAX_PRIMS / 32;
-        const MAX_TILES_X: usize = 180;
-        const MAX_TILES_Y: usize = 113;
-
-        const BITMAP_SIZE: usize = (MAX_TILES_X * MAX_TILES_Y * TILE_STRIDE) * 4;
-
-        let tile_bitmap_buffer = gpu.create_persistent_buffer(&BufferDesc {
-            memory_location: MemoryLocation::Device,
-            host_mapped: true,
-            usage: BufferUsageFlags::STORAGE,
-            size: BITMAP_SIZE,
-        });
-
         Self {
             gpu,
             basic_pipeline,
@@ -894,10 +889,15 @@ impl<'gpu> DrawState<'gpu> {
             display_transform_pipeline,
             width: 0,
             height: 0,
+            tile_resolution_coarse_x: 0,
+            tile_resolution_coarse_y: 0,
+            tile_resolution_fine_x: 0,
+            tile_resolution_fine_y: 0,
             depth_image: default(),
             rt_image: default(),
             ui_image: default(),
-            tile_bitmap_buffer,
+            coarse_tile_bitmap_buffer: default(),
+            fine_tile_bitmap_buffer: default(),
             glyph_atlas_image: default(),
             samplers,
             models,
@@ -1008,6 +1008,49 @@ impl<'gpu> DrawState<'gpu> {
                 gpu.destroy_image(frame, self.rt_image);
                 gpu.destroy_image(frame, self.ui_image);
 
+                let tile_resolution_coarse_x = (width + (TILE_SIZE_COARSE - 1)) / TILE_SIZE_COARSE;
+                let tile_resolution_coarse_y = (height + (TILE_SIZE_COARSE - 1)) / TILE_SIZE_COARSE;
+                let tile_resolution_fine_x = (width + (TILE_SIZE_FINE - 1)) / TILE_SIZE_FINE;
+                let tile_resolution_fine_y = (height + (TILE_SIZE_FINE - 1)) / TILE_SIZE_FINE;
+
+                if tile_resolution_coarse_x != self.tile_resolution_coarse_x
+                    || tile_resolution_coarse_y != self.tile_resolution_coarse_y
+                    || tile_resolution_fine_x != self.tile_resolution_fine_x
+                    || tile_resolution_fine_y != self.tile_resolution_fine_y
+                {
+                    gpu.destroy_buffer(frame, self.coarse_tile_bitmap_buffer);
+                    gpu.destroy_buffer(frame, self.fine_tile_bitmap_buffer);
+
+                    let coarse_bitmap_buffer_size = tile_resolution_coarse_x
+                        * tile_resolution_coarse_y
+                        * TILE_STRIDE_COARSE
+                        * std::mem::size_of::<u32>() as u32;
+
+                    let fine_bitmap_buffer_size = tile_resolution_fine_x
+                        * tile_resolution_fine_y
+                        * TILE_STRIDE_FINE
+                        * std::mem::size_of::<u32>() as u32;
+
+                    self.coarse_tile_bitmap_buffer = gpu.create_buffer(&BufferDesc {
+                        memory_location: MemoryLocation::Device,
+                        host_mapped: false,
+                        usage: BufferUsageFlags::STORAGE,
+                        size: coarse_bitmap_buffer_size.widen(),
+                    });
+
+                    self.fine_tile_bitmap_buffer = gpu.create_buffer(&BufferDesc {
+                        memory_location: MemoryLocation::Device,
+                        host_mapped: false,
+                        usage: BufferUsageFlags::STORAGE,
+                        size: fine_bitmap_buffer_size.widen(),
+                    });
+
+                    self.tile_resolution_coarse_x = tile_resolution_coarse_x;
+                    self.tile_resolution_coarse_y = tile_resolution_coarse_y;
+                    self.tile_resolution_fine_x = tile_resolution_fine_x;
+                    self.tile_resolution_fine_y = tile_resolution_fine_y;
+                }
+
                 self.depth_image = gpu.create_image(&ImageDesc {
                     memory_location: MemoryLocation::Device,
                     host_mapped: false,
@@ -1271,16 +1314,26 @@ impl<'gpu> DrawState<'gpu> {
 
             // Render UI
             {
+                let num_primitives = ui_state.primitive_instances.len() as u32;
+                let num_primitives_32 = (num_primitives + 31) / 32;
+                let num_primitives_1024 = (num_primitives_32 + 31) / 32;
                 let uniforms_buffer = gpu.request_transient_buffer_with_data(
                     frame,
                     thread_token,
                     BufferUsageFlags::UNIFORM,
                     &PrimitiveUniforms {
-                        screen_width: width,
-                        screen_height: height,
-                        atlas_width,
-                        atlas_height,
-                        num_primitives: ui_state.primitive_instances.len() as u32,
+                        screen_resolution_x: self.width,
+                        screen_resolution_y: self.height,
+                        tile_resolution_coarse_x: self.tile_resolution_coarse_x,
+                        tile_resolution_coarse_y: self.tile_resolution_coarse_y,
+                        tile_resolution_fine_x: self.tile_resolution_fine_x,
+                        tile_resolution_fine_y: self.tile_resolution_fine_y,
+                        atlas_resolution_x: atlas_width,
+                        atlas_resolution_y: atlas_height,
+                        num_primitives,
+                        num_primitives_32,
+                        num_primitives_1024,
+                        _pad0: 0,
                     },
                 );
                 let glyph_buffer = gpu.request_transient_buffer_with_data(
@@ -1302,7 +1355,7 @@ impl<'gpu> DrawState<'gpu> {
                     &[0u32],
                 );
 
-                gpu.cmd_set_pipeline(cmd_encoder, self.primitive_2d_pipeline.bin_pipeline);
+                gpu.cmd_set_pipeline(cmd_encoder, self.primitive_2d_pipeline.coarse_bin_pipeline);
 
                 gpu.cmd_set_bind_group(
                     frame,
@@ -1346,11 +1399,20 @@ impl<'gpu> DrawState<'gpu> {
                         Bind {
                             binding: 6,
                             array_element: 0,
-                            typed: TypedBind::StorageBuffer(&[self.tile_bitmap_buffer.to_arg()]),
+                            typed: TypedBind::StorageBuffer(&[self
+                                .coarse_tile_bitmap_buffer
+                                .to_arg()]),
                         },
                         Bind {
                             binding: 7,
                             array_element: 0,
+                            typed: TypedBind::StorageBuffer(&[self
+                                .fine_tile_bitmap_buffer
+                                .to_arg()]),
+                        },
+                        Bind {
+                            binding: 8,
+                            array_element: 0,
                             typed: TypedBind::StorageImage(&[(
                                 ImageLayout::General,
                                 self.ui_image,
@@ -1361,9 +1423,27 @@ impl<'gpu> DrawState<'gpu> {
 
                 gpu.cmd_dispatch(
                     cmd_encoder,
-                    (ui_state.primitive_instances.len() as u32 + 63) / 64,
-                    (self.width + 15) / 16,
-                    (self.height + 15) / 16,
+                    (num_primitives + 63) / 64,
+                    self.tile_resolution_coarse_x,
+                    self.tile_resolution_coarse_y,
+                );
+
+                gpu.cmd_barrier(
+                    cmd_encoder,
+                    Some(&GlobalBarrier {
+                        prev_access: &[Access::ShaderWrite],
+                        next_access: &[Access::ShaderOtherRead],
+                    }),
+                    &[],
+                );
+
+                gpu.cmd_set_pipeline(cmd_encoder, self.primitive_2d_pipeline.fine_bin_pipeline);
+
+                gpu.cmd_dispatch(
+                    cmd_encoder,
+                    (num_primitives_32 + 63) / 64,
+                    self.tile_resolution_fine_x,
+                    self.tile_resolution_fine_y,
                 );
 
                 gpu.cmd_barrier(
@@ -1379,8 +1459,8 @@ impl<'gpu> DrawState<'gpu> {
 
                 gpu.cmd_dispatch(
                     cmd_encoder,
-                    (self.width + 15) / 16,
-                    (self.height + 15) / 16,
+                    self.tile_resolution_fine_x,
+                    self.tile_resolution_fine_y,
                     1,
                 );
 
index d6b57ecd68c0565a20a31678e3a1a2981028a2bf..2c861f88973876efe00ea68f3c93854b8d0726b7 100644 (file)
@@ -1,7 +1,3 @@
-mod basic;
-mod display_transform;
-mod primitive_2d;
-
-pub use basic::{BasicPipeline, BasicUniforms, Vertex};
-pub use display_transform::DisplayTransformPipeline;
-pub use primitive_2d::{GlyphInstance, Primitive2dPipeline, PrimitiveUniforms};
+pub mod basic;
+pub mod display_transform;
+pub mod primitive_2d;
index 8e0fa6476c7f2e8850ab389dea511f78122af5de..c91b00d0c305fbbd622ba96cd16104335b4f923f 100644 (file)
@@ -6,14 +6,31 @@ use narcissus_gpu::{
 
 use crate::Gpu;
 
+pub const MAX_PRIMS: u32 = 0x20000;
+pub const TILE_SIZE_COARSE: u32 = 128;
+pub const TILE_SIZE_FINE: u32 = 16;
+pub const TILE_BITMAP_WORDS_L1: u32 = MAX_PRIMS / 32 / 32;
+pub const TILE_BITMAP_WORDS_L0: u32 = MAX_PRIMS / 32;
+pub const TILE_STRIDE_COARSE: u32 = TILE_BITMAP_WORDS_L0;
+pub const TILE_STRIDE_FINE: u32 = TILE_BITMAP_WORDS_L0 + TILE_BITMAP_WORDS_L1;
+
 #[allow(unused)]
 #[repr(C)]
 pub struct PrimitiveUniforms {
-    pub screen_width: u32,
-    pub screen_height: u32,
-    pub atlas_width: u32,
-    pub atlas_height: u32,
+    pub screen_resolution_x: u32,
+    pub screen_resolution_y: u32,
+    pub tile_resolution_coarse_x: u32,
+    pub tile_resolution_coarse_y: u32,
+    pub tile_resolution_fine_x: u32,
+    pub tile_resolution_fine_y: u32,
+    pub atlas_resolution_x: u32,
+    pub atlas_resolution_y: u32,
+
     pub num_primitives: u32,
+    pub num_primitives_32: u32,
+    pub num_primitives_1024: u32,
+
+    pub _pad0: u32,
 }
 
 #[allow(unused)]
@@ -27,7 +44,8 @@ pub struct GlyphInstance {
 
 pub struct Primitive2dPipeline {
     pub bind_group_layout: BindGroupLayout,
-    pub bin_pipeline: Pipeline,
+    pub coarse_bin_pipeline: Pipeline,
+    pub fine_bin_pipeline: Pipeline,
     pub rasterize_pipeline: Pipeline,
 }
 
@@ -46,16 +64,26 @@ impl Primitive2dPipeline {
             BindDesc::new(ShaderStageFlags::COMPUTE, BindingType::StorageBuffer),
             // Primitive Instances
             BindDesc::new(ShaderStageFlags::COMPUTE, BindingType::StorageBuffer),
-            // Tiles
+            // Coarse Tiles
             BindDesc::new(ShaderStageFlags::COMPUTE, BindingType::StorageBuffer),
-            // UI
+            // Fine Tiles
+            BindDesc::new(ShaderStageFlags::COMPUTE, BindingType::StorageBuffer),
+            // UI Image Output
             BindDesc::new(ShaderStageFlags::COMPUTE, BindingType::StorageImage),
         ]);
 
-        let bin_pipeline = gpu.create_compute_pipeline(&ComputePipelineDesc {
+        let coarse_bin_pipeline = gpu.create_compute_pipeline(&ComputePipelineDesc {
+            shader: ShaderDesc {
+                entry: c"main",
+                code: shark_shaders::PRIMITIVE_2D_BIN_COARSE_COMP_SPV,
+            },
+            bind_group_layouts: &[bind_group_layout],
+        });
+
+        let fine_bin_pipeline = gpu.create_compute_pipeline(&ComputePipelineDesc {
             shader: ShaderDesc {
                 entry: c"main",
-                code: shark_shaders::PRIMITIVE_2D_BIN_COMP_SPV,
+                code: shark_shaders::PRIMITIVE_2D_BIN_FINE_COMP_SPV,
             },
             bind_group_layouts: &[bind_group_layout],
         });
@@ -70,7 +98,8 @@ impl Primitive2dPipeline {
 
         Self {
             bind_group_layout,
-            bin_pipeline,
+            coarse_bin_pipeline,
+            fine_bin_pipeline,
             rasterize_pipeline,
         }
     }