},
Shader {
stage: "comp",
- name: "primitive_2d_bin_coarse",
- },
- Shader {
- stage: "comp",
- name: "primitive_2d_bin_fine",
- },
- Shader {
- stage: "comp",
- name: "primitive_2d_clear_fine",
+ name: "primitive_2d_bin",
},
Shader {
stage: "comp",
uint num_primitives;
uint num_primitives_32;
uint num_primitives_1024;
-
- uint tile_stride_fine;
-
- uvec2 tile_offset;
+ uint tile_stride;
};
layout(std430, push_constant) uniform uniformBuffer {
layout (set = 0, binding = 4, rgba16f) uniform writeonly image2D composited_output;
-layout(std430, set = 0, binding = 5) readonly buffer fineTileCountBufferRead {
- uint fine_count_ro[];
-};
-
float srgb_oetf(float a) {
return (.0031308f >= a) ? 12.92f * a : 1.055f * pow(a, .4166666666666667f) - .055f;
}
layout (local_size_x = 8, local_size_y = 8, local_size_z = 1) in;
void main() {
- const uvec2 tile_coord = gl_WorkGroupID.xy >> 1;
- const uint tile_index = tile_coord.y * primitive_uniforms.tile_stride_fine + tile_coord.x;
-
const vec3 stimulus = imageLoad(layer_rt, ivec2(gl_GlobalInvocationID.xy)).rgb;
const vec3 transformed = tony_mc_mapface(stimulus);
vec3 composited = srgb_oetf(transformed);
- [[branch]]
- if (fine_count_ro[tile_index] != 0) {
- const vec4 ui = imageLoad(layer_ui, ivec2(gl_GlobalInvocationID.xy)).rgba;
- composited = ui.rgb + (composited * (1.0 - ui.a));
- }
+ const vec4 ui = imageLoad(layer_ui, ivec2(gl_GlobalInvocationID.xy)).rgba;
+ composited = ui.rgb + (composited * (1.0 - ui.a));
imageStore(composited_output, ivec2(gl_GlobalInvocationID.xy), vec4(composited, 1.0));
}
-#define MAX_PRIMS 0x20000u
-#define TILE_SIZE_COARSE 64
-#define TILE_SIZE_FINE 16
-#define TILE_SIZE_MUL (TILE_SIZE_COARSE / TILE_SIZE_FINE)
+#define TILE_SIZE 32
+
+#define MAX_PRIMS (1 << 18)
#define TILE_BITMAP_L1_WORDS (MAX_PRIMS / 32 / 32)
#define TILE_BITMAP_L0_WORDS (MAX_PRIMS / 32)
-#define TILE_STRIDE_COARSE TILE_BITMAP_L0_WORDS
-#define TILE_STRIDE_FINE (TILE_BITMAP_L0_WORDS + TILE_BITMAP_L1_WORDS)
-#define TILE_BITMAP_OFFSET_COARSE 0
-#define TILE_BITMAP_L1_OFFSET_FINE 0
-#define TILE_BITMAP_L0_OFFSET_FINE TILE_BITMAP_L1_WORDS
-
-#define TILE_DISPATCH_X 15
+#define TILE_STRIDE (TILE_BITMAP_L0_WORDS + TILE_BITMAP_L1_WORDS)
+#define TILE_BITMAP_L1_OFFSET 0
+#define TILE_BITMAP_L0_OFFSET TILE_BITMAP_L1_WORDS
struct PrimitiveUniforms {
uvec2 screen_resolution;
uint num_primitives;
uint num_primitives_32;
uint num_primitives_1024;
- uint tile_stride_fine;
-
- uvec2 tile_offset_coarse;
+ uint tile_stride;
};
struct Glyph {
--- /dev/null
+#version 460
+
+#extension GL_GOOGLE_include_directive : require
+
+#extension GL_EXT_scalar_block_layout : require
+#extension GL_EXT_control_flow_attributes : require
+
+#extension GL_KHR_shader_subgroup_vote : require
+#extension GL_KHR_shader_subgroup_ballot : require
+
+#include "primitive_2d.h"
+
+// TODO: Spec constant support for different subgroup sizes.
+layout (local_size_x = 64, local_size_y = 1, local_size_z = 1) in;
+
+shared uint bitmap_0[64];
+
+void main() {
+ const uvec2 bin_coord = gl_GlobalInvocationID.yz;
+ const uvec2 bin_min = bin_coord * TILE_SIZE * 8;
+ const uvec2 bin_max = min(bin_min + TILE_SIZE * 8, primitive_uniforms.screen_resolution);
+
+ for (uint i = 0; i < 2048; i += 64) {
+ const uint prim_index = gl_WorkGroupID.x * 2048 + i + gl_SubgroupInvocationID;
+ bool intersects = false;
+ if (prim_index < primitive_uniforms.num_primitives) {
+ intersects = test_glyph(prim_index, bin_min, bin_max);
+ }
+ const uvec4 ballot = subgroupBallot(intersects);
+ bitmap_0[i / 32 + 0] = ballot.x;
+ bitmap_0[i / 32 + 1] = ballot.y;
+ }
+
+ memoryBarrierShared();
+
+ uint bitmap_1[2];
+ {
+ const uvec4 ballot = subgroupBallot(bitmap_0[gl_SubgroupInvocationID] != 0);
+ bitmap_1[0] = ballot.x;
+ bitmap_1[1] = ballot.y;
+ }
+
+ for (uint y = 0; y < 8; y++) {
+ for (uint x = 0; x < 8; x++) {
+ const uvec2 tile_coord = gl_GlobalInvocationID.yz * 8 + uvec2(x, y);
+ const uvec2 tile_min = tile_coord * TILE_SIZE;
+ const uvec2 tile_max = min(tile_min + TILE_SIZE, primitive_uniforms.screen_resolution);
+ [[branch]]
+ if (any(greaterThanEqual(tile_min, tile_max))) {
+ continue;
+ }
+
+ const uint tile_index = tile_coord.y * primitive_uniforms.tile_stride + tile_coord.x;
+
+ for (uint i = 0; i < 2; i++) {
+ uint out_1 = 0;
+
+ uint word_1 = bitmap_1[i];
+ while (word_1 != 0) {
+ const uint bit_1 = findLSB(word_1);
+ word_1 ^= word_1 & -word_1;
+
+ uint out_0 = 0;
+ uint index_0 = i * 32 + bit_1;
+ uint word_0 = bitmap_0[index_0];
+ while (word_0 != 0) {
+ const uint bit_0 = findLSB(word_0);
+ word_0 ^= word_0 & -word_0;
+
+ const uint prim_index = gl_WorkGroupID.x * 2048 + index_0 * 32 + bit_0;
+ if (test_glyph(prim_index, tile_min, tile_max)) {
+ out_0 |= 1 << bit_0;
+ }
+ }
+
+ if (out_0 != 0) {
+ out_1 |= 1 << bit_1;
+ }
+ tile_bitmap_wo[tile_index * TILE_STRIDE + TILE_BITMAP_L0_OFFSET + gl_WorkGroupID.x * 64 + index_0] = out_0;
+ }
+
+ tile_bitmap_wo[tile_index * TILE_STRIDE + TILE_BITMAP_L1_OFFSET + gl_WorkGroupID.x * 2 + i] = out_1;
+ }
+ }
+ }
+}
+++ /dev/null
-#version 460
-
-#extension GL_GOOGLE_include_directive : require
-
-#extension GL_EXT_scalar_block_layout : require
-#extension GL_EXT_control_flow_attributes : require
-
-#extension GL_KHR_shader_subgroup_vote : require
-#extension GL_KHR_shader_subgroup_ballot : require
-
-#include "primitive_2d.h"
-
-// TODO: Spec constant support for different subgroup sizes.
-layout (local_size_x = 64, local_size_y = 1, local_size_z = 1) in;
-
-void main() {
- const uvec2 tile_coord = gl_GlobalInvocationID.yz;
- const uvec2 tile_coord_global = tile_coord + primitive_uniforms.tile_offset_coarse;
-
- const uvec2 tile_min = tile_coord_global * TILE_SIZE_COARSE;
- const uvec2 tile_max = min(tile_min + TILE_SIZE_COARSE, primitive_uniforms.screen_resolution);
-
- const uint primitive_index = gl_WorkGroupID.x * gl_WorkGroupSize.x + gl_SubgroupID * gl_SubgroupSize + gl_SubgroupInvocationID;
-
- bool intersects = false;
- if (primitive_index < primitive_uniforms.num_primitives) {
- intersects = test_glyph(primitive_index, tile_min, tile_max);
- }
-
- uvec4 ballot_result = subgroupBallot(intersects);
- if (subgroupElect()) { // managed democracy wins again
- const uint tile_index = tile_coord.y * (primitive_uniforms.tile_stride_fine / TILE_SIZE_MUL) + tile_coord.x;
- const uint tile_offset = tile_index * TILE_STRIDE_COARSE;
- coarse_bitmap_wo[tile_offset + 2 * gl_WorkGroupID.x + 0] = ballot_result.x;
- coarse_bitmap_wo[tile_offset + 2 * gl_WorkGroupID.x + 1] = ballot_result.y;
- }
-}
+++ /dev/null
-#version 460
-
-#extension GL_GOOGLE_include_directive : require
-
-#extension GL_EXT_scalar_block_layout : require
-#extension GL_EXT_control_flow_attributes : require
-
-#extension GL_KHR_shader_subgroup_vote : require
-#extension GL_KHR_shader_subgroup_ballot : require
-
-#include "primitive_2d.h"
-
-// TODO: Spec constant support for different subgroup sizes.
-layout (local_size_x = 64, local_size_y = 1, local_size_z = 1) in;
-
-void main() {
- const uvec2 tile_coord = gl_WorkGroupID.yz;
- const uint tile_index = tile_coord.y * TILE_DISPATCH_X * TILE_SIZE_MUL + tile_coord.x;
-
- const uvec2 tile_coord_global = tile_coord + primitive_uniforms.tile_offset_coarse * TILE_SIZE_MUL;
- const uint tile_index_global = tile_coord_global.y * primitive_uniforms.tile_stride_fine + tile_coord_global.x;
-
- const uvec2 tile_min = tile_coord_global * TILE_SIZE_FINE;
- const uvec2 tile_max = min(tile_min + TILE_SIZE_FINE, primitive_uniforms.screen_resolution);
-
- const uint index = gl_WorkGroupID.x * gl_WorkGroupSize.x + gl_SubgroupID * gl_SubgroupSize + gl_SubgroupInvocationID;
-
- uint bitmap_l0 = 0;
- if (index < primitive_uniforms.num_primitives_32) {
- const uvec2 tile_coord_coarse = (tile_coord / TILE_SIZE_MUL) + primitive_uniforms.tile_offset_coarse;
- const uint tile_index_coarse = tile_coord_coarse.y * (primitive_uniforms.tile_stride_fine / TILE_SIZE_MUL) + tile_coord_coarse.x;
- const uint tile_base_coarse = tile_index_coarse * TILE_STRIDE_COARSE;
- const uint tile_bitmap_base_coarse = tile_base_coarse + TILE_BITMAP_OFFSET_COARSE;
-
- uint bitmap_coarse = coarse_bitmap_ro[tile_bitmap_base_coarse + index];
- while (bitmap_coarse != 0) {
- const uint i = findLSB(bitmap_coarse);
- bitmap_coarse ^= bitmap_coarse & -bitmap_coarse;
-
- const uint primitive_index = index * 32 + i;
- if (test_glyph(primitive_index, tile_min, tile_max)) {
- bitmap_l0 |= 1 << i;
- }
- }
- }
-
- const uint tile_base_fine = tile_index * TILE_STRIDE_FINE;
- const uint tile_bitmap_l0_base_fine = tile_base_fine + TILE_BITMAP_L0_OFFSET_FINE;
-
- // Write the L0 per-primitive bitmap.
- fine_bitmap_wo[tile_bitmap_l0_base_fine + index] = bitmap_l0;
-
- uvec4 ballot_result = subgroupBallot(bitmap_l0 != 0);
- if (subgroupElect()) {
- // Write the L1 per-bitmap-word bitmap.
- const uint tile_bitmap_l1_base_fine = tile_base_fine + TILE_BITMAP_L1_OFFSET_FINE;
- fine_bitmap_wo[tile_bitmap_l1_base_fine + 2 * gl_WorkGroupID.x + 0] = ballot_result.x;
- fine_bitmap_wo[tile_bitmap_l1_base_fine + 2 * gl_WorkGroupID.x + 1] = ballot_result.y;
-
- const uint count = uint(ballot_result.x != 0) + uint(ballot_result.y != 0);
- if (count != 0) {
- atomicAdd(fine_count_wo[tile_index_global], count);
- }
- }
-}
GlyphInstance glyph_instances[];
};
-layout(std430, set = 0, binding = 4) readonly buffer coarseTileBufferRead {
- uint coarse_bitmap_ro[];
+layout(std430, set = 0, binding = 4) readonly buffer tileBufferRead {
+ uint tile_bitmap_ro[];
};
-layout(std430, set = 0, binding = 4) writeonly buffer coarseTileBufferWrite {
- uint coarse_bitmap_wo[];
+layout(std430, set = 0, binding = 4) writeonly buffer tileBufferWrite {
+ uint tile_bitmap_wo[];
};
-layout(std430, set = 0, binding = 5) readonly buffer fineTileBitmapBufferRead {
- uint fine_bitmap_ro[];
-};
-
-layout(std430, set = 0, binding = 5) writeonly buffer fineTileBitmapBufferWrite {
- uint fine_bitmap_wo[];
-};
-
-layout(std430, set = 0, binding = 6) readonly buffer fineTileCountBufferRead {
- uint fine_count_ro[];
-};
-
-layout(std430, set = 0, binding = 6) writeonly buffer fineTileCountBufferWrite {
- uint fine_count_wo[];
-};
-
-layout (set = 0, binding = 7, rgba16f) uniform writeonly image2D ui_image;
+layout (set = 0, binding = 5, rgba16f) uniform writeonly image2D ui_image;
+++ /dev/null
-#version 460
-
-#extension GL_GOOGLE_include_directive : require
-
-#extension GL_EXT_scalar_block_layout : require
-#extension GL_EXT_control_flow_attributes : require
-
-#include "primitive_2d.h"
-
-layout (local_size_x = 64, local_size_y = 1, local_size_z = 1) in;
-
-void main() {
- fine_count_wo[gl_GlobalInvocationID.x] = 0;
-}
#include "primitive_2d.h"
-layout (local_size_x = TILE_SIZE_FINE, local_size_y = TILE_SIZE_FINE, local_size_z = 1) in;
+layout (local_size_x = 8, local_size_y = 8, local_size_z = 1) in;
#define DEBUG_SHOW_TILES 0
#endif
void main() {
- const uvec2 tile_coord = gl_WorkGroupID.xy;
- const uint tile_index = tile_coord.y * TILE_DISPATCH_X * TILE_SIZE_MUL + tile_coord.x;
-
- const uvec2 tile_coord_global = tile_coord + primitive_uniforms.tile_offset_coarse * TILE_SIZE_MUL;
- const uint tile_index_global = tile_coord_global.y * primitive_uniforms.tile_stride_fine + tile_coord_global.x;
-
- const uint tile_base_fine = tile_index * TILE_STRIDE_FINE;
- const uint tile_bitmap_l1_base_fine = tile_base_fine + TILE_BITMAP_L1_OFFSET_FINE;
- const uint tile_bitmap_l0_base_fine = tile_base_fine + TILE_BITMAP_L0_OFFSET_FINE;
+ const uvec2 tile_coord = gl_WorkGroupID.xy / 4;
+ const uint tile_index = tile_coord.y * primitive_uniforms.tile_stride + tile_coord.x;
+ const uint tile_base = tile_index * TILE_STRIDE;
+ const uint tile_bitmap_l1_base_fine = tile_base + TILE_BITMAP_L1_OFFSET;
+ const uint tile_bitmap_l0_base_fine = tile_base + TILE_BITMAP_L0_OFFSET;
#if DEBUG_SHOW_TILES == 1
- uint count = 0;
+ int count = 0;
// For each tile, iterate over all words in the L1 bitmap.
for (int index_l1 = 0; index_l1 < primitive_uniforms.num_primitives_1024; index_l1++) {
// For each word, iterate all set bits.
- uint bitmap_l1 = fine_bitmap_ro[tile_bitmap_l1_base_fine + index_l1];
+ uint bitmap_l1 = tile_bitmap_ro[tile_bitmap_l1_base_fine + index_l1];
while (bitmap_l1 != 0) {
const uint i = findLSB(bitmap_l1);
// For each set bit in the L1 bitmap, iterate the set bits in the
// corresponding L0 bitmap.
const uint index_l0 = index_l1 * 32 + i;
- uint bitmap_l0 = fine_bitmap_ro[tile_bitmap_l0_base_fine + index_l0];
+ uint bitmap_l0 = tile_bitmap_ro[tile_bitmap_l0_base_fine + index_l0];
count += bitCount(bitmap_l0);
}
}
- const vec3 color = plasma_quintic(float(count) / 50.0);
- imageStore(ui_image, ivec2(gl_GlobalInvocationID.xy + primitive_uniforms.tile_offset_coarse * TILE_SIZE_COARSE), vec4(color, 1.0));
+ const vec3 color = plasma_quintic(float(count) / 100.0);
+ imageStore(ui_image, ivec2(gl_GlobalInvocationID.xy), vec4(color, 1.0));
#else
vec4 accum = vec4(0.0);
- uint word_count = fine_count_ro[tile_index_global];
- if (word_count == 0) {
- return;
- }
-
// For each tile, iterate over all words in the L1 bitmap.
- for (int index_l1 = 0; word_count != 0 && index_l1 < primitive_uniforms.num_primitives_1024; index_l1++) {
+ for (int index_l1 = 0; index_l1 < primitive_uniforms.num_primitives_1024; index_l1++) {
// For each word, iterate all set bits.
- uint bitmap_l1 = fine_bitmap_ro[tile_bitmap_l1_base_fine + index_l1];
-
- if (bitmap_l1 != 0)
- word_count -= 1;
+ uint bitmap_l1 = tile_bitmap_ro[tile_bitmap_l1_base_fine + index_l1];
while (bitmap_l1 != 0) {
const uint i = findLSB(bitmap_l1);
// For each set bit in the L1 bitmap, iterate the set bits in the
// corresponding L0 bitmap.
const uint index_l0 = index_l1 * 32 + i;
- uint bitmap_l0 = fine_bitmap_ro[tile_bitmap_l0_base_fine + index_l0];
+ uint bitmap_l0 = tile_bitmap_ro[tile_bitmap_l0_base_fine + index_l0];
while (bitmap_l0 != 0) {
const uint j = findLSB(bitmap_l0);
bitmap_l0 ^= bitmap_l0 & -bitmap_l0;
const Glyph gl = glyphs[gi.index];
const vec2 glyph_min = gi.position + gl.offset_min;
const vec2 glyph_max = gi.position + gl.offset_max;
- const vec2 sample_center = gl_GlobalInvocationID.xy + primitive_uniforms.tile_offset_coarse * TILE_SIZE_COARSE + vec2(0.5);
+ const vec2 sample_center = gl_GlobalInvocationID.xy + vec2(0.5);
+ [[branch]]
if (all(greaterThanEqual(sample_center, glyph_min)) && all(lessThanEqual(sample_center, glyph_max))) {
const vec2 glyph_size = gl.offset_max - gl.offset_min;
const vec4 color = unpackUnorm4x8(gi.color).bgra;
}
}
- imageStore(ui_image, ivec2(gl_GlobalInvocationID.xy + primitive_uniforms.tile_offset_coarse * TILE_SIZE_COARSE), accum);
+ imageStore(ui_image, ivec2(gl_GlobalInvocationID.xy), accum);
#endif
}
use spring::simple_spring_damper_exact;
use crate::pipelines::basic::BasicUniforms;
-use crate::pipelines::primitive_2d::{
- PrimitiveUniforms, TILE_DISPATCH_COARSE_X, TILE_DISPATCH_COARSE_Y, TILE_DISPATCH_FINE_X,
- TILE_DISPATCH_FINE_Y, TILE_SIZE_COARSE, TILE_SIZE_FINE, TILE_STRIDE_COARSE, TILE_STRIDE_FINE,
-};
+use crate::pipelines::primitive_2d::{PrimitiveUniforms, TILE_SIZE, TILE_STRIDE};
mod fonts;
mod helpers;
width: u32,
height: u32,
- tile_resolution_coarse_x: u32,
- tile_resolution_coarse_y: u32,
- tile_resolution_fine_x: u32,
- tile_resolution_fine_y: u32,
+ tile_resolution_x: u32,
+ tile_resolution_y: u32,
depth_image: Image,
rt_image: Image,
ui_image: Image,
- coarse_tile_bitmap_buffer: Buffer,
- fine_tile_bitmap_buffer: Buffer,
- fine_tile_color_buffer: Buffer,
+ tile_bitmap_buffer: Buffer,
glyph_atlas_image: Image,
let models = Models::load(gpu);
let images = Images::load(gpu, thread_token);
- let fine_bitmap_buffer_size = TILE_DISPATCH_FINE_X
- * TILE_DISPATCH_FINE_Y
- * TILE_STRIDE_FINE
- * std::mem::size_of::<u32>() as u32;
-
- let fine_tile_bitmap_buffer = gpu.create_buffer(&BufferDesc {
- memory_location: MemoryLocation::Device,
- host_mapped: false,
- usage: BufferUsageFlags::STORAGE,
- size: fine_bitmap_buffer_size.widen(),
- });
-
Self {
gpu,
basic_pipeline,
display_transform_pipeline,
width: 0,
height: 0,
- tile_resolution_coarse_x: 0,
- tile_resolution_coarse_y: 0,
- tile_resolution_fine_x: 0,
- tile_resolution_fine_y: 0,
+ tile_resolution_x: 0,
+ tile_resolution_y: 0,
depth_image: default(),
rt_image: default(),
ui_image: default(),
- coarse_tile_bitmap_buffer: default(),
- fine_tile_bitmap_buffer,
- fine_tile_color_buffer: default(),
+ tile_bitmap_buffer: default(),
glyph_atlas_image: default(),
samplers,
models,
gpu.destroy_image(frame, self.rt_image);
gpu.destroy_image(frame, self.ui_image);
- let tile_resolution_coarse_x = (width + (TILE_SIZE_COARSE - 1)) / TILE_SIZE_COARSE;
- let tile_resolution_coarse_y = (height + (TILE_SIZE_COARSE - 1)) / TILE_SIZE_COARSE;
- let tile_resolution_fine_x = (width + (TILE_SIZE_FINE - 1)) / TILE_SIZE_FINE;
- let tile_resolution_fine_y = (height + (TILE_SIZE_FINE - 1)) / TILE_SIZE_FINE;
+ let tile_resolution_x = (width + (TILE_SIZE - 1)) / TILE_SIZE;
+ let tile_resolution_y = (height + (TILE_SIZE - 1)) / TILE_SIZE;
- if tile_resolution_coarse_x != self.tile_resolution_coarse_x
- || tile_resolution_coarse_y != self.tile_resolution_coarse_y
- || tile_resolution_fine_x != self.tile_resolution_fine_x
- || tile_resolution_fine_y != self.tile_resolution_fine_y
+ if tile_resolution_x != self.tile_resolution_x
+ || tile_resolution_y != self.tile_resolution_y
{
- gpu.destroy_buffer(frame, self.fine_tile_color_buffer);
- gpu.destroy_buffer(frame, self.coarse_tile_bitmap_buffer);
+ gpu.destroy_buffer(frame, self.tile_bitmap_buffer);
- let coarse_bitmap_buffer_size = tile_resolution_coarse_x
- * tile_resolution_coarse_y
- * TILE_STRIDE_COARSE
+ let bitmap_buffer_size = tile_resolution_x
+ * tile_resolution_y
+ * TILE_STRIDE
* std::mem::size_of::<u32>() as u32;
- self.coarse_tile_bitmap_buffer = gpu.create_buffer(&BufferDesc {
+ self.tile_bitmap_buffer = gpu.create_buffer(&BufferDesc {
memory_location: MemoryLocation::Device,
host_mapped: false,
usage: BufferUsageFlags::STORAGE,
- size: coarse_bitmap_buffer_size.widen(),
+ size: bitmap_buffer_size.widen(),
});
- // align to the workgroup size to simplify shader
- let fine_color_buffer_size =
- ((tile_resolution_fine_x * tile_resolution_fine_y + 63) & !63)
- * std::mem::size_of::<u32>() as u32;
+ println!("tile_resolution: ({tile_resolution_x},{tile_resolution_y})");
- self.fine_tile_color_buffer = gpu.create_buffer(&BufferDesc {
- memory_location: MemoryLocation::Device,
- host_mapped: false,
- usage: BufferUsageFlags::STORAGE,
- size: fine_color_buffer_size.widen(),
- });
-
- self.tile_resolution_coarse_x = tile_resolution_coarse_x;
- self.tile_resolution_coarse_y = tile_resolution_coarse_y;
- self.tile_resolution_fine_x = tile_resolution_fine_x;
- self.tile_resolution_fine_y = tile_resolution_fine_y;
+ self.tile_resolution_x = tile_resolution_x;
+ self.tile_resolution_y = tile_resolution_y;
}
self.depth_image = gpu.create_image(&ImageDesc {
// Render UI
{
- gpu.cmd_set_pipeline(cmd_encoder, self.primitive_2d_pipeline.fine_clear_pipeline);
-
let glyph_buffer = gpu.request_transient_buffer_with_data(
frame,
thread_token,
ui_state.primitive_instances.as_slice(),
);
+ let num_primitives = ui_state.primitive_instances.len() as u32;
+ let num_primitives_32 = (num_primitives + 31) / 32;
+ let num_primitives_1024 = (num_primitives_32 + 31) / 32;
+
+ ui_state.primitive_instances.clear();
+
+ gpu.cmd_set_pipeline(cmd_encoder, self.primitive_2d_pipeline.bin_pipeline);
+
gpu.cmd_set_bind_group(
frame,
cmd_encoder,
Bind {
binding: 4,
array_element: 0,
- typed: TypedBind::StorageBuffer(&[self
- .coarse_tile_bitmap_buffer
- .to_arg()]),
+ typed: TypedBind::StorageBuffer(&[self.tile_bitmap_buffer.to_arg()]),
},
Bind {
binding: 5,
array_element: 0,
- typed: TypedBind::StorageBuffer(&[self
- .fine_tile_bitmap_buffer
- .to_arg()]),
- },
- Bind {
- binding: 6,
- array_element: 0,
- typed: TypedBind::StorageBuffer(&[self
- .fine_tile_color_buffer
- .to_arg()]),
- },
- Bind {
- binding: 7,
- array_element: 0,
typed: TypedBind::StorageImage(&[(
ImageLayout::General,
self.ui_image,
],
);
- gpu.cmd_dispatch(
- cmd_encoder,
- (self.tile_resolution_fine_x * self.tile_resolution_fine_y + 63) / 64,
- 1,
- 1,
- );
-
- let num_primitives = ui_state.primitive_instances.len() as u32;
- let num_primitives_32 = (num_primitives + 31) / 32;
- let num_primitives_1024 = (num_primitives_32 + 31) / 32;
-
- ui_state.primitive_instances.clear();
-
- gpu.cmd_set_pipeline(cmd_encoder, self.primitive_2d_pipeline.coarse_bin_pipeline);
-
gpu.cmd_push_constants(
cmd_encoder,
ShaderStageFlags::COMPUTE,
num_primitives,
num_primitives_32,
num_primitives_1024,
- tile_stride_fine: self.tile_resolution_fine_x,
- tile_offset_x: 0,
- tile_offset_y: 0,
+ tile_stride: self.tile_resolution_x,
},
);
gpu.cmd_dispatch(
cmd_encoder,
- (num_primitives + 63) / 64,
- self.tile_resolution_coarse_x,
- self.tile_resolution_coarse_y,
+ (num_primitives + 2047) / 2048,
+ (self.tile_resolution_x + 3) / 4,
+ (self.tile_resolution_y + 3) / 4,
);
gpu.cmd_barrier(
&[],
);
- for tile_offset_y in
- (0..self.tile_resolution_coarse_y).step_by(TILE_DISPATCH_COARSE_Y as usize)
- {
- for tile_offset_x in
- (0..self.tile_resolution_coarse_x).step_by(TILE_DISPATCH_COARSE_X as usize)
- {
- let tile_offset_fine_x =
- tile_offset_x * (TILE_SIZE_COARSE / TILE_SIZE_FINE);
-
- let tile_offset_fine_y =
- tile_offset_y * (TILE_SIZE_COARSE / TILE_SIZE_FINE);
-
- let fine_dispatch_x = (tile_offset_fine_x + TILE_DISPATCH_FINE_X)
- .min(self.tile_resolution_fine_x)
- - tile_offset_fine_x;
-
- let fine_dispatch_y = (tile_offset_fine_y + TILE_DISPATCH_FINE_Y)
- .min(self.tile_resolution_fine_y)
- - tile_offset_fine_y;
-
- gpu.cmd_push_constants(
- cmd_encoder,
- ShaderStageFlags::COMPUTE,
- 0,
- &PrimitiveUniforms {
- screen_resolution_x: self.width,
- screen_resolution_y: self.height,
- atlas_resolution_x: atlas_width,
- atlas_resolution_y: atlas_height,
- num_primitives,
- num_primitives_32,
- num_primitives_1024,
- tile_stride_fine: self.tile_resolution_fine_x,
- tile_offset_x,
- tile_offset_y,
- },
- );
-
- gpu.cmd_set_pipeline(
- cmd_encoder,
- self.primitive_2d_pipeline.fine_bin_pipeline,
- );
-
- gpu.cmd_dispatch(
- cmd_encoder,
- (num_primitives_32 + 63) / 64,
- fine_dispatch_x,
- fine_dispatch_y,
- );
-
- gpu.cmd_barrier(
- cmd_encoder,
- Some(&GlobalBarrier {
- prev_access: &[Access::ShaderWrite],
- next_access: &[Access::ShaderOtherRead],
- }),
- &[],
- );
-
- gpu.cmd_set_pipeline(
- cmd_encoder,
- self.primitive_2d_pipeline.rasterize_pipeline,
- );
-
- gpu.cmd_dispatch(cmd_encoder, fine_dispatch_x, fine_dispatch_y, 1);
- }
- }
+ gpu.cmd_set_pipeline(cmd_encoder, self.primitive_2d_pipeline.rasterize_pipeline);
+
+ gpu.cmd_dispatch(cmd_encoder, (self.width + 7) / 8, (self.height + 7) / 8, 1);
}
// Display transform and composite
swapchain_image,
)]),
},
- Bind {
- binding: 5,
- array_element: 0,
- typed: TypedBind::StorageBuffer(&[self
- .fine_tile_color_buffer
- .to_arg()]),
- },
],
);
);
}
- for i in 0..180 {
+ for i in 0..224 {
let i = i as f32;
ui_state.text_fmt(
5.0,
BindDesc::new(ShaderStageFlags::COMPUTE, BindingType::StorageImage),
// Composited Output
BindDesc::new(ShaderStageFlags::COMPUTE, BindingType::StorageImage),
- // Tile color buffer
- BindDesc::new(ShaderStageFlags::COMPUTE, BindingType::StorageBuffer),
]);
let layout = &PipelineLayout {
use crate::Gpu;
-pub const MAX_PRIMS: u32 = 0x20000;
-pub const TILE_SIZE_COARSE: u32 = 64;
-pub const TILE_SIZE_FINE: u32 = 16;
+pub const TILE_SIZE: u32 = 32;
+pub const MAX_PRIMS: u32 = 1 << 18;
pub const TILE_BITMAP_WORDS_L1: u32 = MAX_PRIMS / 32 / 32;
pub const TILE_BITMAP_WORDS_L0: u32 = MAX_PRIMS / 32;
-pub const TILE_STRIDE_COARSE: u32 = TILE_BITMAP_WORDS_L0;
-pub const TILE_STRIDE_FINE: u32 = TILE_BITMAP_WORDS_L0 + TILE_BITMAP_WORDS_L1;
-
-pub const TILE_DISPATCH_COARSE_X: u32 = 15;
-pub const TILE_DISPATCH_COARSE_Y: u32 = 15;
-pub const TILE_DISPATCH_FINE_X: u32 = TILE_DISPATCH_COARSE_X * (TILE_SIZE_COARSE / TILE_SIZE_FINE);
-pub const TILE_DISPATCH_FINE_Y: u32 = TILE_DISPATCH_COARSE_Y * (TILE_SIZE_COARSE / TILE_SIZE_FINE);
+pub const TILE_STRIDE: u32 = TILE_BITMAP_WORDS_L0 + TILE_BITMAP_WORDS_L1;
#[allow(unused)]
#[repr(C)]
pub num_primitives_32: u32,
pub num_primitives_1024: u32,
- pub tile_stride_fine: u32,
-
- pub tile_offset_x: u32,
- pub tile_offset_y: u32,
+ pub tile_stride: u32,
}
#[allow(unused)]
pub struct Primitive2dPipeline {
pub bind_group_layout: BindGroupLayout,
- pub coarse_bin_pipeline: Pipeline,
- pub fine_bin_pipeline: Pipeline,
- pub fine_clear_pipeline: Pipeline,
+ pub bin_pipeline: Pipeline,
pub rasterize_pipeline: Pipeline,
}
BindDesc::new(ShaderStageFlags::COMPUTE, BindingType::StorageBuffer),
// Glyph Instances
BindDesc::new(ShaderStageFlags::COMPUTE, BindingType::StorageBuffer),
- // Coarse Tiles
- BindDesc::new(ShaderStageFlags::COMPUTE, BindingType::StorageBuffer),
- // Fine Tiles
- BindDesc::new(ShaderStageFlags::COMPUTE, BindingType::StorageBuffer),
- // Fine Color
+ // Tiles
BindDesc::new(ShaderStageFlags::COMPUTE, BindingType::StorageBuffer),
// UI Image Output
BindDesc::new(ShaderStageFlags::COMPUTE, BindingType::StorageImage),
}],
};
- let coarse_bin_pipeline = gpu.create_compute_pipeline(&ComputePipelineDesc {
- shader: ShaderDesc {
- entry: c"main",
- code: shark_shaders::PRIMITIVE_2D_BIN_COARSE_COMP_SPV,
- },
- layout,
- });
-
- let fine_bin_pipeline = gpu.create_compute_pipeline(&ComputePipelineDesc {
- shader: ShaderDesc {
- entry: c"main",
- code: shark_shaders::PRIMITIVE_2D_BIN_FINE_COMP_SPV,
- },
- layout,
- });
-
- let fine_clear_pipeline = gpu.create_compute_pipeline(&ComputePipelineDesc {
+ let bin_pipeline = gpu.create_compute_pipeline(&ComputePipelineDesc {
shader: ShaderDesc {
entry: c"main",
- code: shark_shaders::PRIMITIVE_2D_CLEAR_FINE_COMP_SPV,
+ code: shark_shaders::PRIMITIVE_2D_BIN_COMP_SPV,
},
layout,
});
Self {
bind_group_layout,
- coarse_bin_pipeline,
- fine_bin_pipeline,
- fine_clear_pipeline,
+ bin_pipeline,
rasterize_pipeline,
}
}