#define MAX_PRIMS 0x20000u
-#define TILE_SIZE_COARSE 128
+#define TILE_SIZE_COARSE 64
#define TILE_SIZE_FINE 16
-#define TILE_SIZE_SHIFT 3
+#define TILE_SIZE_MUL (TILE_SIZE_COARSE / TILE_SIZE_FINE)
#define TILE_BITMAP_L1_WORDS (MAX_PRIMS / 32 / 32)
#define TILE_BITMAP_L0_WORDS (MAX_PRIMS / 32)
#define TILE_STRIDE_COARSE TILE_BITMAP_L0_WORDS
#define TILE_BITMAP_L1_OFFSET_FINE 0
#define TILE_BITMAP_L0_OFFSET_FINE TILE_BITMAP_L1_WORDS
-#define TILE_DISPATCH_X 8
+#define TILE_DISPATCH_X 15
struct PrimitiveUniforms {
uvec2 screen_resolution;
uint color;
};
-struct PrimitiveInstance {
- uint type;
- uint index;
-};
-
#include "primitive_2d_bindings.h"
bool test_glyph(uint index, uvec2 tile_min, uvec2 tile_max) {
layout (local_size_x = 64, local_size_y = 1, local_size_z = 1) in;
void main() {
- const uvec2 tile_coord = gl_GlobalInvocationID.yz;
- const uint tile_index = tile_coord.y * (TILE_DISPATCH_X << TILE_SIZE_SHIFT) + tile_coord.x;
+ const uvec2 tile_coord = gl_WorkGroupID.yz;
+ const uint tile_index = tile_coord.y * TILE_DISPATCH_X * TILE_SIZE_MUL + tile_coord.x;
- const uvec2 tile_coord_global = tile_coord + (primitive_uniforms.tile_offset_coarse << TILE_SIZE_SHIFT);
+ const uvec2 tile_coord_global = tile_coord + primitive_uniforms.tile_offset_coarse * TILE_SIZE_MUL;
const uint tile_index_global = tile_coord_global.y * primitive_uniforms.tile_stride_fine + tile_coord_global.x;
const uvec2 tile_min = tile_coord_global * TILE_SIZE_FINE;
uint bitmap_l0 = 0;
if (index < primitive_uniforms.num_primitives_32) {
- const uvec2 tile_coord_coarse = tile_coord >> TILE_SIZE_SHIFT;
+ const uvec2 tile_coord_coarse = tile_coord / TILE_SIZE_MUL;
const uint tile_index_coarse = tile_coord_coarse.y * TILE_DISPATCH_X + tile_coord_coarse.x;
const uint tile_base_coarse = tile_index_coarse * TILE_STRIDE_COARSE;
const uint tile_bitmap_base_coarse = tile_base_coarse + TILE_BITMAP_OFFSET_COARSE;
}
const uint tile_base_fine = tile_index * TILE_STRIDE_FINE;
+ const uint tile_bitmap_l0_base_fine = tile_base_fine + TILE_BITMAP_L0_OFFSET_FINE;
// Write the L0 per-primitive bitmap.
- const uint tile_bitmap_l0_base_fine = tile_base_fine + TILE_BITMAP_L0_OFFSET_FINE;
fine_bitmap_wo[tile_bitmap_l0_base_fine + index] = bitmap_l0;
- // Write the L1 per-bitmap-word bitmap.
uvec4 ballot_result = subgroupBallot(bitmap_l0 != 0);
if (subgroupElect()) {
+ // Write the L1 per-bitmap-word bitmap.
const uint tile_bitmap_l1_base_fine = tile_base_fine + TILE_BITMAP_L1_OFFSET_FINE;
fine_bitmap_wo[tile_bitmap_l1_base_fine + 2 * gl_WorkGroupID.x + 0] = ballot_result.x;
fine_bitmap_wo[tile_bitmap_l1_base_fine + 2 * gl_WorkGroupID.x + 1] = ballot_result.y;
GlyphInstance glyph_instances[];
};
-layout(std430, set = 0, binding = 4) readonly buffer primitiveInstanceBuffer {
- PrimitiveInstance primitive_instances[];
-};
-
-layout(std430, set = 0, binding = 5) readonly buffer coarseTileBufferRead {
+layout(std430, set = 0, binding = 4) readonly buffer coarseTileBufferRead {
uint coarse_bitmap_ro[];
};
-layout(std430, set = 0, binding = 5) writeonly buffer coarseTileBufferWrite {
+layout(std430, set = 0, binding = 4) writeonly buffer coarseTileBufferWrite {
uint coarse_bitmap_wo[];
};
-layout(std430, set = 0, binding = 6) readonly buffer fineTileBitmapBufferRead {
+layout(std430, set = 0, binding = 5) readonly buffer fineTileBitmapBufferRead {
uint fine_bitmap_ro[];
};
-layout(std430, set = 0, binding = 6) writeonly buffer fineTileBitmapBufferWrite {
+layout(std430, set = 0, binding = 5) writeonly buffer fineTileBitmapBufferWrite {
uint fine_bitmap_wo[];
};
-layout(std430, set = 0, binding = 7) readonly buffer fineTileCountBufferRead {
+layout(std430, set = 0, binding = 6) readonly buffer fineTileCountBufferRead {
uint fine_count_ro[];
};
-layout(std430, set = 0, binding = 7) writeonly buffer fineTileCountBufferWrite {
+layout(std430, set = 0, binding = 6) writeonly buffer fineTileCountBufferWrite {
uint fine_count_wo[];
};
-layout (set = 0, binding = 8, rgba16f) uniform writeonly image2D ui_image;
+layout (set = 0, binding = 7, rgba16f) uniform writeonly image2D ui_image;
#extension GL_EXT_scalar_block_layout : require
#extension GL_EXT_control_flow_attributes : require
+#extension GL_KHR_shader_subgroup_vote : require
+#extension GL_KHR_shader_subgroup_ballot : require
+
#include "primitive_2d.h"
layout (local_size_x = TILE_SIZE_FINE, local_size_y = TILE_SIZE_FINE, local_size_z = 1) in;
+#define DEBUG_SHOW_TILES 0
+
+#if DEBUG_SHOW_TILES != 0
+
+vec3 plasma_quintic(float x)
+{
+ x = clamp(x, 0.0, 1.0);
+ vec4 x1 = vec4(1.0, x, x * x, x * x * x); // 1 x x2 x3
+ vec4 x2 = x1 * x1.w * x; // x4 x5 x6 x7
+ return vec3(
+ dot(x1.xyzw, vec4(+0.063861086, +1.992659096, -1.023901152, -0.490832805)) + dot(x2.xy, vec2(+1.308442123, -0.914547012)),
+ dot(x1.xyzw, vec4(+0.049718590, -0.791144343, +2.892305078, +0.811726816)) + dot(x2.xy, vec2(-4.686502417, +2.717794514)),
+ dot(x1.xyzw, vec4(+0.513275779, +1.580255060, -5.164414457, +4.559573646)) + dot(x2.xy, vec2(-1.916810682, +0.570638854))
+ );
+}
+
+#endif
+
void main() {
const uvec2 tile_coord = gl_WorkGroupID.xy;
- const uint tile_index = tile_coord.y * (TILE_DISPATCH_X << TILE_SIZE_SHIFT) + tile_coord.x;
+ const uint tile_index = tile_coord.y * TILE_DISPATCH_X * TILE_SIZE_MUL + tile_coord.x;
- const uvec2 tile_coord_global = tile_coord + (primitive_uniforms.tile_offset_coarse << TILE_SIZE_SHIFT);
+ const uvec2 tile_coord_global = tile_coord + primitive_uniforms.tile_offset_coarse * TILE_SIZE_MUL;
const uint tile_index_global = tile_coord_global.y * primitive_uniforms.tile_stride_fine + tile_coord_global.x;
const uint tile_base_fine = tile_index * TILE_STRIDE_FINE;
const uint tile_bitmap_l1_base_fine = tile_base_fine + TILE_BITMAP_L1_OFFSET_FINE;
const uint tile_bitmap_l0_base_fine = tile_base_fine + TILE_BITMAP_L0_OFFSET_FINE;
+#if DEBUG_SHOW_TILES == 1
+
+ uint count = 0;
+ // For each tile, iterate over all words in the L1 bitmap.
+ for (int index_l1 = 0; index_l1 < primitive_uniforms.num_primitives_1024; index_l1++) {
+ // For each word, iterate all set bits.
+ uint bitmap_l1 = fine_bitmap_ro[tile_bitmap_l1_base_fine + index_l1];
+
+ while (bitmap_l1 != 0) {
+ const uint i = findLSB(bitmap_l1);
+ bitmap_l1 ^= bitmap_l1 & -bitmap_l1;
+
+ // For each set bit in the L1 bitmap, iterate the set bits in the
+ // corresponding L0 bitmap.
+ const uint index_l0 = index_l1 * 32 + i;
+ uint bitmap_l0 = fine_bitmap_ro[tile_bitmap_l0_base_fine + index_l0];
+
+ count += bitCount(bitmap_l0);
+ }
+ }
+
+ const vec3 color = plasma_quintic(float(count) / 50.0);
+ imageStore(ui_image, ivec2(gl_GlobalInvocationID.xy + primitive_uniforms.tile_offset_coarse * TILE_SIZE_COARSE), vec4(color, 1.0));
+
+#else
+
vec4 accum = vec4(0.0);
uint word_count = fine_count_ro[tile_index_global];
+ if (word_count == 0) {
+ return;
+ }
// For each tile, iterate over all words in the L1 bitmap.
for (int index_l1 = 0; word_count != 0 && index_l1 < primitive_uniforms.num_primitives_1024; index_l1++) {
}
imageStore(ui_image, ivec2(gl_GlobalInvocationID.xy + primitive_uniforms.tile_offset_coarse * TILE_SIZE_COARSE), accum);
+
+#endif
}
BufferUsageFlags::STORAGE,
ui_state.primitive_instances.as_slice(),
);
- let primitive_instance_buffer = gpu.request_transient_buffer_with_data(
- frame,
- thread_token,
- BufferUsageFlags::STORAGE,
- &[0u32],
- );
gpu.cmd_set_bind_group(
frame,
Bind {
binding: 4,
array_element: 0,
- typed: TypedBind::StorageBuffer(&[primitive_instance_buffer.to_arg()]),
- },
- Bind {
- binding: 5,
- array_element: 0,
typed: TypedBind::StorageBuffer(&[self
.coarse_tile_bitmap_buffer
.to_arg()]),
},
Bind {
- binding: 6,
+ binding: 5,
array_element: 0,
typed: TypedBind::StorageBuffer(&[self
.fine_tile_bitmap_buffer
.to_arg()]),
},
Bind {
- binding: 7,
+ binding: 6,
array_element: 0,
typed: TypedBind::StorageBuffer(&[self
.fine_tile_color_buffer
.to_arg()]),
},
Bind {
- binding: 8,
+ binding: 7,
array_element: 0,
typed: TypedBind::StorageImage(&[(
ImageLayout::General,
base_x * 100.0 * scale + 5.0,
base_y * 100.0 * scale + i * 15.0 * scale,
FontFamily::RobotoRegular,
- 40.0,
+ 20.0,
format_args!("tick: {:?}", tick_duration),
);
}
- for i in 0..80 {
- for j in 0..11 {
- let i = i as f32;
- let j = j as f32;
- let x = 200.0 + j * 200.0;
- let y = 100.0 + j * 100.0;
- ui_state.text_fmt(
- base_x * 100.0 * scale +x * scale,
- base_y * 100.0 * scale +(y + i * 15.0) * scale,
+ for i in 0..120 {
+ let i = i as f32;
+ ui_state.text_fmt(
+ 5.0,
+ (15.0 + i * 15.0) * scale,
FontFamily::NotoSansJapanese,
- 15.0,
+ 8.0,
format_args!(
- "お握り The Quick Brown Fox Jumped Over The Lazy Dog. ████████お握り The Quick Brown Fox Jumped Over The Lazy Dog. ████████"
+ "お握り The Quick Brown Fox Jumped Over The Lazy Dog. ████████お握り The Quick Brown Fox Jumped Over The Lazy Dog. ████████お握り The Quick Brown Fox Jumped Over The Lazy Dog. ████████お握り The Quick Brown Fox Jumped Over The Lazy Dog. ████████お握り The Quick Brown Fox Jumped Over The Lazy Dog. ████████お握り The Quick Brown Fox Jumped Over The Lazy Dog. ████████お握り The Quick Brown Fox Jumped Over The Lazy Dog. ████████お握り The Quick Brown Fox Jumped Over The Lazy Dog. ████████"
),
);
- }
}
draw_state.draw(
use crate::Gpu;
pub const MAX_PRIMS: u32 = 0x20000;
-pub const TILE_SIZE_COARSE: u32 = 128;
+pub const TILE_SIZE_COARSE: u32 = 64;
pub const TILE_SIZE_FINE: u32 = 16;
pub const TILE_BITMAP_WORDS_L1: u32 = MAX_PRIMS / 32 / 32;
pub const TILE_BITMAP_WORDS_L0: u32 = MAX_PRIMS / 32;
pub const TILE_STRIDE_COARSE: u32 = TILE_BITMAP_WORDS_L0;
pub const TILE_STRIDE_FINE: u32 = TILE_BITMAP_WORDS_L0 + TILE_BITMAP_WORDS_L1;
-pub const TILE_DISPATCH_COARSE_X: u32 = 8;
-pub const TILE_DISPATCH_COARSE_Y: u32 = 5;
+pub const TILE_DISPATCH_COARSE_X: u32 = 15;
+pub const TILE_DISPATCH_COARSE_Y: u32 = 15;
pub const TILE_DISPATCH_FINE_X: u32 = TILE_DISPATCH_COARSE_X * (TILE_SIZE_COARSE / TILE_SIZE_FINE);
pub const TILE_DISPATCH_FINE_Y: u32 = TILE_DISPATCH_COARSE_Y * (TILE_SIZE_COARSE / TILE_SIZE_FINE);
BindDesc::new(ShaderStageFlags::COMPUTE, BindingType::StorageBuffer),
// Glyph Instances
BindDesc::new(ShaderStageFlags::COMPUTE, BindingType::StorageBuffer),
- // Primitive Instances
- BindDesc::new(ShaderStageFlags::COMPUTE, BindingType::StorageBuffer),
// Coarse Tiles
BindDesc::new(ShaderStageFlags::COMPUTE, BindingType::StorageBuffer),
// Fine Tiles