void main() {
const uvec2 tile_coord = gl_GlobalInvocationID.yz;
- const uvec2 tile_min = tile_coord * TILE_SIZE_FINE;
+ const uint tile_index = tile_coord.y * (TILE_DISPATCH_X << TILE_SIZE_SHIFT) + tile_coord.x;
+
+ const uvec2 tile_coord_global = tile_coord + (primitive_uniforms.tile_offset_coarse << TILE_SIZE_SHIFT);
+ const uint tile_index_global = tile_coord_global.y * primitive_uniforms.tile_stride_fine + tile_coord_global.x;
+
+ const uvec2 tile_min = tile_coord_global * TILE_SIZE_FINE;
const uvec2 tile_max = min(tile_min + TILE_SIZE_FINE, primitive_uniforms.screen_resolution);
- const uint tile_index = tile_coord.y * primitive_uniforms.tile_stride_fine + tile_coord.x;
const uint index = gl_WorkGroupID.x * gl_WorkGroupSize.x + gl_SubgroupID * gl_SubgroupSize + gl_SubgroupInvocationID;
uint bitmap_l0 = 0;
if (index < primitive_uniforms.num_primitives_32) {
const uvec2 tile_coord_coarse = tile_coord >> TILE_SIZE_SHIFT;
- const uint tile_index_coarse = tile_coord_coarse.y * primitive_uniforms.tile_stride_coarse + tile_coord_coarse.x;
+ const uint tile_index_coarse = tile_coord_coarse.y * TILE_DISPATCH_X + tile_coord_coarse.x;
const uint tile_base_coarse = tile_index_coarse * TILE_STRIDE_COARSE;
const uint tile_bitmap_base_coarse = tile_base_coarse + TILE_BITMAP_OFFSET_COARSE;
const uint count = uint(ballot_result.x != 0) + uint(ballot_result.y != 0);
if (count != 0) {
- atomicAdd(fine_count_wo[tile_index], count);
+ atomicAdd(fine_count_wo[tile_index_global], count);
}
}
}
void main() {
const uvec2 tile_coord = gl_WorkGroupID.xy;
- const uint tile_index = tile_coord.y * primitive_uniforms.tile_stride_fine + tile_coord.x;
+ const uint tile_index = tile_coord.y * (TILE_DISPATCH_X << TILE_SIZE_SHIFT) + tile_coord.x;
+
+ const uvec2 tile_coord_global = tile_coord + (primitive_uniforms.tile_offset_coarse << TILE_SIZE_SHIFT);
+ const uint tile_index_global = tile_coord_global.y * primitive_uniforms.tile_stride_fine + tile_coord_global.x;
+
const uint tile_base_fine = tile_index * TILE_STRIDE_FINE;
const uint tile_bitmap_l1_base_fine = tile_base_fine + TILE_BITMAP_L1_OFFSET_FINE;
const uint tile_bitmap_l0_base_fine = tile_base_fine + TILE_BITMAP_L0_OFFSET_FINE;
vec4 accum = vec4(0.0);
- uint word_count = fine_count_ro[tile_index];
+ uint word_count = fine_count_ro[tile_index_global];
// For each tile, iterate over all words in the L1 bitmap.
for (int index_l1 = 0; word_count != 0 && index_l1 < primitive_uniforms.num_primitives_1024; index_l1++) {
const Glyph gl = glyphs[gi.index];
const vec2 glyph_min = gi.position + gl.offset_min;
const vec2 glyph_max = gi.position + gl.offset_max;
- const vec2 sample_center = gl_GlobalInvocationID.xy + vec2(0.5);
+ const vec2 sample_center = gl_GlobalInvocationID.xy + primitive_uniforms.tile_offset_coarse * TILE_SIZE_COARSE + vec2(0.5);
if (all(greaterThanEqual(sample_center, glyph_min)) && all(lessThanEqual(sample_center, glyph_max))) {
const vec2 glyph_size = gl.offset_max - gl.offset_min;
const vec4 color = unpackUnorm4x8(gi.color).bgra;
}
}
- imageStore(ui_image, ivec2(gl_GlobalInvocationID.xy), accum);
+ imageStore(ui_image, ivec2(gl_GlobalInvocationID.xy + primitive_uniforms.tile_offset_coarse * TILE_SIZE_COARSE), accum);
}
use crate::pipelines::basic::BasicUniforms;
use crate::pipelines::primitive_2d::{
- PrimitiveUniforms, TILE_SIZE_COARSE, TILE_SIZE_FINE, TILE_STRIDE_COARSE, TILE_STRIDE_FINE,
+ PrimitiveUniforms, TILE_DISPATCH_COARSE_X, TILE_DISPATCH_COARSE_Y, TILE_DISPATCH_FINE_X,
+ TILE_DISPATCH_FINE_Y, TILE_SIZE_COARSE, TILE_SIZE_FINE, TILE_STRIDE_COARSE, TILE_STRIDE_FINE,
};
mod fonts;
let models = Models::load(gpu);
let images = Images::load(gpu, thread_token);
+ let coarse_bitmap_buffer_size = TILE_DISPATCH_COARSE_X
+ * TILE_DISPATCH_COARSE_Y
+ * TILE_STRIDE_COARSE
+ * std::mem::size_of::<u32>() as u32;
+
+ let fine_bitmap_buffer_size = TILE_DISPATCH_FINE_X
+ * TILE_DISPATCH_FINE_Y
+ * TILE_STRIDE_FINE
+ * std::mem::size_of::<u32>() as u32;
+
+ let coarse_tile_bitmap_buffer = gpu.create_buffer(&BufferDesc {
+ memory_location: MemoryLocation::Device,
+ host_mapped: false,
+ usage: BufferUsageFlags::STORAGE,
+ size: coarse_bitmap_buffer_size.widen(),
+ });
+
+ let fine_tile_bitmap_buffer = gpu.create_buffer(&BufferDesc {
+ memory_location: MemoryLocation::Device,
+ host_mapped: false,
+ usage: BufferUsageFlags::STORAGE,
+ size: fine_bitmap_buffer_size.widen(),
+ });
+
Self {
gpu,
basic_pipeline,
depth_image: default(),
rt_image: default(),
ui_image: default(),
- coarse_tile_bitmap_buffer: default(),
- fine_tile_bitmap_buffer: default(),
+ coarse_tile_bitmap_buffer,
+ fine_tile_bitmap_buffer,
fine_tile_color_buffer: default(),
glyph_atlas_image: default(),
samplers,
|| tile_resolution_fine_x != self.tile_resolution_fine_x
|| tile_resolution_fine_y != self.tile_resolution_fine_y
{
- gpu.destroy_buffer(frame, self.coarse_tile_bitmap_buffer);
- gpu.destroy_buffer(frame, self.fine_tile_bitmap_buffer);
gpu.destroy_buffer(frame, self.fine_tile_color_buffer);
- let coarse_bitmap_buffer_size = tile_resolution_coarse_x
- * tile_resolution_coarse_y
- * TILE_STRIDE_COARSE
- * std::mem::size_of::<u32>() as u32;
-
- let fine_bitmap_buffer_size = tile_resolution_fine_x
- * tile_resolution_fine_y
- * TILE_STRIDE_FINE
- * std::mem::size_of::<u32>() as u32;
-
// align to the workgroup size to simplify shader
let fine_color_buffer_size =
((tile_resolution_fine_x * tile_resolution_fine_y + 63) & !63)
* std::mem::size_of::<u32>() as u32;
- self.coarse_tile_bitmap_buffer = gpu.create_buffer(&BufferDesc {
- memory_location: MemoryLocation::Device,
- host_mapped: false,
- usage: BufferUsageFlags::STORAGE,
- size: coarse_bitmap_buffer_size.widen(),
- });
-
- self.fine_tile_bitmap_buffer = gpu.create_buffer(&BufferDesc {
- memory_location: MemoryLocation::Device,
- host_mapped: false,
- usage: BufferUsageFlags::STORAGE,
- size: fine_bitmap_buffer_size.widen(),
- });
-
self.fine_tile_color_buffer = gpu.create_buffer(&BufferDesc {
memory_location: MemoryLocation::Device,
host_mapped: false,
gpu.cmd_dispatch(
cmd_encoder,
- (self.tile_resolution_coarse_x * self.tile_resolution_coarse_y + 63) / 64,
+ (self.tile_resolution_fine_x * self.tile_resolution_fine_y + 63) / 64,
1,
1,
);
ui_state.primitive_instances.clear();
- for _pass_y in 0..1 {
- for _pass_x in 0..1 {
+ for tile_offset_y in
+ (0..self.tile_resolution_coarse_y).step_by(TILE_DISPATCH_COARSE_Y as usize)
+ {
+ for tile_offset_x in
+ (0..self.tile_resolution_coarse_x).step_by(TILE_DISPATCH_COARSE_X as usize)
+ {
+ let coarse_dispatch_x = (tile_offset_x + TILE_DISPATCH_COARSE_X)
+ .min(self.tile_resolution_coarse_x)
+ - tile_offset_x;
+ let coarse_dispatch_y = (tile_offset_y + TILE_DISPATCH_COARSE_Y)
+ .min(self.tile_resolution_coarse_y)
+ - tile_offset_y;
+
+ let tile_offset_fine_x =
+ tile_offset_x * (TILE_SIZE_COARSE / TILE_SIZE_FINE);
+
+ let tile_offset_fine_y =
+ tile_offset_y * (TILE_SIZE_COARSE / TILE_SIZE_FINE);
+
+ let fine_dispatch_x = (tile_offset_fine_x + TILE_DISPATCH_FINE_X)
+ .min(self.tile_resolution_fine_x)
+ - tile_offset_fine_x;
+
+ let fine_dispatch_y = (tile_offset_fine_y + TILE_DISPATCH_FINE_Y)
+ .min(self.tile_resolution_fine_y)
+ - tile_offset_fine_y;
+
gpu.cmd_set_pipeline(
cmd_encoder,
self.primitive_2d_pipeline.coarse_bin_pipeline,
num_primitives,
num_primitives_32,
num_primitives_1024,
- tile_stride_coarse: self.tile_resolution_coarse_x,
tile_stride_fine: self.tile_resolution_fine_x,
+ tile_offset_x,
+ tile_offset_y,
},
);
gpu.cmd_dispatch(
cmd_encoder,
(num_primitives + 63) / 64,
- self.tile_resolution_coarse_x,
- self.tile_resolution_coarse_y,
+ coarse_dispatch_x,
+ coarse_dispatch_y,
);
gpu.cmd_barrier(
gpu.cmd_dispatch(
cmd_encoder,
(num_primitives_32 + 63) / 64,
- self.tile_resolution_fine_x,
- self.tile_resolution_fine_y,
+ fine_dispatch_x,
+ fine_dispatch_y,
);
gpu.cmd_barrier(
self.primitive_2d_pipeline.rasterize_pipeline,
);
- gpu.cmd_dispatch(
- cmd_encoder,
- self.tile_resolution_fine_x,
- self.tile_resolution_fine_y,
- 1,
- );
+ gpu.cmd_dispatch(cmd_encoder, fine_dispatch_x, fine_dispatch_y, 1);
}
}
}