layout (local_size_x = 64, local_size_y = 1, local_size_z = 1) in;
void main() {
- const uvec2 tile_size = uvec2(TILE_SIZE_COARSE, TILE_SIZE_COARSE);
const uvec2 tile_coord = gl_GlobalInvocationID.yz;
- const uvec2 tile_min = tile_coord * tile_size;
- const uvec2 tile_max = min(tile_min + tile_size, primitive_uniforms.screen_resolution);
+ const uvec2 tile_min = tile_coord * TILE_SIZE_COARSE;
+ const uvec2 tile_max = min(tile_min + TILE_SIZE_COARSE, primitive_uniforms.screen_resolution);
const uint primitive_index = gl_WorkGroupID.x * gl_WorkGroupSize.x + gl_SubgroupID * gl_SubgroupSize + gl_SubgroupInvocationID;
uvec4 ballot_result = subgroupBallot(intersects);
if (subgroupElect()) { // managed democracy wins again
- const uint tile_index = tile_coord.y * primitive_uniforms.tile_resolution_coarse.x + tile_coord.x;
+ const uint tile_index = tile_coord.y * primitive_uniforms.tile_stride_coarse + tile_coord.x;
const uint tile_offset = tile_index * TILE_STRIDE_COARSE;
coarse_bitmap_wo[tile_offset + 2 * gl_WorkGroupID.x + 0] = ballot_result.x;
coarse_bitmap_wo[tile_offset + 2 * gl_WorkGroupID.x + 1] = ballot_result.y;
layout (local_size_x = 64, local_size_y = 1, local_size_z = 1) in;
void main() {
- const uvec2 tile_size = uvec2(TILE_SIZE_FINE, TILE_SIZE_FINE);
const uvec2 tile_coord = gl_GlobalInvocationID.yz;
- const uvec2 tile_min = tile_coord * tile_size;
- const uvec2 tile_max = min(tile_min + tile_size, primitive_uniforms.screen_resolution);
- const uint tile_index = tile_coord.y * primitive_uniforms.tile_resolution_fine.x + tile_coord.x;
+ const uvec2 tile_min = tile_coord * TILE_SIZE_FINE;
+ const uvec2 tile_max = min(tile_min + TILE_SIZE_FINE, primitive_uniforms.screen_resolution);
+ const uint tile_index = tile_coord.y * primitive_uniforms.tile_stride_fine + tile_coord.x;
const uint index = gl_WorkGroupID.x * gl_WorkGroupSize.x + gl_SubgroupID * gl_SubgroupSize + gl_SubgroupInvocationID;
uint bitmap_l0 = 0;
if (index < primitive_uniforms.num_primitives_32) {
const uvec2 tile_coord_coarse = tile_coord >> TILE_SIZE_SHIFT;
- const uint tile_index_coarse = tile_coord_coarse.y * primitive_uniforms.tile_resolution_coarse.x + tile_coord_coarse.x;
+ const uint tile_index_coarse = tile_coord_coarse.y * primitive_uniforms.tile_stride_coarse + tile_coord_coarse.x;
const uint tile_base_coarse = tile_index_coarse * TILE_STRIDE_COARSE;
const uint tile_bitmap_base_coarse = tile_base_coarse + TILE_BITMAP_OFFSET_COARSE;
* TILE_STRIDE_FINE
* std::mem::size_of::<u32>() as u32;
- let fine_color_buffer_size = tile_resolution_fine_x
- * tile_resolution_fine_y
- * std::mem::size_of::<u32>() as u32;
+ // align to the workgroup size to simplify shader
+ let fine_color_buffer_size =
+ ((tile_resolution_fine_x * tile_resolution_fine_y + 63) & !63)
+ * std::mem::size_of::<u32>() as u32;
self.coarse_tile_bitmap_buffer = gpu.create_buffer(&BufferDesc {
memory_location: MemoryLocation::Device,
// Render UI
{
- gpu.cmd_set_pipeline(cmd_encoder, self.primitive_2d_pipeline.coarse_bin_pipeline);
-
- let num_primitives = ui_state.primitive_instances.len() as u32;
- let num_primitives_32 = (num_primitives + 31) / 32;
- let num_primitives_1024 = (num_primitives_32 + 31) / 32;
-
- gpu.cmd_push_constants(
- cmd_encoder,
- ShaderStageFlags::COMPUTE,
- 0,
- &PrimitiveUniforms {
- screen_resolution_x: self.width,
- screen_resolution_y: self.height,
- tile_resolution_coarse_x: self.tile_resolution_coarse_x,
- tile_resolution_coarse_y: self.tile_resolution_coarse_y,
- tile_resolution_fine_x: self.tile_resolution_fine_x,
- tile_resolution_fine_y: self.tile_resolution_fine_y,
- atlas_resolution_x: atlas_width,
- atlas_resolution_y: atlas_height,
- num_primitives,
- num_primitives_32,
- num_primitives_1024,
- _pad0: 0,
- },
- );
+ gpu.cmd_set_pipeline(cmd_encoder, self.primitive_2d_pipeline.fine_clear_pipeline);
let glyph_buffer = gpu.request_transient_buffer_with_data(
frame,
],
);
- gpu.cmd_dispatch(
- cmd_encoder,
- (num_primitives + 63) / 64,
- self.tile_resolution_coarse_x,
- self.tile_resolution_coarse_y,
- );
-
- gpu.cmd_set_pipeline(cmd_encoder, self.primitive_2d_pipeline.fine_clear_pipeline);
-
gpu.cmd_dispatch(
cmd_encoder,
(self.tile_resolution_coarse_x * self.tile_resolution_coarse_y + 63) / 64,
1,
);
- gpu.cmd_barrier(
- cmd_encoder,
- Some(&GlobalBarrier {
- prev_access: &[Access::ShaderWrite],
- next_access: &[Access::ShaderOtherRead],
- }),
- &[],
- );
-
- gpu.cmd_set_pipeline(cmd_encoder, self.primitive_2d_pipeline.fine_bin_pipeline);
-
- gpu.cmd_dispatch(
- cmd_encoder,
- (num_primitives_32 + 63) / 64,
- self.tile_resolution_fine_x,
- self.tile_resolution_fine_y,
- );
-
- gpu.cmd_barrier(
- cmd_encoder,
- Some(&GlobalBarrier {
- prev_access: &[Access::ShaderWrite],
- next_access: &[Access::ShaderOtherRead],
- }),
- &[],
- );
-
- gpu.cmd_set_pipeline(cmd_encoder, self.primitive_2d_pipeline.rasterize_pipeline);
-
- gpu.cmd_dispatch(
- cmd_encoder,
- self.tile_resolution_fine_x,
- self.tile_resolution_fine_y,
- 1,
- );
+ let num_primitives = ui_state.primitive_instances.len() as u32;
+ let num_primitives_32 = (num_primitives + 31) / 32;
+ let num_primitives_1024 = (num_primitives_32 + 31) / 32;
- // Cleanup
ui_state.primitive_instances.clear();
+
+ for _pass_y in 0..1 {
+ for _pass_x in 0..1 {
+ gpu.cmd_set_pipeline(
+ cmd_encoder,
+ self.primitive_2d_pipeline.coarse_bin_pipeline,
+ );
+
+ gpu.cmd_push_constants(
+ cmd_encoder,
+ ShaderStageFlags::COMPUTE,
+ 0,
+ &PrimitiveUniforms {
+ screen_resolution_x: self.width,
+ screen_resolution_y: self.height,
+ atlas_resolution_x: atlas_width,
+ atlas_resolution_y: atlas_height,
+ num_primitives,
+ num_primitives_32,
+ num_primitives_1024,
+ tile_stride_coarse: self.tile_resolution_coarse_x,
+ tile_stride_fine: self.tile_resolution_fine_x,
+ },
+ );
+
+ gpu.cmd_dispatch(
+ cmd_encoder,
+ (num_primitives + 63) / 64,
+ self.tile_resolution_coarse_x,
+ self.tile_resolution_coarse_y,
+ );
+
+ gpu.cmd_barrier(
+ cmd_encoder,
+ Some(&GlobalBarrier {
+ prev_access: &[Access::ShaderWrite],
+ next_access: &[Access::ShaderOtherRead],
+ }),
+ &[],
+ );
+
+ gpu.cmd_set_pipeline(
+ cmd_encoder,
+ self.primitive_2d_pipeline.fine_bin_pipeline,
+ );
+
+ gpu.cmd_dispatch(
+ cmd_encoder,
+ (num_primitives_32 + 63) / 64,
+ self.tile_resolution_fine_x,
+ self.tile_resolution_fine_y,
+ );
+
+ gpu.cmd_barrier(
+ cmd_encoder,
+ Some(&GlobalBarrier {
+ prev_access: &[Access::ShaderWrite],
+ next_access: &[Access::ShaderOtherRead],
+ }),
+ &[],
+ );
+
+ gpu.cmd_set_pipeline(
+ cmd_encoder,
+ self.primitive_2d_pipeline.rasterize_pipeline,
+ );
+
+ gpu.cmd_dispatch(
+ cmd_encoder,
+ self.tile_resolution_fine_x,
+ self.tile_resolution_fine_y,
+ 1,
+ );
+ }
+ }
}
// Display transform and composite