From: Josh Simmons Date: Mon, 27 May 2024 06:43:34 +0000 (+0200) Subject: shark: Try single pass for coarse culling X-Git-Url: https://git.nega.tv//gitweb.cgi?a=commitdiff_plain;h=874cbd727a3879d08ce2fe157046aeb16052dc55;p=josh%2Fnarcissus shark: Try single pass for coarse culling --- diff --git a/title/shark-shaders/shaders/primitive_2d_bin_coarse.comp.glsl b/title/shark-shaders/shaders/primitive_2d_bin_coarse.comp.glsl index 6be407c..00fa31c 100644 --- a/title/shark-shaders/shaders/primitive_2d_bin_coarse.comp.glsl +++ b/title/shark-shaders/shaders/primitive_2d_bin_coarse.comp.glsl @@ -29,7 +29,7 @@ void main() { uvec4 ballot_result = subgroupBallot(intersects); if (subgroupElect()) { // managed democracy wins again - const uint tile_index = tile_coord.y * TILE_DISPATCH_X + tile_coord.x; + const uint tile_index = tile_coord.y * (primitive_uniforms.tile_stride_fine / TILE_SIZE_MUL) + tile_coord.x; const uint tile_offset = tile_index * TILE_STRIDE_COARSE; coarse_bitmap_wo[tile_offset + 2 * gl_WorkGroupID.x + 0] = ballot_result.x; coarse_bitmap_wo[tile_offset + 2 * gl_WorkGroupID.x + 1] = ballot_result.y; diff --git a/title/shark-shaders/shaders/primitive_2d_bin_fine.comp.glsl b/title/shark-shaders/shaders/primitive_2d_bin_fine.comp.glsl index b4c8c33..06c9a7b 100644 --- a/title/shark-shaders/shaders/primitive_2d_bin_fine.comp.glsl +++ b/title/shark-shaders/shaders/primitive_2d_bin_fine.comp.glsl @@ -27,8 +27,8 @@ void main() { uint bitmap_l0 = 0; if (index < primitive_uniforms.num_primitives_32) { - const uvec2 tile_coord_coarse = tile_coord / TILE_SIZE_MUL; - const uint tile_index_coarse = tile_coord_coarse.y * TILE_DISPATCH_X + tile_coord_coarse.x; + const uvec2 tile_coord_coarse = (tile_coord / TILE_SIZE_MUL) + primitive_uniforms.tile_offset_coarse; + const uint tile_index_coarse = tile_coord_coarse.y * (primitive_uniforms.tile_stride_fine / TILE_SIZE_MUL) + tile_coord_coarse.x; const uint tile_base_coarse = tile_index_coarse * TILE_STRIDE_COARSE; const uint tile_bitmap_base_coarse = tile_base_coarse + TILE_BITMAP_OFFSET_COARSE; diff --git a/title/shark/src/main.rs b/title/shark/src/main.rs index c5b378b..e50ab41 100644 --- a/title/shark/src/main.rs +++ b/title/shark/src/main.rs @@ -885,23 +885,11 @@ impl<'gpu> DrawState<'gpu> { let models = Models::load(gpu); let images = Images::load(gpu, thread_token); - let coarse_bitmap_buffer_size = TILE_DISPATCH_COARSE_X - * TILE_DISPATCH_COARSE_Y - * TILE_STRIDE_COARSE - * std::mem::size_of::() as u32; - let fine_bitmap_buffer_size = TILE_DISPATCH_FINE_X * TILE_DISPATCH_FINE_Y * TILE_STRIDE_FINE * std::mem::size_of::() as u32; - let coarse_tile_bitmap_buffer = gpu.create_buffer(&BufferDesc { - memory_location: MemoryLocation::Device, - host_mapped: false, - usage: BufferUsageFlags::STORAGE, - size: coarse_bitmap_buffer_size.widen(), - }); - let fine_tile_bitmap_buffer = gpu.create_buffer(&BufferDesc { memory_location: MemoryLocation::Device, host_mapped: false, @@ -923,7 +911,7 @@ impl<'gpu> DrawState<'gpu> { depth_image: default(), rt_image: default(), ui_image: default(), - coarse_tile_bitmap_buffer, + coarse_tile_bitmap_buffer: default(), fine_tile_bitmap_buffer, fine_tile_color_buffer: default(), glyph_atlas_image: default(), @@ -1047,6 +1035,19 @@ impl<'gpu> DrawState<'gpu> { || tile_resolution_fine_y != self.tile_resolution_fine_y { gpu.destroy_buffer(frame, self.fine_tile_color_buffer); + gpu.destroy_buffer(frame, self.coarse_tile_bitmap_buffer); + + let coarse_bitmap_buffer_size = tile_resolution_coarse_x + * tile_resolution_coarse_y + * TILE_STRIDE_COARSE + * std::mem::size_of::() as u32; + + self.coarse_tile_bitmap_buffer = gpu.create_buffer(&BufferDesc { + memory_location: MemoryLocation::Device, + host_mapped: false, + usage: BufferUsageFlags::STORAGE, + size: coarse_bitmap_buffer_size.widen(), + }); // align to the workgroup size to simplify shader let fine_color_buffer_size = @@ -1418,19 +1419,48 @@ impl<'gpu> DrawState<'gpu> { ui_state.primitive_instances.clear(); + gpu.cmd_set_pipeline(cmd_encoder, self.primitive_2d_pipeline.coarse_bin_pipeline); + + gpu.cmd_push_constants( + cmd_encoder, + ShaderStageFlags::COMPUTE, + 0, + &PrimitiveUniforms { + screen_resolution_x: self.width, + screen_resolution_y: self.height, + atlas_resolution_x: atlas_width, + atlas_resolution_y: atlas_height, + num_primitives, + num_primitives_32, + num_primitives_1024, + tile_stride_fine: self.tile_resolution_fine_x, + tile_offset_x: 0, + tile_offset_y: 0, + }, + ); + + gpu.cmd_dispatch( + cmd_encoder, + (num_primitives + 63) / 64, + self.tile_resolution_coarse_x, + self.tile_resolution_coarse_y, + ); + + gpu.cmd_barrier( + cmd_encoder, + Some(&GlobalBarrier { + prev_access: &[Access::ShaderWrite], + next_access: &[Access::ShaderOtherRead], + }), + &[], + ); + for tile_offset_y in (0..self.tile_resolution_coarse_y).step_by(TILE_DISPATCH_COARSE_Y as usize) { for tile_offset_x in (0..self.tile_resolution_coarse_x).step_by(TILE_DISPATCH_COARSE_X as usize) { - let coarse_dispatch_x = (tile_offset_x + TILE_DISPATCH_COARSE_X) - .min(self.tile_resolution_coarse_x) - - tile_offset_x; - let coarse_dispatch_y = (tile_offset_y + TILE_DISPATCH_COARSE_Y) - .min(self.tile_resolution_coarse_y) - - tile_offset_y; - let tile_offset_fine_x = tile_offset_x * (TILE_SIZE_COARSE / TILE_SIZE_FINE); @@ -1445,11 +1475,6 @@ impl<'gpu> DrawState<'gpu> { .min(self.tile_resolution_fine_y) - tile_offset_fine_y; - gpu.cmd_set_pipeline( - cmd_encoder, - self.primitive_2d_pipeline.coarse_bin_pipeline, - ); - gpu.cmd_push_constants( cmd_encoder, ShaderStageFlags::COMPUTE, @@ -1468,22 +1493,6 @@ impl<'gpu> DrawState<'gpu> { }, ); - gpu.cmd_dispatch( - cmd_encoder, - (num_primitives + 63) / 64, - coarse_dispatch_x, - coarse_dispatch_y, - ); - - gpu.cmd_barrier( - cmd_encoder, - Some(&GlobalBarrier { - prev_access: &[Access::ShaderWrite], - next_access: &[Access::ShaderOtherRead], - }), - &[], - ); - gpu.cmd_set_pipeline( cmd_encoder, self.primitive_2d_pipeline.fine_bin_pipeline,