stage: "comp",
name: "primitive_2d_bin",
},
+ Shader {
+ stage: "comp",
+ name: "primitive_2d_bin_clear",
+ },
Shader {
stage: "comp",
name: "primitive_2d_rasterize",
#extension GL_EXT_control_flow_attributes : require
+const uint MAX_PRIMS = 1 << 18;
+const uint TILE_BITMAP_L1_WORDS = (MAX_PRIMS / 32 / 32);
+const uint TILE_BITMAP_L0_WORDS = (MAX_PRIMS / 32);
+const uint TILE_STRIDE = (TILE_BITMAP_L0_WORDS + TILE_BITMAP_L1_WORDS + 2);
+const uint TILE_BITMAP_RANGE_OFFSET = 0;
+
struct PrimitiveUniforms {
uvec2 screen_resolution;
uvec2 atlas_resolution;
layout (set = 0, binding = 1) uniform texture3D tony_mc_mapface_lut;
-layout (set = 0, binding = 2, rgba16f) uniform readonly image2D layer_rt;
-layout (set = 0, binding = 3, rgba16f) uniform readonly image2D layer_ui;
+layout(std430, set = 0, binding = 2) readonly buffer tileBufferRead {
+ uint tile_bitmap_ro[];
+};
+
+layout (set = 0, binding = 3, rgba16f) uniform readonly image2D layer_rt;
+layout (set = 0, binding = 4, rgba16f) uniform readonly image2D layer_ui;
-layout (set = 0, binding = 4, rgba16f) uniform writeonly image2D composited_output;
+layout (set = 0, binding = 5, rgba16f) uniform writeonly image2D composited_output;
float srgb_oetf(float a) {
return (.0031308f >= a) ? 12.92f * a : 1.055f * pow(a, .4166666666666667f) - .055f;
const vec3 transformed = tony_mc_mapface(stimulus);
vec3 composited = srgb_oetf(transformed);
- const vec4 ui = imageLoad(layer_ui, ivec2(gl_GlobalInvocationID.xy)).rgba;
- composited = ui.rgb + (composited * (1.0 - ui.a));
+ const uvec2 tile_coord = gl_WorkGroupID.xy / 4;
+ const uint tile_index = tile_coord.y * primitive_uniforms.tile_stride + tile_coord.x;
+ const uint tile_base = tile_index * TILE_STRIDE;
+
+ const uint first = tile_bitmap_ro[tile_base + TILE_BITMAP_RANGE_OFFSET + 0];
+ const uint last = tile_bitmap_ro[tile_base + TILE_BITMAP_RANGE_OFFSET + 1];
+ if (first <= last) {
+ const vec4 ui = imageLoad(layer_ui, ivec2(gl_GlobalInvocationID.xy)).rgba;
+ composited = ui.rgb + (composited * (1.0 - ui.a));
+ }
imageStore(composited_output, ivec2(gl_GlobalInvocationID.xy), vec4(composited, 1.0));
}
const uint MAX_PRIMS = 1 << 18;
const uint TILE_BITMAP_L1_WORDS = (MAX_PRIMS / 32 / 32);
const uint TILE_BITMAP_L0_WORDS = (MAX_PRIMS / 32);
-const uint TILE_STRIDE = (TILE_BITMAP_L0_WORDS + TILE_BITMAP_L1_WORDS);
-const uint TILE_BITMAP_L1_OFFSET = 0;
-const uint TILE_BITMAP_L0_OFFSET = TILE_BITMAP_L1_WORDS;
+const uint TILE_STRIDE = (TILE_BITMAP_L0_WORDS + TILE_BITMAP_L1_WORDS + 2);
+const uint TILE_BITMAP_RANGE_OFFSET = 0;
+const uint TILE_BITMAP_L1_OFFSET = 2;
+const uint TILE_BITMAP_L0_OFFSET = TILE_BITMAP_L1_OFFSET + TILE_BITMAP_L1_WORDS;
struct PrimitiveUniforms {
uvec2 screen_resolution;
}
tile_bitmap_wo[tile_index * TILE_STRIDE + TILE_BITMAP_L1_OFFSET + gl_WorkGroupID.x * 2 + i] = out_1;
+
+ if (out_1 != 0) {
+ atomicMin(tile_bitmap_wo[tile_index * TILE_STRIDE + TILE_BITMAP_RANGE_OFFSET + 0], gl_WorkGroupID.x * 2 + i);
+ atomicMax(tile_bitmap_wo[tile_index * TILE_STRIDE + TILE_BITMAP_RANGE_OFFSET + 1], gl_WorkGroupID.x * 2 + i);
+ }
}
}
}
--- /dev/null
+#version 460
+
+#extension GL_GOOGLE_include_directive : require
+
+#extension GL_EXT_scalar_block_layout : require
+#extension GL_EXT_control_flow_attributes : require
+
+#extension GL_KHR_shader_subgroup_vote : require
+#extension GL_KHR_shader_subgroup_ballot : require
+
+#include "primitive_2d.h"
+
+// TODO: Spec constant support for different subgroup sizes.
+layout (local_size_x = 64, local_size_y = 1, local_size_z = 1) in;
+
+void main() {
+ tile_bitmap_wo[gl_GlobalInvocationID.x * TILE_STRIDE + TILE_BITMAP_RANGE_OFFSET + 0] = 0xffffffff;
+ tile_bitmap_wo[gl_GlobalInvocationID.x * TILE_STRIDE + TILE_BITMAP_RANGE_OFFSET + 1] = 0;
+}
const uvec2 tile_coord = gl_WorkGroupID.xy / 4;
const uint tile_index = tile_coord.y * primitive_uniforms.tile_stride + tile_coord.x;
const uint tile_base = tile_index * TILE_STRIDE;
- const uint tile_bitmap_l1_base_fine = tile_base + TILE_BITMAP_L1_OFFSET;
- const uint tile_bitmap_l0_base_fine = tile_base + TILE_BITMAP_L0_OFFSET;
+
+ const uint first = tile_bitmap_ro[tile_base + TILE_BITMAP_RANGE_OFFSET + 0];
+ const uint last = tile_bitmap_ro[tile_base + TILE_BITMAP_RANGE_OFFSET + 1];
+
+ [[branch]]
+ if (last < first) {
+ return;
+ }
#if DEBUG_SHOW_TILES == 1
int count = 0;
// For each tile, iterate over all words in the L1 bitmap.
- for (int index_l1 = 0; index_l1 < primitive_uniforms.num_primitives_1024; index_l1++) {
+ for (uint index_l1 = first; index_l1 <= last; index_l1++) {
// For each word, iterate all set bits.
- uint bitmap_l1 = tile_bitmap_ro[tile_bitmap_l1_base_fine + index_l1];
+ uint bitmap_l1 = tile_bitmap_ro[tile_base + TILE_BITMAP_L1_OFFSET + index_l1];
while (bitmap_l1 != 0) {
const uint i = findLSB(bitmap_l1);
// For each set bit in the L1 bitmap, iterate the set bits in the
// corresponding L0 bitmap.
const uint index_l0 = index_l1 * 32 + i;
- uint bitmap_l0 = tile_bitmap_ro[tile_bitmap_l0_base_fine + index_l0];
+ uint bitmap_l0 = tile_bitmap_ro[tile_base + TILE_BITMAP_L0_OFFSET + index_l0];
count += bitCount(bitmap_l0);
}
vec4 accum = vec4(0.0);
// For each tile, iterate over all words in the L1 bitmap.
- for (int index_l1 = 0; index_l1 < primitive_uniforms.num_primitives_1024; index_l1++) {
+ for (uint index_l1 = first; index_l1 <= last; index_l1++) {
// For each word, iterate all set bits.
- uint bitmap_l1 = tile_bitmap_ro[tile_bitmap_l1_base_fine + index_l1];
+ uint bitmap_l1 = tile_bitmap_ro[tile_base + TILE_BITMAP_L1_OFFSET + index_l1];
while (bitmap_l1 != 0) {
const uint i = findLSB(bitmap_l1);
// For each set bit in the L1 bitmap, iterate the set bits in the
// corresponding L0 bitmap.
const uint index_l0 = index_l1 * 32 + i;
- uint bitmap_l0 = tile_bitmap_ro[tile_bitmap_l0_base_fine + index_l0];
+ uint bitmap_l0 = tile_bitmap_ro[tile_base + TILE_BITMAP_L0_OFFSET + index_l0];
while (bitmap_l0 != 0) {
const uint j = findLSB(bitmap_l0);
bitmap_l0 ^= bitmap_l0 & -bitmap_l0;
ui_state.primitive_instances.clear();
- gpu.cmd_set_pipeline(cmd_encoder, self.primitive_2d_pipeline.bin_pipeline);
+ gpu.cmd_set_pipeline(cmd_encoder, self.primitive_2d_pipeline.bin_clear_pipeline);
gpu.cmd_set_bind_group(
frame,
},
);
+ gpu.cmd_dispatch(
+ cmd_encoder,
+ (self.tile_resolution_y * self.tile_resolution_x + 63) / 64,
+ 1,
+ 1,
+ );
+
+ gpu.cmd_barrier(
+ cmd_encoder,
+ Some(&GlobalBarrier {
+ prev_access: &[Access::ComputeWrite],
+ next_access: &[Access::ComputeOtherRead],
+ }),
+ &[],
+ );
+
+ gpu.cmd_set_pipeline(cmd_encoder, self.primitive_2d_pipeline.bin_pipeline);
+
gpu.cmd_dispatch(
cmd_encoder,
(num_primitives + 2047) / 2048,
gpu.cmd_barrier(
cmd_encoder,
Some(&GlobalBarrier {
- prev_access: &[Access::ShaderWrite],
- next_access: &[Access::ShaderOtherRead],
+ prev_access: &[Access::ComputeWrite],
+ next_access: &[Access::ComputeOtherRead],
}),
&[],
);
Bind {
binding: 2,
array_element: 0,
+ typed: TypedBind::StorageBuffer(&[self.tile_bitmap_buffer.to_arg()]),
+ },
+ Bind {
+ binding: 3,
+ array_element: 0,
typed: TypedBind::StorageImage(&[(
ImageLayout::General,
self.rt_image,
)]),
},
Bind {
- binding: 3,
+ binding: 4,
array_element: 0,
typed: TypedBind::StorageImage(&[(
ImageLayout::General,
)]),
},
Bind {
- binding: 4,
+ binding: 5,
array_element: 0,
typed: TypedBind::StorageImage(&[(
ImageLayout::General,
BindDesc::new(ShaderStageFlags::COMPUTE, BindingType::Sampler),
// Tony Mc'mapface LUT
BindDesc::new(ShaderStageFlags::COMPUTE, BindingType::SampledImage),
+ // Tiles
+ BindDesc::new(ShaderStageFlags::COMPUTE, BindingType::StorageBuffer),
// Layer RT
BindDesc::new(ShaderStageFlags::COMPUTE, BindingType::StorageImage),
// Layer UI
pub const MAX_PRIMS: u32 = 1 << 18;
pub const TILE_BITMAP_WORDS_L1: u32 = MAX_PRIMS / 32 / 32;
pub const TILE_BITMAP_WORDS_L0: u32 = MAX_PRIMS / 32;
-pub const TILE_STRIDE: u32 = TILE_BITMAP_WORDS_L0 + TILE_BITMAP_WORDS_L1;
+pub const TILE_STRIDE: u32 = TILE_BITMAP_WORDS_L0 + TILE_BITMAP_WORDS_L1 + 2;
#[allow(unused)]
#[repr(C)]
pub struct Primitive2dPipeline {
pub bind_group_layout: BindGroupLayout,
+ pub bin_clear_pipeline: Pipeline,
pub bin_pipeline: Pipeline,
pub rasterize_pipeline: Pipeline,
}
}],
};
+ let bin_clear_pipeline = gpu.create_compute_pipeline(&ComputePipelineDesc {
+ shader: ShaderDesc {
+ entry: c"main",
+ code: shark_shaders::PRIMITIVE_2D_BIN_CLEAR_COMP_SPV,
+ },
+ layout,
+ });
+
let bin_pipeline = gpu.create_compute_pipeline(&ComputePipelineDesc {
shader: ShaderDesc {
entry: c"main",
Self {
bind_group_layout,
+ bin_clear_pipeline,
bin_pipeline,
rasterize_pipeline,
}