stage: "comp",
name: "primitive_2d_bin_fine",
},
+ Shader {
+ stage: "comp",
+ name: "primitive_2d_clear_fine",
+ },
Shader {
stage: "comp",
name: "primitive_2d_rasterize",
#version 460
+#extension GL_EXT_control_flow_attributes : require
+
+struct PrimitiveUniforms {
+ uvec2 screen_resolution;
+ uvec2 tile_resolution_coarse;
+ uvec2 tile_resolution_fine;
+ uvec2 atlas_resolution;
+
+ uint num_primitives;
+ uint num_primitives_32;
+ uint num_primitives_1024;
+ uint pad_1;
+};
+
+layout(std430, push_constant) uniform uniformBuffer {
+ PrimitiveUniforms primitive_uniforms;
+};
+
layout (set = 0, binding = 0) uniform sampler bilinear_sampler;
layout (set = 0, binding = 1) uniform texture3D tony_mc_mapface_lut;
layout (set = 0, binding = 4, rgba16f) uniform writeonly image2D composited_output;
+layout(std430, set = 0, binding = 5) readonly buffer fineTileCountBufferRead {
+ uint fine_count_ro[];
+};
+
float srgb_oetf(float a) {
return (.0031308f >= a) ? 12.92f * a : 1.055f * pow(a, .4166666666666667f) - .055f;
}
layout (local_size_x = 8, local_size_y = 8, local_size_z = 1) in;
void main() {
+ const uvec2 tile_coord = gl_WorkGroupID.xy >> 1;
+ const uint tile_index = tile_coord.y * primitive_uniforms.tile_resolution_fine.x + tile_coord.x;
+
const vec3 stimulus = imageLoad(layer_rt, ivec2(gl_GlobalInvocationID.xy)).rgb;
const vec3 transformed = tony_mc_mapface(stimulus);
- const vec3 srgb = srgb_oetf(transformed);
- const vec4 ui = imageLoad(layer_ui, ivec2(gl_GlobalInvocationID.xy)).rgba;
- const vec3 composited = ui.rgb + (srgb * (1.0 - ui.a));
+ vec3 composited = srgb_oetf(transformed);
+
+ [[branch]]
+ if (fine_count_ro[tile_index] != 0) {
+ const vec4 ui = imageLoad(layer_ui, ivec2(gl_GlobalInvocationID.xy)).rgba;
+ composited = ui.rgb + (composited * (1.0 - ui.a));
+ }
+
imageStore(composited_output, ivec2(gl_GlobalInvocationID.xy), vec4(composited, 1.0));
}
const uint tile_bitmap_l1_base_fine = tile_base_fine + TILE_BITMAP_L1_OFFSET_FINE;
fine_bitmap_wo[tile_bitmap_l1_base_fine + 2 * gl_WorkGroupID.x + 0] = ballot_result.x;
fine_bitmap_wo[tile_bitmap_l1_base_fine + 2 * gl_WorkGroupID.x + 1] = ballot_result.y;
+
+ const uint count = uint(ballot_result.x != 0) + uint(ballot_result.y != 0);
+ if (count != 0) {
+ atomicAdd(fine_count_wo[tile_index], count);
+ }
}
}
uint coarse_bitmap_wo[];
};
-layout(std430, set = 0, binding = 6) readonly buffer fineTileBufferRead {
+layout(std430, set = 0, binding = 6) readonly buffer fineTileBitmapBufferRead {
uint fine_bitmap_ro[];
};
-layout(std430, set = 0, binding = 6) writeonly buffer fineTileBufferWrite {
+layout(std430, set = 0, binding = 6) writeonly buffer fineTileBitmapBufferWrite {
uint fine_bitmap_wo[];
};
-layout (set = 0, binding = 7, rgba16f) uniform writeonly image2D ui_image;
+layout(std430, set = 0, binding = 7) readonly buffer fineTileCountBufferRead {
+ uint fine_count_ro[];
+};
+
+layout(std430, set = 0, binding = 7) writeonly buffer fineTileCountBufferWrite {
+ uint fine_count_wo[];
+};
+
+layout (set = 0, binding = 8, rgba16f) uniform writeonly image2D ui_image;
--- /dev/null
+#version 460
+
+#extension GL_GOOGLE_include_directive : require
+
+#extension GL_EXT_scalar_block_layout : require
+#extension GL_EXT_control_flow_attributes : require
+
+#include "primitive_2d.h"
+
+layout (local_size_x = 64, local_size_y = 1, local_size_z = 1) in;
+
+void main() {
+ if (gl_GlobalInvocationID.x < (primitive_uniforms.tile_resolution_fine.x * primitive_uniforms.tile_resolution_fine.y)) {
+ fine_count_wo[gl_GlobalInvocationID.x] = 0;
+ }
+}
vec4 accum = vec4(0.0);
- // For each tile, iterate over all words in the L1 bitmap.
- //
- // TODO: Count the non-zero words in the tile with atomics, so we can early out on empty tiles?
- for (int index_l1 = 0; index_l1 < primitive_uniforms.num_primitives_1024; index_l1++) {
+ uint word_count = fine_count_ro[tile_index];
+
+ // For each tile, iterate over all words in the L1 bitmap.
+ for (int index_l1 = 0; word_count != 0 && index_l1 < primitive_uniforms.num_primitives_1024; index_l1++) {
// For each word, iterate all set bits.
uint bitmap_l1 = fine_bitmap_ro[tile_bitmap_l1_base_fine + index_l1];
+
+ if (bitmap_l1 != 0)
+ word_count -= 1;
+
while (bitmap_l1 != 0) {
const uint i = findLSB(bitmap_l1);
bitmap_l1 ^= bitmap_l1 & -bitmap_l1;
const Glyph gl = glyphs[gi.index];
const vec2 glyph_min = gi.position + gl.offset_min;
const vec2 glyph_max = gi.position + gl.offset_max;
- const vec2 sample_center = gl_GlobalInvocationID.xy; // half pixel offset goes here?
+ const vec2 sample_center = gl_GlobalInvocationID.xy + vec2(0.5);
if (all(greaterThanEqual(sample_center, glyph_min)) && all(lessThanEqual(sample_center, glyph_max))) {
const vec2 glyph_size = gl.offset_max - gl.offset_min;
const vec4 color = unpackUnorm4x8(gi.color).bgra;
};
use narcissus_image as image;
use narcissus_maths::{
- clamp, perlin_noise3, sin_pi_f32, vec3, Affine3, Deg, HalfTurn, Mat3, Mat4, Point3, Vec3,
+ clamp, perlin_noise3, sin_cos_pi_f32, sin_pi_f32, vec3, Affine3, Deg, HalfTurn, Mat3, Mat4,
+ Point3, Vec3,
};
use pipelines::primitive_2d::{GlyphInstance, Primitive2dPipeline};
use spring::simple_spring_damper_exact;
coarse_tile_bitmap_buffer: Buffer,
fine_tile_bitmap_buffer: Buffer,
+ fine_tile_color_buffer: Buffer,
glyph_atlas_image: Image,
ui_image: default(),
coarse_tile_bitmap_buffer: default(),
fine_tile_bitmap_buffer: default(),
+ fine_tile_color_buffer: default(),
glyph_atlas_image: default(),
samplers,
models,
{
gpu.destroy_buffer(frame, self.coarse_tile_bitmap_buffer);
gpu.destroy_buffer(frame, self.fine_tile_bitmap_buffer);
+ gpu.destroy_buffer(frame, self.fine_tile_color_buffer);
let coarse_bitmap_buffer_size = tile_resolution_coarse_x
* tile_resolution_coarse_y
* TILE_STRIDE_FINE
* std::mem::size_of::<u32>() as u32;
+ let fine_color_buffer_size = tile_resolution_fine_x
+ * tile_resolution_fine_y
+ * std::mem::size_of::<u32>() as u32;
+
self.coarse_tile_bitmap_buffer = gpu.create_buffer(&BufferDesc {
memory_location: MemoryLocation::Device,
host_mapped: false,
size: fine_bitmap_buffer_size.widen(),
});
+ self.fine_tile_color_buffer = gpu.create_buffer(&BufferDesc {
+ memory_location: MemoryLocation::Device,
+ host_mapped: false,
+ usage: BufferUsageFlags::STORAGE,
+ size: fine_color_buffer_size.widen(),
+ });
+
self.tile_resolution_coarse_x = tile_resolution_coarse_x;
self.tile_resolution_coarse_y = tile_resolution_coarse_y;
self.tile_resolution_fine_x = tile_resolution_fine_x;
Bind {
binding: 7,
array_element: 0,
+ typed: TypedBind::StorageBuffer(&[self
+ .fine_tile_color_buffer
+ .to_arg()]),
+ },
+ Bind {
+ binding: 8,
+ array_element: 0,
typed: TypedBind::StorageImage(&[(
ImageLayout::General,
self.ui_image,
self.tile_resolution_coarse_y,
);
+ gpu.cmd_set_pipeline(cmd_encoder, self.primitive_2d_pipeline.fine_clear_pipeline);
+
+ gpu.cmd_dispatch(
+ cmd_encoder,
+ (self.tile_resolution_coarse_x * self.tile_resolution_coarse_y + 63) / 64,
+ 1,
+ 1,
+ );
+
gpu.cmd_barrier(
cmd_encoder,
Some(&GlobalBarrier {
swapchain_image,
)]),
},
+ Bind {
+ binding: 5,
+ array_element: 0,
+ typed: TypedBind::StorageBuffer(&[self
+ .fine_tile_color_buffer
+ .to_arg()]),
+ },
],
);
let tick_duration = Instant::now() - tick_start;
+ let (base_x, base_y) = sin_cos_pi_f32(game_state.time);
+ let base_x = (base_x + 1.0) * 0.5;
+ let base_y = (base_y + 1.0) * 0.5;
+
for i in 0..80 {
let i = i as f32;
ui_state.text_fmt(
- 5.0,
- i * 15.0 * scale,
+ base_x * 100.0 * scale + 5.0,
+ base_y * 100.0 * scale + i * 15.0 * scale,
FontFamily::RobotoRegular,
40.0,
format_args!("tick: {:?}", tick_duration),
let x = 200.0 + j * 200.0;
let y = 100.0 + j * 100.0;
ui_state.text_fmt(
- x * scale,
- (y + i * 15.0) * scale,
+ base_x * 100.0 * scale +x * scale,
+ base_y * 100.0 * scale +(y + i * 15.0) * scale,
FontFamily::NotoSansJapanese,
15.0,
format_args!(
use narcissus_gpu::{
BindDesc, BindGroupLayout, BindingType, ComputePipelineDesc, Pipeline, PipelineLayout,
- ShaderDesc, ShaderStageFlags,
+ PushConstantRange, ShaderDesc, ShaderStageFlags,
};
use crate::Gpu;
+use super::primitive_2d::PrimitiveUniforms;
+
pub struct DisplayTransformPipeline {
pub bind_group_layout: BindGroupLayout,
pub pipeline: Pipeline,
BindDesc::new(ShaderStageFlags::COMPUTE, BindingType::StorageImage),
// Composited Output
BindDesc::new(ShaderStageFlags::COMPUTE, BindingType::StorageImage),
+ // Tile color buffer
+ BindDesc::new(ShaderStageFlags::COMPUTE, BindingType::StorageBuffer),
]);
let layout = &PipelineLayout {
bind_group_layouts: &[bind_group_layout],
- push_constant_ranges: &[],
+ push_constant_ranges: &[PushConstantRange {
+ stage_flags: ShaderStageFlags::COMPUTE,
+ offset: 0,
+ size: std::mem::size_of::<PrimitiveUniforms>() as u32,
+ }],
};
let pipeline = gpu.create_compute_pipeline(&ComputePipelineDesc {
pub bind_group_layout: BindGroupLayout,
pub coarse_bin_pipeline: Pipeline,
pub fine_bin_pipeline: Pipeline,
+ pub fine_clear_pipeline: Pipeline,
pub rasterize_pipeline: Pipeline,
}
BindDesc::new(ShaderStageFlags::COMPUTE, BindingType::StorageBuffer),
// Fine Tiles
BindDesc::new(ShaderStageFlags::COMPUTE, BindingType::StorageBuffer),
+ // Fine Color
+ BindDesc::new(ShaderStageFlags::COMPUTE, BindingType::StorageBuffer),
// UI Image Output
BindDesc::new(ShaderStageFlags::COMPUTE, BindingType::StorageImage),
]);
layout,
});
+ let fine_clear_pipeline = gpu.create_compute_pipeline(&ComputePipelineDesc {
+ shader: ShaderDesc {
+ entry: c"main",
+ code: shark_shaders::PRIMITIVE_2D_CLEAR_FINE_COMP_SPV,
+ },
+ layout,
+ });
+
let rasterize_pipeline = gpu.create_compute_pipeline(&ComputePipelineDesc {
shader: ShaderDesc {
entry: c"main",
bind_group_layout,
coarse_bin_pipeline,
fine_bin_pipeline,
+ fine_clear_pipeline,
rasterize_pipeline,
}
}