[[package]]
name = "shark-shaders"
version = "0.1.0"
+dependencies = [
+ "narcissus-core",
+ "narcissus-font",
+ "narcissus-gpu",
+ "narcissus-maths",
+]
[[package]]
name = "sqlite-sys"
pub use mapped_buffer::{PersistentBuffer, TransientBuffer};
+pub type Gpu = dyn Device + 'static;
+
pub enum DeviceBackend {
Vulkan,
}
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
+narcissus-core = { path = "../../engine/narcissus-core" }
+narcissus-font = { path = "../../engine/narcissus-font" }
+narcissus-maths = { path = "../../engine/narcissus-maths" }
+narcissus-gpu = { path = "../../engine/narcissus-gpu" }
\ No newline at end of file
+pub mod pipelines;
+
include!(concat!(env!("OUT_DIR"), "/shaders.rs"));
use narcissus_font::TouchedGlyphIndex;
use narcissus_gpu::{
BindDesc, BindGroupLayout, BindingType, BlendMode, BufferAddress, CompareOp,
- ComputePipelineDesc, CullingMode, FrontFace, GraphicsPipelineAttachments, GraphicsPipelineDesc,
- ImageFormat, Pipeline, PipelineLayout, PolygonMode, PushConstantRange, Sampler,
- SamplerAddressMode, SamplerDesc, SamplerFilter, ShaderDesc, ShaderStageFlags, SpecConstant,
- Topology,
+ ComputePipelineDesc, CullingMode, FrontFace, Gpu, GraphicsPipelineAttachments,
+ GraphicsPipelineDesc, ImageFormat, Pipeline, PipelineLayout, PolygonMode, PushConstantRange,
+ Sampler, SamplerAddressMode, SamplerDesc, SamplerFilter, ShaderDesc, ShaderStageFlags,
+ SpecConstant, Topology,
};
use narcissus_maths::Mat4;
-use crate::Gpu;
-
pub const DRAW_2D_TILE_SIZE: u32 = 32;
#[allow(unused)]
pub dst_buffer_address: BufferAddress<'a>,
}
+pub const RADIX_ITEMS_PER_WGP: usize = 4096;
+pub const RADIX_DIGITS: usize = 256;
+
+pub fn calcuate_workgroup_count(count: usize) -> usize {
+ (count + (RADIX_ITEMS_PER_WGP - 1)) / RADIX_ITEMS_PER_WGP
+}
+
+/// Returns the size of the spine required to sort the given count in units of u32 words.
+pub fn calculate_spine_size(count: usize) -> usize {
+ calcuate_workgroup_count(count) * RADIX_DIGITS
+}
+
pub struct Pipelines {
_samplers: Samplers,
let basic_pipeline = gpu.create_graphics_pipeline(&GraphicsPipelineDesc {
vertex_shader: ShaderDesc {
- code: shark_shaders::BASIC_VERT_SPV,
+ code: crate::BASIC_VERT_SPV,
..default()
},
fragment_shader: ShaderDesc {
- code: shark_shaders::BASIC_FRAG_SPV,
+ code: crate::BASIC_FRAG_SPV,
..default()
},
layout: PipelineLayout {
};
let draw_2d_bin_0_clear_pipeline = create_compute_pipeline(
- shark_shaders::DRAW_2D_BIN_0_CLEAR_COMP_SPV,
+ crate::DRAW_2D_BIN_0_CLEAR_COMP_SPV,
"draw2d_bin_clear",
0,
std::mem::size_of::<Draw2dClearConstants>(),
let draw_2d_bin_1_scatter_pipeline_workgroup_size = 32;
let draw_2d_bin_1_scatter_pipeline = create_compute_pipeline(
- shark_shaders::DRAW_2D_BIN_1_SCATTER_COMP_SPV,
+ crate::DRAW_2D_BIN_1_SCATTER_COMP_SPV,
"draw2d_bin_scatter",
draw_2d_bin_1_scatter_pipeline_workgroup_size,
std::mem::size_of::<Draw2dScatterConstants>(),
);
let draw_2d_bin_2_sort_pipeline = create_compute_pipeline(
- shark_shaders::DRAW_2D_BIN_2_SORT_COMP_SPV,
+ crate::DRAW_2D_BIN_2_SORT_COMP_SPV,
"draw2d_bin_sort",
0,
std::mem::size_of::<Draw2dSortConstants>(),
);
let draw_2d_bin_3_resolve_pipeline = create_compute_pipeline(
- shark_shaders::DRAW_2D_BIN_3_RESOLVE_COMP_SPV,
+ crate::DRAW_2D_BIN_3_RESOLVE_COMP_SPV,
"draw2d_bin_resolve",
0,
0,
);
- let draw_2d_rasterize_pipeline = create_compute_pipeline(
- shark_shaders::DRAW_2D_RASTERIZE_COMP_SPV,
- "draw2d_rasterize",
- 0,
- 0,
- );
+ let draw_2d_rasterize_pipeline =
+ create_compute_pipeline(crate::DRAW_2D_RASTERIZE_COMP_SPV, "draw2d_rasterize", 0, 0);
let radix_sort_0_upsweep_pipeline = create_compute_pipeline(
- shark_shaders::RADIX_SORT_0_UPSWEEP_COMP_SPV,
+ crate::RADIX_SORT_0_UPSWEEP_COMP_SPV,
"radix_sort_upsweep",
32,
std::mem::size_of::<RadixSortUpsweepConstants>(),
);
let radix_sort_1_spine_pipeline = create_compute_pipeline(
- shark_shaders::RADIX_SORT_1_SPINE_COMP_SPV,
+ crate::RADIX_SORT_1_SPINE_COMP_SPV,
"radix_sort_spine",
32,
std::mem::size_of::<RadixSortSpineConstants>(),
);
let radix_sort_2_downsweep_pipeline = create_compute_pipeline(
- shark_shaders::RADIX_SORT_2_DOWNSWEEP_COMP_SPV,
+ crate::RADIX_SORT_2_DOWNSWEEP_COMP_SPV,
"radix_sort_downsweep",
32,
std::mem::size_of::<RadixSortDownsweepConstants>(),
);
let composite_pipeline =
- create_compute_pipeline(shark_shaders::COMPOSITE_COMP_SPV, "composite", 0, 0);
+ create_compute_pipeline(crate::COMPOSITE_COMP_SPV, "composite", 0, 0);
Self {
_samplers: samplers,
use narcissus_core::{obj, Widen};
use narcissus_maths::{vec2, vec3, vec4, Vec2, Vec3};
-use crate::pipelines::Vertex;
+use shark_shaders::pipelines::Vertex;
pub fn load_obj<P: AsRef<Path>>(path: P) -> (Vec<Vertex>, Vec<u16>) {
#[derive(Default)]
use narcissus_core::dds;
-use pipelines::{
- BasicConstants, ComputeBinds, Draw2dClearConstants, Draw2dCmd, Draw2dScatterConstants,
- Draw2dSortConstants, GraphicsBinds, Pipelines, RadixSortDownsweepConstants,
- RadixSortSpineConstants, RadixSortUpsweepConstants, DRAW_2D_TILE_SIZE,
+use shark_shaders::pipelines::{
+ calculate_spine_size, BasicConstants, ComputeBinds, Draw2dClearConstants, Draw2dCmd,
+ Draw2dScatterConstants, Draw2dSortConstants, GraphicsBinds, Pipelines,
+ RadixSortDownsweepConstants, RadixSortSpineConstants, RadixSortUpsweepConstants,
+ DRAW_2D_TILE_SIZE,
};
use renderdoc_sys as rdoc;
use narcissus_font::{FontCollection, GlyphCache, HorizontalMetrics};
use narcissus_gpu::{
create_device, Access, Bind, BufferImageCopy, BufferUsageFlags, ClearValue, CmdEncoder,
- ColorSpace, Device, DeviceExt, Extent2d, Extent3d, Frame, GlobalBarrier, Image,
- ImageAspectFlags, ImageBarrier, ImageDesc, ImageDimension, ImageFormat, ImageLayout,
- ImageSubresourceRange, ImageTiling, ImageUsageFlags, IndexType, LoadOp, MemoryLocation,
- Offset2d, PersistentBuffer, PresentMode, RenderingAttachment, RenderingDesc, Scissor,
- ShaderStageFlags, StoreOp, SwapchainConfigurator, SwapchainImage, ThreadToken, TypedBind,
- Viewport,
+ ColorSpace, DeviceExt, Extent2d, Extent3d, Frame, GlobalBarrier, Gpu, Image, ImageAspectFlags,
+ ImageBarrier, ImageDesc, ImageDimension, ImageFormat, ImageLayout, ImageSubresourceRange,
+ ImageTiling, ImageUsageFlags, IndexType, LoadOp, MemoryLocation, Offset2d, PersistentBuffer,
+ PresentMode, RenderingAttachment, RenderingDesc, Scissor, ShaderStageFlags, StoreOp,
+ SwapchainConfigurator, SwapchainImage, ThreadToken, TypedBind, Viewport,
};
use narcissus_image as image;
use narcissus_maths::{
mod fonts;
mod helpers;
pub mod microshades;
-mod pipelines;
mod spring;
const SQRT_2: f32 = 0.70710677;
}
}
-type Gpu = dyn Device + 'static;
-
struct DrawState<'gpu> {
gpu: &'gpu Gpu,
3 * std::mem::size_of::<u32>(),
);
- let sort_tmp_buffer = gpu.request_transient_buffer(
+ let tmp_buffer = gpu.request_transient_buffer(
frame,
thread_token,
BufferUsageFlags::STORAGE,
frame,
thread_token,
BufferUsageFlags::STORAGE,
- (COARSE_BUFFER_LEN / (32 * 16)) * 256 * std::mem::size_of::<u32>(), // TODO: Fix size
+ calculate_spine_size(COARSE_BUFFER_LEN) * std::mem::size_of::<u32>(), // TODO: Fix size
);
let draw_buffer_address = gpu.get_buffer_address(draw_buffer.to_arg());
let coarse_buffer_address = gpu.get_buffer_address(coarse_buffer.to_arg());
let indirect_dispatch_buffer_address =
gpu.get_buffer_address(indirect_dispatch_buffer.to_arg());
- let sort_tmp_buffer_address = gpu.get_buffer_address(sort_tmp_buffer.to_arg());
+ let tmp_buffer_address = gpu.get_buffer_address(tmp_buffer.to_arg());
let spine_buffer_address = gpu.get_buffer_address(spine_buffer.to_arg());
gpu.cmd_set_pipeline(cmd_encoder, self.pipelines.draw_2d_bin_0_clear_pipeline);
&[],
);
- // let mut sort_data = Vec::new();
- // let count = 8192u32;
- // sort_data.push(count);
- // for i in 0..count {
- // sort_data.push(255 - i / 32);
- // }
-
- // let sort_buffer = gpu.request_transient_buffer_with_data(
- // frame,
- // thread_token,
- // BufferUsageFlags::STORAGE,
- // sort_data.as_slice(),
- // );
- // let sort_buffer_address = gpu.get_buffer_address(sort_buffer.to_arg());
-
gpu.cmd_set_pipeline(cmd_encoder, self.pipelines.draw_2d_bin_2_sort_pipeline);
gpu.cmd_set_bind_group(cmd_encoder, 0, &compute_bind_group);
gpu.cmd_push_constants(
let count_buffer_address = coarse_buffer_address;
// Then the elements we want to sort follow.
let mut src_buffer_address = count_buffer_address.byte_add(4);
- let mut dst_buffer_address = sort_tmp_buffer_address;
+ let mut dst_buffer_address = tmp_buffer_address;
for pass in 0..4 {
let shift = pass * 8;
--- /dev/null
+use narcissus_core::rand::Pcg64;
+use narcissus_gpu::{
+ create_device, Access, BufferDesc, BufferUsageFlags, DeviceExt, GlobalBarrier, MemoryLocation,
+ ShaderStageFlags, ThreadToken,
+};
+use shark_shaders::pipelines::{
+ calcuate_workgroup_count, calculate_spine_size, Pipelines, RadixSortDownsweepConstants,
+ RadixSortSpineConstants, RadixSortUpsweepConstants,
+};
+
+fn gpu_sort(values: &mut [u32]) {
+ let gpu = create_device(narcissus_gpu::DeviceBackend::Vulkan);
+ let gpu = gpu.as_ref();
+
+ let pipelines = Pipelines::load(gpu);
+
+ let count_buffer = gpu.create_persistent_buffer_with_data(
+ MemoryLocation::Device,
+ BufferUsageFlags::STORAGE,
+ &(values.len() as u32),
+ );
+
+ let sort_buffer = gpu.create_persistent_buffer_with_data(
+ MemoryLocation::Device,
+ BufferUsageFlags::STORAGE,
+ values,
+ );
+
+ let tmp_buffer = gpu.create_buffer(&BufferDesc {
+ memory_location: MemoryLocation::Device,
+ host_mapped: false,
+ usage: BufferUsageFlags::STORAGE,
+ size: std::mem::size_of_val(values),
+ });
+
+ let spine_buffer = gpu.create_buffer(&BufferDesc {
+ memory_location: MemoryLocation::Device,
+ host_mapped: false,
+ usage: BufferUsageFlags::STORAGE,
+ size: calculate_spine_size(values.len()) * std::mem::size_of::<u32>(),
+ });
+
+ let count_buffer_address = gpu.get_buffer_address(count_buffer.to_arg());
+ let spine_buffer_address = gpu.get_buffer_address(spine_buffer.to_arg());
+ let mut src_buffer_address = gpu.get_buffer_address(sort_buffer.to_arg());
+ let mut dst_buffer_address = gpu.get_buffer_address(tmp_buffer.to_arg());
+
+ let thread_token = ThreadToken::new();
+ let thread_token = &thread_token;
+ let frame = gpu.begin_frame();
+ {
+ let frame = &frame;
+ let mut cmd_encoder = gpu.request_cmd_encoder(frame, thread_token);
+
+ {
+ let cmd_encoder = &mut cmd_encoder;
+
+ for pass in 0..4 {
+ let shift = pass * 8;
+
+ // Upsweep
+ gpu.cmd_set_pipeline(cmd_encoder, pipelines.radix_sort_0_upsweep_pipeline);
+ gpu.cmd_push_constants(
+ cmd_encoder,
+ ShaderStageFlags::COMPUTE,
+ 0,
+ &RadixSortUpsweepConstants {
+ shift,
+ _pad: 0,
+ count_buffer_address,
+ src_buffer_address,
+ spine_buffer_address,
+ },
+ );
+ gpu.cmd_dispatch(
+ cmd_encoder,
+ calcuate_workgroup_count(values.len()) as u32,
+ 1,
+ 1,
+ );
+
+ gpu.cmd_barrier(
+ cmd_encoder,
+ Some(&GlobalBarrier {
+ prev_access: &[Access::ComputeWrite],
+ next_access: &[Access::ComputeOtherRead],
+ }),
+ &[],
+ );
+
+ // Exclusive sum of the spine
+ gpu.cmd_set_pipeline(cmd_encoder, pipelines.radix_sort_1_spine_pipeline);
+ gpu.cmd_push_constants(
+ cmd_encoder,
+ ShaderStageFlags::COMPUTE,
+ 0,
+ &RadixSortSpineConstants {
+ count_buffer_address,
+ spine_buffer_address,
+ },
+ );
+ gpu.cmd_dispatch(cmd_encoder, 1, 1, 1);
+
+ gpu.cmd_barrier(
+ cmd_encoder,
+ Some(&GlobalBarrier {
+ prev_access: &[Access::ComputeWrite],
+ next_access: &[Access::ComputeOtherRead],
+ }),
+ &[],
+ );
+
+ // Downsweep
+ gpu.cmd_set_pipeline(cmd_encoder, pipelines.radix_sort_2_downsweep_pipeline);
+ gpu.cmd_push_constants(
+ cmd_encoder,
+ ShaderStageFlags::COMPUTE,
+ 0,
+ &RadixSortDownsweepConstants {
+ shift,
+ _pad: 0,
+ count_buffer_address,
+ spine_buffer_address,
+ src_buffer_address,
+ dst_buffer_address,
+ },
+ );
+ gpu.cmd_dispatch(
+ cmd_encoder,
+ calcuate_workgroup_count(values.len()) as u32,
+ 1,
+ 1,
+ );
+
+ gpu.cmd_barrier(
+ cmd_encoder,
+ Some(&GlobalBarrier {
+ prev_access: &[Access::ComputeWrite],
+ next_access: &[Access::ComputeOtherRead],
+ }),
+ &[],
+ );
+
+ std::mem::swap(&mut src_buffer_address, &mut dst_buffer_address);
+ }
+ }
+
+ gpu.submit(frame, cmd_encoder);
+ }
+
+ gpu.end_frame(frame);
+
+ gpu.wait_idle();
+
+ unsafe { sort_buffer.copy_to_slice(values) };
+}
+
+// This test requires a GPU, so ignore the test by default.
+#[ignore]
+#[test]
+pub fn sort_random_input() {
+ let mut rng = Pcg64::new();
+
+ let count = 15 * 1024 * 1024 + 3;
+
+ let mut values = vec![];
+ values.reserve_exact(count);
+ for _ in 0..count / 2 {
+ let i = rng.next_u64();
+ values.push((i & 0xffff_ffff) as u32);
+ values.push(((i >> 32) & 0xffff_ffff) as u32);
+ }
+
+ values.push((rng.next_u64() & 0xffff_ffff) as u32);
+
+ gpu_sort(&mut values);
+
+ assert!(values.is_sorted());
+}
+
+#[ignore]
+#[test]
+pub fn sort_single() {
+ let mut values = vec![5];
+ let mut sorted = values.clone();
+ sorted.sort();
+
+ gpu_sort(&mut values);
+
+ assert!(values == sorted);
+}
+
+#[ignore]
+#[test]
+pub fn sort_double() {
+ let mut values = vec![u32::MAX, 0];
+ let mut sorted = values.clone();
+ sorted.sort();
+
+ gpu_sort(&mut values);
+
+ assert!(values.is_sorted());
+}
+
+#[ignore]
+#[test]
+pub fn sort_short_input() {
+ let mut values = vec![5, 4, 3, 2, 1];
+ let mut sorted = values.clone();
+ sorted.sort();
+
+ assert!(!values.is_sorted());
+
+ gpu_sort(&mut values);
+
+ assert!(values.is_sorted());
+ assert!(values == sorted);
+}
+
+#[ignore]
+#[test]
+pub fn sort_u32_max() {
+ let mut values = vec![u32::MAX; 10_000];
+
+ gpu_sort(&mut values);
+
+ assert!(values.is_sorted());
+ assert!(values.iter().all(|&x| x == u32::MAX));
+}
+
+#[ignore]
+#[test]
+pub fn sort_u32_zero() {
+ let mut values = vec![0; 10_000];
+
+ gpu_sort(&mut values);
+
+ assert!(values.is_sorted());
+ assert!(values.iter().all(|&x| x == 0));
+}