Add radix sort pipelines.
Add buffer address abstraction.
Add spec constants.
Add subgroup size control.
Add `cmd_dispatch_indirect`.
Change bind group API to separate allocation and binding.
Surely I won't regret the mega-commit...
[[package]]
name = "libc"
-version = "0.2.150"
+version = "0.2.161"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "89d92a4743f9a61002fae18374ed11e7973f530cb3a3255fb354818118b2203c"
+checksum = "8e9489c2807c139ffd9c1794f4af0ebe86a828db53ecdc7fea2111d0fed085d1"
[[package]]
name = "memchr"
};
use narcissus_core::{
- box_assume_init, default, is_aligned_to,
+ default, is_aligned_to,
manual_arc::{self, ManualArc},
raw_window::AsRawWindow,
- zeroed_box, Arc, Arena, HybridArena, Mutex, PhantomUnsend, Pool, Widen,
+ Arc, Arena, HybridArena, Mutex, PhantomUnsend, Pool, Widen,
};
-use vulkan_sys as vk;
+use vulkan_sys::{self as vk};
use crate::{
- frame_counter::FrameCounter, Bind, BindDesc, BindGroupLayout, BindingType, Buffer, BufferArg,
- BufferDesc, BufferImageCopy, BufferUsageFlags, CmdEncoder, ComputePipelineDesc, Device,
- Extent2d, Extent3d, Frame, GlobalBarrier, GpuConcurrent, GraphicsPipelineDesc, Image,
- ImageBarrier, ImageBlit, ImageDesc, ImageDimension, ImageLayout, ImageTiling, ImageViewDesc,
- IndexType, MemoryLocation, Offset2d, Offset3d, PersistentBuffer, Pipeline, PipelineLayout,
- Sampler, SamplerAddressMode, SamplerCompareOp, SamplerDesc, SamplerFilter, ShaderStageFlags,
+ frame_counter::FrameCounter, mapped_buffer::TransientBindGroup, Bind, BindDesc,
+ BindGroupLayout, BindingType, Buffer, BufferAddress, BufferArg, BufferDesc, BufferImageCopy,
+ BufferUsageFlags, CmdEncoder, ComputePipelineDesc, Device, Extent2d, Extent3d, Frame,
+ GlobalBarrier, GpuConcurrent, GraphicsPipelineDesc, Image, ImageBarrier, ImageBlit, ImageDesc,
+ ImageDimension, ImageLayout, ImageTiling, ImageViewDesc, IndexType, MemoryLocation, Offset2d,
+ Offset3d, PersistentBuffer, Pipeline, PipelineLayout, Sampler, SamplerAddressMode,
+ SamplerCompareOp, SamplerDesc, SamplerFilter, ShaderStageFlags, SpecConstant,
SwapchainConfigurator, SwapchainImage, SwapchainOutOfDateError, ThreadToken, TransientBuffer,
TypedBind,
};
physical_device_properties: Box<vk::PhysicalDeviceProperties2>,
_physical_device_properties_11: Box<vk::PhysicalDeviceVulkan11Properties>,
_physical_device_properties_12: Box<vk::PhysicalDeviceVulkan12Properties>,
- _physical_device_properties_13: Box<vk::PhysicalDeviceVulkan13Properties>,
+ physical_device_properties_13: Box<vk::PhysicalDeviceVulkan13Properties>,
+ physical_device_memory_properties: Box<vk::PhysicalDeviceMemoryProperties>,
+
_physical_device_features: Box<vk::PhysicalDeviceFeatures2>,
_physical_device_features_11: Box<vk::PhysicalDeviceVulkan11Features>,
_physical_device_features_12: Box<vk::PhysicalDeviceVulkan12Features>,
_physical_device_features_13: Box<vk::PhysicalDeviceVulkan13Features>,
- physical_device_memory_properties: Box<vk::PhysicalDeviceMemoryProperties>,
_global_fn: vk::GlobalFunctions,
instance_fn: vk::InstanceFunctions,
instance_fn.enumerate_physical_devices(instance, count, ptr)
});
- let mut physical_device_properties =
- unsafe { box_assume_init(zeroed_box::<vk::PhysicalDeviceProperties2>()) };
- let mut physical_device_properties_11 =
- unsafe { box_assume_init(zeroed_box::<vk::PhysicalDeviceVulkan11Properties>()) };
- let mut physical_device_properties_12 =
- unsafe { box_assume_init(zeroed_box::<vk::PhysicalDeviceVulkan12Properties>()) };
- let mut physical_device_properties_13 =
- unsafe { box_assume_init(zeroed_box::<vk::PhysicalDeviceVulkan13Properties>()) };
-
- physical_device_properties._type = vk::StructureType::PhysicalDeviceProperties2;
- physical_device_properties_11._type = vk::StructureType::PhysicalDeviceVulkan11Properties;
- physical_device_properties_12._type = vk::StructureType::PhysicalDeviceVulkan12Properties;
- physical_device_properties_13._type = vk::StructureType::PhysicalDeviceVulkan13Properties;
-
- physical_device_properties_12._next = physical_device_properties_13.as_mut()
- as *mut vk::PhysicalDeviceVulkan13Properties
- as *mut _;
- physical_device_properties_11._next = physical_device_properties_12.as_mut()
- as *mut vk::PhysicalDeviceVulkan12Properties
- as *mut _;
- physical_device_properties._next = physical_device_properties_11.as_mut()
- as *mut vk::PhysicalDeviceVulkan11Properties
- as *mut _;
-
- let mut physical_device_features =
- unsafe { box_assume_init(zeroed_box::<vk::PhysicalDeviceFeatures2>()) };
- let mut physical_device_features_11 =
- unsafe { box_assume_init(zeroed_box::<vk::PhysicalDeviceVulkan11Features>()) };
- let mut physical_device_features_12 =
- unsafe { box_assume_init(zeroed_box::<vk::PhysicalDeviceVulkan12Features>()) };
- let mut physical_device_features_13 =
- unsafe { box_assume_init(zeroed_box::<vk::PhysicalDeviceVulkan13Features>()) };
-
- physical_device_features._type = vk::StructureType::PhysicalDeviceFeatures2;
- physical_device_features_11._type = vk::StructureType::PhysicalDeviceVulkan11Features;
- physical_device_features_12._type = vk::StructureType::PhysicalDeviceVulkan12Features;
- physical_device_features_13._type = vk::StructureType::PhysicalDeviceVulkan13Features;
-
- physical_device_features_12._next = physical_device_features_13.as_mut()
- as *mut vk::PhysicalDeviceVulkan13Features
- as *mut _;
- physical_device_features_11._next = physical_device_features_12.as_mut()
- as *mut vk::PhysicalDeviceVulkan12Features
- as *mut _;
- physical_device_features._next = physical_device_features_11.as_mut()
- as *mut vk::PhysicalDeviceVulkan11Features
- as *mut _;
+ let mut physical_device_properties: Box<vk::PhysicalDeviceProperties2> = default();
+ let mut physical_device_properties_11: Box<vk::PhysicalDeviceVulkan11Properties> =
+ default();
+ let mut physical_device_properties_12: Box<vk::PhysicalDeviceVulkan12Properties> =
+ default();
+ let mut physical_device_properties_13: Box<vk::PhysicalDeviceVulkan13Properties> =
+ default();
+
+ physical_device_properties_12._next =
+ physical_device_properties_13.as_mut() as *mut _ as *mut _;
+ physical_device_properties_11._next =
+ physical_device_properties_12.as_mut() as *mut _ as *mut _;
+ physical_device_properties._next =
+ physical_device_properties_11.as_mut() as *mut _ as *mut _;
+
+ let mut physical_device_features: Box<vk::PhysicalDeviceFeatures2> = default();
+ let mut physical_device_features_11: Box<vk::PhysicalDeviceVulkan11Features> = default();
+ let mut physical_device_features_12: Box<vk::PhysicalDeviceVulkan12Features> = default();
+ let mut physical_device_features_13: Box<vk::PhysicalDeviceVulkan13Features> = default();
+
+ physical_device_features_12._next =
+ physical_device_features_13.as_mut() as *mut _ as *mut _;
+ physical_device_features_11._next =
+ physical_device_features_12.as_mut() as *mut _ as *mut _;
+ physical_device_features._next = physical_device_features_11.as_mut() as *mut _ as *mut _;
let physical_device = physical_devices
.iter()
physical_device_properties.properties.api_version >= vk::VERSION_1_3
&& physical_device_features_13.dynamic_rendering == vk::Bool32::True
+ && physical_device_features_13.subgroup_size_control == vk::Bool32::True
+ && physical_device_features_13.maintenance4 == vk::Bool32::True
+ && physical_device_features_13.compute_full_subgroups == vk::Bool32::True
&& physical_device_features_12.timeline_semaphore == vk::Bool32::True
&& physical_device_features_12.descriptor_indexing == vk::Bool32::True
&& physical_device_features_12.descriptor_binding_partially_bound
let enabled_features_13 = vk::PhysicalDeviceVulkan13Features {
dynamic_rendering: vk::Bool32::True,
synchronization2: vk::Bool32::True,
+ subgroup_size_control: vk::Bool32::True,
+ compute_full_subgroups: vk::Bool32::True,
+ maintenance4: vk::Bool32::True,
..default()
};
let enabled_features_12 = vk::PhysicalDeviceVulkan12Features {
physical_device_properties,
_physical_device_properties_11: physical_device_properties_11,
_physical_device_properties_12: physical_device_properties_12,
- _physical_device_properties_13: physical_device_properties_13,
+ physical_device_properties_13,
+ physical_device_memory_properties,
+
_physical_device_features: physical_device_features,
_physical_device_features_11: physical_device_features_11,
_physical_device_features_12: physical_device_features_12,
_physical_device_features_13: physical_device_features_13,
- physical_device_memory_properties,
_global_fn: global_fn,
instance_fn,
hasher.update(&bind_desc.count.to_le_bytes());
}
- let layout_bindings =
- arena.alloc_slice_fill_iter(binds_desc.iter().enumerate().map(|(i, bind_desc)| {
+ let layout_bindings: &mut [vulkan_sys::DescriptorSetLayoutBinding] = arena
+ .alloc_slice_fill_iter(binds_desc.iter().enumerate().map(|(i, bind_desc)| {
let immutable_samplers = if !bind_desc.immutable_samplers.is_empty() {
assert_eq!(
bind_desc.binding_type,
}
fn create_graphics_pipeline(&self, pipeline_desc: &GraphicsPipelineDesc) -> Pipeline {
- let pipeline_layout = self.cache_pipeline_layout(pipeline_desc.layout);
+ let pipeline_layout = self.cache_pipeline_layout(&pipeline_desc.layout);
let arena = HybridArena::<1024>::new();
pipeline_desc.fragment_shader.code,
);
+ assert!(
+ !(pipeline_desc.vertex_shader.required_subgroup_size.is_some()
+ || pipeline_desc
+ .fragment_shader
+ .required_subgroup_size
+ .is_some()
+ || pipeline_desc.vertex_shader.allow_varying_subgroup_size
+ || pipeline_desc.fragment_shader.allow_varying_subgroup_size),
+ "subgroup size control features not implemented for graphics shader stages"
+ );
+
let stages = &[
vk::PipelineShaderStageCreateInfo {
stage: vk::ShaderStageFlags::VERTEX,
}
fn create_compute_pipeline(&self, pipeline_desc: &ComputePipelineDesc) -> Pipeline {
- let pipeline_layout = self.cache_pipeline_layout(pipeline_desc.layout);
+ let arena = HybridArena::<1024>::new();
+
+ let pipeline_layout = self.cache_pipeline_layout(&pipeline_desc.layout);
let module = vulkan_shader_module(&self.device_fn, self.device, pipeline_desc.shader.code);
- let stage = vk::PipelineShaderStageCreateInfo {
- stage: vk::ShaderStageFlags::COMPUTE,
- name: pipeline_desc.shader.entry.as_ptr(),
- module,
- ..default()
- };
+ let mut shader_stage_create_flags = default();
+
+ if pipeline_desc.shader.require_full_subgroups {
+ shader_stage_create_flags |= vk::PipelineShaderStageCreateFlags::REQUIRE_FULL_SUBGROUPS
+ }
+
+ if pipeline_desc.shader.allow_varying_subgroup_size {
+ shader_stage_create_flags |=
+ vk::PipelineShaderStageCreateFlags::ALLOW_VARYING_SUBGROUP_SIZE;
+ }
- let create_infos = &[vk::ComputePipelineCreateInfo {
+ let specialization_info: Option<&vk::SpecializationInfo> =
+ if !pipeline_desc.shader.spec_constants.is_empty() {
+ let block_len = pipeline_desc
+ .shader
+ .spec_constants
+ .iter()
+ .map(|spec_constant| match spec_constant {
+ SpecConstant::Bool { id: _, value: _ }
+ | SpecConstant::U32 { id: _, value: _ }
+ | SpecConstant::I32 { id: _, value: _ }
+ | SpecConstant::F32 { id: _, value: _ } => 4,
+ })
+ .sum::<usize>();
+
+ let block = arena.alloc_slice_fill_copy(block_len, 0u8);
+
+ let mut offset = 0;
+ let map_entries =
+ arena.alloc_slice_fill_iter(pipeline_desc.shader.spec_constants.iter().map(
+ |spec_constant| {
+ let constant_id;
+ let value_size;
+ match *spec_constant {
+ SpecConstant::Bool { id, value } => {
+ constant_id = id;
+ let value = if value {
+ vk::Bool32::True
+ } else {
+ vk::Bool32::False
+ } as u32;
+ value_size = std::mem::size_of_val(&value);
+ block[offset..offset + value_size]
+ .copy_from_slice(&value.to_ne_bytes())
+ }
+ SpecConstant::U32 { id, value } => {
+ constant_id = id;
+ value_size = std::mem::size_of_val(&value);
+ block[offset..offset + value_size]
+ .copy_from_slice(&value.to_ne_bytes());
+ }
+ SpecConstant::I32 { id, value } => {
+ constant_id = id;
+ value_size = std::mem::size_of_val(&value);
+ block[offset..offset + value_size]
+ .copy_from_slice(&value.to_ne_bytes());
+ }
+ SpecConstant::F32 { id, value } => {
+ constant_id = id;
+ value_size = std::mem::size_of_val(&value);
+ block[offset..offset + value_size]
+ .copy_from_slice(&value.to_ne_bytes());
+ }
+ }
+
+ let map_entry = vk::SpecializationMapEntry {
+ constant_id,
+ offset: offset as u32,
+ size: value_size,
+ };
+
+ offset += value_size;
+
+ map_entry
+ },
+ ));
+
+ Some(arena.alloc(vk::SpecializationInfo {
+ data: block.into(),
+ map_entries: map_entries.into(),
+ }))
+ } else {
+ None
+ };
+
+ let compute_pipeline_create_info = arena.alloc(vk::ComputePipelineCreateInfo {
layout: pipeline_layout.pipeline_layout,
- stage,
+ stage: vk::PipelineShaderStageCreateInfo {
+ stage: vk::ShaderStageFlags::COMPUTE,
+ name: pipeline_desc.shader.entry.as_ptr(),
+ module,
+ flags: shader_stage_create_flags,
+ specialization_info,
+ ..default()
+ },
..default()
- }];
+ });
+
+ if let Some(required_subgroup_size) = pipeline_desc.shader.required_subgroup_size {
+ assert!(self
+ .physical_device_properties_13
+ .required_subgroup_size_stages
+ .contains(vk::ShaderStageFlags::COMPUTE));
+ assert!(
+ required_subgroup_size >= self.physical_device_properties_13.min_subgroup_size
+ && required_subgroup_size
+ <= self.physical_device_properties_13.max_subgroup_size
+ );
+
+ let shader_stage_required_subgroup_size_create_info =
+ arena.alloc(vk::PipelineShaderStageRequiredSubgroupSizeCreateInfo {
+ required_subgroup_size,
+ ..default()
+ });
+
+ // SAFETY: Both are arena allocations and therefore have identical lifetimes.
+ compute_pipeline_create_info.stage._next =
+ shader_stage_required_subgroup_size_create_info as *const _ as *const _;
+ }
let mut pipelines = [vk::Pipeline::null()];
vk_check!(self.device_fn.create_compute_pipelines(
self.device,
vk::PipelineCache::null(),
- create_infos,
+ std::slice::from_ref(compute_pipeline_create_info),
None,
&mut pipelines
));
}
}
+ fn debug_name_bind_group_layout(&self, bind_group_layout: BindGroupLayout, name: &str) {
+ #[cfg(feature = "debug_markers")]
+ if let Some(debug_utils_fn) = &self.debug_utils_fn {
+ let descriptor_set_layout;
+ {
+ let bind_group_layout_pool = self.bind_group_layout_pool.lock();
+ let Some(bind_group_layout) = bind_group_layout_pool.get(bind_group_layout.0)
+ else {
+ return;
+ };
+
+ descriptor_set_layout = bind_group_layout.descriptor_set_layout;
+ }
+
+ let arena = HybridArena::<512>::new();
+ let object_name = arena.alloc_cstr_from_str(name);
+
+ let image_name_info = vk::DebugUtilsObjectNameInfoExt {
+ object_type: vk::ObjectType::DescriptorSetLayout,
+ object_handle: descriptor_set_layout.as_raw(),
+ object_name: object_name.as_ptr(),
+ ..default()
+ };
+ unsafe { debug_utils_fn.set_debug_utils_object_name_ext(self.device, &image_name_info) }
+ }
+ }
+
+ fn debug_name_pipeline(&self, pipeline: Pipeline, name: &str) {
+ #[cfg(feature = "debug_markers")]
+ if let Some(debug_utils_fn) = &self.debug_utils_fn {
+ let pipeline_handle;
+ let pipeline_layout_handle;
+ {
+ let pipeline_pool = self.pipeline_pool.lock();
+ let Some(pipeline) = pipeline_pool.get(pipeline.0) else {
+ return;
+ };
+
+ pipeline_handle = pipeline.pipeline;
+ pipeline_layout_handle = pipeline.pipeline_layout.pipeline_layout;
+ }
+
+ let arena = HybridArena::<512>::new();
+ let object_name = arena.alloc_cstr_from_str(name);
+
+ let image_name_info = vk::DebugUtilsObjectNameInfoExt {
+ object_type: vk::ObjectType::Pipeline,
+ object_handle: pipeline_handle.as_raw(),
+ object_name: object_name.as_ptr(),
+ ..default()
+ };
+ unsafe { debug_utils_fn.set_debug_utils_object_name_ext(self.device, &image_name_info) }
+
+ let image_view_name_info = vk::DebugUtilsObjectNameInfoExt {
+ object_type: vk::ObjectType::PipelineLayout,
+ object_handle: pipeline_layout_handle.as_raw(),
+ object_name: object_name.as_ptr(),
+ ..default()
+ };
+ unsafe {
+ debug_utils_fn.set_debug_utils_object_name_ext(self.device, &image_view_name_info)
+ }
+ }
+ }
+
fn destroy_buffer(&self, frame: &Frame, buffer: Buffer) {
if let Some(buffer) = self.buffer_pool.lock().remove(buffer.0) {
assert_eq!(
self.request_transient_buffer(frame, thread_token, usage, size as u64)
}
- fn request_cmd_encoder<'a, 'thread>(
+ fn request_transient_bind_group<'a>(
&self,
- frame: &'a Frame,
+ frame: &'a Frame<'a>,
thread_token: &'a ThreadToken,
- ) -> CmdEncoder<'a> {
- let frame = self.frame(frame);
- let per_thread = frame.per_thread.get(thread_token);
- let mut cmd_buffer_pool = per_thread.cmd_buffer_pool.borrow_mut();
-
- // We have consumed all available command buffers, need to allocate a new one.
- if cmd_buffer_pool.next_free_index >= cmd_buffer_pool.command_buffers.len() {
- let mut cmd_buffers = [vk::CommandBuffer::null(); 4];
- let allocate_info = vk::CommandBufferAllocateInfo {
- command_pool: cmd_buffer_pool.command_pool,
- level: vk::CommandBufferLevel::Primary,
- command_buffer_count: cmd_buffers.len() as u32,
- ..default()
- };
- vk_check!(self.device_fn.allocate_command_buffers(
- self.device,
- &allocate_info,
- cmd_buffers.as_mut_ptr()
- ));
- cmd_buffer_pool.command_buffers.extend(cmd_buffers.iter());
- }
-
- let index = cmd_buffer_pool.next_free_index;
- cmd_buffer_pool.next_free_index += 1;
- let command_buffer = cmd_buffer_pool.command_buffers[index];
-
- vk_check!(self.device_fn.begin_command_buffer(
- command_buffer,
- &vk::CommandBufferBeginInfo {
- flags: vk::CommandBufferUsageFlags::ONE_TIME_SUBMIT,
- ..default()
- }
- ));
-
- let vulkan_cmd_encoder = per_thread.arena.alloc(VulkanCmdEncoder {
- command_buffer,
- ..default()
- });
-
- CmdEncoder {
- cmd_encoder_addr: vulkan_cmd_encoder as *mut _ as usize,
- thread_token,
- phantom_unsend: PhantomUnsend {},
- }
- }
-
- fn cmd_insert_debug_marker(
- &self,
- cmd_encoder: &mut CmdEncoder,
- label_name: &str,
- color: [f32; 4],
- ) {
- #[cfg(feature = "debug_markers")]
- if let Some(debug_utils_fn) = &self.debug_utils_fn {
- let arena = HybridArena::<256>::new();
- let label_name = arena.alloc_cstr_from_str(label_name);
-
- let command_buffer = self.cmd_encoder_mut(cmd_encoder).command_buffer;
- let label_info = vk::DebugUtilsLabelExt {
- label_name: label_name.as_ptr(),
- color,
- ..default()
- };
- unsafe {
- debug_utils_fn.cmd_insert_debug_utils_label_ext(command_buffer, &label_info);
- }
- }
- }
-
- fn cmd_begin_debug_marker(
- &self,
- cmd_encoder: &mut CmdEncoder,
- label_name: &str,
- color: [f32; 4],
- ) {
- #[cfg(feature = "debug_markers")]
- if let Some(debug_utils_fn) = &self.debug_utils_fn {
- let arena = HybridArena::<256>::new();
- let label_name = arena.alloc_cstr_from_str(label_name);
-
- let command_buffer = self.cmd_encoder_mut(cmd_encoder).command_buffer;
- let label_info = vk::DebugUtilsLabelExt {
- label_name: label_name.as_ptr(),
- color,
- ..default()
- };
- unsafe {
- debug_utils_fn.cmd_begin_debug_utils_label_ext(command_buffer, &label_info);
- }
- }
- }
-
- fn cmd_end_debug_marker(&self, cmd_encoder: &mut CmdEncoder) {
- #[cfg(feature = "debug_markers")]
- if let Some(debug_utils_fn) = &self.debug_utils_fn {
- let command_buffer = self.cmd_encoder_mut(cmd_encoder).command_buffer;
- unsafe { debug_utils_fn.cmd_end_debug_utils_label_ext(command_buffer) }
- }
- }
-
- fn cmd_set_bind_group(
- &self,
- frame: &Frame,
- cmd_encoder: &mut CmdEncoder,
layout: BindGroupLayout,
- bind_group_index: u32,
bindings: &[Bind],
- ) {
+ ) -> TransientBindGroup<'a> {
let arena = HybridArena::<4096>::new();
let descriptor_set_layout = self
.descriptor_set_layout;
let frame = self.frame(frame);
- let per_thread = frame.per_thread.get(cmd_encoder.thread_token);
+ let per_thread = frame.per_thread.get(thread_token);
let mut descriptor_pool = per_thread.descriptor_pool.get();
let mut allocated_pool = false;
.update_descriptor_sets(self.device, write_descriptors, &[])
};
+ TransientBindGroup {
+ bind_group: descriptor_set.as_raw(),
+ phantom: PhantomData,
+ }
+ }
+
+ fn request_cmd_encoder<'a, 'thread>(
+ &self,
+ frame: &'a Frame,
+ thread_token: &'a ThreadToken,
+ ) -> CmdEncoder<'a> {
+ let frame = self.frame(frame);
+ let per_thread = frame.per_thread.get(thread_token);
+ let mut cmd_buffer_pool = per_thread.cmd_buffer_pool.borrow_mut();
+
+ // We have consumed all available command buffers, need to allocate a new one.
+ if cmd_buffer_pool.next_free_index >= cmd_buffer_pool.command_buffers.len() {
+ let mut cmd_buffers = [vk::CommandBuffer::null(); 4];
+ let allocate_info = vk::CommandBufferAllocateInfo {
+ command_pool: cmd_buffer_pool.command_pool,
+ level: vk::CommandBufferLevel::Primary,
+ command_buffer_count: cmd_buffers.len() as u32,
+ ..default()
+ };
+ vk_check!(self.device_fn.allocate_command_buffers(
+ self.device,
+ &allocate_info,
+ cmd_buffers.as_mut_ptr()
+ ));
+ cmd_buffer_pool.command_buffers.extend(cmd_buffers.iter());
+ }
+
+ let index = cmd_buffer_pool.next_free_index;
+ cmd_buffer_pool.next_free_index += 1;
+ let command_buffer = cmd_buffer_pool.command_buffers[index];
+
+ vk_check!(self.device_fn.begin_command_buffer(
+ command_buffer,
+ &vk::CommandBufferBeginInfo {
+ flags: vk::CommandBufferUsageFlags::ONE_TIME_SUBMIT,
+ ..default()
+ }
+ ));
+
+ let vulkan_cmd_encoder = per_thread.arena.alloc(VulkanCmdEncoder {
+ command_buffer,
+ ..default()
+ });
+
+ CmdEncoder {
+ cmd_encoder_addr: vulkan_cmd_encoder as *mut _ as usize,
+ phantom: PhantomData,
+ phantom_unsend: PhantomUnsend {},
+ }
+ }
+
+ fn cmd_insert_debug_marker(
+ &self,
+ cmd_encoder: &mut CmdEncoder,
+ label_name: &str,
+ color: [f32; 4],
+ ) {
+ #[cfg(feature = "debug_markers")]
+ if let Some(debug_utils_fn) = &self.debug_utils_fn {
+ let arena = HybridArena::<256>::new();
+ let label_name = arena.alloc_cstr_from_str(label_name);
+
+ let command_buffer = self.cmd_encoder_mut(cmd_encoder).command_buffer;
+ let label_info = vk::DebugUtilsLabelExt {
+ label_name: label_name.as_ptr(),
+ color,
+ ..default()
+ };
+ unsafe {
+ debug_utils_fn.cmd_insert_debug_utils_label_ext(command_buffer, &label_info);
+ }
+ }
+ }
+
+ fn cmd_begin_debug_marker(
+ &self,
+ cmd_encoder: &mut CmdEncoder,
+ label_name: &str,
+ color: [f32; 4],
+ ) {
+ #[cfg(feature = "debug_markers")]
+ if let Some(debug_utils_fn) = &self.debug_utils_fn {
+ let arena = HybridArena::<256>::new();
+ let label_name = arena.alloc_cstr_from_str(label_name);
+
+ let command_buffer = self.cmd_encoder_mut(cmd_encoder).command_buffer;
+ let label_info = vk::DebugUtilsLabelExt {
+ label_name: label_name.as_ptr(),
+ color,
+ ..default()
+ };
+ unsafe {
+ debug_utils_fn.cmd_begin_debug_utils_label_ext(command_buffer, &label_info);
+ }
+ }
+ }
+
+ fn cmd_end_debug_marker(&self, cmd_encoder: &mut CmdEncoder) {
+ #[cfg(feature = "debug_markers")]
+ if let Some(debug_utils_fn) = &self.debug_utils_fn {
+ let command_buffer = self.cmd_encoder_mut(cmd_encoder).command_buffer;
+ unsafe { debug_utils_fn.cmd_end_debug_utils_label_ext(command_buffer) }
+ }
+ }
+
+ fn cmd_set_bind_group(
+ &self,
+ cmd_encoder: &mut CmdEncoder,
+ bind_group_index: u32,
+ bind_group: &TransientBindGroup,
+ ) {
let cmd_encoder = self.cmd_encoder_mut(cmd_encoder);
- let VulkanBoundPipeline {
- pipeline_layout,
- pipeline_bind_point,
- } = cmd_encoder
+
+ let bound_pipeline = cmd_encoder
.bound_pipeline
.as_ref()
- .expect("cannot set bind groups without a pipeline bound")
- .clone();
+ .expect("cannot set bind group without a pipeline bound");
let command_buffer = cmd_encoder.command_buffer;
+ let descriptor_set = vk::DescriptorSet::from_raw(bind_group.bind_group);
unsafe {
self.device_fn.cmd_bind_descriptor_sets(
command_buffer,
- pipeline_bind_point,
- pipeline_layout,
+ bound_pipeline.pipeline_bind_point,
+ bound_pipeline.pipeline_layout,
bind_group_index,
&[descriptor_set],
&[],
}
}
+ fn cmd_dispatch_indirect(&self, cmd_encoder: &mut CmdEncoder, buffer: BufferArg, offset: u64) {
+ let (buffer, base_offset, _range) = self.unwrap_buffer_arg(&buffer);
+
+ let command_buffer = self.cmd_encoder_mut(cmd_encoder).command_buffer;
+ unsafe {
+ self.device_fn
+ .cmd_dispatch_indirect(command_buffer, buffer, base_offset + offset);
+ }
+ }
+
fn submit(&self, frame: &Frame, mut cmd_encoder: CmdEncoder) {
let fence = self.universal_queue_fence.fetch_add(1, Ordering::SeqCst) + 1;
self.frame_counter.release(frame);
}
- fn get_buffer_address(&self, buffer: BufferArg) -> u64 {
+ fn get_buffer_address<'a>(&self, buffer: BufferArg<'a>) -> BufferAddress<'a> {
let buffer = match buffer {
BufferArg::Unmanaged(buffer) => buffer.0,
BufferArg::Persistent(buffer) => buffer.buffer.0,
};
let buffer_pool = self.buffer_pool.lock();
let buffer = buffer_pool.get(buffer).unwrap();
- buffer.address
+ BufferAddress {
+ value: buffer.address,
+ phantom: PhantomData,
+ }
}
}
)
};
+ let address = BufferAddress {
+ value: address,
+ phantom: PhantomData,
+ };
+
let ptr = NonNull::new(memory.mapped_ptr()).unwrap();
frame.destroyed_buffers.lock().push_back(buffer);
buffer: buffer.as_raw(),
address,
offset: 0,
- phantom: PhantomData,
};
}
let current = allocator.current.as_ref().unwrap();
+ let address = BufferAddress {
+ value: current.address,
+ phantom: PhantomData,
+ };
+
+ let address = address.byte_add(allocator.offset);
+
TransientBuffer {
ptr: NonNull::new(
current
.unwrap(),
len: size as usize,
buffer: current.buffer.as_raw(),
- address: current.address + allocator.offset,
+ address,
offset: allocator.offset,
- phantom: PhantomData,
}
}
use std::{ffi::CStr, marker::PhantomData};
use backend::vulkan;
+use mapped_buffer::TransientBindGroup;
use narcissus_core::{
default, flags_def, raw_window::AsRawWindow, thread_token_def, Handle, PhantomUnsend,
};
}
}
+#[derive(Clone, Copy, PartialEq, Eq)]
+pub enum PipelineBindPoint {
+ Graphics,
+ Compute,
+}
+
flags_def!(ShaderStageFlags);
impl ShaderStageFlags {
pub const VERTEX: Self = Self(1 << 0);
pub dst_offset_max: Offset3d,
}
+pub enum SpecConstant {
+ Bool { id: u32, value: bool },
+ U32 { id: u32, value: u32 },
+ I32 { id: u32, value: i32 },
+ F32 { id: u32, value: f32 },
+}
+
pub struct ShaderDesc<'a> {
pub entry: &'a CStr,
+ pub require_full_subgroups: bool,
+ pub allow_varying_subgroup_size: bool,
+ pub required_subgroup_size: Option<u32>,
+ pub spec_constants: &'a [SpecConstant],
pub code: &'a [u8],
}
+impl<'a> Default for ShaderDesc<'a> {
+ fn default() -> Self {
+ Self {
+ entry: c"main",
+ require_full_subgroups: false,
+ allow_varying_subgroup_size: false,
+ required_subgroup_size: None,
+ spec_constants: &[],
+ code: &[],
+ }
+ }
+}
+
#[derive(Clone, Copy, PartialEq, Eq)]
pub enum SamplerFilter {
Point,
pub struct GraphicsPipelineDesc<'a> {
pub vertex_shader: ShaderDesc<'a>,
pub fragment_shader: ShaderDesc<'a>,
- pub layout: &'a PipelineLayout<'a>,
+ pub layout: PipelineLayout<'a>,
pub attachments: GraphicsPipelineAttachments<'a>,
pub topology: Topology,
pub primitive_restart: bool,
pub struct ComputePipelineDesc<'a> {
pub shader: ShaderDesc<'a>,
- pub layout: &'a PipelineLayout<'a>,
+ pub layout: PipelineLayout<'a>,
}
#[derive(Clone, Copy, Debug)]
StorageBuffer(&'a [BufferArg<'a>]),
}
+#[repr(C)]
+#[derive(Clone, Copy)]
+pub struct BufferAddress<'a> {
+ value: u64,
+ phantom: PhantomData<&'a [u8]>,
+}
+
+impl<'a> BufferAddress<'a> {
+ #[inline(always)]
+ #[must_use]
+ pub fn as_raw(self) -> u64 {
+ self.value
+ }
+
+ #[inline(always)]
+ #[must_use]
+ pub fn byte_add(self, count: u64) -> Self {
+ Self {
+ value: self.value.wrapping_add(count),
+ phantom: self.phantom,
+ }
+ }
+
+ #[inline(always)]
+ #[must_use]
+ pub fn byte_offset(self, count: i64) -> Self {
+ Self {
+ value: self.value.wrapping_add_signed(count),
+ phantom: self.phantom,
+ }
+ }
+}
+
#[derive(Clone, Copy, PartialEq, Eq)]
pub enum Access {
/// No access.
pub struct CmdEncoder<'a> {
cmd_encoder_addr: usize,
- thread_token: &'a ThreadToken,
+ phantom: PhantomData<&'a ()>,
phantom_unsend: PhantomUnsend,
}
fn debug_name_buffer(&self, buffer: BufferArg, label_name: &str);
fn debug_name_image(&self, image: Image, label_name: &str);
+ fn debug_name_bind_group_layout(&self, bind_group_layout: BindGroupLayout, label_name: &str);
+ fn debug_name_pipeline(&self, pipeline: Pipeline, label_name: &str);
- // Danger Zone
- fn get_buffer_address(&self, buffer: BufferArg) -> u64;
+ fn get_buffer_address<'a>(&self, buffer: BufferArg<'a>) -> BufferAddress<'a>;
fn destroy_buffer(&self, frame: &Frame, buffer: Buffer);
fn destroy_persistent_buffer(&self, frame: &Frame, buffer: PersistentBuffer);
size: usize,
) -> TransientBuffer<'a>;
+ #[must_use]
+ fn request_transient_bind_group<'a>(
+ &self,
+ frame: &'a Frame<'a>,
+ thread_token: &'a ThreadToken,
+ layout: BindGroupLayout,
+ bindings: &[Bind],
+ ) -> TransientBindGroup<'a>;
+
#[must_use]
fn request_cmd_encoder<'a>(
&'a self,
fn cmd_set_bind_group(
&self,
- frame: &Frame,
cmd_encoder: &mut CmdEncoder,
- layout: BindGroupLayout,
bind_group_index: u32,
- bindings: &[Bind],
+ bind_group: &TransientBindGroup,
);
fn cmd_set_index_buffer(
group_count_z: u32,
);
+ fn cmd_dispatch_indirect(&self, cmd_encoder: &mut CmdEncoder, buffer: BufferArg, offset: u64);
+
fn submit(&self, frame: &Frame, cmd_encoder: CmdEncoder);
fn begin_frame(&self) -> Frame;
use std::{marker::PhantomData, ptr::NonNull};
-use crate::{Buffer, BufferArg};
+use crate::{Buffer, BufferAddress, BufferArg};
#[cold]
fn overflow() -> ! {
pub(crate) offset: u64,
pub(crate) len: usize,
pub(crate) buffer: u64,
- pub(crate) address: u64,
- pub(crate) phantom: PhantomData<&'a u8>,
+ pub(crate) address: BufferAddress<'a>,
}
impl<'a> TransientBuffer<'a> {
unsafe { copy_from_with_offset(self.ptr, self.len, offset, src) }
}
}
+
+pub struct TransientBindGroup<'a> {
+ pub(crate) bind_group: u64,
+ pub(crate) phantom: PhantomData<&'a ()>,
+}
#[repr(C)]
pub struct SpecializationInfo<'a> {
pub map_entries: VulkanSlice1<'a, u32, SpecializationMapEntry, 4>,
- /// Size in bytes of pData
- pub data_size: usize,
- /// Pointer to SpecConstant data
- pub data: *const c_void,
+ pub data: VulkanSlice1<'a, usize, u8, 0>,
}
impl<'a> Default for SpecializationInfo<'a> {
}
}
+#[repr(C)]
+pub struct PipelineShaderStageRequiredSubgroupSizeCreateInfo {
+ pub _type: StructureType,
+ pub _next: *const c_void,
+ pub required_subgroup_size: u32,
+}
+
+impl Default for PipelineShaderStageRequiredSubgroupSizeCreateInfo {
+ fn default() -> Self {
+ let mut x = unsafe { MaybeUninit::<Self>::zeroed().assume_init() };
+ x._type = StructureType::PipelineShaderStageRequiredSubgroupSizeCreateInfo;
+ x
+ }
+}
+
#[repr(C)]
pub struct ComputePipelineCreateInfo<'a> {
pub _type: StructureType,
}
}
+#[repr(C)]
pub struct PhysicalDeviceSwapchainMaintenance1FeaturesEXT {
pub _type: StructureType,
pub _next: *mut c_void,
},
Shader {
stage: "comp",
- name: "primitive_2d_bin",
+ name: "draw_2d_bin_0_clear",
},
Shader {
stage: "comp",
- name: "primitive_2d_bin_clear",
+ name: "draw_2d_bin_1_scatter",
},
Shader {
stage: "comp",
- name: "primitive_2d_rasterize",
+ name: "draw_2d_bin_2_sort",
},
Shader {
stage: "comp",
- name: "display_transform",
+ name: "draw_2d_bin_3_resolve",
+ },
+ Shader {
+ stage: "comp",
+ name: "draw_2d_rasterize",
+ },
+ Shader {
+ stage: "comp",
+ name: "radix_sort_0_upsweep",
+ },
+ Shader {
+ stage: "comp",
+ name: "radix_sort_1_spine",
+ },
+ Shader {
+ stage: "comp",
+ name: "radix_sort_2_downsweep",
+ },
+ Shader {
+ stage: "comp",
+ name: "composite",
},
];
#version 460
-layout(set = 0, binding = 1) uniform sampler bilinear_sampler;
-layout(set = 1, binding = 2) uniform texture2D albedo;
+layout(set = 0, binding = 0) uniform sampler bilinear_sampler;
+layout(set = 0, binding = 1) uniform texture2D albedo;
layout(location = 0) in vec2 tex_coord;
layout(location = 1) in vec3 normal;
--- /dev/null
+#version 460
+
+#extension GL_EXT_buffer_reference : require
+#extension GL_EXT_buffer_reference2 : require
+#extension GL_EXT_scalar_block_layout : require
+
+struct Vertex {
+ vec4 position;
+ vec4 normal;
+ vec4 texcoord;
+};
+
+struct Transform {
+ vec4 transform[3];
+};
+
+layout(buffer_reference, std430, buffer_reference_align = 16) readonly buffer VertexRef {
+ Vertex values[];
+};
+
+layout(buffer_reference, std430, buffer_reference_align = 16) readonly buffer TransformRef {
+ Transform values[];
+};
+
+struct BasicConstants {
+ mat4 clip_from_camera;
+ VertexRef vertex_buffer;
+ TransformRef transform_buffer;
+};
+
+layout(std430, row_major, push_constant) uniform BasicConstantsBlock {
+ BasicConstants constants;
+};
+
+layout(location = 0) out vec2 out_texcoord;
+layout(location = 1) out vec3 out_normal;
+
+void main() {
+ const Transform td = constants.transform_buffer.values[gl_InstanceIndex];
+ const Vertex vd = constants.vertex_buffer.values[gl_VertexIndex];
+
+ const mat4 camera_from_model = mat4(
+ td.transform[0].x, td.transform[0].w, td.transform[1].z, 0.0,
+ td.transform[0].y, td.transform[1].x, td.transform[1].w, 0.0,
+ td.transform[0].z, td.transform[1].y, td.transform[2].x, 0.0,
+ td.transform[2].y, td.transform[2].z, td.transform[2].w, 1.0
+ );
+
+ const vec4 position_clip = constants.clip_from_camera * camera_from_model * vec4(vd.position.xyz, 1.0);
+
+ gl_Position = position_clip;
+
+ out_normal = vd.normal.xyz;
+ out_texcoord = vec2(vd.texcoord.x, 1.0 - vd.texcoord.y);
+}
+++ /dev/null
-#version 460
-
-#extension GL_EXT_scalar_block_layout : require
-
-struct VertexData {
- vec4 position;
- vec4 normal;
- vec4 texcoord;
-};
-
-struct TransformData {
- vec4 transform[3];
-};
-
-layout(std430, row_major, set = 0, binding = 0) uniform uniformBuffer {
- mat4 clip_from_camera;
-};
-
-layout(std430, set = 1, binding = 0) readonly buffer vertexBuffer {
- VertexData vertices[];
-};
-
-layout(std430, set = 1, binding = 1) readonly buffer transformBuffer {
- TransformData transforms[];
-};
-
-layout(location = 0) out vec2 out_texcoord;
-layout(location = 1) out vec3 out_normal;
-
-void main() {
- const TransformData td = transforms[gl_InstanceIndex];
- const VertexData vd = vertices[gl_VertexIndex];
-
- const mat4 camera_from_model = mat4(
- td.transform[0].x, td.transform[0].w, td.transform[1].z, 0.0,
- td.transform[0].y, td.transform[1].x, td.transform[1].w, 0.0,
- td.transform[0].z, td.transform[1].y, td.transform[2].x, 0.0,
- td.transform[2].y, td.transform[2].z, td.transform[2].w, 1.0
- );
-
- const vec4 position_clip = clip_from_camera * camera_from_model * vec4(vd.position.xyz, 1.0);
-
- gl_Position = position_clip;
-
- out_normal = vd.normal.xyz;
- out_texcoord = vec2(vd.texcoord.x, 1.0 - vd.texcoord.y);
-}
layout (local_size_x = 8, local_size_y = 8, local_size_z = 1) in;
void main() {
- const uvec2 tile_coord = gl_WorkGroupID.xy / 4;
- const uint tile_index = tile_coord.y * uniforms.tile_stride + tile_coord.x;
- const uint tile_base = tile_index * TILE_STRIDE;
- TilesRead tiles_read = TilesRead(uniforms.tiles);
- const uint lo = tiles_read.values[tile_base + TILE_BITMAP_RANGE_LO_OFFSET];
- const uint hi = tiles_read.values[tile_base + TILE_BITMAP_RANGE_HI_OFFSET];
-
+ // const uvec2 tile_coord = gl_WorkGroupID.xy / 4;
+ // const uint tile_index = tile_coord.y * uniforms.tile_resolution.x + tile_coord.x;
+
+ // TilesRead tiles_read = TilesRead(uniforms.tiles);
+ // const uint lo = tiles_read.values[tile_base + TILE_BITMAP_RANGE_LO_OFFSET];
+ // const uint hi = tiles_read.values[tile_base + TILE_BITMAP_RANGE_HI_OFFSET];
+
+ // Display transform
const vec3 stimulus = imageLoad(color_layer, ivec2(gl_GlobalInvocationID.xy)).rgb;
const vec3 transformed = tony_mc_mapface(stimulus);
vec3 composited = srgb_oetf(transformed);
- if (lo <= hi) {
- const vec4 ui = imageLoad(ui_layer_read, ivec2(gl_GlobalInvocationID.xy)).rgba;
- composited = ui.rgb + (composited * (1.0 - ui.a));
- }
+ // UI Composite
+ // if (lo <= hi) {
+ // const vec4 ui = imageLoad(ui_layer_read, ivec2(gl_GlobalInvocationID.xy)).rgba;
+ // composited = ui.rgb + (composited * (1.0 - ui.a));
+ // }
imageStore(composited_output, ivec2(gl_GlobalInvocationID.xy), vec4(composited, 1.0));
}
#ifndef COMPUTE_BINDINGS_INCLUDE
#define COMPUTE_BINDINGS_INCLUDE
-#include "primitive_2d.h"
-
-layout(buffer_reference, std430, buffer_reference_align = 16) readonly buffer PrimitiveInstances
-{
- PrimitiveInstance values[];
-};
-
-layout(buffer_reference, std430, buffer_reference_align = 16) readonly buffer Rects
-{
- Rect values[];
-};
-
-layout(buffer_reference, std430, buffer_reference_align = 16) readonly buffer Glyphs
-{
- Glyph values[];
-};
-
-layout(buffer_reference, std430, buffer_reference_align = 4) readonly buffer TilesRead
-{
- uint values[];
-};
-
-layout(buffer_reference, std430, buffer_reference_align = 4) writeonly buffer TilesWrite
-{
- uint values[];
-};
-
-struct ComputeUniforms {
- uvec2 screen_resolution;
- uvec2 atlas_resolution;
- uvec2 tile_resolution;
-
- uint num_primitives;
- uint num_primitives_32;
- uint num_primitives_1024;
- uint tile_stride;
-
- PrimitiveInstances primitive_instances;
- Rects rects;
- Glyphs glyphs;
- TilesWrite tiles;
-};
-
-layout(std430, push_constant) uniform UniformBuffer {
- ComputeUniforms uniforms;
-};
-
layout (set = 0, binding = 0) uniform sampler bilinear_sampler;
layout (set = 0, binding = 1) uniform texture3D tony_mc_mapface_lut;
layout (set = 0, binding = 2) uniform texture2D glyph_atlas;
--- /dev/null
+#ifndef DRAW_2D_H
+#define DRAW_2D_H
+
+const uint TILE_SIZE = 32;
+
+const uint DRAW_2D_CMD_RECT = 0;
+const uint DRAW_2D_CMD_GLYPH = 1;
+
+struct Glyph {
+ ivec2 atlas_min;
+ ivec2 atlas_max;
+
+ vec2 offset_min;
+ vec2 offset_max;
+};
+
+struct Draw2dCmd {
+ uint type;
+ uint words[7];
+};
+
+struct Draw2dCmdRect {
+ uint border_width;
+ vec2 position;
+ vec2 half_extent;
+ uint background_color;
+ uint border_color;
+};
+
+struct Draw2dCmdGlyph {
+ uint index;
+ vec2 position;
+ uint color;
+};
+
+Draw2dCmdRect decode_rect(Draw2dCmd cmd) {
+ return Draw2dCmdRect(
+ cmd.words[0],
+ vec2(uintBitsToFloat(cmd.words[1]), uintBitsToFloat(cmd.words[2])),
+ vec2(uintBitsToFloat(cmd.words[3]), uintBitsToFloat(cmd.words[4])),
+ cmd.words[5],
+ cmd.words[6]
+ );
+}
+
+Draw2dCmdGlyph decode_glyph(Draw2dCmd cmd) {
+ return Draw2dCmdGlyph(
+ cmd.words[0],
+ vec2(uintBitsToFloat(cmd.words[1]), uintBitsToFloat(cmd.words[2])),
+ cmd.words[3]
+ );
+}
+
+layout(buffer_reference, std430, buffer_reference_align = 16) readonly buffer Draw2dCommandRef
+{
+ Draw2dCmd values[];
+};
+
+layout(buffer_reference, std430, buffer_reference_align = 16) readonly buffer GlyphRef
+{
+ Glyph values[];
+};
+
+layout(buffer_reference, std430, buffer_reference_align = 4) buffer CoarseRef
+{
+ uint values[];
+};
+
+#endif
\ No newline at end of file
--- /dev/null
+#version 460
+
+#extension GL_GOOGLE_include_directive : require
+
+#extension GL_EXT_buffer_reference : require
+#extension GL_EXT_buffer_reference2 : require
+#extension GL_EXT_scalar_block_layout : require
+#extension GL_EXT_control_flow_attributes : require
+
+#extension GL_KHR_shader_subgroup_vote : require
+#extension GL_KHR_shader_subgroup_ballot : require
+
+#include "draw_2d.h"
+#include "radix_sort.h"
+
+struct Draw2dClearConstants {
+ CoarseRef coarse_buffer;
+};
+
+layout(std430, push_constant) uniform Draw2dClearConstantsBlock {
+ Draw2dClearConstants constants;
+};
+
+layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+
+void main() {
+ constants.coarse_buffer.values[0] = 0;
+}
--- /dev/null
+#version 460
+
+#extension GL_GOOGLE_include_directive : require
+
+#extension GL_EXT_buffer_reference : require
+#extension GL_EXT_buffer_reference2 : require
+#extension GL_EXT_scalar_block_layout : require
+#extension GL_EXT_control_flow_attributes : require
+
+#extension GL_KHR_shader_subgroup_arithmetic : require
+#extension GL_KHR_shader_subgroup_ballot : require
+#extension GL_KHR_shader_subgroup_vote : require
+
+#include "draw_2d.h"
+
+struct Draw2dScatterConstants {
+ uvec2 screen_resolution;
+ uvec2 tile_resolution;
+
+ uint draw_buffer_len;
+ uint coarse_buffer_len;
+
+ Draw2dCommandRef draw_buffer;
+ GlyphRef glyph_buffer;
+ CoarseRef coarse_buffer;
+};
+
+layout(std430, push_constant) uniform Draw2dScatterConstantsBlock {
+ Draw2dScatterConstants constants;
+};
+
+layout (local_size_x_id = 0, local_size_y = 1, local_size_z = 1) in;
+
+void main() {
+ const uint local_id = gl_SubgroupID * gl_SubgroupSize + gl_SubgroupInvocationID;
+ const uint draw_index = gl_WorkGroupID.x * gl_WorkGroupSize.x + local_id;
+
+ // Bounds for this command, any tiles which intersect this AABB will be written.
+ vec2 cmd_min = vec2(99999.9);
+ vec2 cmd_max = vec2(-99999.9);
+
+ if (draw_index < constants.draw_buffer_len) {
+ const Draw2dCmd cmd = constants.draw_buffer.values[draw_index];
+ const uint type = cmd.type;
+ for (;;) {
+ const uint scalar_type = subgroupBroadcastFirst(type);
+ [[branch]]
+ if (scalar_type == type) {
+ switch (scalar_type) {
+ case DRAW_2D_CMD_RECT:
+ const Draw2dCmdRect cmd_rect = decode_rect(cmd);
+ cmd_min = cmd_rect.position - cmd_rect.half_extent - cmd_rect.border_width;
+ cmd_max = cmd_rect.position + cmd_rect.half_extent + cmd_rect.border_width;
+ break;
+ case DRAW_2D_CMD_GLYPH:
+ const Draw2dCmdGlyph cmd_glyph = decode_glyph(cmd);
+ const Glyph glyph = constants.glyph_buffer.values[cmd_glyph.index];
+ cmd_min = cmd_glyph.position + glyph.offset_min;
+ cmd_max = cmd_glyph.position + glyph.offset_max;
+ break;
+ }
+ break;
+ }
+ }
+ }
+
+ const vec2 cmds_min = subgroupMin(cmd_min);
+ const vec2 cmds_max = subgroupMax(cmd_max);
+
+ // Are all our commands entirely offscreen?
+ if (any(greaterThan(cmds_min, constants.screen_resolution)) || any(lessThan(cmds_max, vec2(0.0)))) {
+ return;
+ }
+
+ const uvec2 cmd_min_tile = uvec2(floor(max(min(cmd_min, constants.screen_resolution), 0.0) / TILE_SIZE));
+ const uvec2 cmd_max_tile = uvec2(floor(max(min(cmd_max, constants.screen_resolution), 0.0) / TILE_SIZE));
+ const uvec2 cmds_min_tile = subgroupMin(cmd_min_tile);
+ const uvec2 cmds_max_tile = subgroupMax(cmd_max_tile);
+
+ // Are any single commands responsible for the entire bounds?
+ const bool cmd_absolute_min = cmd_min_tile == cmds_min_tile;
+ const bool cmd_absolute_max = cmd_max_tile == cmds_max_tile;
+ const bool use_individual_bounds = !any(notEqual(subgroupBallot(cmd_absolute_min) & subgroupBallot(cmd_absolute_max), uvec4(0)));
+
+ if (false && use_individual_bounds) {
+
+ } else {
+ const uvec2 tile_count = cmds_max_tile - cmds_min_tile + uvec2(1);
+ const uint count = tile_count.x * tile_count.y;
+
+ uint offset;
+ if (subgroupElect()) {
+ offset = atomicAdd(constants.coarse_buffer.values[0], count) + 1;
+ }
+ offset = subgroupBroadcastFirst(offset);
+
+ if (offset >= constants.coarse_buffer_len) {
+ return;
+ }
+
+ for (uint y = 0; y < tile_count.y; y++) {
+ for (uint x = 0; x < tile_count.x; x += gl_SubgroupSize) {
+ const uint local_x = x + gl_SubgroupInvocationID;
+ if (local_x < tile_count.x) {
+ const uint yy = cmds_min_tile.y + y;
+ const uint xx = cmds_min_tile.x + local_x;
+ const uint packed = ((yy & 0xff) << 24) | ((xx & 0xff) << 16) | (gl_WorkGroupID.x & 0xffff);
+ constants.coarse_buffer.values[offset + local_x] = packed;
+ }
+ subgroupBarrier();
+ }
+ offset += tile_count.x;
+ }
+ }
+
+ // if (gl_SubgroupSize == 32 && fullscreen_ballot.x != 0) {
+ // uint offset;
+ // if (subgroupElect()) {
+ // const uint count = constants.tile_resolution.x * constants.tile_resolution.y;
+ // offset = atomicAdd(constants.coarse_buffer.values[0], count) + 1;
+ // }
+ // offset = subgroupBroadcastFirst(offset);
+
+ // if (offset >= constants.coarse_buffer_len) {
+ // return;
+ // }
+
+ // const uint word_index = gl_WorkGroupID.x;
+
+ // for (uint y = 0; y < constants.tile_resolution.y; y++) {
+ // for (uint x = 0; x < constants.tile_resolution.x; x++) {
+ // const uint tile_index = y * constants.tile_resolution.x + x;
+ // const uint packed = (tile_index << 16) | word_index;
+ // if (subgroupElect() && offset + tile_index < constants.coarse_buffer_len) {
+ // constants.coarse_buffer.values[offset + tile_index] = packed;
+ // }
+ // }
+ // }
+
+ // return;
+ // }
+
+ // uint count = 0;
+
+ // for (uint y = cmds_min_tile.y; y <= cmds_max_tile.y; y++) {
+ // for (uint x = cmds_min_tile.x; x <= cmds_max_tile.x; x++) {
+ // const vec2 tile_min = uvec2(x, y) * TILE_SIZE;
+ // const vec2 tile_max = min(tile_min + TILE_SIZE, constants.screen_resolution);
+
+ // const bool intersects = !(any(lessThan(tile_max, cmd_min)) || any(greaterThan(tile_min, cmd_max)));
+ // const uvec4 ballot = subgroupBallot(intersects);
+
+ // if (subgroupElect()) {
+ // count += uint(ballot.x != 0);
+ // }
+ // }
+ // }
+
+ // if (count == 0) {
+ // return;
+ // }
+
+ // uint offset;
+ // if (subgroupElect()) {
+ // offset = atomicAdd(constants.coarse_buffer.values[0], count) + 1;
+ // }
+ // offset = subgroupBroadcastFirst(offset);
+
+ // if (offset >= constants.coarse_buffer_len) {
+ // return;
+ // }
+
+ // for (uint y = cmds_min_tile.y; y <= cmds_max_tile.y; y++) {
+ // for (uint x = cmds_min_tile.x; x <= cmds_max_tile.x; x++) {
+ // const vec2 tile_min = uvec2(x, y) * TILE_SIZE;
+ // const vec2 tile_max = min(tile_min + TILE_SIZE, constants.screen_resolution);
+ // const uint tile_index = y * constants.tile_resolution.x + x;
+
+ // const bool intersects = !(any(lessThan(tile_max, cmd_min)) || any(greaterThan(tile_min, cmd_max)));
+ // const uvec4 ballot = subgroupBallot(intersects);
+
+ // if (subgroupElect() && ballot.x != 0 && offset < constants.coarse_buffer_len) {
+ // const uint word_index = gl_WorkGroupID.x;
+ // const uint packed = (tile_index << 16) | word_index;
+ // constants.coarse_buffer.values[offset++] = packed;
+ // }
+ // }
+ // }
+}
--- /dev/null
+#version 460
+
+#extension GL_GOOGLE_include_directive : require
+
+#extension GL_EXT_buffer_reference : require
+#extension GL_EXT_buffer_reference2 : require
+#extension GL_EXT_scalar_block_layout : require
+#extension GL_EXT_control_flow_attributes : require
+
+#extension GL_KHR_shader_subgroup_arithmetic : require
+#extension GL_KHR_shader_subgroup_ballot : require
+#extension GL_KHR_shader_subgroup_shuffle_relative: enable
+#extension GL_KHR_shader_subgroup_vote : require
+
+#include "compute_bindings.h"
+
+#include "draw_2d.h"
+#include "indirect.h"
+#include "radix_sort.h"
+
+layout(buffer_reference, std430, buffer_reference_align = 4) buffer VkDispatchIndirectCommandRef {
+ VkDispatchIndirectCommand dimensions;
+};
+
+struct Draw2dSortConstants {
+ uint coarse_buffer_len;
+ uint _pad;
+ VkDispatchIndirectCommandRef indirect_dispatch_buffer;
+ CoarseRef coarse_buffer;
+};
+
+layout(std430, push_constant) uniform Draw2dSortConstantsBlock {
+ Draw2dSortConstants constants;
+};
+
+layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
+
+void main() {
+ // We shouldn't overflow the coarse buffer in the scatter phase, but we can
+ // still end up with a count that's larger than the buffer size since we
+ // unconditionally atomicAdd. So we need to clamp to the actual size now
+ // before dispatching sort work.
+ const uint count = min(constants.coarse_buffer_len, constants.coarse_buffer.values[0]);
+ constants.coarse_buffer.values[0] = count;
+
+ constants.indirect_dispatch_buffer.dimensions.x = (count + (RADIX_ITEMS_PER_WGP - 1)) / RADIX_ITEMS_PER_WGP;
+ constants.indirect_dispatch_buffer.dimensions.y = 1;
+ constants.indirect_dispatch_buffer.dimensions.z = 1;
+}
--- /dev/null
+#version 460
+
+#extension GL_GOOGLE_include_directive : require
+
+#extension GL_EXT_buffer_reference : require
+#extension GL_EXT_buffer_reference2 : require
+#extension GL_EXT_scalar_block_layout : require
+#extension GL_EXT_control_flow_attributes : require
+
+#extension GL_KHR_shader_subgroup_arithmetic : require
+#extension GL_KHR_shader_subgroup_ballot : require
+#extension GL_KHR_shader_subgroup_vote : require
+
+#include "compute_bindings.h"
+
+const uint SUBGROUP_SIZE = 64;
+
+// TODO: Spec constant support for different subgroup sizes.
+layout (local_size_x = SUBGROUP_SIZE, local_size_y = 1, local_size_z = 1) in;
+
+void main() {
+}
--- /dev/null
+#version 460
+
+#extension GL_GOOGLE_include_directive : require
+
+#extension GL_EXT_buffer_reference : require
+#extension GL_EXT_buffer_reference2 : require
+#extension GL_EXT_scalar_block_layout : require
+#extension GL_EXT_control_flow_attributes : require
+
+#extension GL_KHR_shader_subgroup_vote : require
+#extension GL_KHR_shader_subgroup_ballot : require
+
+#include "compute_bindings.h"
+
+layout (local_size_x = 8, local_size_y = 8, local_size_z = 1) in;
+
+/// x = (((index >> 2) & 0x0007) & 0xFFFE) | index & 0x0001
+/// y = ((index >> 1) & 0x0003) | (((index >> 3) & 0x0007) & 0xFFFC)
+
+#define DEBUG_SHOW_TILES 0
+
+#if DEBUG_SHOW_TILES != 0
+
+vec3 plasma_quintic(float x)
+{
+ x = clamp(x, 0.0, 1.0);
+ vec4 x1 = vec4(1.0, x, x * x, x * x * x); // 1 x x2 x3
+ vec4 x2 = x1 * x1.w * x; // x4 x5 x6 x7
+ return vec3(
+ dot(x1.xyzw, vec4(+0.063861086, +1.992659096, -1.023901152, -0.490832805)) + dot(x2.xy, vec2(+1.308442123, -0.914547012)),
+ dot(x1.xyzw, vec4(+0.049718590, -0.791144343, +2.892305078, +0.811726816)) + dot(x2.xy, vec2(-4.686502417, +2.717794514)),
+ dot(x1.xyzw, vec4(+0.513275779, +1.580255060, -5.164414457, +4.559573646)) + dot(x2.xy, vec2(-1.916810682, +0.570638854))
+ );
+}
+
+#endif
+
+void main() {
+ // const uvec2 tile_coord = gl_WorkGroupID.xy / (TILE_SIZE / gl_WorkGroupSize.xy);
+ // const uint tile_index = tile_coord.y * uniforms.tile_resolution.x + tile_coord.x;
+
+ // TilesRead tiles_read = TilesRead(uniforms.tiles);
+
+ // const uint lo = tiles_read.values[tile_base + TILE_BITMAP_RANGE_LO_OFFSET];
+ // const uint hi = tiles_read.values[tile_base + TILE_BITMAP_RANGE_HI_OFFSET];
+
+ // if (hi < lo) {
+ // return;
+ // }
+
+// #if DEBUG_SHOW_TILES == 1
+
+// uint count = 0;
+// // For each tile, iterate over all words in the L1 bitmap.
+// for (uint index_l1 = lo; index_l1 <= hi; index_l1++) {
+// // For each word, iterate all set bits.
+// uint bitmap_l1 = tiles_read.values[tile_base + TILE_BITMAP_L1_OFFSET + index_l1];
+
+// while (bitmap_l1 != 0) {
+// const uint i = findLSB(bitmap_l1);
+// bitmap_l1 ^= bitmap_l1 & -bitmap_l1;
+
+// // For each set bit in the L1 bitmap, iterate the set bits in the
+// // corresponding L0 bitmap.
+// const uint index_l0 = index_l1 * 32 + i;
+// uint bitmap_l0 = tiles_read.values[tile_base + TILE_BITMAP_L0_OFFSET + index_l0];
+
+// count += bitCount(bitmap_l0);
+// }
+// }
+
+// const vec3 color = plasma_quintic(float(count) / 100.0);
+// imageStore(ui_layer_write, ivec2(gl_GlobalInvocationID.xy), vec4(color, 1.0));
+
+// #elif DEBUG_SHOW_TILES == 2
+
+// uint count = hi - lo;
+// const vec3 color = plasma_quintic(float(count) / 100.0);
+// imageStore(ui_layer_write, ivec2(gl_GlobalInvocationID.xy), vec4(color, 1.0));
+
+// #else
+
+// const vec2 sample_center = gl_GlobalInvocationID.xy + vec2(0.5);
+
+// vec4 accum = vec4(0.0);
+
+// // For each tile, iterate over all words in the L1 bitmap.
+// for (uint index_l1 = lo; index_l1 <= hi; index_l1++) {
+// // For each word, iterate all set bits.
+// uint bitmap_l1 = tiles_read.values[tile_base + TILE_BITMAP_L1_OFFSET + index_l1];
+
+// while (bitmap_l1 != 0) {
+// const uint i = findLSB(bitmap_l1);
+// bitmap_l1 ^= bitmap_l1 & -bitmap_l1;
+
+// // For each set bit in the L1 bitmap, iterate the set bits in the
+// // corresponding L0 bitmap.
+// const uint index_l0 = index_l1 * 32 + i;
+// uint bitmap_l0 = tiles_read.values[tile_base + TILE_BITMAP_L0_OFFSET + index_l0];
+// while (bitmap_l0 != 0) {
+// const uint j = findLSB(bitmap_l0);
+// bitmap_l0 ^= bitmap_l0 & -bitmap_l0;
+
+// // Set bits in the L0 bitmap indicate binned primitives for this tile.
+// const uint primitive_index = index_l0 * 32 + j;
+// const Primitive2d primitive = uniforms.primitives.values[primitive_index];
+// const uint type = primitive.type;
+
+// vec4 primitive_color = vec4(0.0);
+
+// switch (type) {
+// case PRIMITIVE_TYPE_RECT: {
+// // const Rect rect = uniforms.rects.values[offset];
+// // const vec2 rect_min = primitive_instance.position - rect.half_extent - rect.border_width;
+// // const vec2 rect_max = primitive_instance.position + rect.half_extent + rect.border_width;
+// if (all(greaterThanEqual(sample_center, rect_min)) && all(lessThanEqual(sample_center, rect_max))) {
+// primitive_color = unpackUnorm4x8(primitive_instance.color).bgra;
+// }
+// break;
+// }
+// case PRIMITIVE_TYPE_GLYPH: {
+// const Glyph glyph = uniforms.glyphs.values[offset];
+// const vec2 glyph_min = primitive_instance.position + glyph.offset_min;
+// const vec2 glyph_max = primitive_instance.position + glyph.offset_max;
+// if (all(greaterThanEqual(sample_center, glyph_min)) && all(lessThanEqual(sample_center, glyph_max))) {
+// const vec2 glyph_size = glyph.offset_max - glyph.offset_min;
+// const vec2 uv = mix(glyph.atlas_min, glyph.atlas_max, (sample_center - glyph_min) / glyph_size) / uniforms.atlas_resolution;
+// const vec4 color = unpackUnorm4x8(primitive_instance.color).bgra;
+// const float coverage = textureLod(sampler2D(glyph_atlas, bilinear_sampler), uv, 0.0).r * color.a;
+// primitive_color = color * coverage;
+// }
+// break;
+// }
+// }
+
+// // does it blend?
+// accum.rgba = primitive_color.rgba + accum.rgba * (1.0 - primitive_color.a);
+// }
+// }
+// }
+
+// imageStore(ui_layer_write, ivec2(gl_GlobalInvocationID.xy), accum);
+
+// #endif
+}
--- /dev/null
+#ifndef INDIRECT_H
+#define INDIRECT_H
+
+struct VkDispatchIndirectCommand {
+ uint x;
+ uint y;
+ uint z;
+};
+
+#endif
\ No newline at end of file
+++ /dev/null
-#ifndef PRIMITIVE_2D_INCLUDE
-#define PRIMITIVE_2D_INCLUDE
-
-const uint TILE_SIZE = 32;
-
-const uint MAX_PRIMS = 1 << 18;
-const uint TILE_BITMAP_L1_WORDS = (MAX_PRIMS / 32 / 32);
-const uint TILE_BITMAP_L0_WORDS = (MAX_PRIMS / 32);
-const uint TILE_STRIDE = (TILE_BITMAP_L0_WORDS + TILE_BITMAP_L1_WORDS + 2);
-const uint TILE_BITMAP_RANGE_LO_OFFSET = 0;
-const uint TILE_BITMAP_RANGE_HI_OFFSET = (TILE_BITMAP_RANGE_LO_OFFSET + 1);
-const uint TILE_BITMAP_L1_OFFSET = (TILE_BITMAP_RANGE_HI_OFFSET + 1);
-const uint TILE_BITMAP_L0_OFFSET = (TILE_BITMAP_L1_OFFSET + TILE_BITMAP_L1_WORDS);
-
-const uint PRIMITIVE_TYPE_RECT = 0;
-const uint PRIMITIVE_TYPE_GLYPH = 1;
-
-struct PrimitiveInstance {
- uint packed;
- uint color;
- vec2 position;
-};
-
-struct Rect {
- vec2 half_extent;
- float border_width;
- float border_radius;
-};
-
-struct Glyph {
- ivec2 atlas_min;
- ivec2 atlas_max;
-
- vec2 offset_min;
- vec2 offset_max;
-};
-
-#endif
\ No newline at end of file
+++ /dev/null
-#version 460
-
-#extension GL_GOOGLE_include_directive : require
-
-#extension GL_EXT_buffer_reference : require
-#extension GL_EXT_buffer_reference2 : require
-#extension GL_EXT_scalar_block_layout : require
-#extension GL_EXT_control_flow_attributes : require
-
-#extension GL_KHR_shader_subgroup_arithmetic : require
-#extension GL_KHR_shader_subgroup_ballot : require
-#extension GL_KHR_shader_subgroup_vote : require
-
-#include "compute_bindings.h"
-
-const uint SUBGROUP_SIZE = 64;
-const uint NUM_SUBGROUPS = 16;
-const uint NUM_PRIMITIVES_WG = (SUBGROUP_SIZE * NUM_SUBGROUPS);
-
-// TODO: Spec constant support for different subgroup sizes.
-layout (local_size_x = SUBGROUP_SIZE, local_size_y = 1, local_size_z = 1) in;
-
-void main() {
- uint word_index = 0;
-
- for (uint i = 0; i < NUM_PRIMITIVES_WG; i += gl_SubgroupSize.x) {
- const uint primitive_index = gl_WorkGroupID.x * NUM_PRIMITIVES_WG + i + gl_SubgroupInvocationID;
-
- // Bounds for this primitive, any tiles which intersect this AABB will be written.
- vec2 primitive_min = vec2(99999.9);
- vec2 primitive_max = vec2(-99999.9);
-
- if (primitive_index < uniforms.num_primitives) {
- const PrimitiveInstance primitive_instance = uniforms.primitive_instances.values[primitive_index];
- const uint type = bitfieldExtract(primitive_instance.packed, 30, 2);
- const uint offset = bitfieldExtract(primitive_instance.packed, 0, 20);
-
- for (;;) {
- const uint scalar_type = subgroupBroadcastFirst(type);
- [[branch]]
- if (scalar_type == type) {
- switch (type) {
- case PRIMITIVE_TYPE_RECT:
- const Rect rect = uniforms.rects.values[offset];
- primitive_min = primitive_instance.position - rect.half_extent;
- primitive_max = primitive_instance.position + rect.half_extent;
- break;
- case PRIMITIVE_TYPE_GLYPH:
- const Glyph glyph = uniforms.glyphs.values[offset];
- primitive_min = primitive_instance.position + glyph.offset_min;
- primitive_max = primitive_instance.position + glyph.offset_max;
- break;
- }
- break;
- }
- }
- }
-
- const vec2 primitives_min = subgroupMin(primitive_min);
- const vec2 primitives_max = subgroupMax(primitive_max);
-
- if (any(greaterThan(primitives_min, uniforms.screen_resolution)) || any(lessThan(primitives_max, vec2(0.0)))) {
- word_index += 2;
- continue;
- }
-
- const ivec2 tile_start = ivec2(floor(max(min(primitives_min, uniforms.screen_resolution), 0.0) / TILE_SIZE));
- const ivec2 tile_end = ivec2(floor((max(min(primitives_max, uniforms.screen_resolution), 0.0) + (TILE_SIZE - 1)) / TILE_SIZE));
-
- for (int y = tile_start.y; y < tile_end.y; y++) {
- for (int x = tile_start.x; x < tile_end.x; x++) {
- const uvec2 tile_coord = uvec2(x, y);
- const vec2 tile_min = tile_coord * TILE_SIZE;
- const vec2 tile_max = min(tile_min + TILE_SIZE, uniforms.screen_resolution);
-
- const bool intersects = !(any(lessThan(tile_max, primitive_min)) || any(greaterThan(tile_min, primitive_max)));
- const uvec4 ballot = subgroupBallot(intersects);
-
- if (ballot.x == 0 && ballot.y == 0) {
- continue;
- }
-
- const uint tile_index = tile_coord.y * uniforms.tile_stride + tile_coord.x;
-
- if (ballot.x != 0) {
- uniforms.tiles.values[tile_index * TILE_STRIDE + TILE_BITMAP_L0_OFFSET + gl_WorkGroupID.x * 32 + word_index + 0] = ballot.x;
- }
-
- if (ballot.y != 0) {
- uniforms.tiles.values[tile_index * TILE_STRIDE + TILE_BITMAP_L0_OFFSET + gl_WorkGroupID.x * 32 + word_index + 1] = ballot.y;
- }
-
- if (subgroupElect()) {
- uniforms.tiles.values[tile_index * TILE_STRIDE + TILE_BITMAP_L1_OFFSET + gl_WorkGroupID.x] |=
- (uint(ballot.x != 0) << (word_index + 0)) |
- (uint(ballot.y != 0) << (word_index + 1));
-
- atomicMin(uniforms.tiles.values[tile_index * TILE_STRIDE + TILE_BITMAP_RANGE_LO_OFFSET], gl_WorkGroupID.x);
- atomicMax(uniforms.tiles.values[tile_index * TILE_STRIDE + TILE_BITMAP_RANGE_HI_OFFSET], gl_WorkGroupID.x);
- }
- }
- }
-
- word_index += 2;
- }
-}
+++ /dev/null
-#version 460
-
-#extension GL_GOOGLE_include_directive : require
-
-#extension GL_EXT_buffer_reference : require
-#extension GL_EXT_buffer_reference2 : require
-#extension GL_EXT_scalar_block_layout : require
-#extension GL_EXT_control_flow_attributes : require
-
-#extension GL_KHR_shader_subgroup_vote : require
-#extension GL_KHR_shader_subgroup_ballot : require
-
-#include "compute_bindings.h"
-
-// TODO: Spec constant support for different subgroup sizes.
-layout (local_size_x = 64, local_size_y = 1, local_size_z = 1) in;
-
-void main() {
- const uint tile_index = gl_GlobalInvocationID.z * uniforms.tile_stride + gl_GlobalInvocationID.y;
-
- uniforms.tiles.values[tile_index * TILE_STRIDE + TILE_BITMAP_RANGE_LO_OFFSET] = 0xffffffff;
- uniforms.tiles.values[tile_index * TILE_STRIDE + TILE_BITMAP_RANGE_HI_OFFSET] = 0;
-
- if (gl_GlobalInvocationID.x < TILE_BITMAP_L1_WORDS) {
- uniforms.tiles.values[tile_index * TILE_STRIDE + TILE_BITMAP_L1_OFFSET + gl_GlobalInvocationID.x] = 0;
- }
-}
+++ /dev/null
-#version 460
-
-#extension GL_GOOGLE_include_directive : require
-
-#extension GL_EXT_buffer_reference : require
-#extension GL_EXT_buffer_reference2 : require
-#extension GL_EXT_scalar_block_layout : require
-#extension GL_EXT_control_flow_attributes : require
-
-#extension GL_KHR_shader_subgroup_vote : require
-#extension GL_KHR_shader_subgroup_ballot : require
-
-#include "compute_bindings.h"
-#include "primitive_2d.h"
-
-layout (local_size_x = 8, local_size_y = 8, local_size_z = 1) in;
-
-#define DEBUG_SHOW_TILES 0
-
-#if DEBUG_SHOW_TILES != 0
-
-vec3 plasma_quintic(float x)
-{
- x = clamp(x, 0.0, 1.0);
- vec4 x1 = vec4(1.0, x, x * x, x * x * x); // 1 x x2 x3
- vec4 x2 = x1 * x1.w * x; // x4 x5 x6 x7
- return vec3(
- dot(x1.xyzw, vec4(+0.063861086, +1.992659096, -1.023901152, -0.490832805)) + dot(x2.xy, vec2(+1.308442123, -0.914547012)),
- dot(x1.xyzw, vec4(+0.049718590, -0.791144343, +2.892305078, +0.811726816)) + dot(x2.xy, vec2(-4.686502417, +2.717794514)),
- dot(x1.xyzw, vec4(+0.513275779, +1.580255060, -5.164414457, +4.559573646)) + dot(x2.xy, vec2(-1.916810682, +0.570638854))
- );
-}
-
-#endif
-
-void main() {
- const uvec2 tile_coord = gl_WorkGroupID.xy / (TILE_SIZE / gl_WorkGroupSize.xy);
- const uint tile_index = tile_coord.y * uniforms.tile_stride + tile_coord.x;
- const uint tile_base = tile_index * TILE_STRIDE;
-
- TilesRead tiles_read = TilesRead(uniforms.tiles);
-
- const uint lo = tiles_read.values[tile_base + TILE_BITMAP_RANGE_LO_OFFSET];
- const uint hi = tiles_read.values[tile_base + TILE_BITMAP_RANGE_HI_OFFSET];
-
- if (hi < lo) {
- return;
- }
-
-#if DEBUG_SHOW_TILES == 1
-
- uint count = 0;
- // For each tile, iterate over all words in the L1 bitmap.
- for (uint index_l1 = lo; index_l1 <= hi; index_l1++) {
- // For each word, iterate all set bits.
- uint bitmap_l1 = tiles_read.values[tile_base + TILE_BITMAP_L1_OFFSET + index_l1];
-
- while (bitmap_l1 != 0) {
- const uint i = findLSB(bitmap_l1);
- bitmap_l1 ^= bitmap_l1 & -bitmap_l1;
-
- // For each set bit in the L1 bitmap, iterate the set bits in the
- // corresponding L0 bitmap.
- const uint index_l0 = index_l1 * 32 + i;
- uint bitmap_l0 = tiles_read.values[tile_base + TILE_BITMAP_L0_OFFSET + index_l0];
-
- count += bitCount(bitmap_l0);
- }
- }
-
- const vec3 color = plasma_quintic(float(count) / 100.0);
- imageStore(ui_layer_write, ivec2(gl_GlobalInvocationID.xy), vec4(color, 1.0));
-
-#elif DEBUG_SHOW_TILES == 2
-
- uint count = hi - lo;
- const vec3 color = plasma_quintic(float(count) / 100.0);
- imageStore(ui_layer_write, ivec2(gl_GlobalInvocationID.xy), vec4(color, 1.0));
-
-#else
-
- const vec2 sample_center = gl_GlobalInvocationID.xy + vec2(0.5);
-
- vec4 accum = vec4(0.0);
-
- // For each tile, iterate over all words in the L1 bitmap.
- for (uint index_l1 = lo; index_l1 <= hi; index_l1++) {
- // For each word, iterate all set bits.
- uint bitmap_l1 = tiles_read.values[tile_base + TILE_BITMAP_L1_OFFSET + index_l1];
-
- while (bitmap_l1 != 0) {
- const uint i = findLSB(bitmap_l1);
- bitmap_l1 ^= bitmap_l1 & -bitmap_l1;
-
- // For each set bit in the L1 bitmap, iterate the set bits in the
- // corresponding L0 bitmap.
- const uint index_l0 = index_l1 * 32 + i;
- uint bitmap_l0 = tiles_read.values[tile_base + TILE_BITMAP_L0_OFFSET + index_l0];
- while (bitmap_l0 != 0) {
- const uint j = findLSB(bitmap_l0);
- bitmap_l0 ^= bitmap_l0 & -bitmap_l0;
-
- // Set bits in the L0 bitmap indicate binned primitives for this tile.
- const uint primitive_index = index_l0 * 32 + j;
- const PrimitiveInstance primitive_instance = uniforms.primitive_instances.values[primitive_index];
- const uint type = bitfieldExtract(primitive_instance.packed, 30, 2);
- const uint offset = bitfieldExtract(primitive_instance.packed, 0, 20);
-
- switch (type) {
- case PRIMITIVE_TYPE_RECT: {
- const Rect rect = uniforms.rects.values[offset];
- const vec2 rect_min = primitive_instance.position - rect.half_extent;
- const vec2 rect_max = primitive_instance.position + rect.half_extent;
- if (all(greaterThanEqual(sample_center, rect_min)) && all(lessThanEqual(sample_center, rect_max))) {
- const vec4 color = unpackUnorm4x8(primitive_instance.color).bgra;
- accum.rgb = color.rgb * color.a + accum.rgb * (1.0 - color.a);
- accum.a = color.a + accum.a * (1.0 - color.a);
- }
- break;
- }
- case PRIMITIVE_TYPE_GLYPH: {
- const Glyph glyph = uniforms.glyphs.values[offset];
- const vec2 glyph_min = primitive_instance.position + glyph.offset_min;
- const vec2 glyph_max = primitive_instance.position + glyph.offset_max;
- if (all(greaterThanEqual(sample_center, glyph_min)) && all(lessThanEqual(sample_center, glyph_max))) {
- const vec2 glyph_size = glyph.offset_max - glyph.offset_min;
- const vec2 uv = mix(glyph.atlas_min, glyph.atlas_max, (sample_center - glyph_min) / glyph_size) / uniforms.atlas_resolution;
- const vec4 color = unpackUnorm4x8(primitive_instance.color).bgra;
- const float coverage = textureLod(sampler2D(glyph_atlas, bilinear_sampler), uv, 0.0).r * color.a;
- accum.rgb = color.rgb * coverage + accum.rgb * (1.0 - coverage);
- accum.a = coverage + accum.a * (1.0 - coverage);
- }
- break;
- }
- }
- }
- }
- }
-
- imageStore(ui_layer_write, ivec2(gl_GlobalInvocationID.xy), accum);
-
-#endif
-}
--- /dev/null
+#ifndef RADIX_SORT_H
+#define RADIX_SORT_H
+
+const uint RADIX_BITS = 8;
+const uint RADIX_DIGITS = 1 << RADIX_BITS;
+const uint RADIX_MASK = RADIX_DIGITS - 1;
+
+const uint RADIX_WGP_SIZE = 256;
+const uint RADIX_ITEMS_PER_INVOCATION = 16;
+const uint RADIX_ITEMS_PER_WGP = RADIX_WGP_SIZE * RADIX_ITEMS_PER_INVOCATION;
+
+const uint RADIX_SPINE_WGP_SIZE = 256;
+
+#endif
--- /dev/null
+#version 460
+
+#extension GL_GOOGLE_include_directive : require
+
+#extension GL_EXT_buffer_reference : require
+#extension GL_EXT_buffer_reference2 : require
+#extension GL_EXT_scalar_block_layout : require
+#extension GL_EXT_control_flow_attributes : require
+
+#include "compute_bindings.h"
+
+#include "radix_sort.h"
+
+#include "draw_2d.h"
+#include "indirect.h"
+
+layout(buffer_reference, std430, buffer_reference_align = 4) readonly buffer CountRef {
+ uint value;
+};
+
+layout(buffer_reference, std430, buffer_reference_align = 4) readonly buffer ValuesRef {
+ uint values[];
+};
+
+layout(buffer_reference, std430, buffer_reference_align = 4) writeonly buffer SpineRef {
+ uint values[];
+};
+
+struct RadixSortUpsweepConstants {
+ uint shift;
+ uint _pad;
+ CountRef count_buffer;
+ ValuesRef src_buffer;
+ SpineRef spine_buffer;
+};
+
+layout(std430, push_constant) uniform RadixSortUpsweepConstantsBlock {
+ RadixSortUpsweepConstants constants;
+};
+
+shared uint histogram[RADIX_DIGITS];
+
+layout (local_size_x = RADIX_DIGITS, local_size_y = 1, local_size_z = 1) in;
+
+void main() {
+ const uint count = constants.count_buffer.value;
+ const uint shift = constants.shift;
+
+ // Clear local histogram
+ histogram[gl_LocalInvocationID.x] = 0;
+
+ barrier();
+
+ const uint start = gl_WorkGroupID.x * RADIX_ITEMS_PER_WGP;
+ const uint end = start + RADIX_ITEMS_PER_WGP;
+
+ const bool skip_bounds_check = end <= count;
+
+ if (skip_bounds_check) {
+ for (uint i = start; i < end; i += RADIX_DIGITS) {
+ const uint index = i + gl_LocalInvocationID.x;
+ const uint value = constants.src_buffer.values[index];
+ const uint digit = (value >> shift) & RADIX_MASK;
+ atomicAdd(histogram[digit], 1);
+ }
+ } else {
+ for (uint i = start; i < end; i += RADIX_DIGITS) {
+ const uint index = i + gl_LocalInvocationID.x;
+ if (index < count) {
+ const uint value = constants.src_buffer.values[index];
+ const uint digit = (value >> shift) & RADIX_MASK;
+ atomicAdd(histogram[digit], 1);
+ }
+ }
+ }
+
+ barrier();
+
+ // Scatter to the spine, this is a striped layout so we can efficiently
+ // calculate the prefix sum. Re-calculate how many workgroups we dispatched
+ // to determine the stride we need to write at.
+ const uint wgp_count = (count + (RADIX_ITEMS_PER_WGP - 1)) / RADIX_ITEMS_PER_WGP;
+ constants.spine_buffer.values[(gl_LocalInvocationID.x * wgp_count) + gl_WorkGroupID.x] = histogram[gl_LocalInvocationID.x];
+}
\ No newline at end of file
--- /dev/null
+#version 460
+
+#extension GL_GOOGLE_include_directive : require
+
+#extension GL_EXT_buffer_reference : require
+#extension GL_EXT_buffer_reference2 : require
+#extension GL_EXT_scalar_block_layout : require
+#extension GL_EXT_control_flow_attributes : require
+
+#extension GL_KHR_shader_subgroup_arithmetic : require
+#extension GL_KHR_shader_subgroup_ballot : require
+#extension GL_KHR_shader_subgroup_shuffle_relative: enable
+#extension GL_KHR_shader_subgroup_vote : require
+
+//#extension GL_EXT_debug_printf : enable
+
+#include "compute_bindings.h"
+
+#include "radix_sort.h"
+
+#include "draw_2d.h"
+#include "indirect.h"
+
+layout(buffer_reference, std430, buffer_reference_align = 4) buffer CountRef {
+ uint value;
+};
+
+layout(buffer_reference, std430, buffer_reference_align = 4) buffer SpineRef {
+ uint values[];
+};
+
+struct RadixSortSpineConstants {
+ CountRef count_buffer;
+ SpineRef spine_buffer;
+};
+
+layout(std430, push_constant) uniform RadixSortSpineConstantsBlock {
+ RadixSortSpineConstants constants;
+};
+
+layout (constant_id = 0) const uint SUBGROUP_SIZE = 64;
+
+const uint NUM_SUBGROUPS = RADIX_SPINE_WGP_SIZE / SUBGROUP_SIZE;
+
+shared uint sums[NUM_SUBGROUPS];
+shared uint carry_in;
+
+layout (local_size_x = RADIX_SPINE_WGP_SIZE, local_size_y = 1, local_size_z = 1) in;
+
+void main() {
+ const uint local_id = gl_SubgroupID * gl_SubgroupSize + gl_SubgroupInvocationID;
+
+ const uint count = constants.count_buffer.value;
+
+ // Re-calculate how many workgroups pushed data into the spine
+ const uint upsweep_wgp_count = (count + (RADIX_ITEMS_PER_WGP - 1)) / RADIX_ITEMS_PER_WGP;
+
+ carry_in = 0;
+ for (uint i = 0; i < upsweep_wgp_count; i++) {
+ const uint spine_index = i * RADIX_DIGITS + local_id;
+
+ // Load values and calculate partial sums
+ const uint value = constants.spine_buffer.values[spine_index];
+ const uint sum = subgroupAdd(value);
+ const uint scan = subgroupExclusiveAdd(value);
+
+ if (subgroupElect()) {
+ sums[gl_SubgroupID] = sum;
+ }
+
+ barrier();
+
+ // Scan partials
+ if (local_id < NUM_SUBGROUPS) {
+ sums[local_id] = subgroupExclusiveAdd(sums[local_id]);
+ }
+
+ const uint carry = carry_in;
+
+ barrier();
+
+ // Write out the final prefix sum, combining the carry-in, subgroup sums, and local scan
+ constants.spine_buffer.values[spine_index] = carry + sums[gl_SubgroupID] + scan;
+
+ if (gl_SubgroupID == gl_NumSubgroups - 1 && subgroupElect()) {
+ carry_in += sums[gl_SubgroupID] + sum;
+ }
+
+ memoryBarrierShared();
+ }
+}
--- /dev/null
+#version 460
+
+#extension GL_GOOGLE_include_directive : require
+
+#extension GL_EXT_buffer_reference : require
+#extension GL_EXT_buffer_reference2 : require
+#extension GL_EXT_scalar_block_layout : require
+#extension GL_EXT_control_flow_attributes : require
+
+#extension GL_EXT_shader_explicit_arithmetic_types_int8 : require
+
+#extension GL_KHR_shader_subgroup_arithmetic : require
+#extension GL_KHR_shader_subgroup_ballot : require
+#extension GL_KHR_shader_subgroup_shuffle_relative: enable
+#extension GL_KHR_shader_subgroup_vote : require
+
+//#extension GL_EXT_debug_printf : enable
+
+#include "compute_bindings.h"
+
+#include "radix_sort.h"
+
+#include "draw_2d.h"
+#include "indirect.h"
+
+layout (constant_id = 0) const uint SUBGROUP_SIZE = 64;
+
+const uint NUM_SUBGROUPS = RADIX_WGP_SIZE / SUBGROUP_SIZE;
+
+layout(buffer_reference, std430, buffer_reference_align = 4) readonly buffer CountRef {
+ uint value;
+};
+
+layout(buffer_reference, std430, buffer_reference_align = 4) readonly buffer SpineRef {
+ uint values[];
+};
+
+layout(buffer_reference, std430, buffer_reference_align = 4) buffer ValuesRef {
+ uint values[];
+};
+
+struct RadixSortDownsweepConstants {
+ uint shift;
+ uint _pad;
+ CountRef count_buffer;
+ SpineRef spine_buffer;
+ ValuesRef src_buffer;
+ ValuesRef dst_buffer;
+};
+
+layout(std430, push_constant) uniform RadixSortDownsweepConstantsBlock {
+ RadixSortDownsweepConstants constants;
+};
+
+shared uint values[RADIX_WGP_SIZE];
+shared uint spine[RADIX_DIGITS];
+shared uint match_masks[NUM_SUBGROUPS][RADIX_DIGITS];
+
+layout (local_size_x = RADIX_WGP_SIZE, local_size_y = 1, local_size_z = 1) in;
+
+void main() {
+ const uint shift = constants.shift;
+ const uint count = constants.count_buffer.value;
+ const uint wgp_count = (count + (RADIX_ITEMS_PER_WGP - 1)) / RADIX_ITEMS_PER_WGP;
+
+ const uint start = gl_WorkGroupID.x * RADIX_ITEMS_PER_WGP;
+ const uint end = min(start + RADIX_ITEMS_PER_WGP, count);
+
+ const uint local_id = gl_SubgroupID * gl_SubgroupSize + gl_SubgroupInvocationID;
+
+ // Gather from spine.
+ spine[local_id] = constants.spine_buffer.values[(local_id * wgp_count) + gl_WorkGroupID.x];
+
+ for (uint value_base = start; value_base < end; value_base += RADIX_WGP_SIZE) {
+ // Clear shared memory and load values from src buffer.
+ for (uint i = 0; i < NUM_SUBGROUPS; i++) {
+ match_masks[i][local_id] = 0;
+ }
+
+ barrier();
+
+ const uint global_offset = value_base + local_id;
+ uint value = 0xffffffff;
+ uint digit = 0xff;
+
+ if (global_offset < end) {
+ value = constants.src_buffer.values[global_offset];
+ digit = (value >> shift) & RADIX_MASK;
+ atomicOr(match_masks[gl_SubgroupID][digit], 1 << gl_SubgroupInvocationID);
+ }
+
+ barrier();
+
+ if (global_offset < end) {
+ const uint peer_mask = match_masks[gl_SubgroupID][digit];
+
+ uint peer_scan = bitCount(peer_mask & gl_SubgroupLtMask.x);
+ for (uint i = 0; i < gl_NumSubgroups; i++) {
+ if (i < gl_SubgroupID) {
+ peer_scan += bitCount(match_masks[i][digit]);
+ }
+ }
+
+ const uint dst_index = spine[digit] + peer_scan;
+ constants.dst_buffer.values[dst_index] = value;
+ }
+
+ barrier();
+
+ // Increment the spine with the counts for the workgroup we just wrote out.
+ for (uint i = 0; i < NUM_SUBGROUPS; i++) {
+ atomicAdd(spine[local_id], bitCount(match_masks[i][local_id]));
+ }
+ }
+}
use narcissus_core::{obj, Widen};
use narcissus_maths::{vec2, vec3, vec4, Vec2, Vec3};
-use crate::pipelines::basic::Vertex;
+use crate::pipelines::Vertex;
pub fn load_obj<P: AsRef<Path>>(path: P) -> (Vec<Vertex>, Vec<u16>) {
#[derive(Default)]
use std::path::Path;
use std::time::{Duration, Instant};
-use narcissus_core::{dds, Widen as _};
-use pipelines::basic::BasicPipeline;
-use pipelines::{PrimitiveInstance, PrimitiveUniforms, Rect, TILE_SIZE, TILE_STRIDE};
+use narcissus_core::dds;
+
+use pipelines::{
+ BasicConstants, ComputeBinds, Draw2dClearConstants, Draw2dCmd, Draw2dScatterConstants,
+ Draw2dSortConstants, GraphicsBinds, Pipelines, RadixSortDownsweepConstants,
+ RadixSortSpineConstants, RadixSortUpsweepConstants, DRAW_2D_TILE_SIZE,
+};
+
use renderdoc_sys as rdoc;
use fonts::{FontFamily, Fonts};
use narcissus_core::{box_assume_init, default, rand::Pcg64, zeroed_box, BitIter};
use narcissus_font::{FontCollection, GlyphCache, HorizontalMetrics};
use narcissus_gpu::{
- create_device, Access, Bind, BindDesc, BindGroupLayout, BindingType, Buffer, BufferDesc,
- BufferImageCopy, BufferUsageFlags, ClearValue, CmdEncoder, ColorSpace, ComputePipelineDesc,
- Device, DeviceExt, Extent2d, Extent3d, Frame, GlobalBarrier, Image, ImageAspectFlags,
- ImageBarrier, ImageDesc, ImageDimension, ImageFormat, ImageLayout, ImageSubresourceRange,
- ImageTiling, ImageUsageFlags, IndexType, LoadOp, MemoryLocation, Offset2d, PersistentBuffer,
- Pipeline, PipelineLayout, PresentMode, PushConstantRange, RenderingAttachment, RenderingDesc,
- Sampler, SamplerAddressMode, SamplerDesc, SamplerFilter, Scissor, ShaderDesc, ShaderStageFlags,
- StoreOp, SwapchainConfigurator, SwapchainImage, ThreadToken, TypedBind, Viewport,
+ create_device, Access, Bind, BufferImageCopy, BufferUsageFlags, ClearValue, CmdEncoder,
+ ColorSpace, Device, DeviceExt, Extent2d, Extent3d, Frame, GlobalBarrier, Image,
+ ImageAspectFlags, ImageBarrier, ImageDesc, ImageDimension, ImageFormat, ImageLayout,
+ ImageSubresourceRange, ImageTiling, ImageUsageFlags, IndexType, LoadOp, MemoryLocation,
+ Offset2d, PersistentBuffer, PresentMode, RenderingAttachment, RenderingDesc, Scissor,
+ ShaderStageFlags, StoreOp, SwapchainConfigurator, SwapchainImage, ThreadToken, TypedBind,
+ Viewport,
};
use narcissus_image as image;
use narcissus_maths::{
};
use spring::simple_spring_damper_exact;
-use crate::pipelines::basic::BasicUniforms;
-
mod fonts;
mod helpers;
pub mod microshades;
player_speed: 10.0,
- weapon_cooldown: 0.0,
+ weapon_cooldown: 0.2,
weapon_projectile_speed: 20.0,
- weapon_projectile_lifetime: 6.0,
+ weapon_projectile_lifetime: 3.0,
};
#[derive(Clone, Copy, Debug)]
tmp_string: String,
- primitive_instances: Vec<PrimitiveInstance>,
- rects: Vec<Rect>,
+ draw_cmds: Vec<Draw2dCmd>,
}
impl<'a> UiState<'a> {
fonts,
glyph_cache,
tmp_string: default(),
- primitive_instances: vec![],
- rects: vec![],
+ draw_cmds: vec![],
}
}
let center_x = x + half_extent_x;
let center_y = y + half_extent_y;
- let rect_index = self.rects.len() as u32;
-
- self.rects.push(Rect {
+ self.draw_cmds.push(Draw2dCmd::rect(
+ center_x,
+ center_y,
half_extent_x,
half_extent_y,
- border_width: 0.0,
- border_radius: 0.0,
- });
-
- self.primitive_instances.push(PrimitiveInstance::rect(
- rect_index, 0x4400ff00, center_x, center_y,
+ 1.0,
+ 0x4400ff00,
+ 0xffff0000,
))
}
x += advance * scale;
- self.primitive_instances.push(PrimitiveInstance::glyph(
- touched_glyph_index,
- 0x880000ff,
- x,
- y,
- ));
+ self.draw_cmds
+ .push(Draw2dCmd::glyph(touched_glyph_index, 0x880000ff, x, y));
x += advance_width * scale;
}
}
}
-enum SamplerRes {
- Bilinear,
-}
-
-pub struct Samplers {
- bilinear: Sampler,
-}
-
-impl Index<SamplerRes> for Samplers {
- type Output = Sampler;
-
- fn index(&self, index: SamplerRes) -> &Self::Output {
- match index {
- SamplerRes::Bilinear => &self.bilinear,
- }
- }
-}
-
-impl Samplers {
- fn load(gpu: &Gpu) -> Samplers {
- let bilinear = gpu.create_sampler(&SamplerDesc {
- filter: SamplerFilter::Bilinear,
- address_mode: SamplerAddressMode::Clamp,
- compare_op: None,
- mip_lod_bias: 0.0,
- min_lod: 0.0,
- max_lod: 0.0,
- });
- Samplers { bilinear }
- }
-}
-
struct Model<'a> {
indices: u32,
vertex_buffer: PersistentBuffer<'a>,
struct DrawState<'gpu> {
gpu: &'gpu Gpu,
- basic_pipeline: BasicPipeline,
-
- compute_bind_group_layout: BindGroupLayout,
- bin_clear_pipeline: Pipeline,
- bin_pipeline: Pipeline,
- rasterize_pipeline: Pipeline,
- display_transform_pipeline: Pipeline,
-
width: u32,
height: u32,
color_image: Image,
ui_image: Image,
- tiles_buffer: Buffer,
-
glyph_atlas_image: Image,
- _samplers: Samplers,
+ pipelines: Pipelines,
+
models: Models<'gpu>,
images: Images,
impl<'gpu> DrawState<'gpu> {
fn new(gpu: &'gpu Gpu, thread_token: &ThreadToken) -> Self {
- let samplers = Samplers::load(gpu);
- let immutable_samplers = &[samplers[SamplerRes::Bilinear]];
-
- let compute_bind_group_layout = gpu.create_bind_group_layout(&[
- // Samplers
- BindDesc::with_immutable_samplers(ShaderStageFlags::COMPUTE, immutable_samplers),
- // Tony mc mapface LUT
- BindDesc::new(ShaderStageFlags::COMPUTE, BindingType::SampledImage),
- // Glyph Atlas
- BindDesc::new(ShaderStageFlags::COMPUTE, BindingType::SampledImage),
- // UI Render Target
- BindDesc::new(ShaderStageFlags::COMPUTE, BindingType::StorageImage),
- // Color Render Target
- BindDesc::new(ShaderStageFlags::COMPUTE, BindingType::StorageImage),
- // Composited output
- BindDesc::new(ShaderStageFlags::COMPUTE, BindingType::StorageImage),
- ]);
-
- let compute_pipeline_layout = PipelineLayout {
- bind_group_layouts: &[compute_bind_group_layout],
- push_constant_ranges: &[PushConstantRange {
- stage_flags: ShaderStageFlags::COMPUTE,
- offset: 0,
- size: std::mem::size_of::<PrimitiveUniforms>() as u32,
- }],
- };
-
- let bin_clear_pipeline = gpu.create_compute_pipeline(&ComputePipelineDesc {
- shader: ShaderDesc {
- entry: c"main",
- code: shark_shaders::PRIMITIVE_2D_BIN_CLEAR_COMP_SPV,
- },
- layout: &compute_pipeline_layout,
- });
-
- let bin_pipeline = gpu.create_compute_pipeline(&ComputePipelineDesc {
- shader: ShaderDesc {
- entry: c"main",
- code: shark_shaders::PRIMITIVE_2D_BIN_COMP_SPV,
- },
- layout: &compute_pipeline_layout,
- });
-
- let rasterize_pipeline = gpu.create_compute_pipeline(&ComputePipelineDesc {
- shader: ShaderDesc {
- entry: c"main",
- code: shark_shaders::PRIMITIVE_2D_RASTERIZE_COMP_SPV,
- },
- layout: &compute_pipeline_layout,
- });
-
- let display_transform_pipeline = gpu.create_compute_pipeline(&ComputePipelineDesc {
- shader: ShaderDesc {
- entry: c"main",
- code: shark_shaders::DISPLAY_TRANSFORM_COMP_SPV,
- },
- layout: &compute_pipeline_layout,
- });
-
- let basic_pipeline = BasicPipeline::new(gpu, immutable_samplers);
-
+ let pipelines = Pipelines::load(gpu);
let models = Models::load(gpu);
let images = Images::load(gpu, thread_token);
Self {
gpu,
- basic_pipeline,
- compute_bind_group_layout,
- bin_clear_pipeline,
- bin_pipeline,
- rasterize_pipeline,
- display_transform_pipeline,
width: 0,
height: 0,
tile_resolution_x: 0,
depth_image: default(),
color_image: default(),
ui_image: default(),
- tiles_buffer: default(),
glyph_atlas_image: default(),
- _samplers: samplers,
+ pipelines,
models,
images,
transforms: vec![],
let gpu = self.gpu;
let half_turn_y = Mat3::from_axis_rotation(Vec3::Y, HalfTurn::new(0.5));
- let scale = Mat3::from_scale(Vec3::splat(0.125));
+ let scale = Mat3::from_scale(Vec3::splat(0.4));
fn rotate_dir(dir: Vec3, up: Vec3) -> Mat3 {
let f = dir.normalized();
gpu.destroy_image(frame, self.color_image);
gpu.destroy_image(frame, self.ui_image);
- let tile_resolution_x = (width + (TILE_SIZE - 1)) / TILE_SIZE;
- let tile_resolution_y = (height + (TILE_SIZE - 1)) / TILE_SIZE;
-
- if tile_resolution_x != self.tile_resolution_x
- || tile_resolution_y != self.tile_resolution_y
- {
- gpu.destroy_buffer(frame, self.tiles_buffer);
-
- let bitmap_buffer_size = tile_resolution_x
- * tile_resolution_y
- * TILE_STRIDE
- * std::mem::size_of::<u32>() as u32;
-
- self.tiles_buffer = gpu.create_buffer(&BufferDesc {
- memory_location: MemoryLocation::Device,
- host_mapped: false,
- usage: BufferUsageFlags::STORAGE,
- size: bitmap_buffer_size.widen(),
- });
-
- gpu.debug_name_buffer(self.tiles_buffer.to_arg(), "tile bitmap");
-
- println!("tile_resolution: ({tile_resolution_x},{tile_resolution_y})");
-
- self.tile_resolution_x = tile_resolution_x;
- self.tile_resolution_y = tile_resolution_y;
- }
+ self.tile_resolution_x = (width + (DRAW_2D_TILE_SIZE - 1)) / DRAW_2D_TILE_SIZE;
+ self.tile_resolution_y = (height + (DRAW_2D_TILE_SIZE - 1)) / DRAW_2D_TILE_SIZE;
self.depth_image = gpu.create_image(&ImageDesc {
memory_location: MemoryLocation::Device,
// Render basic stuff.
{
- gpu.cmd_set_pipeline(cmd_encoder, self.basic_pipeline.pipeline);
+ let model = &self.models[ModelRes::Shark];
+ let image = self.images[ImageRes::Shark];
- let basic_uniforms = BasicUniforms { clip_from_model };
+ let instance_count = self.transforms.len() as u32;
- let uniform_buffer = gpu.request_transient_buffer_with_data(
+ let transform_buffer = gpu.request_transient_buffer_with_data(
frame,
thread_token,
- BufferUsageFlags::UNIFORM,
- &basic_uniforms,
+ BufferUsageFlags::STORAGE,
+ self.transforms.as_slice(),
);
- gpu.cmd_set_bind_group(
+ // We're done with you now!
+ self.transforms.clear();
+
+ let graphics_bind_group = gpu.request_transient_bind_group(
frame,
- cmd_encoder,
- self.basic_pipeline.uniforms_bind_group_layout,
- 0,
+ thread_token,
+ self.pipelines.graphics_bind_group_layout,
&[Bind {
- binding: 0,
+ binding: GraphicsBinds::Albedo as u32,
array_element: 0,
- typed: TypedBind::UniformBuffer(&[uniform_buffer.to_arg()]),
+ typed: TypedBind::SampledImage(&[(ImageLayout::Optimal, image)]),
}],
);
- {
- let model = &self.models[ModelRes::Shark];
- let image = self.images[ImageRes::Shark];
-
- let transform_buffer = gpu.request_transient_buffer_with_data(
- frame,
- thread_token,
- BufferUsageFlags::STORAGE,
- self.transforms.as_slice(),
- );
-
- gpu.cmd_set_bind_group(
- frame,
- cmd_encoder,
- self.basic_pipeline.storage_bind_group_layout,
- 1,
- &[
- Bind {
- binding: 0,
- array_element: 0,
- typed: TypedBind::StorageBuffer(&[model.vertex_buffer.to_arg()]),
- },
- Bind {
- binding: 1,
- array_element: 0,
- typed: TypedBind::StorageBuffer(&[transform_buffer.to_arg()]),
- },
- Bind {
- binding: 2,
- array_element: 0,
- typed: TypedBind::SampledImage(&[(ImageLayout::Optimal, image)]),
- },
- ],
- );
-
- gpu.cmd_set_index_buffer(
- cmd_encoder,
- model.index_buffer.to_arg(),
- 0,
- IndexType::U16,
- );
+ gpu.cmd_set_pipeline(cmd_encoder, self.pipelines.basic_pipeline);
+ gpu.cmd_set_bind_group(cmd_encoder, 0, &graphics_bind_group);
+ gpu.cmd_push_constants(
+ cmd_encoder,
+ ShaderStageFlags::VERTEX,
+ 0,
+ &BasicConstants {
+ clip_from_model,
+ vertex_buffer_address: gpu.get_buffer_address(model.vertex_buffer.to_arg()),
+ transform_buffer_address: gpu.get_buffer_address(transform_buffer.to_arg()),
+ },
+ );
- gpu.cmd_draw_indexed(
- cmd_encoder,
- model.indices,
- self.transforms.len() as u32,
- 0,
- 0,
- 0,
- );
- }
+ gpu.cmd_set_index_buffer(
+ cmd_encoder,
+ model.index_buffer.to_arg(),
+ 0,
+ IndexType::U16,
+ );
- // We're done with you now!
- self.transforms.clear();
+ gpu.cmd_draw_indexed(cmd_encoder, model.indices, instance_count, 0, 0, 0);
}
gpu.cmd_end_rendering(cmd_encoder);
gpu.cmd_end_debug_marker(cmd_encoder);
+ let compute_bind_group = gpu.request_transient_bind_group(
+ frame,
+ thread_token,
+ self.pipelines.compute_bind_group_layout,
+ &[
+ Bind {
+ binding: ComputeBinds::TonyMcMapfaceLut as u32,
+ array_element: 0,
+ typed: TypedBind::SampledImage(&[(
+ ImageLayout::Optimal,
+ self.images[ImageRes::TonyMcMapfaceLut],
+ )]),
+ },
+ Bind {
+ binding: ComputeBinds::GlyphAtlas as u32,
+ array_element: 0,
+ typed: TypedBind::SampledImage(&[(
+ ImageLayout::Optimal,
+ self.glyph_atlas_image,
+ )]),
+ },
+ Bind {
+ binding: ComputeBinds::UiRenderTarget as u32,
+ array_element: 0,
+ typed: TypedBind::StorageImage(&[(ImageLayout::General, self.ui_image)]),
+ },
+ Bind {
+ binding: ComputeBinds::ColorRenderTarget as u32,
+ array_element: 0,
+ typed: TypedBind::StorageImage(&[(ImageLayout::General, self.color_image)]),
+ },
+ Bind {
+ binding: ComputeBinds::CompositedRenderTarget as u32,
+ array_element: 0,
+ typed: TypedBind::StorageImage(&[(ImageLayout::General, swapchain_image)]),
+ },
+ ],
+ );
+
// Render UI
{
gpu.cmd_begin_debug_marker(
microshades::PURPLE_RGBA_F32[3],
);
- let primitive_instance_buffer = gpu.request_transient_buffer_with_data(
+ let draw_buffer = gpu.request_transient_buffer_with_data(
frame,
thread_token,
BufferUsageFlags::STORAGE,
- ui_state.primitive_instances.as_slice(),
+ ui_state.draw_cmds.as_slice(),
);
+
+ let draw_buffer_len = ui_state.draw_cmds.len() as u32;
+ ui_state.draw_cmds.clear();
+
let glyph_buffer = gpu.request_transient_buffer_with_data(
frame,
thread_token,
BufferUsageFlags::STORAGE,
touched_glyphs,
);
- let rect_buffer = gpu.request_transient_buffer_with_data(
+
+ const COARSE_BUFFER_LEN: usize = 1 << 18;
+ let coarse_buffer = gpu.request_transient_buffer(
frame,
thread_token,
BufferUsageFlags::STORAGE,
- ui_state.rects.as_slice(),
+ COARSE_BUFFER_LEN * std::mem::size_of::<u32>(),
);
- let num_primitives = ui_state.primitive_instances.len() as u32;
- let num_primitives_32 = (num_primitives + 31) / 32;
- let num_primitives_1024 = (num_primitives_32 + 31) / 32;
-
- ui_state.primitive_instances.clear();
+ let indirect_dispatch_buffer = gpu.request_transient_buffer(
+ frame,
+ thread_token,
+ BufferUsageFlags::INDIRECT,
+ 3 * std::mem::size_of::<u32>(),
+ );
- gpu.cmd_set_pipeline(cmd_encoder, self.bin_clear_pipeline);
+ let sort_tmp_buffer = gpu.request_transient_buffer(
+ frame,
+ thread_token,
+ BufferUsageFlags::STORAGE,
+ COARSE_BUFFER_LEN * std::mem::size_of::<u32>(),
+ );
- gpu.cmd_set_bind_group(
+ let spine_buffer = gpu.request_transient_buffer(
frame,
+ thread_token,
+ BufferUsageFlags::STORAGE,
+ (COARSE_BUFFER_LEN / (32 * 16)) * 256 * std::mem::size_of::<u32>(), // TODO: Fix size
+ );
+
+ let draw_buffer_address = gpu.get_buffer_address(draw_buffer.to_arg());
+ let glyph_buffer_address = gpu.get_buffer_address(glyph_buffer.to_arg());
+ let coarse_buffer_address = gpu.get_buffer_address(coarse_buffer.to_arg());
+ let indirect_dispatch_buffer_address =
+ gpu.get_buffer_address(indirect_dispatch_buffer.to_arg());
+ let sort_tmp_buffer_address = gpu.get_buffer_address(sort_tmp_buffer.to_arg());
+ let spine_buffer_address = gpu.get_buffer_address(spine_buffer.to_arg());
+
+ gpu.cmd_set_pipeline(cmd_encoder, self.pipelines.draw_2d_bin_0_clear_pipeline);
+ gpu.cmd_set_bind_group(cmd_encoder, 0, &compute_bind_group);
+ gpu.cmd_push_constants(
cmd_encoder,
- self.compute_bind_group_layout,
+ ShaderStageFlags::COMPUTE,
0,
- &[
- Bind {
- binding: 1,
- array_element: 0,
- typed: TypedBind::SampledImage(&[(
- ImageLayout::Optimal,
- self.images[ImageRes::TonyMcMapfaceLut],
- )]),
- },
- Bind {
- binding: 2,
- array_element: 0,
- typed: TypedBind::SampledImage(&[(
- ImageLayout::Optimal,
- self.glyph_atlas_image,
- )]),
- },
- Bind {
- binding: 3,
- array_element: 0,
- typed: TypedBind::StorageImage(&[(
- ImageLayout::General,
- self.ui_image,
- )]),
- },
- Bind {
- binding: 4,
- array_element: 0,
- typed: TypedBind::StorageImage(&[(
- ImageLayout::General,
- self.color_image,
- )]),
- },
- Bind {
- binding: 5,
- array_element: 0,
- typed: TypedBind::StorageImage(&[(
- ImageLayout::General,
- swapchain_image,
- )]),
- },
- ],
+ &Draw2dClearConstants {
+ coarse_buffer_address,
+ },
+ );
+ gpu.cmd_dispatch(cmd_encoder, 1, 1, 1);
+
+ gpu.cmd_barrier(
+ cmd_encoder,
+ Some(&GlobalBarrier {
+ prev_access: &[Access::ComputeWrite],
+ next_access: &[Access::ComputeOtherRead],
+ }),
+ &[],
);
+ gpu.cmd_set_pipeline(cmd_encoder, self.pipelines.draw_2d_bin_1_scatter_pipeline);
+ gpu.cmd_set_bind_group(cmd_encoder, 0, &compute_bind_group);
gpu.cmd_push_constants(
cmd_encoder,
ShaderStageFlags::COMPUTE,
0,
- &PrimitiveUniforms {
+ &Draw2dScatterConstants {
screen_resolution_x: self.width,
screen_resolution_y: self.height,
- atlas_resolution_x: atlas_width,
- atlas_resolution_y: atlas_height,
- num_primitives,
- num_primitives_32,
- num_primitives_1024,
tile_resolution_x: self.tile_resolution_x,
tile_resolution_y: self.tile_resolution_y,
- tile_stride: self.tile_resolution_x,
- primitives_instances_buffer: gpu
- .get_buffer_address(primitive_instance_buffer.to_arg()),
- glyphs_buffer: gpu.get_buffer_address(glyph_buffer.to_arg()),
- rects_buffer: gpu.get_buffer_address(rect_buffer.to_arg()),
- tiles_buffer: gpu.get_buffer_address(self.tiles_buffer.to_arg()),
+ draw_buffer_len,
+ coarse_buffer_len: COARSE_BUFFER_LEN as u32,
+ draw_buffer_address,
+ glyph_buffer_address,
+ coarse_buffer_address,
},
);
- gpu.cmd_dispatch(
+ for _ in 0..4 {
+ gpu.cmd_dispatch(
+ cmd_encoder,
+ (draw_buffer_len
+ + (self.pipelines.draw_2d_bin_1_scatter_pipeline_workgroup_size - 1))
+ / self.pipelines.draw_2d_bin_1_scatter_pipeline_workgroup_size,
+ 1,
+ 1,
+ );
+ }
+
+ gpu.cmd_barrier(
cmd_encoder,
- (num_primitives_1024 + 63) / 64,
- self.tile_resolution_x,
- self.tile_resolution_y,
+ Some(&GlobalBarrier {
+ prev_access: &[Access::ComputeWrite],
+ next_access: &[Access::ComputeOtherRead],
+ }),
+ &[],
+ );
+
+ // let mut sort_data = Vec::new();
+ // let count = 8192u32;
+ // sort_data.push(count);
+ // for i in 0..count {
+ // sort_data.push(255 - i / 32);
+ // }
+
+ // let sort_buffer = gpu.request_transient_buffer_with_data(
+ // frame,
+ // thread_token,
+ // BufferUsageFlags::STORAGE,
+ // sort_data.as_slice(),
+ // );
+ // let sort_buffer_address = gpu.get_buffer_address(sort_buffer.to_arg());
+
+ gpu.cmd_set_pipeline(cmd_encoder, self.pipelines.draw_2d_bin_2_sort_pipeline);
+ gpu.cmd_set_bind_group(cmd_encoder, 0, &compute_bind_group);
+ gpu.cmd_push_constants(
+ cmd_encoder,
+ ShaderStageFlags::COMPUTE,
+ 0,
+ &Draw2dSortConstants {
+ // -1 due to the count taking up a single slot in the buffer.
+ coarse_buffer_len: COARSE_BUFFER_LEN as u32 - 1,
+ _pad: 0,
+ indirect_dispatch_buffer_address,
+ coarse_buffer_address,
+ },
);
+ gpu.cmd_dispatch(cmd_encoder, 1, 1, 1);
gpu.cmd_barrier(
cmd_encoder,
Some(&GlobalBarrier {
prev_access: &[Access::ComputeWrite],
- next_access: &[Access::ComputeOtherRead],
+ next_access: &[Access::ComputeOtherRead, Access::IndirectBuffer],
}),
&[],
);
- gpu.cmd_set_pipeline(cmd_encoder, self.bin_pipeline);
+ gpu.cmd_begin_debug_marker(
+ cmd_encoder,
+ "radix sort",
+ microshades::ORANGE_RGBA_F32[2],
+ );
- gpu.cmd_dispatch(cmd_encoder, (num_primitives + 1023) / 1024, 1, 1);
+ // First element in the scratch buffer is the count.
+ let count_buffer_address = coarse_buffer_address;
+ // Then the elements we want to sort follow.
+ let mut src_buffer_address = count_buffer_address.byte_add(4);
+ let mut dst_buffer_address = sort_tmp_buffer_address;
+
+ for pass in 0..4 {
+ let shift = pass * 8;
+
+ // Upsweep
+ gpu.cmd_set_pipeline(cmd_encoder, self.pipelines.radix_sort_0_upsweep_pipeline);
+ gpu.cmd_set_bind_group(cmd_encoder, 0, &compute_bind_group);
+ gpu.cmd_push_constants(
+ cmd_encoder,
+ ShaderStageFlags::COMPUTE,
+ 0,
+ &RadixSortUpsweepConstants {
+ shift,
+ _pad: 0,
+ count_buffer_address,
+ src_buffer_address,
+ spine_buffer_address,
+ },
+ );
+ gpu.cmd_dispatch_indirect(cmd_encoder, indirect_dispatch_buffer.to_arg(), 0);
+
+ gpu.cmd_barrier(
+ cmd_encoder,
+ Some(&GlobalBarrier {
+ prev_access: &[Access::ComputeWrite],
+ next_access: &[Access::ComputeOtherRead],
+ }),
+ &[],
+ );
+
+ // Exclusive sum of the spine
+ gpu.cmd_set_pipeline(cmd_encoder, self.pipelines.radix_sort_1_spine_pipeline);
+ gpu.cmd_set_bind_group(cmd_encoder, 0, &compute_bind_group);
+ gpu.cmd_push_constants(
+ cmd_encoder,
+ ShaderStageFlags::COMPUTE,
+ 0,
+ &RadixSortSpineConstants {
+ count_buffer_address,
+ spine_buffer_address,
+ },
+ );
+ gpu.cmd_dispatch(cmd_encoder, 1, 1, 1);
+
+ gpu.cmd_barrier(
+ cmd_encoder,
+ Some(&GlobalBarrier {
+ prev_access: &[Access::ComputeWrite],
+ next_access: &[Access::ComputeOtherRead],
+ }),
+ &[],
+ );
+
+ // Downsweep
+ gpu.cmd_set_pipeline(
+ cmd_encoder,
+ self.pipelines.radix_sort_2_downsweep_pipeline,
+ );
+ gpu.cmd_set_bind_group(cmd_encoder, 0, &compute_bind_group);
+ gpu.cmd_push_constants(
+ cmd_encoder,
+ ShaderStageFlags::COMPUTE,
+ 0,
+ &RadixSortDownsweepConstants {
+ shift,
+ _pad: 0,
+ count_buffer_address,
+ src_buffer_address,
+ dst_buffer_address,
+ spine_buffer_address,
+ },
+ );
+ gpu.cmd_dispatch_indirect(cmd_encoder, indirect_dispatch_buffer.to_arg(), 0);
+
+ gpu.cmd_barrier(
+ cmd_encoder,
+ Some(&GlobalBarrier {
+ prev_access: &[Access::ComputeWrite],
+ next_access: &[Access::ComputeOtherRead],
+ }),
+ &[],
+ );
+
+ std::mem::swap(&mut src_buffer_address, &mut dst_buffer_address);
+ }
+
+ gpu.cmd_end_debug_marker(cmd_encoder);
+
+ gpu.cmd_set_pipeline(cmd_encoder, self.pipelines.draw_2d_bin_3_resolve_pipeline);
+ gpu.cmd_set_bind_group(cmd_encoder, 0, &compute_bind_group);
+ gpu.cmd_dispatch(
+ cmd_encoder,
+ self.tile_resolution_x,
+ self.tile_resolution_y,
+ 1,
+ );
gpu.cmd_barrier(
cmd_encoder,
&[],
);
- gpu.cmd_set_pipeline(cmd_encoder, self.rasterize_pipeline);
-
+ gpu.cmd_set_pipeline(cmd_encoder, self.pipelines.draw_2d_rasterize_pipeline);
+ gpu.cmd_set_bind_group(cmd_encoder, 0, &compute_bind_group);
gpu.cmd_dispatch(cmd_encoder, (self.width + 7) / 8, (self.height + 7) / 8, 1);
gpu.cmd_end_debug_marker(cmd_encoder);
gpu.cmd_compute_touch_swapchain(cmd_encoder, swapchain_image);
- gpu.cmd_set_pipeline(cmd_encoder, self.display_transform_pipeline);
+ gpu.cmd_set_pipeline(cmd_encoder, self.pipelines.composite_pipeline);
+ gpu.cmd_set_bind_group(cmd_encoder, 0, &compute_bind_group);
gpu.cmd_dispatch(cmd_encoder, (self.width + 7) / 8, (self.height + 7) / 8, 1);
--- /dev/null
+use narcissus_core::default;
+use narcissus_font::TouchedGlyphIndex;
+use narcissus_gpu::{
+ BindDesc, BindGroupLayout, BindingType, BlendMode, BufferAddress, CompareOp,
+ ComputePipelineDesc, CullingMode, FrontFace, GraphicsPipelineAttachments, GraphicsPipelineDesc,
+ ImageFormat, Pipeline, PipelineLayout, PolygonMode, PushConstantRange, Sampler,
+ SamplerAddressMode, SamplerDesc, SamplerFilter, ShaderDesc, ShaderStageFlags, SpecConstant,
+ Topology,
+};
+use narcissus_maths::Mat4;
+
+use crate::Gpu;
+
+pub const DRAW_2D_TILE_SIZE: u32 = 32;
+
+#[allow(unused)]
+#[repr(C)]
+pub struct Vertex {
+ pub position: [f32; 4],
+ pub normal: [f32; 4],
+ pub texcoord: [f32; 4],
+}
+
+#[repr(u32)]
+enum Draw2dCmdType {
+ Rect,
+ Glyph,
+}
+
+#[allow(unused)]
+#[repr(C)]
+pub union Draw2dCmd {
+ rect: CmdRect,
+ glyph: CmdGlyph,
+}
+
+#[repr(C)]
+#[derive(Clone, Copy)]
+struct CmdGlyph {
+ r#type: u32,
+ index: u32,
+ x: f32,
+ y: f32,
+ color: u32,
+ _padding: [u8; 12],
+}
+
+const _: () = assert!(std::mem::size_of::<CmdGlyph>() == std::mem::size_of::<Draw2dCmd>());
+
+#[repr(C)]
+#[derive(Clone, Copy)]
+struct CmdRect {
+ r#type: u32,
+ border_width: f32,
+ x: f32,
+ y: f32,
+ half_extent_x: f32,
+ half_extent_y: f32,
+ background_color: u32,
+ border_color: u32,
+}
+
+const _: () = assert!(std::mem::size_of::<CmdRect>() == std::mem::size_of::<Draw2dCmd>());
+
+impl Draw2dCmd {
+ #[inline(always)]
+ pub fn glyph(glyph_index: TouchedGlyphIndex, color: u32, x: f32, y: f32) -> Self {
+ Self {
+ glyph: CmdGlyph {
+ r#type: Draw2dCmdType::Glyph as u32,
+ index: glyph_index.as_u32(),
+ x,
+ y,
+ color,
+ _padding: default(),
+ },
+ }
+ }
+
+ #[inline(always)]
+ pub fn rect(
+ x: f32,
+ y: f32,
+ half_extent_x: f32,
+ half_extent_y: f32,
+ border_width: f32,
+ background_color: u32,
+ border_color: u32,
+ ) -> Self {
+ Self {
+ rect: CmdRect {
+ r#type: Draw2dCmdType::Rect as u32,
+ border_width,
+ x,
+ y,
+ half_extent_x,
+ half_extent_y,
+ background_color,
+ border_color,
+ },
+ }
+ }
+}
+
+pub struct Samplers {
+ pub bilinear: Sampler,
+}
+
+impl Samplers {
+ fn load(gpu: &Gpu) -> Samplers {
+ let bilinear = gpu.create_sampler(&SamplerDesc {
+ filter: SamplerFilter::Bilinear,
+ address_mode: SamplerAddressMode::Clamp,
+ compare_op: None,
+ mip_lod_bias: 0.0,
+ min_lod: 0.0,
+ max_lod: 0.0,
+ });
+ Samplers { bilinear }
+ }
+}
+
+pub enum GraphicsBinds {
+ #[allow(unused)]
+ ImmutableSamplers,
+ Albedo,
+}
+
+pub enum ComputeBinds {
+ #[allow(unused)]
+ ImmutableSamplers,
+ TonyMcMapfaceLut,
+ GlyphAtlas,
+ UiRenderTarget,
+ ColorRenderTarget,
+ CompositedRenderTarget,
+}
+
+#[repr(C)]
+pub struct BasicConstants<'a> {
+ pub clip_from_model: Mat4,
+ pub vertex_buffer_address: BufferAddress<'a>,
+ pub transform_buffer_address: BufferAddress<'a>,
+}
+
+#[repr(C)]
+pub struct Draw2dClearConstants<'a> {
+ pub coarse_buffer_address: BufferAddress<'a>,
+}
+
+#[repr(C)]
+pub struct Draw2dScatterConstants<'a> {
+ pub screen_resolution_x: u32,
+ pub screen_resolution_y: u32,
+ pub tile_resolution_x: u32,
+ pub tile_resolution_y: u32,
+
+ pub draw_buffer_len: u32,
+ pub coarse_buffer_len: u32,
+
+ pub draw_buffer_address: BufferAddress<'a>,
+ pub glyph_buffer_address: BufferAddress<'a>,
+ pub coarse_buffer_address: BufferAddress<'a>,
+}
+
+#[repr(C)]
+pub struct Draw2dSortConstants<'a> {
+ pub coarse_buffer_len: u32,
+ pub _pad: u32,
+ pub indirect_dispatch_buffer_address: BufferAddress<'a>,
+ pub coarse_buffer_address: BufferAddress<'a>,
+}
+
+#[repr(C)]
+pub struct RadixSortUpsweepConstants<'a> {
+ pub shift: u32,
+ pub _pad: u32,
+ pub count_buffer_address: BufferAddress<'a>,
+ pub src_buffer_address: BufferAddress<'a>,
+ pub spine_buffer_address: BufferAddress<'a>,
+}
+
+#[repr(C)]
+pub struct RadixSortSpineConstants<'a> {
+ pub count_buffer_address: BufferAddress<'a>,
+ pub spine_buffer_address: BufferAddress<'a>,
+}
+
+#[repr(C)]
+pub struct RadixSortDownsweepConstants<'a> {
+ pub shift: u32,
+ pub _pad: u32,
+ pub count_buffer_address: BufferAddress<'a>,
+ pub spine_buffer_address: BufferAddress<'a>,
+ pub src_buffer_address: BufferAddress<'a>,
+ pub dst_buffer_address: BufferAddress<'a>,
+}
+
+pub struct Pipelines {
+ _samplers: Samplers,
+
+ pub graphics_bind_group_layout: BindGroupLayout,
+ pub compute_bind_group_layout: BindGroupLayout,
+
+ pub basic_pipeline: Pipeline,
+
+ pub draw_2d_bin_0_clear_pipeline: Pipeline,
+ pub draw_2d_bin_1_scatter_pipeline_workgroup_size: u32,
+ pub draw_2d_bin_1_scatter_pipeline: Pipeline,
+ pub draw_2d_bin_2_sort_pipeline: Pipeline,
+ pub draw_2d_bin_3_resolve_pipeline: Pipeline,
+ pub draw_2d_rasterize_pipeline: Pipeline,
+
+ pub radix_sort_0_upsweep_pipeline: Pipeline,
+ pub radix_sort_1_spine_pipeline: Pipeline,
+ pub radix_sort_2_downsweep_pipeline: Pipeline,
+
+ pub composite_pipeline: Pipeline,
+}
+
+impl Pipelines {
+ pub fn load(gpu: &Gpu) -> Self {
+ let samplers = Samplers::load(gpu);
+ let immutable_samplers = &[samplers.bilinear];
+
+ let graphics_bind_group_layout = gpu.create_bind_group_layout(&[
+ // Samplers
+ BindDesc::with_immutable_samplers(ShaderStageFlags::FRAGMENT, immutable_samplers),
+ // Albedo
+ BindDesc::new(ShaderStageFlags::FRAGMENT, BindingType::SampledImage),
+ ]);
+
+ gpu.debug_name_bind_group_layout(graphics_bind_group_layout, "graphics");
+
+ let compute_bind_group_layout = gpu.create_bind_group_layout(&[
+ // Samplers
+ BindDesc::with_immutable_samplers(ShaderStageFlags::COMPUTE, immutable_samplers),
+ // Tony mc mapface LUT
+ BindDesc::new(ShaderStageFlags::COMPUTE, BindingType::SampledImage),
+ // Glyph Atlas
+ BindDesc::new(ShaderStageFlags::COMPUTE, BindingType::SampledImage),
+ // UI Render Target
+ BindDesc::new(ShaderStageFlags::COMPUTE, BindingType::StorageImage),
+ // Color Render Target
+ BindDesc::new(ShaderStageFlags::COMPUTE, BindingType::StorageImage),
+ // Composited Render Target
+ BindDesc::new(ShaderStageFlags::COMPUTE, BindingType::StorageImage),
+ ]);
+
+ gpu.debug_name_bind_group_layout(compute_bind_group_layout, "compute");
+
+ let basic_pipeline = gpu.create_graphics_pipeline(&GraphicsPipelineDesc {
+ vertex_shader: ShaderDesc {
+ code: shark_shaders::BASIC_VERT_SPV,
+ ..default()
+ },
+ fragment_shader: ShaderDesc {
+ code: shark_shaders::BASIC_FRAG_SPV,
+ ..default()
+ },
+ layout: PipelineLayout {
+ bind_group_layouts: &[graphics_bind_group_layout],
+ push_constant_ranges: &[PushConstantRange {
+ stage_flags: ShaderStageFlags::VERTEX,
+ offset: 0,
+ size: std::mem::size_of::<BasicConstants>() as u32,
+ }],
+ },
+ attachments: GraphicsPipelineAttachments {
+ color_attachment_formats: &[ImageFormat::RGBA16_FLOAT],
+ depth_attachment_format: Some(ImageFormat::DEPTH_F32),
+ stencil_attachment_format: None,
+ },
+ topology: Topology::Triangles,
+ primitive_restart: false,
+ polygon_mode: PolygonMode::Fill,
+ culling_mode: CullingMode::Back,
+ front_face: FrontFace::CounterClockwise,
+ blend_mode: BlendMode::Opaque,
+ depth_bias: None,
+ depth_compare_op: CompareOp::GreaterOrEqual,
+ depth_test_enable: true,
+ depth_write_enable: true,
+ stencil_test_enable: false,
+ stencil_back: default(),
+ stencil_front: default(),
+ });
+
+ gpu.debug_name_pipeline(basic_pipeline, "basic");
+
+ let create_compute_pipeline = |code, name, workgroup_size, push_constant_size| {
+ let push_constant_range = PushConstantRange {
+ stage_flags: ShaderStageFlags::COMPUTE,
+ offset: 0,
+ size: push_constant_size as u32,
+ };
+
+ let pipeline = gpu.create_compute_pipeline(&ComputePipelineDesc {
+ shader: ShaderDesc {
+ code,
+ require_full_subgroups: workgroup_size != 0,
+ required_subgroup_size: if workgroup_size != 0 {
+ Some(workgroup_size)
+ } else {
+ None
+ },
+ spec_constants: &[SpecConstant::U32 {
+ id: 0,
+ value: workgroup_size,
+ }],
+ ..default()
+ },
+ layout: PipelineLayout {
+ bind_group_layouts: &[compute_bind_group_layout],
+ // Validation cries about push constant ranges with zero size.
+ push_constant_ranges: if push_constant_range.size != 0 {
+ std::slice::from_ref(&push_constant_range)
+ } else {
+ &[]
+ },
+ },
+ });
+
+ gpu.debug_name_pipeline(pipeline, name);
+
+ pipeline
+ };
+
+ let draw_2d_bin_0_clear_pipeline = create_compute_pipeline(
+ shark_shaders::DRAW_2D_BIN_0_CLEAR_COMP_SPV,
+ "draw2d_bin_clear",
+ 0,
+ std::mem::size_of::<Draw2dClearConstants>(),
+ );
+
+ let draw_2d_bin_1_scatter_pipeline_workgroup_size = 32;
+ let draw_2d_bin_1_scatter_pipeline = create_compute_pipeline(
+ shark_shaders::DRAW_2D_BIN_1_SCATTER_COMP_SPV,
+ "draw2d_bin_scatter",
+ draw_2d_bin_1_scatter_pipeline_workgroup_size,
+ std::mem::size_of::<Draw2dScatterConstants>(),
+ );
+
+ let draw_2d_bin_2_sort_pipeline = create_compute_pipeline(
+ shark_shaders::DRAW_2D_BIN_2_SORT_COMP_SPV,
+ "draw2d_bin_sort",
+ 0,
+ std::mem::size_of::<Draw2dSortConstants>(),
+ );
+
+ let draw_2d_bin_3_resolve_pipeline = create_compute_pipeline(
+ shark_shaders::DRAW_2D_BIN_3_RESOLVE_COMP_SPV,
+ "draw2d_bin_resolve",
+ 0,
+ 0,
+ );
+
+ let draw_2d_rasterize_pipeline = create_compute_pipeline(
+ shark_shaders::DRAW_2D_RASTERIZE_COMP_SPV,
+ "draw2d_rasterize",
+ 0,
+ 0,
+ );
+
+ let radix_sort_0_upsweep_pipeline = create_compute_pipeline(
+ shark_shaders::RADIX_SORT_0_UPSWEEP_COMP_SPV,
+ "radix_sort_upsweep",
+ 32,
+ std::mem::size_of::<RadixSortUpsweepConstants>(),
+ );
+
+ let radix_sort_1_spine_pipeline = create_compute_pipeline(
+ shark_shaders::RADIX_SORT_1_SPINE_COMP_SPV,
+ "radix_sort_spine",
+ 32,
+ std::mem::size_of::<RadixSortSpineConstants>(),
+ );
+
+ let radix_sort_2_downsweep_pipeline = create_compute_pipeline(
+ shark_shaders::RADIX_SORT_2_DOWNSWEEP_COMP_SPV,
+ "radix_sort_downsweep",
+ 32,
+ std::mem::size_of::<RadixSortDownsweepConstants>(),
+ );
+
+ let composite_pipeline =
+ create_compute_pipeline(shark_shaders::COMPOSITE_COMP_SPV, "composite", 0, 0);
+
+ Self {
+ _samplers: samplers,
+
+ graphics_bind_group_layout,
+ compute_bind_group_layout,
+
+ basic_pipeline,
+
+ draw_2d_bin_0_clear_pipeline,
+ draw_2d_bin_1_scatter_pipeline_workgroup_size,
+ draw_2d_bin_1_scatter_pipeline,
+ draw_2d_bin_2_sort_pipeline,
+ draw_2d_bin_3_resolve_pipeline,
+ draw_2d_rasterize_pipeline,
+
+ radix_sort_0_upsweep_pipeline,
+ radix_sort_1_spine_pipeline,
+ radix_sort_2_downsweep_pipeline,
+
+ composite_pipeline,
+ }
+ }
+}
+++ /dev/null
-use narcissus_core::default;
-use narcissus_gpu::{
- BindDesc, BindGroupLayout, BindingType, BlendMode, CompareOp, CullingMode, FrontFace,
- GraphicsPipelineAttachments, GraphicsPipelineDesc, ImageFormat, Pipeline, PipelineLayout,
- PolygonMode, Sampler, ShaderDesc, ShaderStageFlags, Topology,
-};
-use narcissus_maths::Mat4;
-
-use crate::Gpu;
-
-#[allow(unused)]
-#[repr(C)]
-pub struct BasicUniforms {
- pub clip_from_model: Mat4,
-}
-
-#[allow(unused)]
-#[repr(C)]
-pub struct Vertex {
- pub position: [f32; 4],
- pub normal: [f32; 4],
- pub texcoord: [f32; 4],
-}
-
-pub struct BasicPipeline {
- pub uniforms_bind_group_layout: BindGroupLayout,
- pub storage_bind_group_layout: BindGroupLayout,
- pub pipeline: Pipeline,
-}
-
-impl BasicPipeline {
- pub fn new(gpu: &Gpu, immutable_samplers: &[Sampler]) -> Self {
- let uniforms_bind_group_layout = gpu.create_bind_group_layout(&[
- // Uniforms
- BindDesc::new(ShaderStageFlags::ALL, BindingType::UniformBuffer),
- // Samplers
- BindDesc::with_immutable_samplers(ShaderStageFlags::ALL, immutable_samplers),
- ]);
-
- let storage_bind_group_layout = gpu.create_bind_group_layout(&[
- // Vertex Buffer
- BindDesc::new(ShaderStageFlags::ALL, BindingType::StorageBuffer),
- // Transform Buffer
- BindDesc::new(ShaderStageFlags::ALL, BindingType::StorageBuffer),
- // Albedo
- BindDesc::new(ShaderStageFlags::ALL, BindingType::SampledImage),
- ]);
-
- let layout = &PipelineLayout {
- bind_group_layouts: &[uniforms_bind_group_layout, storage_bind_group_layout],
- push_constant_ranges: &[],
- };
-
- let pipeline = gpu.create_graphics_pipeline(&GraphicsPipelineDesc {
- vertex_shader: ShaderDesc {
- entry: c"main",
- code: shark_shaders::BASIC_VERT_SPV,
- },
- fragment_shader: ShaderDesc {
- entry: c"main",
- code: shark_shaders::BASIC_FRAG_SPV,
- },
- layout,
- attachments: GraphicsPipelineAttachments {
- color_attachment_formats: &[ImageFormat::RGBA16_FLOAT],
- depth_attachment_format: Some(ImageFormat::DEPTH_F32),
- stencil_attachment_format: None,
- },
- topology: Topology::Triangles,
- primitive_restart: false,
- polygon_mode: PolygonMode::Fill,
- culling_mode: CullingMode::Back,
- front_face: FrontFace::CounterClockwise,
- blend_mode: BlendMode::Opaque,
- depth_bias: None,
- depth_compare_op: CompareOp::GreaterOrEqual,
- depth_test_enable: true,
- depth_write_enable: true,
- stencil_test_enable: false,
- stencil_back: default(),
- stencil_front: default(),
- });
-
- Self {
- uniforms_bind_group_layout,
- storage_bind_group_layout,
- pipeline,
- }
- }
-}
+++ /dev/null
-use narcissus_font::TouchedGlyphIndex;
-
-pub mod basic;
-
-pub const TILE_SIZE: u32 = 32;
-pub const MAX_PRIMS: u32 = 1 << 18;
-pub const TILE_BITMAP_WORDS_L1: u32 = MAX_PRIMS / 32 / 32;
-pub const TILE_BITMAP_WORDS_L0: u32 = MAX_PRIMS / 32;
-pub const TILE_STRIDE: u32 = TILE_BITMAP_WORDS_L0 + TILE_BITMAP_WORDS_L1 + 2;
-
-#[allow(unused)]
-#[repr(C)]
-pub struct PrimitiveUniforms {
- pub screen_resolution_x: u32,
- pub screen_resolution_y: u32,
-
- pub atlas_resolution_x: u32,
- pub atlas_resolution_y: u32,
-
- pub tile_resolution_x: u32,
- pub tile_resolution_y: u32,
-
- pub num_primitives: u32,
- pub num_primitives_32: u32,
- pub num_primitives_1024: u32,
- pub tile_stride: u32,
-
- pub primitives_instances_buffer: u64,
- pub rects_buffer: u64,
- pub glyphs_buffer: u64,
- pub tiles_buffer: u64,
-}
-
-#[repr(u32)]
-pub enum PrimitiveType {
- Rect,
- Glyph,
-}
-
-#[allow(unused)]
-#[repr(C)]
-pub struct PrimitiveInstance {
- pub packed: u32,
- pub color: u32,
- pub x: f32,
- pub y: f32,
-}
-
-#[repr(C)]
-pub struct Rect {
- pub half_extent_x: f32,
- pub half_extent_y: f32,
- pub border_width: f32,
- pub border_radius: f32,
-}
-
-impl PrimitiveInstance {
- #[inline(always)]
- pub fn glyph(glyph_index: TouchedGlyphIndex, color: u32, x: f32, y: f32) -> Self {
- let packed = glyph_index.as_u32() | ((PrimitiveType::Glyph as u32) << 30);
- Self {
- packed,
- color,
- x,
- y,
- }
- }
-
- #[inline(always)]
- pub fn rect(rect_index: u32, color: u32, x: f32, y: f32) -> Self {
- let packed = rect_index | ((PrimitiveType::Rect as u32) << 30);
- Self {
- packed,
- color,
- x,
- y,
- }
- }
-}