From: Joshua Simmons Date: Sun, 9 Jul 2023 22:07:48 +0000 (+0200) Subject: narcissus-gpu: Add transient buffer allocator X-Git-Url: https://git.nega.tv//gitweb.cgi?a=commitdiff_plain;h=15549f07b5391904ee1b426a5404dd7875dfa97f;p=josh%2Fnarcissus narcissus-gpu: Add transient buffer allocator --- diff --git a/bins/narcissus/src/main.rs b/bins/narcissus/src/main.rs index a344d88..a89c59f 100644 --- a/bins/narcissus/src/main.rs +++ b/bins/narcissus/src/main.rs @@ -10,8 +10,8 @@ use narcissus_app::{create_app, Event, Key, PressedState, WindowDesc}; use narcissus_core::{default, rand::Pcg64, slice::array_windows}; use narcissus_font::{FontCollection, GlyphCache, HorizontalMetrics, TouchedGlyph}; use narcissus_gpu::{ - create_device, Access, BufferImageCopy, BufferUsageFlags, ClearValue, Extent2d, Extent3d, - ImageAspectFlags, ImageBarrier, ImageDesc, ImageDimension, ImageFormat, ImageLayout, + create_device, Access, BufferDesc, BufferImageCopy, BufferUsageFlags, ClearValue, Extent2d, + Extent3d, ImageAspectFlags, ImageBarrier, ImageDesc, ImageDimension, ImageFormat, ImageLayout, ImageUsageFlags, LoadOp, MemoryLocation, Offset2d, Offset3d, RenderingAttachment, RenderingDesc, Scissor, StoreOp, ThreadToken, Viewport, }; @@ -36,7 +36,14 @@ const MAX_GLYPHS: usize = 8192; /// # Safety /// /// Must not be applied to any types with padding -pub unsafe trait Blittable: Sized {} +pub unsafe trait Blittable: Sized { + fn as_bytes(&self) -> &[u8] { + // SAFETY: Safe whilst trait is correctly applied. + unsafe { + std::slice::from_raw_parts(self as *const _ as *const u8, std::mem::size_of::()) + } + } +} unsafe impl Blittable for u8 {} unsafe impl Blittable for u16 {} @@ -83,24 +90,12 @@ pub fn main() { blåhaj_image.as_slice(), ); - let mut basic_uniform_buffer = MappedBuffer::new( - device.as_ref(), - BufferUsageFlags::UNIFORM, - std::mem::size_of::(), - ); - let mut basic_transform_buffer = MappedBuffer::new( device.as_ref(), BufferUsageFlags::STORAGE, std::mem::size_of::() * MAX_SHARKS, ); - let mut text_uniform_buffer = MappedBuffer::new( - device.as_ref(), - BufferUsageFlags::UNIFORM, - std::mem::size_of::(), - ); - let mut glyph_instance_buffer = MappedBuffer::new( device.as_ref(), BufferUsageFlags::STORAGE, @@ -126,6 +121,25 @@ pub fn main() { mip_levels: 1, }); + let mut rng = Pcg64::new(); + let mut buffers = (0..4096) + .map(|_| { + device.create_buffer(&BufferDesc { + location: MemoryLocation::HostMapped, + usage: BufferUsageFlags::STORAGE, + size: 16 + rng.next_bound_usize(1024 - 16), + }) + }) + .collect::>(); + + buffers.extend((0..512).map(|_| { + device.create_buffer(&BufferDesc { + location: MemoryLocation::HostMapped, + usage: BufferUsageFlags::STORAGE, + size: 16 + rng.next_bound_usize(10 * 1024 * 1024 - 16), + }) + })); + { let frame = device.begin_frame(); let mut cmd_buffer = device.create_cmd_buffer(&frame, &thread_token); @@ -276,8 +290,6 @@ pub fn main() { ); let clip_from_model = clip_from_camera * camera_from_model; - basic_uniform_buffer.write(BasicUniforms { clip_from_model }); - // Do some Font Shit.' let line0 = "Snarfe, Blåhaj! And the Quick Brown Fox jumped Over the Lazy doge."; let line1 = "加盟国は、国際連合と協力して"; @@ -365,13 +377,6 @@ pub fn main() { let atlas_width = glyph_cache.width() as u32; let atlas_height = glyph_cache.height() as u32; - text_uniform_buffer.write(TextUniforms { - screen_width: width, - screen_height: height, - atlas_width, - atlas_height, - }); - glyph_instance_buffer.write_slice(&glyph_instances); let (touched_glyphs, texture) = glyph_cache.update_atlas(); @@ -481,8 +486,9 @@ pub fn main() { basic_pipeline.bind( device.as_ref(), &frame, + &thread_token, &mut cmd_buffer, - basic_uniform_buffer.buffer(), + &BasicUniforms { clip_from_model }, blåhaj_vertex_buffer, blåhaj_index_buffer, basic_transform_buffer.buffer(), @@ -502,8 +508,14 @@ pub fn main() { text_pipeline.bind( device.as_ref(), &frame, + &thread_token, &mut cmd_buffer, - text_uniform_buffer.buffer(), + &TextUniforms { + screen_width: width, + screen_height: height, + atlas_width, + atlas_height, + }, glyph_buffer.buffer(), glyph_instance_buffer.buffer(), glyph_atlas, @@ -517,4 +529,9 @@ pub fn main() { device.end_frame(frame); } + + #[cfg(debug_assertions)] + { + device.debug_allocator_dump_svg().unwrap(); + } } diff --git a/bins/narcissus/src/mapped_buffer.rs b/bins/narcissus/src/mapped_buffer.rs index 98300e7..77acf18 100644 --- a/bins/narcissus/src/mapped_buffer.rs +++ b/bins/narcissus/src/mapped_buffer.rs @@ -30,19 +30,6 @@ impl<'a> MappedBuffer<'a> { self.buffer } - pub fn write(&mut self, value: T) - where - T: Blittable, - { - unsafe { - let src = std::slice::from_raw_parts( - &value as *const T as *const u8, - std::mem::size_of::(), - ); - self.slice.copy_from_slice(src) - } - } - pub fn write_slice(&mut self, values: &[T]) where T: Blittable, diff --git a/bins/narcissus/src/pipelines/basic.rs b/bins/narcissus/src/pipelines/basic.rs index 3bb4ec0..4a8ffd6 100644 --- a/bins/narcissus/src/pipelines/basic.rs +++ b/bins/narcissus/src/pipelines/basic.rs @@ -4,7 +4,7 @@ use narcissus_gpu::{ Buffer, CmdBuffer, CompareOp, CullingMode, Device, Frame, FrontFace, GraphicsPipelineDesc, GraphicsPipelineLayout, Image, ImageFormat, ImageLayout, IndexType, Pipeline, PolygonMode, Sampler, SamplerAddressMode, SamplerDesc, SamplerFilter, ShaderDesc, ShaderStageFlags, - Topology, TypedBind, + ThreadToken, Topology, TypedBind, }; use narcissus_maths::Mat4; @@ -15,6 +15,7 @@ const FRAG_SPV: &[u8] = include_bytes_align!(4, "../shaders/basic.frag.spv"); #[allow(unused)] #[repr(C)] +#[repr(align(16))] pub struct BasicUniforms { pub clip_from_model: Mat4, } @@ -127,13 +128,23 @@ impl BasicPipeline { &self, device: &dyn Device, frame: &Frame, + thread_token: &ThreadToken, cmd_buffer: &mut CmdBuffer, - uniform_buffer: Buffer, + basic_uniforms: &BasicUniforms, vertex_buffer: Buffer, index_buffer: Buffer, transform_buffer: Buffer, texture: Image, ) { + let mut uniform_buffer = device.request_transient_uniform_buffer( + frame, + thread_token, + std::mem::size_of::(), + std::mem::align_of::(), + ); + + uniform_buffer.copy_from_slice(basic_uniforms.as_bytes()); + device.cmd_set_pipeline(cmd_buffer, self.pipeline); device.cmd_set_bind_group( @@ -144,7 +155,7 @@ impl BasicPipeline { &[Bind { binding: 0, array_element: 0, - typed: TypedBind::UniformBuffer(&[uniform_buffer]), + typed: TypedBind::UniformBuffer(&[uniform_buffer.into()]), }], ); @@ -157,12 +168,12 @@ impl BasicPipeline { Bind { binding: 0, array_element: 0, - typed: TypedBind::StorageBuffer(&[vertex_buffer]), + typed: TypedBind::StorageBuffer(&[vertex_buffer.into()]), }, Bind { binding: 1, array_element: 0, - typed: TypedBind::StorageBuffer(&[transform_buffer]), + typed: TypedBind::StorageBuffer(&[transform_buffer.into()]), }, Bind { binding: 2, diff --git a/bins/narcissus/src/pipelines/text.rs b/bins/narcissus/src/pipelines/text.rs index b38d249..65766e0 100644 --- a/bins/narcissus/src/pipelines/text.rs +++ b/bins/narcissus/src/pipelines/text.rs @@ -4,8 +4,8 @@ use narcissus_gpu::{ Bind, BindGroupLayout, BindGroupLayoutDesc, BindGroupLayoutEntryDesc, BindingType, BlendMode, Buffer, CmdBuffer, CompareOp, CullingMode, Device, Frame, FrontFace, GraphicsPipelineDesc, GraphicsPipelineLayout, Image, ImageFormat, ImageLayout, Pipeline, PolygonMode, Sampler, - SamplerAddressMode, SamplerDesc, SamplerFilter, ShaderDesc, ShaderStageFlags, Topology, - TypedBind, + SamplerAddressMode, SamplerDesc, SamplerFilter, ShaderDesc, ShaderStageFlags, ThreadToken, + Topology, TypedBind, }; use crate::Blittable; @@ -15,6 +15,7 @@ const FRAG_SPV: &[u8] = include_bytes_align!(4, "../shaders/text.frag.spv"); #[allow(unused)] #[repr(C)] +#[repr(align(16))] pub struct TextUniforms { pub screen_width: u32, pub screen_height: u32, @@ -126,12 +127,22 @@ impl TextPipeline { &self, device: &dyn Device, frame: &Frame, + thread_token: &ThreadToken, cmd_buffer: &mut CmdBuffer, - uniforms: Buffer, + text_uniforms: &TextUniforms, cached_glyphs: Buffer, glyph_instances: Buffer, atlas: Image, ) { + let mut uniforms = device.request_transient_uniform_buffer( + frame, + thread_token, + std::mem::size_of::(), + std::mem::align_of::(), + ); + + uniforms.copy_from_slice(text_uniforms.as_bytes()); + device.cmd_set_pipeline(cmd_buffer, self.pipeline); device.cmd_set_bind_group( frame, @@ -142,17 +153,17 @@ impl TextPipeline { Bind { binding: 0, array_element: 0, - typed: TypedBind::UniformBuffer(&[uniforms]), + typed: TypedBind::UniformBuffer(&[uniforms.into()]), }, Bind { binding: 1, array_element: 0, - typed: TypedBind::StorageBuffer(&[cached_glyphs]), + typed: TypedBind::StorageBuffer(&[cached_glyphs.into()]), }, Bind { binding: 2, array_element: 0, - typed: TypedBind::StorageBuffer(&[glyph_instances]), + typed: TypedBind::StorageBuffer(&[glyph_instances.into()]), }, Bind { binding: 3, diff --git a/libs/narcissus-gpu/src/backend/vulkan/mod.rs b/libs/narcissus-gpu/src/backend/vulkan/mod.rs index c4722b1..d70d4ab 100644 --- a/libs/narcissus-gpu/src/backend/vulkan/mod.rs +++ b/libs/narcissus-gpu/src/backend/vulkan/mod.rs @@ -8,10 +8,10 @@ use std::{ }; use narcissus_core::{ - cstr, cstr_from_bytes_until_nul, default, is_aligned_to, manual_arc, + box_assume_init, cstr, cstr_from_bytes_until_nul, default, is_aligned_to, manual_arc, manual_arc::ManualArc, raw_window::{AsRawWindow, RawWindow}, - Arena, HybridArena, Mutex, PhantomUnsend, Pool, Widen, + zeroed_box, Arena, HybridArena, Mutex, PhantomUnsend, Pool, Widen, }; use vulkan_sys as vk; @@ -20,15 +20,15 @@ use crate::{ delay_queue::DelayQueue, frame_counter::FrameCounter, tlsf::{self, Tlsf}, - Access, Bind, BindGroupLayout, BindGroupLayoutDesc, BindingType, BlendMode, Buffer, BufferDesc, - BufferImageCopy, BufferUsageFlags, ClearValue, CmdBuffer, CompareOp, ComputePipelineDesc, - CullingMode, Device, Extent2d, Extent3d, Frame, FrontFace, GlobalBarrier, GpuConcurrent, - GraphicsPipelineDesc, Image, ImageAspectFlags, ImageBarrier, ImageBlit, ImageDesc, - ImageDimension, ImageFormat, ImageLayout, ImageSubresourceLayers, ImageSubresourceRange, - ImageUsageFlags, ImageViewDesc, IndexType, LoadOp, MemoryLocation, Offset2d, Offset3d, - Pipeline, PolygonMode, Sampler, SamplerAddressMode, SamplerCompareOp, SamplerDesc, - SamplerFilter, ShaderStageFlags, StencilOp, StencilOpState, StoreOp, SwapchainOutOfDateError, - ThreadToken, Topology, TypedBind, + Access, Bind, BindGroupLayout, BindGroupLayoutDesc, BindingType, BlendMode, Buffer, BufferBind, + BufferDesc, BufferImageCopy, BufferUsageFlags, ClearValue, CmdBuffer, CompareOp, + ComputePipelineDesc, CullingMode, Device, Extent2d, Extent3d, Frame, FrontFace, GlobalBarrier, + GpuConcurrent, GraphicsPipelineDesc, Image, ImageAspectFlags, ImageBarrier, ImageBlit, + ImageDesc, ImageDimension, ImageFormat, ImageLayout, ImageSubresourceLayers, + ImageSubresourceRange, ImageUsageFlags, ImageViewDesc, IndexType, LoadOp, MemoryLocation, + Offset2d, Offset3d, Pipeline, PolygonMode, Sampler, SamplerAddressMode, SamplerCompareOp, + SamplerDesc, SamplerFilter, ShaderStageFlags, StencilOp, StencilOpState, StoreOp, + SwapchainOutOfDateError, ThreadToken, Topology, TransientBuffer, TypedBind, }; const NUM_FRAMES: usize = 2; @@ -38,6 +38,18 @@ const NUM_FRAMES: usize = 2; /// There's no correct answer here (spec bug) we're just picking a big number and hoping for the best. const SWAPCHAIN_DESTROY_DELAY_FRAMES: usize = 8; +pub struct VulkanConstants { + // How large should transient buffers be, this will limit the maximum size of transient allocations. + transient_buffer_size: u64, + // How should we align transient buffers, this will limit the maximum alignment of transient allocations. + transient_buffer_max_align: u64, +} + +const VULKAN_CONSTANTS: VulkanConstants = VulkanConstants { + transient_buffer_size: 2 * 1024 * 1024, + transient_buffer_max_align: 256, +}; + mod libc { use std::os::raw::{c_char, c_int, c_void}; @@ -907,21 +919,100 @@ struct VulkanBoundPipeline { pipeline_bind_point: vk::PipelineBindPoint, } +#[derive(Clone)] +struct VulkanTransientBuffer { + buffer: vk::Buffer, + memory: VulkanMemory, +} + +struct VulkanTransientAllocator { + usage: vk::BufferUsageFlags, + min_align: u64, + offset: u64, + current: Option, + used_buffers: Vec, +} + +impl VulkanTransientAllocator { + fn new(usage: vk::BufferUsageFlags, min_align: u64) -> Self { + Self { + usage, + min_align, + offset: 0, + current: None, + used_buffers: default(), + } + } + + fn alloc<'a>(&mut self, device: &VulkanDevice, size: u64, align: u64) -> TransientBuffer<'a> { + assert!(size <= VULKAN_CONSTANTS.transient_buffer_size); + assert!( + align != 0 + && align.is_power_of_two() + && align <= VULKAN_CONSTANTS.transient_buffer_max_align + ); + + let align = align.max(self.min_align); + + if self.offset < size || self.current.is_none() { + let transient_buffer = + device.request_transient_buffer(VULKAN_CONSTANTS.transient_buffer_size, self.usage); + + self.used_buffers.push(transient_buffer.clone()); + self.current = Some(transient_buffer); + self.offset = VULKAN_CONSTANTS.transient_buffer_size; + } + + let current = self.current.as_ref().unwrap(); + + self.offset = self.offset.wrapping_sub(size); + self.offset &= !(align - 1); + + TransientBuffer { + ptr: NonNull::new( + current + .memory + .mapped_ptr() + .wrapping_offset(self.offset as isize), + ) + .unwrap(), + len: size as usize, + buffer: current.buffer.as_raw(), + offset: self.offset, + _phantom: &PhantomData, + } + } +} + struct VulkanCmdBuffer { command_buffer: vk::CommandBuffer, bound_pipeline: Option, swapchains_touched: HashMap, } +impl Default for VulkanCmdBuffer { + fn default() -> Self { + Self { + command_buffer: default(), + bound_pipeline: default(), + swapchains_touched: default(), + } + } +} + struct VulkanCmdBufferPool { command_pool: vk::CommandPool, next_free_index: usize, command_buffers: Vec, } +#[repr(align(64))] struct VulkanPerThread { cmd_buffer_pool: RefCell, descriptor_pool: Cell, + transient_index_allocator: RefCell, + transient_storage_allocator: RefCell, + transient_uniform_allocator: RefCell, arena: Arena, } @@ -969,7 +1060,6 @@ type SwapchainDestroyQueue = DelayQueue<(vk::SwapchainKHR, vk::SurfaceKHR, Box<[ pub(crate) struct VulkanDevice { instance: vk::Instance, physical_device: vk::PhysicalDevice, - physical_device_memory_properties: Box, device: vk::Device, universal_queue: vk::Queue, @@ -994,8 +1084,22 @@ pub(crate) struct VulkanDevice { recycled_semaphores: Mutex>, recycled_descriptor_pools: Mutex>, + recycled_transient_index_buffers: Mutex>, + recycled_transient_storage_buffers: Mutex>, + recycled_transient_uniform_buffers: Mutex>, + allocators: [Option>; vk::MAX_MEMORY_TYPES as usize], + _physical_device_properties: Box, + _physical_device_properties_11: Box, + _physical_device_properties_12: Box, + _physical_device_properties_13: Box, + _physical_device_features: Box, + _physical_device_features_11: Box, + _physical_device_features_12: Box, + _physical_device_features_13: Box, + physical_device_memory_properties: Box, + _global_fn: vk::GlobalFunctions, instance_fn: vk::InstanceFunctions, xcb_surface_fn: Option, @@ -1120,72 +1224,85 @@ impl VulkanDevice { instance_fn.enumerate_physical_devices(instance, count, ptr) }); + let mut physical_device_properties = + unsafe { box_assume_init(zeroed_box::()) }; + let mut physical_device_properties_11 = + unsafe { box_assume_init(zeroed_box::()) }; + let mut physical_device_properties_12 = + unsafe { box_assume_init(zeroed_box::()) }; + let mut physical_device_properties_13 = + unsafe { box_assume_init(zeroed_box::()) }; + + physical_device_properties._type = vk::StructureType::PhysicalDeviceProperties2; + physical_device_properties_11._type = vk::StructureType::PhysicalDeviceVulkan11Properties; + physical_device_properties_12._type = vk::StructureType::PhysicalDeviceVulkan12Properties; + physical_device_properties_13._type = vk::StructureType::PhysicalDeviceVulkan13Properties; + + physical_device_properties_12._next = physical_device_properties_13.as_mut() + as *mut vk::PhysicalDeviceVulkan13Properties + as *mut _; + physical_device_properties_11._next = physical_device_properties_12.as_mut() + as *mut vk::PhysicalDeviceVulkan12Properties + as *mut _; + physical_device_properties._next = physical_device_properties_11.as_mut() + as *mut vk::PhysicalDeviceVulkan11Properties + as *mut _; + + let mut physical_device_features = + unsafe { box_assume_init(zeroed_box::()) }; + let mut physical_device_features_11 = + unsafe { box_assume_init(zeroed_box::()) }; + let mut physical_device_features_12 = + unsafe { box_assume_init(zeroed_box::()) }; + let mut physical_device_features_13 = + unsafe { box_assume_init(zeroed_box::()) }; + + physical_device_features._type = vk::StructureType::PhysicalDeviceFeatures2; + physical_device_features_11._type = vk::StructureType::PhysicalDeviceVulkan11Features; + physical_device_features_12._type = vk::StructureType::PhysicalDeviceVulkan12Features; + physical_device_features_13._type = vk::StructureType::PhysicalDeviceVulkan13Features; + + physical_device_features_12._next = physical_device_features_13.as_mut() + as *mut vk::PhysicalDeviceVulkan13Features + as *mut _; + physical_device_features_11._next = physical_device_features_12.as_mut() + as *mut vk::PhysicalDeviceVulkan12Features + as *mut _; + physical_device_features._next = physical_device_features_11.as_mut() + as *mut vk::PhysicalDeviceVulkan11Features + as *mut _; + let physical_device = physical_devices .iter() .copied() .find(|&physical_device| { - let (properties, _properties_11, _properties_12, _properties_13) = { - let mut properties_13 = vk::PhysicalDeviceVulkan13Properties::default(); - let mut properties_12 = vk::PhysicalDeviceVulkan12Properties { - _next: &mut properties_13 as *mut vk::PhysicalDeviceVulkan13Properties - as *mut _, - ..default() - }; - let mut properties_11 = vk::PhysicalDeviceVulkan11Properties { - _next: &mut properties_12 as *mut vk::PhysicalDeviceVulkan12Properties - as *mut _, - ..default() - }; - let mut properties = vk::PhysicalDeviceProperties2 { - _next: &mut properties_11 as *mut vk::PhysicalDeviceVulkan11Properties - as *mut _, - ..default() - }; - unsafe { - instance_fn - .get_physical_device_properties2(physical_device, &mut properties); - } - (properties, properties_11, properties_12, properties_13) - }; - - let (_features, _features_11, features_12, features_13) = { - let mut features_13 = vk::PhysicalDeviceVulkan13Features::default(); - let mut features_12 = vk::PhysicalDeviceVulkan12Features { - _next: &mut features_13 as *mut vk::PhysicalDeviceVulkan13Features - as *mut _, - ..default() - }; - let mut features_11 = vk::PhysicalDeviceVulkan11Features { - _next: &mut features_12 as *mut vk::PhysicalDeviceVulkan12Features - as *mut _, - ..default() - }; - let mut features = vk::PhysicalDeviceFeatures2 { - _next: &mut features_11 as *mut vk::PhysicalDeviceVulkan11Features - as *mut _, - ..default() - }; - - unsafe { - instance_fn.get_physical_device_features2(physical_device, &mut features); - } - (features.features, features_11, features_12, features_13) - }; + unsafe { + instance_fn.get_physical_device_properties2( + physical_device, + physical_device_properties.as_mut(), + ); + instance_fn.get_physical_device_features2( + physical_device, + physical_device_features.as_mut(), + ); + } - properties.properties.api_version >= vk::VERSION_1_3 - && features_13.dynamic_rendering == vk::Bool32::True - && features_12.timeline_semaphore == vk::Bool32::True - && features_12.descriptor_indexing == vk::Bool32::True - && features_12.descriptor_binding_partially_bound == vk::Bool32::True - && features_12.draw_indirect_count == vk::Bool32::True - && features_12.uniform_buffer_standard_layout == vk::Bool32::True + physical_device_properties.properties.api_version >= vk::VERSION_1_3 + && physical_device_features_13.dynamic_rendering == vk::Bool32::True + && physical_device_features_12.timeline_semaphore == vk::Bool32::True + && physical_device_features_12.descriptor_indexing == vk::Bool32::True + && physical_device_features_12.descriptor_binding_partially_bound + == vk::Bool32::True + && physical_device_features_12.draw_indirect_count == vk::Bool32::True + && physical_device_features_12.uniform_buffer_standard_layout + == vk::Bool32::True }) .expect("no supported physical devices reported"); let physical_device_memory_properties = unsafe { - let mut memory_properties = vk::PhysicalDeviceMemoryProperties::default(); + let mut memory_properties = Box::::default(); instance_fn - .get_physical_device_memory_properties(physical_device, &mut memory_properties); + .get_physical_device_memory_properties(physical_device, memory_properties.as_mut()); memory_properties }; @@ -1290,9 +1407,29 @@ impl VulkanDevice { command_buffers: Vec::new(), next_free_index: 0, }; + let transient_index_allocator = + VulkanTransientAllocator::new(vk::BufferUsageFlags::INDEX_BUFFER, 1); + let transient_storage_allocator = VulkanTransientAllocator::new( + vk::BufferUsageFlags::STORAGE_BUFFER, + physical_device_properties + .properties + .limits + .min_storage_buffer_offset_alignment, + ); + let transient_uniform_allocator = VulkanTransientAllocator::new( + vk::BufferUsageFlags::UNIFORM_BUFFER, + physical_device_properties + .properties + .limits + .min_uniform_buffer_offset_alignment, + ); + VulkanPerThread { cmd_buffer_pool: RefCell::new(cmd_buffer_pool), descriptor_pool: Cell::new(vk::DescriptorPool::null()), + transient_index_allocator: RefCell::new(transient_index_allocator), + transient_storage_allocator: RefCell::new(transient_storage_allocator), + transient_uniform_allocator: RefCell::new(transient_uniform_allocator), arena: Arena::new(), } }); @@ -1326,7 +1463,15 @@ impl VulkanDevice { Self { instance, physical_device, - physical_device_memory_properties: Box::new(physical_device_memory_properties), + _physical_device_properties: physical_device_properties, + _physical_device_properties_11: physical_device_properties_11, + _physical_device_properties_12: physical_device_properties_12, + _physical_device_properties_13: physical_device_properties_13, + _physical_device_features: physical_device_features, + _physical_device_features_11: physical_device_features_11, + _physical_device_features_12: physical_device_features_12, + _physical_device_features_13: physical_device_features_13, + physical_device_memory_properties, device, universal_queue, @@ -1349,6 +1494,9 @@ impl VulkanDevice { recycled_semaphores: default(), recycled_descriptor_pools: default(), + recycled_transient_index_buffers: default(), + recycled_transient_storage_buffers: default(), + recycled_transient_uniform_buffers: default(), allocators, @@ -2384,6 +2532,51 @@ impl Device for VulkanDevice { } } + fn request_transient_index_buffer<'a>( + &self, + frame: &'a Frame, + thread_token: &'a ThreadToken, + size: usize, + align: usize, + ) -> TransientBuffer<'a> { + let frame = self.frame(frame); + let per_thread = frame.per_thread.get(thread_token); + per_thread + .transient_index_allocator + .borrow_mut() + .alloc(self, size as u64, align as u64) + } + + fn request_transient_storage_buffer<'a>( + &self, + frame: &'a Frame, + thread_token: &'a ThreadToken, + size: usize, + align: usize, + ) -> TransientBuffer<'a> { + let frame = self.frame(frame); + let per_thread = frame.per_thread.get(thread_token); + per_thread + .transient_storage_allocator + .borrow_mut() + .alloc(self, size as u64, align as u64) + } + + fn request_transient_uniform_buffer<'a>( + &self, + frame: &'a Frame, + thread_token: &'a ThreadToken, + size: usize, + align: usize, + ) -> TransientBuffer<'a> { + let frame = self.frame(frame); + let per_thread = frame.per_thread.get(thread_token); + per_thread + .transient_uniform_allocator + .borrow_mut() + .alloc(self, size as u64, align as u64) + } + fn create_cmd_buffer<'a, 'thread>( &self, frame: &'a Frame, @@ -2424,8 +2617,7 @@ impl Device for VulkanDevice { let vulkan_cmd_buffer = per_thread.arena.alloc(VulkanCmdBuffer { command_buffer, - bound_pipeline: None, - swapchains_touched: HashMap::new(), + ..default() }); CmdBuffer { @@ -2677,13 +2869,20 @@ impl Device for VulkanDevice { } TypedBind::UniformBuffer(buffers) => { let buffer_pool = self.buffer_pool.lock(); - let buffer_infos_iter = buffers.iter().map(|buffer| { - let buffer = buffer_pool.get(buffer.0).unwrap().buffer; - vk::DescriptorBufferInfo { - buffer, - offset: 0, - range: vk::WHOLE_SIZE, + let buffer_infos_iter = buffers.iter().map(|buffer| match buffer { + BufferBind::Unmanaged(buffer) => { + let buffer = buffer_pool.get(buffer.0).unwrap().buffer; + vk::DescriptorBufferInfo { + buffer, + offset: 0, + range: vk::WHOLE_SIZE, + } } + BufferBind::Transient(transient) => vk::DescriptorBufferInfo { + buffer: vk::Buffer::from_raw(transient.buffer), + offset: transient.offset, + range: transient.len as u64, + }, }); let buffer_infos = arena.alloc_slice_fill_iter(buffer_infos_iter); vk::WriteDescriptorSet { @@ -2698,13 +2897,20 @@ impl Device for VulkanDevice { } TypedBind::StorageBuffer(buffers) => { let buffer_pool = self.buffer_pool.lock(); - let buffer_infos_iter = buffers.iter().map(|buffer| { - let buffer = buffer_pool.get(buffer.0).unwrap().buffer; - vk::DescriptorBufferInfo { - buffer, - offset: 0, - range: vk::WHOLE_SIZE, + let buffer_infos_iter = buffers.iter().map(|buffer| match buffer { + BufferBind::Unmanaged(buffer) => { + let buffer = buffer_pool.get(buffer.0).unwrap().buffer; + vk::DescriptorBufferInfo { + buffer, + offset: 0, + range: vk::WHOLE_SIZE, + } } + BufferBind::Transient(transient) => vk::DescriptorBufferInfo { + buffer: vk::Buffer::from_raw(transient.buffer), + offset: transient.offset, + range: transient.len as u64, + }, }); let buffer_infos = arena.alloc_slice_fill_iter(buffer_infos_iter); vk::WriteDescriptorSet { @@ -3111,6 +3317,38 @@ impl Device for VulkanDevice { )); cmd_buffer_pool.next_free_index = 0; } + + let used_index_buffers = + &mut per_thread.transient_index_allocator.get_mut().used_buffers; + + if !used_index_buffers.is_empty() { + self.recycled_transient_index_buffers + .lock() + .extend(used_index_buffers.drain(..)) + } + + let used_storage_buffers = &mut per_thread + .transient_storage_allocator + .get_mut() + .used_buffers; + + if !used_storage_buffers.is_empty() { + self.recycled_transient_storage_buffers + .lock() + .extend(used_storage_buffers.drain(..)) + } + + let used_uniform_buffers = &mut per_thread + .transient_uniform_allocator + .get_mut() + .used_buffers; + + if !used_uniform_buffers.is_empty() { + self.recycled_transient_uniform_buffers + .lock() + .extend(used_uniform_buffers.drain(..)) + } + per_thread.arena.reset() } @@ -3309,9 +3547,95 @@ impl Device for VulkanDevice { self.destroy_swapchain(surface) } } + + #[cfg(debug_assertions)] + fn debug_allocator_dump_svg(&self) -> Result<(), std::io::Error> { + for (i, allocator) in self + .allocators + .iter() + .filter_map(Option::as_deref) + .enumerate() + { + let mut bitmap_file = std::fs::File::create(format!("target/{i}_bitmap.svg")).unwrap(); + allocator.tlsf.lock().debug_bitmap_svg(&mut bitmap_file)?; + } + + Ok(()) + } } impl VulkanDevice { + fn request_transient_buffer( + &self, + size: u64, + usage: vk::BufferUsageFlags, + ) -> VulkanTransientBuffer { + if let Some(transient_buffer) = if usage == vk::BufferUsageFlags::INDEX_BUFFER { + self.recycled_transient_index_buffers.lock().pop_back() + } else if usage == vk::BufferUsageFlags::STORAGE_BUFFER { + self.recycled_transient_storage_buffers.lock().pop_back() + } else if usage == vk::BufferUsageFlags::UNIFORM_BUFFER { + self.recycled_transient_uniform_buffers.lock().pop_back() + } else { + panic!() + } { + return transient_buffer; + } + + let queue_family_indices = &[self.universal_queue_family_index]; + + let create_info = vk::BufferCreateInfo { + size, + usage, + queue_family_indices: queue_family_indices.into(), + sharing_mode: vk::SharingMode::Exclusive, + ..default() + }; + let mut buffer = vk::Buffer::null(); + vk_check!(self + .device_fn + .create_buffer(self.device, &create_info, None, &mut buffer)); + + let mut memory_requirements = vk::MemoryRequirements2::default(); + + self.device_fn.get_buffer_memory_requirements2( + self.device, + &vk::BufferMemoryRequirementsInfo2 { + buffer, + ..default() + }, + &mut memory_requirements, + ); + + let memory = self.allocate_memory(&VulkanMemoryDesc { + requirements: memory_requirements.memory_requirements, + memory_location: MemoryLocation::HostMapped, + _linear: true, + }); + + assert!(!memory.mapped_ptr().is_null()); + // SAFETY: The memory has just been allocated, so as long as the pointer is + // non-null, then we can create a slice for it. + unsafe { + let dst = std::slice::from_raw_parts_mut(memory.mapped_ptr(), memory.size().widen()); + dst.fill(0); + } + + unsafe { + self.device_fn.bind_buffer_memory2( + self.device, + &[vk::BindBufferMemoryInfo { + buffer, + memory: memory.device_memory(), + offset: memory.offset(), + ..default() + }], + ) + }; + + VulkanTransientBuffer { buffer, memory } + } + fn acquire_swapchain( &self, frame: &Frame, @@ -3655,9 +3979,49 @@ impl Drop for VulkanDevice { unsafe { device_fn.destroy_command_pool(device, cmd_buffer_pool.command_pool, None) } + + for &VulkanTransientBuffer { buffer, memory: _ } in + &per_thread.transient_index_allocator.get_mut().used_buffers + { + unsafe { device_fn.destroy_buffer(device, buffer, None) } + } + + for &VulkanTransientBuffer { buffer, memory: _ } in &per_thread + .transient_storage_allocator + .get_mut() + .used_buffers + { + unsafe { device_fn.destroy_buffer(device, buffer, None) } + } + + for &VulkanTransientBuffer { buffer, memory: _ } in &per_thread + .transient_uniform_allocator + .get_mut() + .used_buffers + { + unsafe { device_fn.destroy_buffer(device, buffer, None) } + } } } + for VulkanTransientBuffer { buffer, memory: _ } in + self.recycled_transient_index_buffers.get_mut() + { + unsafe { device_fn.destroy_buffer(device, *buffer, None) } + } + + for VulkanTransientBuffer { buffer, memory: _ } in + self.recycled_transient_storage_buffers.get_mut() + { + unsafe { device_fn.destroy_buffer(device, *buffer, None) } + } + + for VulkanTransientBuffer { buffer, memory: _ } in + self.recycled_transient_uniform_buffers.get_mut() + { + unsafe { device_fn.destroy_buffer(device, *buffer, None) } + } + for buffer in self.buffer_pool.get_mut().values() { unsafe { device_fn.destroy_buffer(device, buffer.buffer, None) } } diff --git a/libs/narcissus-gpu/src/lib.rs b/libs/narcissus-gpu/src/lib.rs index 7105184..c72142b 100644 --- a/libs/narcissus-gpu/src/lib.rs +++ b/libs/narcissus-gpu/src/lib.rs @@ -1,4 +1,4 @@ -use std::{ffi::CStr, marker::PhantomData}; +use std::{ffi::CStr, marker::PhantomData, ptr::NonNull}; use backend::vulkan; use narcissus_core::{ @@ -61,6 +61,21 @@ pub struct BindGroupLayout(Handle); #[derive(Clone, Copy, PartialEq, Eq, Hash)] pub struct Pipeline(Handle); +pub struct TransientBuffer<'a> { + ptr: NonNull, + len: usize, + buffer: u64, + offset: u64, + _phantom: &'a PhantomData<()>, +} + +impl<'a> TransientBuffer<'a> { + pub fn copy_from_slice(&mut self, bytes: &[u8]) { + unsafe { std::slice::from_raw_parts_mut(self.ptr.as_ptr(), self.len) } + .copy_from_slice(bytes) + } +} + #[derive(Clone, Copy, PartialEq, Eq, Debug)] pub enum MemoryLocation { HostMapped, @@ -464,11 +479,28 @@ pub struct Bind<'a> { pub typed: TypedBind<'a>, } +pub enum BufferBind<'a> { + Unmanaged(Buffer), + Transient(TransientBuffer<'a>), +} + +impl<'a> From for BufferBind<'a> { + fn from(value: Buffer) -> Self { + BufferBind::Unmanaged(value) + } +} + +impl<'a> From> for BufferBind<'a> { + fn from(value: TransientBuffer<'a>) -> Self { + BufferBind::Transient(value) + } +} + pub enum TypedBind<'a> { Sampler(&'a [Sampler]), Image(&'a [(ImageLayout, Image)]), - UniformBuffer(&'a [Buffer]), - StorageBuffer(&'a [Buffer]), + UniformBuffer(&'a [BufferBind<'a>]), + StorageBuffer(&'a [BufferBind<'a>]), } #[derive(Clone, Copy, PartialEq, Eq)] @@ -712,6 +744,33 @@ pub trait Device { /// any remaining references derived from that address. unsafe fn unmap_buffer(&self, buffer: Buffer); + #[must_use] + fn request_transient_uniform_buffer<'a>( + &self, + frame: &'a Frame<'a>, + thread_token: &'a ThreadToken, + size: usize, + align: usize, + ) -> TransientBuffer<'a>; + + #[must_use] + fn request_transient_storage_buffer<'a>( + &self, + frame: &'a Frame<'a>, + thread_token: &'a ThreadToken, + size: usize, + align: usize, + ) -> TransientBuffer<'a>; + + #[must_use] + fn request_transient_index_buffer<'a>( + &self, + frame: &'a Frame<'a>, + thread_token: &'a ThreadToken, + size: usize, + align: usize, + ) -> TransientBuffer<'a>; + #[must_use] fn create_cmd_buffer<'a, 'thread>( &'a self, @@ -804,4 +863,7 @@ pub trait Device { fn begin_frame(&self) -> Frame; fn end_frame<'device>(&'device self, frame: Frame<'device>); + + #[cfg(debug_assertions)] + fn debug_allocator_dump_svg(&self) -> Result<(), std::io::Error>; } diff --git a/libs/narcissus-gpu/src/tlsf.rs b/libs/narcissus-gpu/src/tlsf.rs index 12ec9d0..9892df2 100644 --- a/libs/narcissus-gpu/src/tlsf.rs +++ b/libs/narcissus-gpu/src/tlsf.rs @@ -601,7 +601,7 @@ where // The mask is a no-op if the alignment is already met, do it unconditionally. let offset = (self.blocks[block_index].offset as u64 + align - 1) & !(align - 1); - debug_assert_eq!(offset & align - 1, 0); + debug_assert_eq!(offset & (align - 1), 0); Some(Allocation { block_index, @@ -647,6 +647,97 @@ where // Insert the merged free block. self.insert_block(block_index); } + + #[cfg(debug_assertions)] + pub fn debug_bitmap_svg(&self, w: &mut dyn std::io::Write) -> Result<(), std::io::Error> { + use narcissus_core::svg::{self, svg_begin, svg_end}; + + struct Bytes { + bytes: u32, + } + + impl Bytes { + fn new(bytes: u32) -> Self { + Self { bytes } + } + } + + impl std::fmt::Display for Bytes { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + if self.bytes < 1024 { + write!(f, "{}b", self.bytes) + } else if self.bytes < 1024 * 1024 { + write!(f, "{:.2}KiB", self.bytes as f32 / (1024.0)) + } else { + write!(f, "{:.2}MiB", self.bytes as f32 / (1024.0 * 1024.0)) + } + } + } + + write!(w, "{}", svg_begin(615.0, 375.0))?; + + const BOX_SIZE: f32 = 15.0; + const PAD: f32 = 30.0; + + let stroke = svg::stroke(svg::black(), 2.0, 1.0); + let fg = svg::style(svg::fill(svg::rgb(0xdf, 0x73, 0x1a), 1.0), stroke); + let bg = svg::style(svg::fill(svg::rgb(0xfe, 0xfe, 0xfe), 0.0), stroke); + + let mut y = 28.0; + let mut x = 0.0; + + for i in 0..BIN_COUNT { + let bin = Bin::new(i as u32, 0); + write!( + w, + "{}", + svg::text(x, y, 14.0, fg, &Bytes::new(bin.lower_bound())) + )?; + y += BOX_SIZE; + } + + y = PAD; + x = 100.0; + + for i in 0..BIN_COUNT { + let empty = self.bitmap_0 & 1 << i == 0; + write!( + w, + "{}", + svg::rect(x, y, BOX_SIZE, BOX_SIZE).style(if empty { bg } else { fg }) + )?; + y += BOX_SIZE; + } + + y = PAD; + x = 100.0 + PAD * 2.0; + + for (bin, bitmap) in self.bitmap_1.iter().enumerate() { + for sub_bin in 0..SUB_BIN_COUNT { + let bin = Bin::new(bin as u32, sub_bin as u32); + let lower_bound = Bytes::new(bin.lower_bound()); + let upper_bound = Bytes::new(bin.upper_bound()); + let range = format!("{lower_bound}-{upper_bound}"); + + let empty = bitmap & 1 << sub_bin == 0; + + write!( + w, + "{}", + svg::rect(x, y, BOX_SIZE, BOX_SIZE) + .style(if empty { bg } else { fg }) + .title(&range) + )?; + x += BOX_SIZE; + } + x = 100.0 + PAD * 2.0; + y += BOX_SIZE; + } + + write!(w, "{}", svg_end())?; + + Ok(()) + } } #[cfg(test)]