]> git.nega.tv - josh/narcissus/commitdiff
narcissus-gpu: Add transient buffer allocator
authorJoshua Simmons <josh@nega.tv>
Sun, 9 Jul 2023 22:07:48 +0000 (00:07 +0200)
committerJoshua Simmons <josh@nega.tv>
Sun, 9 Jul 2023 22:08:49 +0000 (00:08 +0200)
bins/narcissus/src/main.rs
bins/narcissus/src/mapped_buffer.rs
bins/narcissus/src/pipelines/basic.rs
bins/narcissus/src/pipelines/text.rs
libs/narcissus-gpu/src/backend/vulkan/mod.rs
libs/narcissus-gpu/src/lib.rs
libs/narcissus-gpu/src/tlsf.rs

index a344d8852c373ed528e1e0b23324270be67dfe3a..a89c59fa76e09e47347c9112d7bf0436ca654203 100644 (file)
@@ -10,8 +10,8 @@ use narcissus_app::{create_app, Event, Key, PressedState, WindowDesc};
 use narcissus_core::{default, rand::Pcg64, slice::array_windows};
 use narcissus_font::{FontCollection, GlyphCache, HorizontalMetrics, TouchedGlyph};
 use narcissus_gpu::{
-    create_device, Access, BufferImageCopy, BufferUsageFlags, ClearValue, Extent2d, Extent3d,
-    ImageAspectFlags, ImageBarrier, ImageDesc, ImageDimension, ImageFormat, ImageLayout,
+    create_device, Access, BufferDesc, BufferImageCopy, BufferUsageFlags, ClearValue, Extent2d,
+    Extent3d, ImageAspectFlags, ImageBarrier, ImageDesc, ImageDimension, ImageFormat, ImageLayout,
     ImageUsageFlags, LoadOp, MemoryLocation, Offset2d, Offset3d, RenderingAttachment,
     RenderingDesc, Scissor, StoreOp, ThreadToken, Viewport,
 };
@@ -36,7 +36,14 @@ const MAX_GLYPHS: usize = 8192;
 /// # Safety
 ///
 /// Must not be applied to any types with padding
-pub unsafe trait Blittable: Sized {}
+pub unsafe trait Blittable: Sized {
+    fn as_bytes(&self) -> &[u8] {
+        // SAFETY: Safe whilst trait is correctly applied.
+        unsafe {
+            std::slice::from_raw_parts(self as *const _ as *const u8, std::mem::size_of::<Self>())
+        }
+    }
+}
 
 unsafe impl Blittable for u8 {}
 unsafe impl Blittable for u16 {}
@@ -83,24 +90,12 @@ pub fn main() {
         blåhaj_image.as_slice(),
     );
 
-    let mut basic_uniform_buffer = MappedBuffer::new(
-        device.as_ref(),
-        BufferUsageFlags::UNIFORM,
-        std::mem::size_of::<BasicUniforms>(),
-    );
-
     let mut basic_transform_buffer = MappedBuffer::new(
         device.as_ref(),
         BufferUsageFlags::STORAGE,
         std::mem::size_of::<Affine3>() * MAX_SHARKS,
     );
 
-    let mut text_uniform_buffer = MappedBuffer::new(
-        device.as_ref(),
-        BufferUsageFlags::UNIFORM,
-        std::mem::size_of::<TextUniforms>(),
-    );
-
     let mut glyph_instance_buffer = MappedBuffer::new(
         device.as_ref(),
         BufferUsageFlags::STORAGE,
@@ -126,6 +121,25 @@ pub fn main() {
         mip_levels: 1,
     });
 
+    let mut rng = Pcg64::new();
+    let mut buffers = (0..4096)
+        .map(|_| {
+            device.create_buffer(&BufferDesc {
+                location: MemoryLocation::HostMapped,
+                usage: BufferUsageFlags::STORAGE,
+                size: 16 + rng.next_bound_usize(1024 - 16),
+            })
+        })
+        .collect::<Vec<_>>();
+
+    buffers.extend((0..512).map(|_| {
+        device.create_buffer(&BufferDesc {
+            location: MemoryLocation::HostMapped,
+            usage: BufferUsageFlags::STORAGE,
+            size: 16 + rng.next_bound_usize(10 * 1024 * 1024 - 16),
+        })
+    }));
+
     {
         let frame = device.begin_frame();
         let mut cmd_buffer = device.create_cmd_buffer(&frame, &thread_token);
@@ -276,8 +290,6 @@ pub fn main() {
         );
         let clip_from_model = clip_from_camera * camera_from_model;
 
-        basic_uniform_buffer.write(BasicUniforms { clip_from_model });
-
         // Do some Font Shit.'
         let line0 = "Snarfe, Blåhaj! And the Quick Brown Fox jumped Over the Lazy doge.";
         let line1 = "加盟国は、国際連合と協力して";
@@ -365,13 +377,6 @@ pub fn main() {
         let atlas_width = glyph_cache.width() as u32;
         let atlas_height = glyph_cache.height() as u32;
 
-        text_uniform_buffer.write(TextUniforms {
-            screen_width: width,
-            screen_height: height,
-            atlas_width,
-            atlas_height,
-        });
-
         glyph_instance_buffer.write_slice(&glyph_instances);
 
         let (touched_glyphs, texture) = glyph_cache.update_atlas();
@@ -481,8 +486,9 @@ pub fn main() {
         basic_pipeline.bind(
             device.as_ref(),
             &frame,
+            &thread_token,
             &mut cmd_buffer,
-            basic_uniform_buffer.buffer(),
+            &BasicUniforms { clip_from_model },
             blåhaj_vertex_buffer,
             blåhaj_index_buffer,
             basic_transform_buffer.buffer(),
@@ -502,8 +508,14 @@ pub fn main() {
         text_pipeline.bind(
             device.as_ref(),
             &frame,
+            &thread_token,
             &mut cmd_buffer,
-            text_uniform_buffer.buffer(),
+            &TextUniforms {
+                screen_width: width,
+                screen_height: height,
+                atlas_width,
+                atlas_height,
+            },
             glyph_buffer.buffer(),
             glyph_instance_buffer.buffer(),
             glyph_atlas,
@@ -517,4 +529,9 @@ pub fn main() {
 
         device.end_frame(frame);
     }
+
+    #[cfg(debug_assertions)]
+    {
+        device.debug_allocator_dump_svg().unwrap();
+    }
 }
index 98300e7534660672c12b88e59abe24bc7898839a..77acf18f5f4e600d1920b00198a620ab29187bd4 100644 (file)
@@ -30,19 +30,6 @@ impl<'a> MappedBuffer<'a> {
         self.buffer
     }
 
-    pub fn write<T>(&mut self, value: T)
-    where
-        T: Blittable,
-    {
-        unsafe {
-            let src = std::slice::from_raw_parts(
-                &value as *const T as *const u8,
-                std::mem::size_of::<T>(),
-            );
-            self.slice.copy_from_slice(src)
-        }
-    }
-
     pub fn write_slice<T>(&mut self, values: &[T])
     where
         T: Blittable,
index 3bb4ec03c52f85838acf48a89215a5eb6de8eeb4..4a8ffd6fa7904241f79c13afb4289068f1a1f31f 100644 (file)
@@ -4,7 +4,7 @@ use narcissus_gpu::{
     Buffer, CmdBuffer, CompareOp, CullingMode, Device, Frame, FrontFace, GraphicsPipelineDesc,
     GraphicsPipelineLayout, Image, ImageFormat, ImageLayout, IndexType, Pipeline, PolygonMode,
     Sampler, SamplerAddressMode, SamplerDesc, SamplerFilter, ShaderDesc, ShaderStageFlags,
-    Topology, TypedBind,
+    ThreadToken, Topology, TypedBind,
 };
 use narcissus_maths::Mat4;
 
@@ -15,6 +15,7 @@ const FRAG_SPV: &[u8] = include_bytes_align!(4, "../shaders/basic.frag.spv");
 
 #[allow(unused)]
 #[repr(C)]
+#[repr(align(16))]
 pub struct BasicUniforms {
     pub clip_from_model: Mat4,
 }
@@ -127,13 +128,23 @@ impl BasicPipeline {
         &self,
         device: &dyn Device,
         frame: &Frame,
+        thread_token: &ThreadToken,
         cmd_buffer: &mut CmdBuffer,
-        uniform_buffer: Buffer,
+        basic_uniforms: &BasicUniforms,
         vertex_buffer: Buffer,
         index_buffer: Buffer,
         transform_buffer: Buffer,
         texture: Image,
     ) {
+        let mut uniform_buffer = device.request_transient_uniform_buffer(
+            frame,
+            thread_token,
+            std::mem::size_of::<BasicUniforms>(),
+            std::mem::align_of::<BasicUniforms>(),
+        );
+
+        uniform_buffer.copy_from_slice(basic_uniforms.as_bytes());
+
         device.cmd_set_pipeline(cmd_buffer, self.pipeline);
 
         device.cmd_set_bind_group(
@@ -144,7 +155,7 @@ impl BasicPipeline {
             &[Bind {
                 binding: 0,
                 array_element: 0,
-                typed: TypedBind::UniformBuffer(&[uniform_buffer]),
+                typed: TypedBind::UniformBuffer(&[uniform_buffer.into()]),
             }],
         );
 
@@ -157,12 +168,12 @@ impl BasicPipeline {
                 Bind {
                     binding: 0,
                     array_element: 0,
-                    typed: TypedBind::StorageBuffer(&[vertex_buffer]),
+                    typed: TypedBind::StorageBuffer(&[vertex_buffer.into()]),
                 },
                 Bind {
                     binding: 1,
                     array_element: 0,
-                    typed: TypedBind::StorageBuffer(&[transform_buffer]),
+                    typed: TypedBind::StorageBuffer(&[transform_buffer.into()]),
                 },
                 Bind {
                     binding: 2,
index b38d2497e45e742e99849996df7441adff4c1c8d..65766e05f11ed12e356321c70a771827abf91681 100644 (file)
@@ -4,8 +4,8 @@ use narcissus_gpu::{
     Bind, BindGroupLayout, BindGroupLayoutDesc, BindGroupLayoutEntryDesc, BindingType, BlendMode,
     Buffer, CmdBuffer, CompareOp, CullingMode, Device, Frame, FrontFace, GraphicsPipelineDesc,
     GraphicsPipelineLayout, Image, ImageFormat, ImageLayout, Pipeline, PolygonMode, Sampler,
-    SamplerAddressMode, SamplerDesc, SamplerFilter, ShaderDesc, ShaderStageFlags, Topology,
-    TypedBind,
+    SamplerAddressMode, SamplerDesc, SamplerFilter, ShaderDesc, ShaderStageFlags, ThreadToken,
+    Topology, TypedBind,
 };
 
 use crate::Blittable;
@@ -15,6 +15,7 @@ const FRAG_SPV: &[u8] = include_bytes_align!(4, "../shaders/text.frag.spv");
 
 #[allow(unused)]
 #[repr(C)]
+#[repr(align(16))]
 pub struct TextUniforms {
     pub screen_width: u32,
     pub screen_height: u32,
@@ -126,12 +127,22 @@ impl TextPipeline {
         &self,
         device: &dyn Device,
         frame: &Frame,
+        thread_token: &ThreadToken,
         cmd_buffer: &mut CmdBuffer,
-        uniforms: Buffer,
+        text_uniforms: &TextUniforms,
         cached_glyphs: Buffer,
         glyph_instances: Buffer,
         atlas: Image,
     ) {
+        let mut uniforms = device.request_transient_uniform_buffer(
+            frame,
+            thread_token,
+            std::mem::size_of::<TextUniforms>(),
+            std::mem::align_of::<TextUniforms>(),
+        );
+
+        uniforms.copy_from_slice(text_uniforms.as_bytes());
+
         device.cmd_set_pipeline(cmd_buffer, self.pipeline);
         device.cmd_set_bind_group(
             frame,
@@ -142,17 +153,17 @@ impl TextPipeline {
                 Bind {
                     binding: 0,
                     array_element: 0,
-                    typed: TypedBind::UniformBuffer(&[uniforms]),
+                    typed: TypedBind::UniformBuffer(&[uniforms.into()]),
                 },
                 Bind {
                     binding: 1,
                     array_element: 0,
-                    typed: TypedBind::StorageBuffer(&[cached_glyphs]),
+                    typed: TypedBind::StorageBuffer(&[cached_glyphs.into()]),
                 },
                 Bind {
                     binding: 2,
                     array_element: 0,
-                    typed: TypedBind::StorageBuffer(&[glyph_instances]),
+                    typed: TypedBind::StorageBuffer(&[glyph_instances.into()]),
                 },
                 Bind {
                     binding: 3,
index c4722b1f418c3e572eebd9146a526a1fbc21a733..d70d4ab87a6b5e6884482a9293422b7268093dbc 100644 (file)
@@ -8,10 +8,10 @@ use std::{
 };
 
 use narcissus_core::{
-    cstr, cstr_from_bytes_until_nul, default, is_aligned_to, manual_arc,
+    box_assume_init, cstr, cstr_from_bytes_until_nul, default, is_aligned_to, manual_arc,
     manual_arc::ManualArc,
     raw_window::{AsRawWindow, RawWindow},
-    Arena, HybridArena, Mutex, PhantomUnsend, Pool, Widen,
+    zeroed_box, Arena, HybridArena, Mutex, PhantomUnsend, Pool, Widen,
 };
 
 use vulkan_sys as vk;
@@ -20,15 +20,15 @@ use crate::{
     delay_queue::DelayQueue,
     frame_counter::FrameCounter,
     tlsf::{self, Tlsf},
-    Access, Bind, BindGroupLayout, BindGroupLayoutDesc, BindingType, BlendMode, Buffer, BufferDesc,
-    BufferImageCopy, BufferUsageFlags, ClearValue, CmdBuffer, CompareOp, ComputePipelineDesc,
-    CullingMode, Device, Extent2d, Extent3d, Frame, FrontFace, GlobalBarrier, GpuConcurrent,
-    GraphicsPipelineDesc, Image, ImageAspectFlags, ImageBarrier, ImageBlit, ImageDesc,
-    ImageDimension, ImageFormat, ImageLayout, ImageSubresourceLayers, ImageSubresourceRange,
-    ImageUsageFlags, ImageViewDesc, IndexType, LoadOp, MemoryLocation, Offset2d, Offset3d,
-    Pipeline, PolygonMode, Sampler, SamplerAddressMode, SamplerCompareOp, SamplerDesc,
-    SamplerFilter, ShaderStageFlags, StencilOp, StencilOpState, StoreOp, SwapchainOutOfDateError,
-    ThreadToken, Topology, TypedBind,
+    Access, Bind, BindGroupLayout, BindGroupLayoutDesc, BindingType, BlendMode, Buffer, BufferBind,
+    BufferDesc, BufferImageCopy, BufferUsageFlags, ClearValue, CmdBuffer, CompareOp,
+    ComputePipelineDesc, CullingMode, Device, Extent2d, Extent3d, Frame, FrontFace, GlobalBarrier,
+    GpuConcurrent, GraphicsPipelineDesc, Image, ImageAspectFlags, ImageBarrier, ImageBlit,
+    ImageDesc, ImageDimension, ImageFormat, ImageLayout, ImageSubresourceLayers,
+    ImageSubresourceRange, ImageUsageFlags, ImageViewDesc, IndexType, LoadOp, MemoryLocation,
+    Offset2d, Offset3d, Pipeline, PolygonMode, Sampler, SamplerAddressMode, SamplerCompareOp,
+    SamplerDesc, SamplerFilter, ShaderStageFlags, StencilOp, StencilOpState, StoreOp,
+    SwapchainOutOfDateError, ThreadToken, Topology, TransientBuffer, TypedBind,
 };
 
 const NUM_FRAMES: usize = 2;
@@ -38,6 +38,18 @@ const NUM_FRAMES: usize = 2;
 /// There's no correct answer here (spec bug) we're just picking a big number and hoping for the best.
 const SWAPCHAIN_DESTROY_DELAY_FRAMES: usize = 8;
 
+pub struct VulkanConstants {
+    // How large should transient buffers be, this will limit the maximum size of transient allocations.
+    transient_buffer_size: u64,
+    // How should we align transient buffers, this will limit the maximum alignment of transient allocations.
+    transient_buffer_max_align: u64,
+}
+
+const VULKAN_CONSTANTS: VulkanConstants = VulkanConstants {
+    transient_buffer_size: 2 * 1024 * 1024,
+    transient_buffer_max_align: 256,
+};
+
 mod libc {
     use std::os::raw::{c_char, c_int, c_void};
 
@@ -907,21 +919,100 @@ struct VulkanBoundPipeline {
     pipeline_bind_point: vk::PipelineBindPoint,
 }
 
+#[derive(Clone)]
+struct VulkanTransientBuffer {
+    buffer: vk::Buffer,
+    memory: VulkanMemory,
+}
+
+struct VulkanTransientAllocator {
+    usage: vk::BufferUsageFlags,
+    min_align: u64,
+    offset: u64,
+    current: Option<VulkanTransientBuffer>,
+    used_buffers: Vec<VulkanTransientBuffer>,
+}
+
+impl VulkanTransientAllocator {
+    fn new(usage: vk::BufferUsageFlags, min_align: u64) -> Self {
+        Self {
+            usage,
+            min_align,
+            offset: 0,
+            current: None,
+            used_buffers: default(),
+        }
+    }
+
+    fn alloc<'a>(&mut self, device: &VulkanDevice, size: u64, align: u64) -> TransientBuffer<'a> {
+        assert!(size <= VULKAN_CONSTANTS.transient_buffer_size);
+        assert!(
+            align != 0
+                && align.is_power_of_two()
+                && align <= VULKAN_CONSTANTS.transient_buffer_max_align
+        );
+
+        let align = align.max(self.min_align);
+
+        if self.offset < size || self.current.is_none() {
+            let transient_buffer =
+                device.request_transient_buffer(VULKAN_CONSTANTS.transient_buffer_size, self.usage);
+
+            self.used_buffers.push(transient_buffer.clone());
+            self.current = Some(transient_buffer);
+            self.offset = VULKAN_CONSTANTS.transient_buffer_size;
+        }
+
+        let current = self.current.as_ref().unwrap();
+
+        self.offset = self.offset.wrapping_sub(size);
+        self.offset &= !(align - 1);
+
+        TransientBuffer {
+            ptr: NonNull::new(
+                current
+                    .memory
+                    .mapped_ptr()
+                    .wrapping_offset(self.offset as isize),
+            )
+            .unwrap(),
+            len: size as usize,
+            buffer: current.buffer.as_raw(),
+            offset: self.offset,
+            _phantom: &PhantomData,
+        }
+    }
+}
+
 struct VulkanCmdBuffer {
     command_buffer: vk::CommandBuffer,
     bound_pipeline: Option<VulkanBoundPipeline>,
     swapchains_touched: HashMap<vk::SurfaceKHR, (vk::Image, vk::PipelineStageFlags2)>,
 }
 
+impl Default for VulkanCmdBuffer {
+    fn default() -> Self {
+        Self {
+            command_buffer: default(),
+            bound_pipeline: default(),
+            swapchains_touched: default(),
+        }
+    }
+}
+
 struct VulkanCmdBufferPool {
     command_pool: vk::CommandPool,
     next_free_index: usize,
     command_buffers: Vec<vk::CommandBuffer>,
 }
 
+#[repr(align(64))]
 struct VulkanPerThread {
     cmd_buffer_pool: RefCell<VulkanCmdBufferPool>,
     descriptor_pool: Cell<vk::DescriptorPool>,
+    transient_index_allocator: RefCell<VulkanTransientAllocator>,
+    transient_storage_allocator: RefCell<VulkanTransientAllocator>,
+    transient_uniform_allocator: RefCell<VulkanTransientAllocator>,
     arena: Arena,
 }
 
@@ -969,7 +1060,6 @@ type SwapchainDestroyQueue = DelayQueue<(vk::SwapchainKHR, vk::SurfaceKHR, Box<[
 pub(crate) struct VulkanDevice {
     instance: vk::Instance,
     physical_device: vk::PhysicalDevice,
-    physical_device_memory_properties: Box<vk::PhysicalDeviceMemoryProperties>,
     device: vk::Device,
 
     universal_queue: vk::Queue,
@@ -994,8 +1084,22 @@ pub(crate) struct VulkanDevice {
     recycled_semaphores: Mutex<VecDeque<vk::Semaphore>>,
     recycled_descriptor_pools: Mutex<VecDeque<vk::DescriptorPool>>,
 
+    recycled_transient_index_buffers: Mutex<VecDeque<VulkanTransientBuffer>>,
+    recycled_transient_storage_buffers: Mutex<VecDeque<VulkanTransientBuffer>>,
+    recycled_transient_uniform_buffers: Mutex<VecDeque<VulkanTransientBuffer>>,
+
     allocators: [Option<Box<VulkanAllocator>>; vk::MAX_MEMORY_TYPES as usize],
 
+    _physical_device_properties: Box<vk::PhysicalDeviceProperties2>,
+    _physical_device_properties_11: Box<vk::PhysicalDeviceVulkan11Properties>,
+    _physical_device_properties_12: Box<vk::PhysicalDeviceVulkan12Properties>,
+    _physical_device_properties_13: Box<vk::PhysicalDeviceVulkan13Properties>,
+    _physical_device_features: Box<vk::PhysicalDeviceFeatures2>,
+    _physical_device_features_11: Box<vk::PhysicalDeviceVulkan11Features>,
+    _physical_device_features_12: Box<vk::PhysicalDeviceVulkan12Features>,
+    _physical_device_features_13: Box<vk::PhysicalDeviceVulkan13Features>,
+    physical_device_memory_properties: Box<vk::PhysicalDeviceMemoryProperties>,
+
     _global_fn: vk::GlobalFunctions,
     instance_fn: vk::InstanceFunctions,
     xcb_surface_fn: Option<vk::XcbSurfaceKHRFunctions>,
@@ -1120,72 +1224,85 @@ impl VulkanDevice {
             instance_fn.enumerate_physical_devices(instance, count, ptr)
         });
 
+        let mut physical_device_properties =
+            unsafe { box_assume_init(zeroed_box::<vk::PhysicalDeviceProperties2>()) };
+        let mut physical_device_properties_11 =
+            unsafe { box_assume_init(zeroed_box::<vk::PhysicalDeviceVulkan11Properties>()) };
+        let mut physical_device_properties_12 =
+            unsafe { box_assume_init(zeroed_box::<vk::PhysicalDeviceVulkan12Properties>()) };
+        let mut physical_device_properties_13 =
+            unsafe { box_assume_init(zeroed_box::<vk::PhysicalDeviceVulkan13Properties>()) };
+
+        physical_device_properties._type = vk::StructureType::PhysicalDeviceProperties2;
+        physical_device_properties_11._type = vk::StructureType::PhysicalDeviceVulkan11Properties;
+        physical_device_properties_12._type = vk::StructureType::PhysicalDeviceVulkan12Properties;
+        physical_device_properties_13._type = vk::StructureType::PhysicalDeviceVulkan13Properties;
+
+        physical_device_properties_12._next = physical_device_properties_13.as_mut()
+            as *mut vk::PhysicalDeviceVulkan13Properties
+            as *mut _;
+        physical_device_properties_11._next = physical_device_properties_12.as_mut()
+            as *mut vk::PhysicalDeviceVulkan12Properties
+            as *mut _;
+        physical_device_properties._next = physical_device_properties_11.as_mut()
+            as *mut vk::PhysicalDeviceVulkan11Properties
+            as *mut _;
+
+        let mut physical_device_features =
+            unsafe { box_assume_init(zeroed_box::<vk::PhysicalDeviceFeatures2>()) };
+        let mut physical_device_features_11 =
+            unsafe { box_assume_init(zeroed_box::<vk::PhysicalDeviceVulkan11Features>()) };
+        let mut physical_device_features_12 =
+            unsafe { box_assume_init(zeroed_box::<vk::PhysicalDeviceVulkan12Features>()) };
+        let mut physical_device_features_13 =
+            unsafe { box_assume_init(zeroed_box::<vk::PhysicalDeviceVulkan13Features>()) };
+
+        physical_device_features._type = vk::StructureType::PhysicalDeviceFeatures2;
+        physical_device_features_11._type = vk::StructureType::PhysicalDeviceVulkan11Features;
+        physical_device_features_12._type = vk::StructureType::PhysicalDeviceVulkan12Features;
+        physical_device_features_13._type = vk::StructureType::PhysicalDeviceVulkan13Features;
+
+        physical_device_features_12._next = physical_device_features_13.as_mut()
+            as *mut vk::PhysicalDeviceVulkan13Features
+            as *mut _;
+        physical_device_features_11._next = physical_device_features_12.as_mut()
+            as *mut vk::PhysicalDeviceVulkan12Features
+            as *mut _;
+        physical_device_features._next = physical_device_features_11.as_mut()
+            as *mut vk::PhysicalDeviceVulkan11Features
+            as *mut _;
+
         let physical_device = physical_devices
             .iter()
             .copied()
             .find(|&physical_device| {
-                let (properties, _properties_11, _properties_12, _properties_13) = {
-                    let mut properties_13 = vk::PhysicalDeviceVulkan13Properties::default();
-                    let mut properties_12 = vk::PhysicalDeviceVulkan12Properties {
-                        _next: &mut properties_13 as *mut vk::PhysicalDeviceVulkan13Properties
-                            as *mut _,
-                        ..default()
-                    };
-                    let mut properties_11 = vk::PhysicalDeviceVulkan11Properties {
-                        _next: &mut properties_12 as *mut vk::PhysicalDeviceVulkan12Properties
-                            as *mut _,
-                        ..default()
-                    };
-                    let mut properties = vk::PhysicalDeviceProperties2 {
-                        _next: &mut properties_11 as *mut vk::PhysicalDeviceVulkan11Properties
-                            as *mut _,
-                        ..default()
-                    };
-                    unsafe {
-                        instance_fn
-                            .get_physical_device_properties2(physical_device, &mut properties);
-                    }
-                    (properties, properties_11, properties_12, properties_13)
-                };
-
-                let (_features, _features_11, features_12, features_13) = {
-                    let mut features_13 = vk::PhysicalDeviceVulkan13Features::default();
-                    let mut features_12 = vk::PhysicalDeviceVulkan12Features {
-                        _next: &mut features_13 as *mut vk::PhysicalDeviceVulkan13Features
-                            as *mut _,
-                        ..default()
-                    };
-                    let mut features_11 = vk::PhysicalDeviceVulkan11Features {
-                        _next: &mut features_12 as *mut vk::PhysicalDeviceVulkan12Features
-                            as *mut _,
-                        ..default()
-                    };
-                    let mut features = vk::PhysicalDeviceFeatures2 {
-                        _next: &mut features_11 as *mut vk::PhysicalDeviceVulkan11Features
-                            as *mut _,
-                        ..default()
-                    };
-
-                    unsafe {
-                        instance_fn.get_physical_device_features2(physical_device, &mut features);
-                    }
-                    (features.features, features_11, features_12, features_13)
-                };
+                unsafe {
+                    instance_fn.get_physical_device_properties2(
+                        physical_device,
+                        physical_device_properties.as_mut(),
+                    );
+                    instance_fn.get_physical_device_features2(
+                        physical_device,
+                        physical_device_features.as_mut(),
+                    );
+                }
 
-                properties.properties.api_version >= vk::VERSION_1_3
-                    && features_13.dynamic_rendering == vk::Bool32::True
-                    && features_12.timeline_semaphore == vk::Bool32::True
-                    && features_12.descriptor_indexing == vk::Bool32::True
-                    && features_12.descriptor_binding_partially_bound == vk::Bool32::True
-                    && features_12.draw_indirect_count == vk::Bool32::True
-                    && features_12.uniform_buffer_standard_layout == vk::Bool32::True
+                physical_device_properties.properties.api_version >= vk::VERSION_1_3
+                    && physical_device_features_13.dynamic_rendering == vk::Bool32::True
+                    && physical_device_features_12.timeline_semaphore == vk::Bool32::True
+                    && physical_device_features_12.descriptor_indexing == vk::Bool32::True
+                    && physical_device_features_12.descriptor_binding_partially_bound
+                        == vk::Bool32::True
+                    && physical_device_features_12.draw_indirect_count == vk::Bool32::True
+                    && physical_device_features_12.uniform_buffer_standard_layout
+                        == vk::Bool32::True
             })
             .expect("no supported physical devices reported");
 
         let physical_device_memory_properties = unsafe {
-            let mut memory_properties = vk::PhysicalDeviceMemoryProperties::default();
+            let mut memory_properties = Box::<vk::PhysicalDeviceMemoryProperties>::default();
             instance_fn
-                .get_physical_device_memory_properties(physical_device, &mut memory_properties);
+                .get_physical_device_memory_properties(physical_device, memory_properties.as_mut());
             memory_properties
         };
 
@@ -1290,9 +1407,29 @@ impl VulkanDevice {
                     command_buffers: Vec::new(),
                     next_free_index: 0,
                 };
+                let transient_index_allocator =
+                    VulkanTransientAllocator::new(vk::BufferUsageFlags::INDEX_BUFFER, 1);
+                let transient_storage_allocator = VulkanTransientAllocator::new(
+                    vk::BufferUsageFlags::STORAGE_BUFFER,
+                    physical_device_properties
+                        .properties
+                        .limits
+                        .min_storage_buffer_offset_alignment,
+                );
+                let transient_uniform_allocator = VulkanTransientAllocator::new(
+                    vk::BufferUsageFlags::UNIFORM_BUFFER,
+                    physical_device_properties
+                        .properties
+                        .limits
+                        .min_uniform_buffer_offset_alignment,
+                );
+
                 VulkanPerThread {
                     cmd_buffer_pool: RefCell::new(cmd_buffer_pool),
                     descriptor_pool: Cell::new(vk::DescriptorPool::null()),
+                    transient_index_allocator: RefCell::new(transient_index_allocator),
+                    transient_storage_allocator: RefCell::new(transient_storage_allocator),
+                    transient_uniform_allocator: RefCell::new(transient_uniform_allocator),
                     arena: Arena::new(),
                 }
             });
@@ -1326,7 +1463,15 @@ impl VulkanDevice {
         Self {
             instance,
             physical_device,
-            physical_device_memory_properties: Box::new(physical_device_memory_properties),
+            _physical_device_properties: physical_device_properties,
+            _physical_device_properties_11: physical_device_properties_11,
+            _physical_device_properties_12: physical_device_properties_12,
+            _physical_device_properties_13: physical_device_properties_13,
+            _physical_device_features: physical_device_features,
+            _physical_device_features_11: physical_device_features_11,
+            _physical_device_features_12: physical_device_features_12,
+            _physical_device_features_13: physical_device_features_13,
+            physical_device_memory_properties,
             device,
 
             universal_queue,
@@ -1349,6 +1494,9 @@ impl VulkanDevice {
 
             recycled_semaphores: default(),
             recycled_descriptor_pools: default(),
+            recycled_transient_index_buffers: default(),
+            recycled_transient_storage_buffers: default(),
+            recycled_transient_uniform_buffers: default(),
 
             allocators,
 
@@ -2384,6 +2532,51 @@ impl Device for VulkanDevice {
         }
     }
 
+    fn request_transient_index_buffer<'a>(
+        &self,
+        frame: &'a Frame,
+        thread_token: &'a ThreadToken,
+        size: usize,
+        align: usize,
+    ) -> TransientBuffer<'a> {
+        let frame = self.frame(frame);
+        let per_thread = frame.per_thread.get(thread_token);
+        per_thread
+            .transient_index_allocator
+            .borrow_mut()
+            .alloc(self, size as u64, align as u64)
+    }
+
+    fn request_transient_storage_buffer<'a>(
+        &self,
+        frame: &'a Frame,
+        thread_token: &'a ThreadToken,
+        size: usize,
+        align: usize,
+    ) -> TransientBuffer<'a> {
+        let frame = self.frame(frame);
+        let per_thread = frame.per_thread.get(thread_token);
+        per_thread
+            .transient_storage_allocator
+            .borrow_mut()
+            .alloc(self, size as u64, align as u64)
+    }
+
+    fn request_transient_uniform_buffer<'a>(
+        &self,
+        frame: &'a Frame,
+        thread_token: &'a ThreadToken,
+        size: usize,
+        align: usize,
+    ) -> TransientBuffer<'a> {
+        let frame = self.frame(frame);
+        let per_thread = frame.per_thread.get(thread_token);
+        per_thread
+            .transient_uniform_allocator
+            .borrow_mut()
+            .alloc(self, size as u64, align as u64)
+    }
+
     fn create_cmd_buffer<'a, 'thread>(
         &self,
         frame: &'a Frame,
@@ -2424,8 +2617,7 @@ impl Device for VulkanDevice {
 
         let vulkan_cmd_buffer = per_thread.arena.alloc(VulkanCmdBuffer {
             command_buffer,
-            bound_pipeline: None,
-            swapchains_touched: HashMap::new(),
+            ..default()
         });
 
         CmdBuffer {
@@ -2677,13 +2869,20 @@ impl Device for VulkanDevice {
             }
             TypedBind::UniformBuffer(buffers) => {
                 let buffer_pool = self.buffer_pool.lock();
-                let buffer_infos_iter = buffers.iter().map(|buffer| {
-                    let buffer = buffer_pool.get(buffer.0).unwrap().buffer;
-                    vk::DescriptorBufferInfo {
-                        buffer,
-                        offset: 0,
-                        range: vk::WHOLE_SIZE,
+                let buffer_infos_iter = buffers.iter().map(|buffer| match buffer {
+                    BufferBind::Unmanaged(buffer) => {
+                        let buffer = buffer_pool.get(buffer.0).unwrap().buffer;
+                        vk::DescriptorBufferInfo {
+                            buffer,
+                            offset: 0,
+                            range: vk::WHOLE_SIZE,
+                        }
                     }
+                    BufferBind::Transient(transient) => vk::DescriptorBufferInfo {
+                        buffer: vk::Buffer::from_raw(transient.buffer),
+                        offset: transient.offset,
+                        range: transient.len as u64,
+                    },
                 });
                 let buffer_infos = arena.alloc_slice_fill_iter(buffer_infos_iter);
                 vk::WriteDescriptorSet {
@@ -2698,13 +2897,20 @@ impl Device for VulkanDevice {
             }
             TypedBind::StorageBuffer(buffers) => {
                 let buffer_pool = self.buffer_pool.lock();
-                let buffer_infos_iter = buffers.iter().map(|buffer| {
-                    let buffer = buffer_pool.get(buffer.0).unwrap().buffer;
-                    vk::DescriptorBufferInfo {
-                        buffer,
-                        offset: 0,
-                        range: vk::WHOLE_SIZE,
+                let buffer_infos_iter = buffers.iter().map(|buffer| match buffer {
+                    BufferBind::Unmanaged(buffer) => {
+                        let buffer = buffer_pool.get(buffer.0).unwrap().buffer;
+                        vk::DescriptorBufferInfo {
+                            buffer,
+                            offset: 0,
+                            range: vk::WHOLE_SIZE,
+                        }
                     }
+                    BufferBind::Transient(transient) => vk::DescriptorBufferInfo {
+                        buffer: vk::Buffer::from_raw(transient.buffer),
+                        offset: transient.offset,
+                        range: transient.len as u64,
+                    },
                 });
                 let buffer_infos = arena.alloc_slice_fill_iter(buffer_infos_iter);
                 vk::WriteDescriptorSet {
@@ -3111,6 +3317,38 @@ impl Device for VulkanDevice {
                     ));
                     cmd_buffer_pool.next_free_index = 0;
                 }
+
+                let used_index_buffers =
+                    &mut per_thread.transient_index_allocator.get_mut().used_buffers;
+
+                if !used_index_buffers.is_empty() {
+                    self.recycled_transient_index_buffers
+                        .lock()
+                        .extend(used_index_buffers.drain(..))
+                }
+
+                let used_storage_buffers = &mut per_thread
+                    .transient_storage_allocator
+                    .get_mut()
+                    .used_buffers;
+
+                if !used_storage_buffers.is_empty() {
+                    self.recycled_transient_storage_buffers
+                        .lock()
+                        .extend(used_storage_buffers.drain(..))
+                }
+
+                let used_uniform_buffers = &mut per_thread
+                    .transient_uniform_allocator
+                    .get_mut()
+                    .used_buffers;
+
+                if !used_uniform_buffers.is_empty() {
+                    self.recycled_transient_uniform_buffers
+                        .lock()
+                        .extend(used_uniform_buffers.drain(..))
+                }
+
                 per_thread.arena.reset()
             }
 
@@ -3309,9 +3547,95 @@ impl Device for VulkanDevice {
             self.destroy_swapchain(surface)
         }
     }
+
+    #[cfg(debug_assertions)]
+    fn debug_allocator_dump_svg(&self) -> Result<(), std::io::Error> {
+        for (i, allocator) in self
+            .allocators
+            .iter()
+            .filter_map(Option::as_deref)
+            .enumerate()
+        {
+            let mut bitmap_file = std::fs::File::create(format!("target/{i}_bitmap.svg")).unwrap();
+            allocator.tlsf.lock().debug_bitmap_svg(&mut bitmap_file)?;
+        }
+
+        Ok(())
+    }
 }
 
 impl VulkanDevice {
+    fn request_transient_buffer(
+        &self,
+        size: u64,
+        usage: vk::BufferUsageFlags,
+    ) -> VulkanTransientBuffer {
+        if let Some(transient_buffer) = if usage == vk::BufferUsageFlags::INDEX_BUFFER {
+            self.recycled_transient_index_buffers.lock().pop_back()
+        } else if usage == vk::BufferUsageFlags::STORAGE_BUFFER {
+            self.recycled_transient_storage_buffers.lock().pop_back()
+        } else if usage == vk::BufferUsageFlags::UNIFORM_BUFFER {
+            self.recycled_transient_uniform_buffers.lock().pop_back()
+        } else {
+            panic!()
+        } {
+            return transient_buffer;
+        }
+
+        let queue_family_indices = &[self.universal_queue_family_index];
+
+        let create_info = vk::BufferCreateInfo {
+            size,
+            usage,
+            queue_family_indices: queue_family_indices.into(),
+            sharing_mode: vk::SharingMode::Exclusive,
+            ..default()
+        };
+        let mut buffer = vk::Buffer::null();
+        vk_check!(self
+            .device_fn
+            .create_buffer(self.device, &create_info, None, &mut buffer));
+
+        let mut memory_requirements = vk::MemoryRequirements2::default();
+
+        self.device_fn.get_buffer_memory_requirements2(
+            self.device,
+            &vk::BufferMemoryRequirementsInfo2 {
+                buffer,
+                ..default()
+            },
+            &mut memory_requirements,
+        );
+
+        let memory = self.allocate_memory(&VulkanMemoryDesc {
+            requirements: memory_requirements.memory_requirements,
+            memory_location: MemoryLocation::HostMapped,
+            _linear: true,
+        });
+
+        assert!(!memory.mapped_ptr().is_null());
+        // SAFETY: The memory has just been allocated, so as long as the pointer is
+        // non-null, then we can create a slice for it.
+        unsafe {
+            let dst = std::slice::from_raw_parts_mut(memory.mapped_ptr(), memory.size().widen());
+            dst.fill(0);
+        }
+
+        unsafe {
+            self.device_fn.bind_buffer_memory2(
+                self.device,
+                &[vk::BindBufferMemoryInfo {
+                    buffer,
+                    memory: memory.device_memory(),
+                    offset: memory.offset(),
+                    ..default()
+                }],
+            )
+        };
+
+        VulkanTransientBuffer { buffer, memory }
+    }
+
     fn acquire_swapchain(
         &self,
         frame: &Frame,
@@ -3655,9 +3979,49 @@ impl Drop for VulkanDevice {
                 unsafe {
                     device_fn.destroy_command_pool(device, cmd_buffer_pool.command_pool, None)
                 }
+
+                for &VulkanTransientBuffer { buffer, memory: _ } in
+                    &per_thread.transient_index_allocator.get_mut().used_buffers
+                {
+                    unsafe { device_fn.destroy_buffer(device, buffer, None) }
+                }
+
+                for &VulkanTransientBuffer { buffer, memory: _ } in &per_thread
+                    .transient_storage_allocator
+                    .get_mut()
+                    .used_buffers
+                {
+                    unsafe { device_fn.destroy_buffer(device, buffer, None) }
+                }
+
+                for &VulkanTransientBuffer { buffer, memory: _ } in &per_thread
+                    .transient_uniform_allocator
+                    .get_mut()
+                    .used_buffers
+                {
+                    unsafe { device_fn.destroy_buffer(device, buffer, None) }
+                }
             }
         }
 
+        for VulkanTransientBuffer { buffer, memory: _ } in
+            self.recycled_transient_index_buffers.get_mut()
+        {
+            unsafe { device_fn.destroy_buffer(device, *buffer, None) }
+        }
+
+        for VulkanTransientBuffer { buffer, memory: _ } in
+            self.recycled_transient_storage_buffers.get_mut()
+        {
+            unsafe { device_fn.destroy_buffer(device, *buffer, None) }
+        }
+
+        for VulkanTransientBuffer { buffer, memory: _ } in
+            self.recycled_transient_uniform_buffers.get_mut()
+        {
+            unsafe { device_fn.destroy_buffer(device, *buffer, None) }
+        }
+
         for buffer in self.buffer_pool.get_mut().values() {
             unsafe { device_fn.destroy_buffer(device, buffer.buffer, None) }
         }
index 71051840e2f3d1ffb08b24cef70e1b83c8f55a1c..c72142b7d9b9b836a93453cdc944d9842694e93d 100644 (file)
@@ -1,4 +1,4 @@
-use std::{ffi::CStr, marker::PhantomData};
+use std::{ffi::CStr, marker::PhantomData, ptr::NonNull};
 
 use backend::vulkan;
 use narcissus_core::{
@@ -61,6 +61,21 @@ pub struct BindGroupLayout(Handle);
 #[derive(Clone, Copy, PartialEq, Eq, Hash)]
 pub struct Pipeline(Handle);
 
+pub struct TransientBuffer<'a> {
+    ptr: NonNull<u8>,
+    len: usize,
+    buffer: u64,
+    offset: u64,
+    _phantom: &'a PhantomData<()>,
+}
+
+impl<'a> TransientBuffer<'a> {
+    pub fn copy_from_slice(&mut self, bytes: &[u8]) {
+        unsafe { std::slice::from_raw_parts_mut(self.ptr.as_ptr(), self.len) }
+            .copy_from_slice(bytes)
+    }
+}
+
 #[derive(Clone, Copy, PartialEq, Eq, Debug)]
 pub enum MemoryLocation {
     HostMapped,
@@ -464,11 +479,28 @@ pub struct Bind<'a> {
     pub typed: TypedBind<'a>,
 }
 
+pub enum BufferBind<'a> {
+    Unmanaged(Buffer),
+    Transient(TransientBuffer<'a>),
+}
+
+impl<'a> From<Buffer> for BufferBind<'a> {
+    fn from(value: Buffer) -> Self {
+        BufferBind::Unmanaged(value)
+    }
+}
+
+impl<'a> From<TransientBuffer<'a>> for BufferBind<'a> {
+    fn from(value: TransientBuffer<'a>) -> Self {
+        BufferBind::Transient(value)
+    }
+}
+
 pub enum TypedBind<'a> {
     Sampler(&'a [Sampler]),
     Image(&'a [(ImageLayout, Image)]),
-    UniformBuffer(&'a [Buffer]),
-    StorageBuffer(&'a [Buffer]),
+    UniformBuffer(&'a [BufferBind<'a>]),
+    StorageBuffer(&'a [BufferBind<'a>]),
 }
 
 #[derive(Clone, Copy, PartialEq, Eq)]
@@ -712,6 +744,33 @@ pub trait Device {
     /// any remaining references derived from that address.
     unsafe fn unmap_buffer(&self, buffer: Buffer);
 
+    #[must_use]
+    fn request_transient_uniform_buffer<'a>(
+        &self,
+        frame: &'a Frame<'a>,
+        thread_token: &'a ThreadToken,
+        size: usize,
+        align: usize,
+    ) -> TransientBuffer<'a>;
+
+    #[must_use]
+    fn request_transient_storage_buffer<'a>(
+        &self,
+        frame: &'a Frame<'a>,
+        thread_token: &'a ThreadToken,
+        size: usize,
+        align: usize,
+    ) -> TransientBuffer<'a>;
+
+    #[must_use]
+    fn request_transient_index_buffer<'a>(
+        &self,
+        frame: &'a Frame<'a>,
+        thread_token: &'a ThreadToken,
+        size: usize,
+        align: usize,
+    ) -> TransientBuffer<'a>;
+
     #[must_use]
     fn create_cmd_buffer<'a, 'thread>(
         &'a self,
@@ -804,4 +863,7 @@ pub trait Device {
     fn begin_frame(&self) -> Frame;
 
     fn end_frame<'device>(&'device self, frame: Frame<'device>);
+
+    #[cfg(debug_assertions)]
+    fn debug_allocator_dump_svg(&self) -> Result<(), std::io::Error>;
 }
index 12ec9d0787301503f9aa50c968aefcdc0768996c..9892df2630fecae62c66d92b57a84c9542abaaa0 100644 (file)
@@ -601,7 +601,7 @@ where
         // The mask is a no-op if the alignment is already met, do it unconditionally.
         let offset = (self.blocks[block_index].offset as u64 + align - 1) & !(align - 1);
 
-        debug_assert_eq!(offset & align - 1, 0);
+        debug_assert_eq!(offset & (align - 1), 0);
 
         Some(Allocation {
             block_index,
@@ -647,6 +647,97 @@ where
         // Insert the merged free block.
         self.insert_block(block_index);
     }
+
+    #[cfg(debug_assertions)]
+    pub fn debug_bitmap_svg(&self, w: &mut dyn std::io::Write) -> Result<(), std::io::Error> {
+        use narcissus_core::svg::{self, svg_begin, svg_end};
+
+        struct Bytes {
+            bytes: u32,
+        }
+
+        impl Bytes {
+            fn new(bytes: u32) -> Self {
+                Self { bytes }
+            }
+        }
+
+        impl std::fmt::Display for Bytes {
+            fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+                if self.bytes < 1024 {
+                    write!(f, "{}b", self.bytes)
+                } else if self.bytes < 1024 * 1024 {
+                    write!(f, "{:.2}KiB", self.bytes as f32 / (1024.0))
+                } else {
+                    write!(f, "{:.2}MiB", self.bytes as f32 / (1024.0 * 1024.0))
+                }
+            }
+        }
+
+        write!(w, "{}", svg_begin(615.0, 375.0))?;
+
+        const BOX_SIZE: f32 = 15.0;
+        const PAD: f32 = 30.0;
+
+        let stroke = svg::stroke(svg::black(), 2.0, 1.0);
+        let fg = svg::style(svg::fill(svg::rgb(0xdf, 0x73, 0x1a), 1.0), stroke);
+        let bg = svg::style(svg::fill(svg::rgb(0xfe, 0xfe, 0xfe), 0.0), stroke);
+
+        let mut y = 28.0;
+        let mut x = 0.0;
+
+        for i in 0..BIN_COUNT {
+            let bin = Bin::new(i as u32, 0);
+            write!(
+                w,
+                "{}",
+                svg::text(x, y, 14.0, fg, &Bytes::new(bin.lower_bound()))
+            )?;
+            y += BOX_SIZE;
+        }
+
+        y = PAD;
+        x = 100.0;
+
+        for i in 0..BIN_COUNT {
+            let empty = self.bitmap_0 & 1 << i == 0;
+            write!(
+                w,
+                "{}",
+                svg::rect(x, y, BOX_SIZE, BOX_SIZE).style(if empty { bg } else { fg })
+            )?;
+            y += BOX_SIZE;
+        }
+
+        y = PAD;
+        x = 100.0 + PAD * 2.0;
+
+        for (bin, bitmap) in self.bitmap_1.iter().enumerate() {
+            for sub_bin in 0..SUB_BIN_COUNT {
+                let bin = Bin::new(bin as u32, sub_bin as u32);
+                let lower_bound = Bytes::new(bin.lower_bound());
+                let upper_bound = Bytes::new(bin.upper_bound());
+                let range = format!("{lower_bound}-{upper_bound}");
+
+                let empty = bitmap & 1 << sub_bin == 0;
+
+                write!(
+                    w,
+                    "{}",
+                    svg::rect(x, y, BOX_SIZE, BOX_SIZE)
+                        .style(if empty { bg } else { fg })
+                        .title(&range)
+                )?;
+                x += BOX_SIZE;
+            }
+            x = 100.0 + PAD * 2.0;
+            y += BOX_SIZE;
+        }
+
+        write!(w, "{}", svg_end())?;
+
+        Ok(())
+    }
 }
 
 #[cfg(test)]