From eb4a865424f838c36e0b2f20be14a67b332d35f1 Mon Sep 17 00:00:00 2001 From: Joshua Simmons Date: Wed, 19 Jul 2023 10:34:01 +0200 Subject: [PATCH] narcissus-gpu: Improve allocator behavior Add support for falling back to other memory types after allocation failure. Add support for falling back to dedicated allocations after TLSF super-block allocation failure. --- bins/narcissus/src/main.rs | 3 + .../src/backend/vulkan/allocator.rs | 363 ++++++++++-------- libs/narcissus-gpu/src/backend/vulkan/mod.rs | 87 ++--- libs/narcissus-gpu/src/lib.rs | 1 + 4 files changed, 246 insertions(+), 208 deletions(-) diff --git a/bins/narcissus/src/main.rs b/bins/narcissus/src/main.rs index 640279f..fb6815a 100644 --- a/bins/narcissus/src/main.rs +++ b/bins/narcissus/src/main.rs @@ -84,6 +84,7 @@ pub fn main() { let blåhaj_image = device.create_image(&ImageDesc { memory_location: MemoryLocation::Device, + host_mapped: false, usage: ImageUsageFlags::SAMPLED | ImageUsageFlags::TRANSFER, dimension: ImageDimension::Type2d, format: ImageFormat::RGBA8_SRGB, @@ -116,6 +117,7 @@ pub fn main() { let glyph_atlas = device.create_image(&ImageDesc { memory_location: MemoryLocation::Device, usage: ImageUsageFlags::SAMPLED | ImageUsageFlags::TRANSFER, + host_mapped: false, dimension: ImageDimension::Type2d, format: ImageFormat::R8_UNORM, initial_layout: ImageLayout::Optimal, @@ -294,6 +296,7 @@ pub fn main() { device.destroy_image(&frame, depth_image); depth_image = device.create_image(&ImageDesc { memory_location: MemoryLocation::Device, + host_mapped: false, usage: ImageUsageFlags::DEPTH_STENCIL_ATTACHMENT, dimension: ImageDimension::Type2d, format: ImageFormat::DEPTH_F32, diff --git a/libs/narcissus-gpu/src/backend/vulkan/allocator.rs b/libs/narcissus-gpu/src/backend/vulkan/allocator.rs index db79801..abd7832 100644 --- a/libs/narcissus-gpu/src/backend/vulkan/allocator.rs +++ b/libs/narcissus-gpu/src/backend/vulkan/allocator.rs @@ -1,6 +1,10 @@ -use std::collections::HashSet; +use std::{ + collections::HashSet, + sync::atomic::{AtomicU32, Ordering}, +}; + +use narcissus_core::{default, BitIter, Mutex, Widen}; -use narcissus_core::{default, Mutex, Widen}; use vulkan_sys as vk; use crate::{ @@ -13,8 +17,10 @@ use super::{VulkanDevice, VulkanFrame}; #[derive(Default)] pub struct VulkanAllocator { - tlsf: Mutex>, + tlsf: [Mutex>; vk::MAX_MEMORY_TYPES as usize], dedicated: Mutex>, + + allocation_count: AtomicU32, } #[derive(Clone, Copy)] @@ -23,17 +29,11 @@ pub struct VulkanAllocationInfo { mapped_ptr: *mut u8, } -pub enum VulkanMemoryDedicatedDesc { - Image(vk::Image), - Buffer(vk::Buffer), -} - #[derive(Clone)] pub struct VulkanMemoryDedicated { memory: vk::DeviceMemory, mapped_ptr: *mut u8, size: u64, - memory_type_index: u32, } #[derive(Clone)] @@ -93,206 +93,245 @@ impl VulkanMemory { } impl VulkanDevice { - pub fn find_memory_type_index(&self, filter: u32, flags: vk::MemoryPropertyFlags) -> u32 { - (0..self.physical_device_memory_properties.memory_type_count) - .map(|memory_type_index| { - ( - memory_type_index, - self.physical_device_memory_properties.memory_types[memory_type_index.widen()], - ) - }) - .find(|(i, memory_type)| { - (filter & (1 << i)) != 0 && memory_type.property_flags.contains(flags) - }) - .expect("could not find memory type matching flags") - .0 - } - - pub fn allocate_memory_dedicated( + pub fn allocate_memory( &self, memory_location: MemoryLocation, - requirements: &vk::MemoryRequirements, - dedicated_desc: &VulkanMemoryDedicatedDesc, + host_mapped: bool, + memory_requirements: &vk::MemoryRequirements, + memory_dedicated_requirements: &vk::MemoryDedicatedRequirements, + memory_dedicated_allocate_info: &vk::MemoryDedicatedAllocateInfo, ) -> VulkanMemory { - let memory_property_flags = match memory_location { + let required_memory_property_flags = if host_mapped { + vk::MemoryPropertyFlags::HOST_VISIBLE + } else { + vk::MemoryPropertyFlags::default() + }; + + let mut preferred_memory_property_flags = match memory_location { MemoryLocation::Host => vk::MemoryPropertyFlags::HOST_VISIBLE, MemoryLocation::Device => vk::MemoryPropertyFlags::DEVICE_LOCAL, }; - let size = requirements.size; - - let memory_type_index = - self.find_memory_type_index(requirements.memory_type_bits, memory_property_flags); - - let allocator = self.allocators[memory_type_index.widen()] - .as_ref() - .expect("returned a memory type index that has no associated allocator"); - - let mut allocate_info = vk::MemoryAllocateInfo { - allocation_size: size, - memory_type_index, - ..default() - }; + let size = memory_requirements.size; + let align = memory_requirements.alignment; + + fn allocate( + device: &VulkanDevice, + host_mapped: bool, + allocation_size: u64, + memory_type_index: u32, + memory_dedicated_allocate_info: Option<&vk::MemoryDedicatedAllocateInfo>, + ) -> Option<(vk::DeviceMemory, *mut u8)> { + if device.allocator.allocation_count.load(Ordering::Relaxed) + >= device + .physical_device_properties + .properties + .limits + .max_memory_allocation_count + { + return None; + } - let mut dedicated_allocate_info = vk::MemoryDedicatedAllocateInfo::default(); + let mut allocate_info = vk::MemoryAllocateInfo { + allocation_size, + memory_type_index, + ..default() + }; - match *dedicated_desc { - VulkanMemoryDedicatedDesc::Image(image) => { - dedicated_allocate_info.image = image; + if let Some(memory_dedicated_allocate_info) = memory_dedicated_allocate_info { + allocate_info._next = memory_dedicated_allocate_info + as *const vk::MemoryDedicatedAllocateInfo + as *const _; } - VulkanMemoryDedicatedDesc::Buffer(buffer) => dedicated_allocate_info.buffer = buffer, - } - allocate_info._next = - &dedicated_allocate_info as *const vk::MemoryDedicatedAllocateInfo as *const _; - - let mut memory = vk::DeviceMemory::null(); - vk_check!(self - .device_fn - .allocate_memory(self.device, &allocate_info, None, &mut memory)); - - allocator.dedicated.lock().insert(memory); - - let mapped_ptr = if self.physical_device_memory_properties.memory_types - [memory_type_index.widen()] - .property_flags - .contains(vk::MemoryPropertyFlags::HOST_VISIBLE) - { - let mut data = std::ptr::null_mut(); - vk_check!(self.device_fn.map_memory( - self.device, - memory, - 0, - vk::WHOLE_SIZE, - vk::MemoryMapFlags::default(), - &mut data - )); - data as *mut u8 - } else { - std::ptr::null_mut() - }; - - VulkanMemory::Dedicated(VulkanMemoryDedicated { - memory, - mapped_ptr, - size, - memory_type_index, - }) - } - pub fn allocate_memory( - &self, - memory_location: MemoryLocation, - requirements: &vk::MemoryRequirements, - ) -> VulkanMemory { - let memory_property_flags = match memory_location { - MemoryLocation::Host => vk::MemoryPropertyFlags::HOST_VISIBLE, - MemoryLocation::Device => vk::MemoryPropertyFlags::DEVICE_LOCAL, - }; + let mut memory = vk::DeviceMemory::null(); + let memory = match unsafe { + device + .device_fn + .allocate_memory(device.device, &allocate_info, None, &mut memory) + } { + vk::Result::Success => memory, + vk::Result::ErrorOutOfDeviceMemory | vk::Result::ErrorOutOfHostMemory => { + return None + } + _ => panic!(), + }; + + device + .allocator + .allocation_count + .fetch_add(1, Ordering::AcqRel); + + let mapped_ptr = if host_mapped { + let mut data = std::ptr::null_mut(); + vk_check!(device.device_fn.map_memory( + device.device, + memory, + 0, + vk::WHOLE_SIZE, + vk::MemoryMapFlags::default(), + &mut data + )); + data as *mut u8 + } else { + std::ptr::null_mut() + }; - let size = requirements.size; - let align = requirements.alignment; + Some((memory, mapped_ptr)) + } - let memory_type_index = - self.find_memory_type_index(requirements.memory_type_bits, memory_property_flags); + // Outer loop here so that if we fail the first time around, we can clear the + // preferred memory property flags and try again. + loop { + for memory_type_index in + BitIter::new(std::iter::once(memory_requirements.memory_type_bits)) + { + let memory_type = + &self.physical_device_memory_properties.memory_types[memory_type_index]; + + if !memory_type + .property_flags + .contains(required_memory_property_flags) + { + continue; + } - let allocator = self.allocators[memory_type_index.widen()] - .as_ref() - .expect("returned a memory type index that has no associated allocator"); + if !memory_type + .property_flags + .contains(preferred_memory_property_flags) + { + continue; + } - let mut tlsf = allocator.tlsf.lock(); + // Does the driver want a dedicated allocation? + if memory_dedicated_requirements.requires_dedicated_allocation == vk::Bool32::True + || memory_dedicated_requirements.prefers_dedicated_allocation + == vk::Bool32::True + { + if let Some((memory, mapped_ptr)) = allocate( + self, + host_mapped, + size, + memory_type_index as u32, + Some(memory_dedicated_allocate_info), + ) { + self.allocator.dedicated.lock().insert(memory); + + return VulkanMemory::Dedicated(VulkanMemoryDedicated { + memory, + mapped_ptr, + size, + }); + } + } - let allocation = { - if let Some(allocation) = tlsf.alloc(size, align) { - allocation - } else { - let allocate_info = vk::MemoryAllocateInfo { - allocation_size: VULKAN_CONSTANTS.tlsf_block_size, - memory_type_index, - ..default() - }; - - let mut memory = vk::DeviceMemory::null(); - vk_check!(self.device_fn.allocate_memory( - self.device, - &allocate_info, - None, - &mut memory - )); + let block_size = VULKAN_CONSTANTS.tlsf_maximum_block_size; + + // If the allocation is smaller than the TLSF super-block size for this + // allocation type, we should attempt sub-allocation. + if size <= block_size { + let mut tlsf = self.allocator.tlsf[memory_type_index].lock(); + + if let Some(allocation) = tlsf.alloc(size, align) { + return VulkanMemory::SubAlloc(VulkanMemorySubAlloc { + allocation, + size, + memory_type_index: memory_type_index as u32, + }); + } else { + // When allocating backing storage for TLSF super-blocks, ensure that all memory + // is mapped if the memory type supports host mapping. This ensures we never + // have to map a super-block later if an individual allocation desires it. + if let Some((memory, mapped_ptr)) = allocate( + self, + memory_type + .property_flags + .contains(vk::MemoryPropertyFlags::HOST_VISIBLE), + block_size, + memory_type_index as u32, + None, + ) { + tlsf.insert_super_block( + block_size, + VulkanAllocationInfo { memory, mapped_ptr }, + ); + + // After inserting a new super-block we should always be able to service the + // allocation request since the outer condition checks `size` <= `block_size`. + let allocation = tlsf.alloc(size, align).unwrap(); + + return VulkanMemory::SubAlloc(VulkanMemorySubAlloc { + allocation, + size, + memory_type_index: memory_type_index as u32, + }); + } + } + } - let mapped_ptr = if self.physical_device_memory_properties.memory_types - [memory_type_index.widen()] - .property_flags - .contains(vk::MemoryPropertyFlags::HOST_VISIBLE) + // If sub-allocation failed, and we were unable to allocate a new super-block, + // OR + // If the requested allocation size was too large for the TLSF allocator, + // + // Attempt a dedicated allocation for the exact requested size. + if let Some((memory, mapped_ptr)) = + allocate(self, host_mapped, size, memory_type_index as u32, None) { - let mut data = std::ptr::null_mut(); - vk_check!(self.device_fn.map_memory( - self.device, - memory, - 0, - vk::WHOLE_SIZE, - vk::MemoryMapFlags::default(), - &mut data - )); - data as *mut u8 - } else { - std::ptr::null_mut() - }; - - tlsf.insert_super_block( - VULKAN_CONSTANTS.tlsf_block_size, - VulkanAllocationInfo { memory, mapped_ptr }, - ); + self.allocator.dedicated.lock().insert(memory); - tlsf.alloc(size, align).expect("failed to allocate") + return VulkanMemory::Dedicated(VulkanMemoryDedicated { + memory, + mapped_ptr, + size, + }); + } } - }; - VulkanMemory::SubAlloc(VulkanMemorySubAlloc { - allocation, - size: requirements.size, - memory_type_index, - }) + // If we have any preferred flags, then try clearing those and trying again. + // If there's no preferred flags left, then we couldn't allocate any memory. + if preferred_memory_property_flags == default() { + panic!("allocation failure") + } else { + preferred_memory_property_flags = default() + } + } } pub fn allocator_begin_frame(&self, frame: &mut VulkanFrame) { for allocation in frame.destroyed_allocations.get_mut().drain(..) { match allocation { VulkanMemory::Dedicated(dedicated) => { - let allocator = self.allocators[dedicated.memory_type_index.widen()] - .as_ref() - .unwrap(); - allocator.dedicated.lock().remove(&dedicated.memory); + self.allocator.dedicated.lock().remove(&dedicated.memory); unsafe { self.device_fn .free_memory(self.device, dedicated.memory, None) } } VulkanMemory::SubAlloc(sub_alloc) => { - let allocator = self.allocators[sub_alloc.memory_type_index.widen()] - .as_ref() - .unwrap(); - allocator.tlsf.lock().free(sub_alloc.allocation) + let mut allocator = + self.allocator.tlsf[sub_alloc.memory_type_index.widen()].lock(); + allocator.free(sub_alloc.allocation) } } } } pub fn allocator_drop(&mut self) { - for allocator in self.allocators.iter_mut().flatten() { + for tlsf in self.allocator.tlsf.iter_mut() { // Clear out all memory blocks held by the TLSF allocators. - let tlsf = allocator.tlsf.get_mut(); + let tlsf = tlsf.get_mut(); for super_block in tlsf.super_blocks() { unsafe { self.device_fn .free_memory(self.device, super_block.user_data.memory, None) } } + } - // Clear out all dedicated allocations. - let dedicated = allocator.dedicated.get_mut(); - for memory in dedicated.iter() { - unsafe { self.device_fn.free_memory(self.device, *memory, None) } - } + // Clear out all dedicated allocations. + let dedicated = self.allocator.dedicated.get_mut(); + for memory in dedicated.iter() { + unsafe { self.device_fn.free_memory(self.device, *memory, None) } } } } diff --git a/libs/narcissus-gpu/src/backend/vulkan/mod.rs b/libs/narcissus-gpu/src/backend/vulkan/mod.rs index 30cf005..32a482c 100644 --- a/libs/narcissus-gpu/src/backend/vulkan/mod.rs +++ b/libs/narcissus-gpu/src/backend/vulkan/mod.rs @@ -31,7 +31,7 @@ mod libc; mod wsi; use self::{ - allocator::{VulkanAllocator, VulkanMemory, VulkanMemoryDedicatedDesc}, + allocator::{VulkanAllocator, VulkanMemory}, barrier::{vulkan_image_memory_barrier, vulkan_memory_barrier}, convert::*, wsi::{VulkanWsi, VulkanWsiFrame}, @@ -53,8 +53,8 @@ pub struct VulkanConstants { /// transient allocations. transient_buffer_size: u64, - /// Default size for backing allocations used by the TLSF allocator. - tlsf_block_size: u64, + /// Maximum size for backing allocations used by the TLSF allocator. + tlsf_maximum_block_size: u64, /// The max number of descriptor sets allocatable from each descriptor pool. descriptor_pool_max_sets: u32, @@ -72,7 +72,7 @@ const VULKAN_CONSTANTS: VulkanConstants = VulkanConstants { num_frames: 2, swapchain_destroy_delay: 8, transient_buffer_size: 4 * 1024 * 1024, - tlsf_block_size: 128 * 1024 * 1024, + tlsf_maximum_block_size: 128 * 1024 * 1024, descriptor_pool_max_sets: 500, descriptor_pool_sampler_count: 100, descriptor_pool_uniform_buffer_count: 500, @@ -343,7 +343,7 @@ pub(crate) struct VulkanDevice { recycled_transient_buffers: Mutex>, - allocators: [Option>; vk::MAX_MEMORY_TYPES as usize], + allocator: VulkanAllocator, physical_device_properties: Box, _physical_device_properties_11: Box, @@ -652,14 +652,6 @@ impl VulkanDevice { }) })); - let allocators = std::array::from_fn(|i| { - if i < physical_device_memory_properties.memory_type_count.widen() { - Some(default()) - } else { - None - } - }); - Self { instance, physical_device, @@ -694,7 +686,7 @@ impl VulkanDevice { recycled_descriptor_pools: default(), recycled_transient_buffers: default(), - allocators, + allocator: default(), _global_fn: global_fn, instance_fn, @@ -790,7 +782,7 @@ impl VulkanDevice { semaphore } - fn create_buffer(&self, desc: &BufferDesc, initial_data: Option<&[u8]>) -> Buffer { + fn create_buffer(&self, desc: &BufferDesc, data: Option<&[u8]>) -> Buffer { let queue_family_indices = &[self.universal_queue_family_index]; let create_info = vk::BufferCreateInfo { @@ -821,30 +813,27 @@ impl VulkanDevice { &mut memory_requirements, ); - let memory = if memory_dedicated_requirements.prefers_dedicated_allocation - == vk::Bool32::True - || memory_dedicated_requirements.requires_dedicated_allocation == vk::Bool32::True - { - self.allocate_memory_dedicated( - desc.memory_location, - &memory_requirements.memory_requirements, - &VulkanMemoryDedicatedDesc::Buffer(buffer), - ) - } else { - self.allocate_memory( - desc.memory_location, - &memory_requirements.memory_requirements, - ) + let memory_dedicated_allocate_info = vk::MemoryDedicatedAllocateInfo { + buffer, + ..default() }; - if let Some(initial_data) = initial_data { + let memory = self.allocate_memory( + desc.memory_location, + desc.host_mapped, + &memory_requirements.memory_requirements, + &memory_dedicated_requirements, + &memory_dedicated_allocate_info, + ); + + if let Some(data) = data { assert!(!memory.mapped_ptr().is_null()); // SAFETY: The memory has just been allocated, so as long as the pointer is // non-null, then we can create a slice for it. unsafe { let dst = std::slice::from_raw_parts_mut(memory.mapped_ptr(), memory.size().widen()); - dst[..desc.size].copy_from_slice(initial_data); + dst[..desc.size].copy_from_slice(data); } } @@ -1001,21 +990,15 @@ impl Device for VulkanDevice { &mut memory_requirements, ); - let memory = if memory_dedicated_requirements.prefers_dedicated_allocation - == vk::Bool32::True - || memory_dedicated_requirements.requires_dedicated_allocation == vk::Bool32::True - { - self.allocate_memory_dedicated( - desc.memory_location, - &memory_requirements.memory_requirements, - &VulkanMemoryDedicatedDesc::Image(image), - ) - } else { - self.allocate_memory( - desc.memory_location, - &memory_requirements.memory_requirements, - ) - }; + let memory_dedicated_allocate_info = vk::MemoryDedicatedAllocateInfo { image, ..default() }; + + let memory = self.allocate_memory( + desc.memory_location, + desc.host_mapped, + &memory_requirements.memory_requirements, + &memory_dedicated_requirements, + &memory_dedicated_allocate_info, + ); unsafe { self.device_fn.bind_image_memory2( @@ -2376,9 +2359,15 @@ impl VulkanDevice { &mut memory_requirements, ); + let memory_dedicated_requirements = vk::MemoryDedicatedRequirements::default(); + let memory_dedicated_allocate_info = vk::MemoryDedicatedAllocateInfo::default(); + let memory = self.allocate_memory( MemoryLocation::Host, + true, &memory_requirements.memory_requirements, + &memory_dedicated_requirements, + &memory_dedicated_allocate_info, ); unsafe { @@ -2510,9 +2499,15 @@ impl VulkanDevice { &mut memory_requirements, ); + let memory_dedicated_requirements = vk::MemoryDedicatedRequirements::default(); + let memory_dedicated_allocate_info = vk::MemoryDedicatedAllocateInfo::default(); + let memory = self.allocate_memory( MemoryLocation::Host, + true, &memory_requirements.memory_requirements, + &memory_dedicated_requirements, + &memory_dedicated_allocate_info, ); assert!(!memory.mapped_ptr().is_null()); diff --git a/libs/narcissus-gpu/src/lib.rs b/libs/narcissus-gpu/src/lib.rs index 9a5526e..a41100a 100644 --- a/libs/narcissus-gpu/src/lib.rs +++ b/libs/narcissus-gpu/src/lib.rs @@ -212,6 +212,7 @@ pub struct BufferDesc { pub struct ImageDesc { pub memory_location: MemoryLocation, + pub host_mapped: bool, pub usage: ImageUsageFlags, pub dimension: ImageDimension, pub format: ImageFormat, -- 2.49.0