From: Joshua Simmons Date: Wed, 19 Jul 2023 18:55:21 +0000 (+0200) Subject: narcissus-gpu: Track allocation metrics per-heap X-Git-Url: https://git.nega.tv//gitweb.cgi?a=commitdiff_plain;h=65005aa1c1449ea04941497433075b038e496714;p=josh%2Fnarcissus narcissus-gpu: Track allocation metrics per-heap Add tracking for per-heap memory usage. Calculate per-heap TLSF super block size based on heap size. --- diff --git a/libs/narcissus-gpu/src/backend/vulkan/allocator.rs b/libs/narcissus-gpu/src/backend/vulkan/allocator.rs index abd7832..880bb25 100644 --- a/libs/narcissus-gpu/src/backend/vulkan/allocator.rs +++ b/libs/narcissus-gpu/src/backend/vulkan/allocator.rs @@ -1,6 +1,6 @@ use std::{ collections::HashSet, - sync::atomic::{AtomicU32, Ordering}, + sync::atomic::{AtomicU32, AtomicU64, Ordering}, }; use narcissus_core::{default, BitIter, Mutex, Widen}; @@ -8,25 +8,69 @@ use narcissus_core::{default, BitIter, Mutex, Widen}; use vulkan_sys as vk; use crate::{ - backend::vulkan::VULKAN_CONSTANTS, tlsf::{self, Tlsf}, vk_check, MemoryLocation, }; -use super::{VulkanDevice, VulkanFrame}; +use super::{VulkanDevice, VulkanFrame, VULKAN_CONSTANTS}; + +#[derive(Default, Debug)] +pub struct VulkanMemoryHeap { + /// The calculated Tlsf super-block size for this memory heap. + /// + /// Smaller heaps will require a smaller super block size to prevent excess + /// memory waste. Calculate a suitable super block size using + /// `VULKAN_CONSTANTS.tlsf_default_super_block_size` and + /// `VULKAN_CONSTANTS.tlsf_small_super_block_divisor`. + tlsf_super_block_size: u64, + + /// Total size in bytes we have allocated against this memory heap. + total_allocated_bytes: AtomicU64, +} + +#[derive(Default)] +pub struct VulkanMemoryType { + tlsf: Mutex>, +} #[derive(Default)] pub struct VulkanAllocator { - tlsf: [Mutex>; vk::MAX_MEMORY_TYPES as usize], + memory_heaps: [VulkanMemoryHeap; vk::MAX_MEMORY_HEAPS as usize], + memory_types: [VulkanMemoryType; vk::MAX_MEMORY_TYPES as usize], dedicated: Mutex>, - allocation_count: AtomicU32, } +impl VulkanAllocator { + pub fn new(memory_properties: &vk::PhysicalDeviceMemoryProperties) -> Self { + let memory_heaps = std::array::from_fn(|memory_heap_index| { + let memory_heap_properties = &memory_properties.memory_heaps[memory_heap_index]; + let tlsf_super_block_size = if memory_heap_properties.size + >= VULKAN_CONSTANTS.tlsf_small_super_block_divisor + * VULKAN_CONSTANTS.tlsf_default_super_block_size + { + VULKAN_CONSTANTS.tlsf_default_super_block_size + } else { + memory_heap_properties.size / VULKAN_CONSTANTS.tlsf_small_super_block_divisor + }; + VulkanMemoryHeap { + tlsf_super_block_size, + total_allocated_bytes: default(), + } + }); + + Self { + memory_heaps, + ..default() + } + } +} + #[derive(Clone, Copy)] -pub struct VulkanAllocationInfo { +pub struct VulkanSuperBlockInfo { memory: vk::DeviceMemory, mapped_ptr: *mut u8, + memory_type_index: u32, } #[derive(Clone)] @@ -34,13 +78,13 @@ pub struct VulkanMemoryDedicated { memory: vk::DeviceMemory, mapped_ptr: *mut u8, size: u64, + memory_type_index: u32, } #[derive(Clone)] pub struct VulkanMemorySubAlloc { - allocation: tlsf::Allocation, + allocation: tlsf::Allocation, size: u64, - memory_type_index: u32, } #[derive(Clone)] @@ -93,6 +137,121 @@ impl VulkanMemory { } impl VulkanDevice { + fn free_memory(&self, memory: VulkanMemory) { + match memory { + VulkanMemory::Dedicated(dedicated) => { + self.allocator.dedicated.lock().remove(&dedicated.memory); + + let memory_heap = &self.allocator.memory_heaps[self + .physical_device_memory_properties + .memory_types[dedicated.memory_type_index.widen()] + .heap_index + .widen()]; + + memory_heap + .total_allocated_bytes + .fetch_sub(dedicated.size, Ordering::SeqCst); + + self.allocator + .allocation_count + .fetch_sub(1, Ordering::SeqCst); + + unsafe { + self.device_fn + .free_memory(self.device, dedicated.memory, None) + } + } + VulkanMemory::SubAlloc(sub_alloc) => { + let mut allocator = self.allocator.memory_types + [sub_alloc.allocation.user_data().memory_type_index.widen()] + .tlsf + .lock(); + allocator.free(sub_alloc.allocation) + } + } + } + + fn try_allocate_memory( + &self, + host_mapped: bool, + allocation_size: u64, + memory_type_index: u32, + memory_dedicated_allocate_info: Option<&vk::MemoryDedicatedAllocateInfo>, + ) -> Option<(vk::DeviceMemory, *mut u8)> { + // Can't allocate if we would blow the global allocation limit. + if self.allocator.allocation_count.load(Ordering::Relaxed) + >= self + .physical_device_properties + .properties + .limits + .max_memory_allocation_count + { + return None; + } + + let heap_index = self.physical_device_memory_properties.memory_types + [memory_type_index.widen()] + .heap_index; + + let memory_heap_properties = + &self.physical_device_memory_properties.memory_heaps[heap_index.widen()]; + let memory_heap = &self.allocator.memory_heaps[heap_index.widen()]; + + // Can't allocate if we would blow this heap's size. + let current_allocated_bytes = memory_heap.total_allocated_bytes.load(Ordering::Relaxed); + if current_allocated_bytes + allocation_size > memory_heap_properties.size { + return None; + } + + let mut allocate_info = vk::MemoryAllocateInfo { + allocation_size, + memory_type_index, + ..default() + }; + + if let Some(memory_dedicated_allocate_info) = memory_dedicated_allocate_info { + allocate_info._next = memory_dedicated_allocate_info + as *const vk::MemoryDedicatedAllocateInfo + as *const _; + } + + let mut memory = vk::DeviceMemory::null(); + let memory = match unsafe { + self.device_fn + .allocate_memory(self.device, &allocate_info, None, &mut memory) + } { + vk::Result::Success => memory, + vk::Result::ErrorOutOfDeviceMemory | vk::Result::ErrorOutOfHostMemory => return None, + _ => panic!(), + }; + + // Update allocation statistics. + self.allocator + .allocation_count + .fetch_add(1, Ordering::AcqRel); + + memory_heap + .total_allocated_bytes + .fetch_add(allocation_size, Ordering::SeqCst); + + let mapped_ptr = if host_mapped { + let mut data = std::ptr::null_mut(); + vk_check!(self.device_fn.map_memory( + self.device, + memory, + 0, + vk::WHOLE_SIZE, + vk::MemoryMapFlags::default(), + &mut data + )); + data as *mut u8 + } else { + std::ptr::null_mut() + }; + + Some((memory, mapped_ptr)) + } + pub fn allocate_memory( &self, memory_location: MemoryLocation, @@ -115,101 +274,32 @@ impl VulkanDevice { let size = memory_requirements.size; let align = memory_requirements.alignment; - fn allocate( - device: &VulkanDevice, - host_mapped: bool, - allocation_size: u64, - memory_type_index: u32, - memory_dedicated_allocate_info: Option<&vk::MemoryDedicatedAllocateInfo>, - ) -> Option<(vk::DeviceMemory, *mut u8)> { - if device.allocator.allocation_count.load(Ordering::Relaxed) - >= device - .physical_device_properties - .properties - .limits - .max_memory_allocation_count - { - return None; - } - - let mut allocate_info = vk::MemoryAllocateInfo { - allocation_size, - memory_type_index, - ..default() - }; - - if let Some(memory_dedicated_allocate_info) = memory_dedicated_allocate_info { - allocate_info._next = memory_dedicated_allocate_info - as *const vk::MemoryDedicatedAllocateInfo - as *const _; - } - - let mut memory = vk::DeviceMemory::null(); - let memory = match unsafe { - device - .device_fn - .allocate_memory(device.device, &allocate_info, None, &mut memory) - } { - vk::Result::Success => memory, - vk::Result::ErrorOutOfDeviceMemory | vk::Result::ErrorOutOfHostMemory => { - return None - } - _ => panic!(), - }; - - device - .allocator - .allocation_count - .fetch_add(1, Ordering::AcqRel); - - let mapped_ptr = if host_mapped { - let mut data = std::ptr::null_mut(); - vk_check!(device.device_fn.map_memory( - device.device, - memory, - 0, - vk::WHOLE_SIZE, - vk::MemoryMapFlags::default(), - &mut data - )); - data as *mut u8 - } else { - std::ptr::null_mut() - }; - - Some((memory, mapped_ptr)) - } - // Outer loop here so that if we fail the first time around, we can clear the // preferred memory property flags and try again. loop { for memory_type_index in BitIter::new(std::iter::once(memory_requirements.memory_type_bits)) { - let memory_type = + let memory_type_properties = &self.physical_device_memory_properties.memory_types[memory_type_index]; + let memory_heap_index = memory_type_properties.heap_index.widen(); - if !memory_type - .property_flags - .contains(required_memory_property_flags) + let memory_type_property_flags = memory_type_properties.property_flags; + if !memory_type_property_flags + .contains(required_memory_property_flags | preferred_memory_property_flags) { continue; } - if !memory_type - .property_flags - .contains(preferred_memory_property_flags) - { - continue; - } + let memory_type = &self.allocator.memory_types[memory_type_index]; + let memory_heap = &self.allocator.memory_heaps[memory_heap_index]; // Does the driver want a dedicated allocation? if memory_dedicated_requirements.requires_dedicated_allocation == vk::Bool32::True || memory_dedicated_requirements.prefers_dedicated_allocation == vk::Bool32::True { - if let Some((memory, mapped_ptr)) = allocate( - self, + if let Some((memory, mapped_ptr)) = self.try_allocate_memory( host_mapped, size, memory_type_index as u32, @@ -221,39 +311,36 @@ impl VulkanDevice { memory, mapped_ptr, size, + memory_type_index: memory_type_index as u32, }); } } - let block_size = VULKAN_CONSTANTS.tlsf_maximum_block_size; - - // If the allocation is smaller than the TLSF super-block size for this + // If the allocation is smaller than the Tlsf super-block size for this // allocation type, we should attempt sub-allocation. - if size <= block_size { - let mut tlsf = self.allocator.tlsf[memory_type_index].lock(); + if size <= memory_heap.tlsf_super_block_size { + let mut tlsf = memory_type.tlsf.lock(); if let Some(allocation) = tlsf.alloc(size, align) { - return VulkanMemory::SubAlloc(VulkanMemorySubAlloc { - allocation, - size, - memory_type_index: memory_type_index as u32, - }); + return VulkanMemory::SubAlloc(VulkanMemorySubAlloc { allocation, size }); } else { - // When allocating backing storage for TLSF super-blocks, ensure that all memory + // When allocating backing storage for Tlsf super-blocks, ensure that all memory // is mapped if the memory type supports host mapping. This ensures we never // have to map a super-block later if an individual allocation desires it. - if let Some((memory, mapped_ptr)) = allocate( - self, - memory_type - .property_flags + if let Some((memory, mapped_ptr)) = self.try_allocate_memory( + memory_type_property_flags .contains(vk::MemoryPropertyFlags::HOST_VISIBLE), - block_size, + memory_heap.tlsf_super_block_size, memory_type_index as u32, None, ) { tlsf.insert_super_block( - block_size, - VulkanAllocationInfo { memory, mapped_ptr }, + memory_heap.tlsf_super_block_size, + VulkanSuperBlockInfo { + memory, + mapped_ptr, + memory_type_index: memory_type_index as u32, + }, ); // After inserting a new super-block we should always be able to service the @@ -263,7 +350,6 @@ impl VulkanDevice { return VulkanMemory::SubAlloc(VulkanMemorySubAlloc { allocation, size, - memory_type_index: memory_type_index as u32, }); } } @@ -271,11 +357,11 @@ impl VulkanDevice { // If sub-allocation failed, and we were unable to allocate a new super-block, // OR - // If the requested allocation size was too large for the TLSF allocator, + // If the requested allocation size was too large for the Tlsf allocator, // // Attempt a dedicated allocation for the exact requested size. if let Some((memory, mapped_ptr)) = - allocate(self, host_mapped, size, memory_type_index as u32, None) + self.try_allocate_memory(host_mapped, size, memory_type_index as u32, None) { self.allocator.dedicated.lock().insert(memory); @@ -283,6 +369,7 @@ impl VulkanDevice { memory, mapped_ptr, size, + memory_type_index: memory_type_index as u32, }); } } @@ -299,27 +386,27 @@ impl VulkanDevice { pub fn allocator_begin_frame(&self, frame: &mut VulkanFrame) { for allocation in frame.destroyed_allocations.get_mut().drain(..) { - match allocation { - VulkanMemory::Dedicated(dedicated) => { - self.allocator.dedicated.lock().remove(&dedicated.memory); - unsafe { - self.device_fn - .free_memory(self.device, dedicated.memory, None) - } - } - VulkanMemory::SubAlloc(sub_alloc) => { - let mut allocator = - self.allocator.tlsf[sub_alloc.memory_type_index.widen()].lock(); - allocator.free(sub_alloc.allocation) - } - } + self.free_memory(allocation); } } pub fn allocator_drop(&mut self) { - for tlsf in self.allocator.tlsf.iter_mut() { - // Clear out all memory blocks held by the TLSF allocators. - let tlsf = tlsf.get_mut(); + println!( + "{:?}", + &self.allocator.memory_heaps[..self + .physical_device_memory_properties + .memory_heap_count + .widen()] + ); + + println!( + "count: {}", + self.allocator.allocation_count.load(Ordering::Relaxed) + ); + + for memory_type in self.allocator.memory_types.iter_mut() { + // Clear out all memory blocks held by the Tlsf allocators. + let tlsf = memory_type.tlsf.get_mut(); for super_block in tlsf.super_blocks() { unsafe { self.device_fn diff --git a/libs/narcissus-gpu/src/backend/vulkan/mod.rs b/libs/narcissus-gpu/src/backend/vulkan/mod.rs index 32a482c..c8f3ff8 100644 --- a/libs/narcissus-gpu/src/backend/vulkan/mod.rs +++ b/libs/narcissus-gpu/src/backend/vulkan/mod.rs @@ -53,8 +53,13 @@ pub struct VulkanConstants { /// transient allocations. transient_buffer_size: u64, - /// Maximum size for backing allocations used by the TLSF allocator. - tlsf_maximum_block_size: u64, + /// Default size for backing allocations used by the Tlsf allocator. + tlsf_default_super_block_size: u64, + + /// For memory heaps that are smaller than `tlsf_default_super_block_size` * + /// `tlsf_small_super_block_divisor`, use heap size divided by + /// `tlsf_small_super_block_divisor` as the super block size. + tlsf_small_super_block_divisor: u64, /// The max number of descriptor sets allocatable from each descriptor pool. descriptor_pool_max_sets: u32, @@ -72,7 +77,8 @@ const VULKAN_CONSTANTS: VulkanConstants = VulkanConstants { num_frames: 2, swapchain_destroy_delay: 8, transient_buffer_size: 4 * 1024 * 1024, - tlsf_maximum_block_size: 128 * 1024 * 1024, + tlsf_default_super_block_size: 128 * 1024 * 1024, + tlsf_small_super_block_divisor: 16, descriptor_pool_max_sets: 500, descriptor_pool_sampler_count: 100, descriptor_pool_uniform_buffer_count: 500, @@ -652,6 +658,8 @@ impl VulkanDevice { }) })); + let allocator = VulkanAllocator::new(physical_device_memory_properties.as_ref()); + Self { instance, physical_device, @@ -686,7 +694,7 @@ impl VulkanDevice { recycled_descriptor_pools: default(), recycled_transient_buffers: default(), - allocator: default(), + allocator, _global_fn: global_fn, instance_fn,