From: Joshua Simmons <josh@nega.tv>
Date: Wed, 19 Jul 2023 18:55:21 +0000 (+0200)
Subject: narcissus-gpu: Track allocation metrics per-heap
X-Git-Url: https://git.nega.tv//gitweb.cgi?a=commitdiff_plain;h=65005aa1c1449ea04941497433075b038e496714;p=josh%2Fnarcissus

narcissus-gpu: Track allocation metrics per-heap

Add tracking for per-heap memory usage.
Calculate per-heap TLSF super block size based on heap size.
---

diff --git a/libs/narcissus-gpu/src/backend/vulkan/allocator.rs b/libs/narcissus-gpu/src/backend/vulkan/allocator.rs
index abd7832..880bb25 100644
--- a/libs/narcissus-gpu/src/backend/vulkan/allocator.rs
+++ b/libs/narcissus-gpu/src/backend/vulkan/allocator.rs
@@ -1,6 +1,6 @@
 use std::{
     collections::HashSet,
-    sync::atomic::{AtomicU32, Ordering},
+    sync::atomic::{AtomicU32, AtomicU64, Ordering},
 };
 
 use narcissus_core::{default, BitIter, Mutex, Widen};
@@ -8,25 +8,69 @@ use narcissus_core::{default, BitIter, Mutex, Widen};
 use vulkan_sys as vk;
 
 use crate::{
-    backend::vulkan::VULKAN_CONSTANTS,
     tlsf::{self, Tlsf},
     vk_check, MemoryLocation,
 };
 
-use super::{VulkanDevice, VulkanFrame};
+use super::{VulkanDevice, VulkanFrame, VULKAN_CONSTANTS};
+
+#[derive(Default, Debug)]
+pub struct VulkanMemoryHeap {
+    /// The calculated Tlsf super-block size for this memory heap.
+    ///
+    /// Smaller heaps will require a smaller super block size to prevent excess
+    /// memory waste. Calculate a suitable super block size using
+    /// `VULKAN_CONSTANTS.tlsf_default_super_block_size` and
+    /// `VULKAN_CONSTANTS.tlsf_small_super_block_divisor`.
+    tlsf_super_block_size: u64,
+
+    /// Total size in bytes we have allocated against this memory heap.
+    total_allocated_bytes: AtomicU64,
+}
+
+#[derive(Default)]
+pub struct VulkanMemoryType {
+    tlsf: Mutex<Tlsf<VulkanSuperBlockInfo>>,
+}
 
 #[derive(Default)]
 pub struct VulkanAllocator {
-    tlsf: [Mutex<Tlsf<VulkanAllocationInfo>>; vk::MAX_MEMORY_TYPES as usize],
+    memory_heaps: [VulkanMemoryHeap; vk::MAX_MEMORY_HEAPS as usize],
+    memory_types: [VulkanMemoryType; vk::MAX_MEMORY_TYPES as usize],
     dedicated: Mutex<HashSet<vk::DeviceMemory>>,
-
     allocation_count: AtomicU32,
 }
 
+impl VulkanAllocator {
+    pub fn new(memory_properties: &vk::PhysicalDeviceMemoryProperties) -> Self {
+        let memory_heaps = std::array::from_fn(|memory_heap_index| {
+            let memory_heap_properties = &memory_properties.memory_heaps[memory_heap_index];
+            let tlsf_super_block_size = if memory_heap_properties.size
+                >= VULKAN_CONSTANTS.tlsf_small_super_block_divisor
+                    * VULKAN_CONSTANTS.tlsf_default_super_block_size
+            {
+                VULKAN_CONSTANTS.tlsf_default_super_block_size
+            } else {
+                memory_heap_properties.size / VULKAN_CONSTANTS.tlsf_small_super_block_divisor
+            };
+            VulkanMemoryHeap {
+                tlsf_super_block_size,
+                total_allocated_bytes: default(),
+            }
+        });
+
+        Self {
+            memory_heaps,
+            ..default()
+        }
+    }
+}
+
 #[derive(Clone, Copy)]
-pub struct VulkanAllocationInfo {
+pub struct VulkanSuperBlockInfo {
     memory: vk::DeviceMemory,
     mapped_ptr: *mut u8,
+    memory_type_index: u32,
 }
 
 #[derive(Clone)]
@@ -34,13 +78,13 @@ pub struct VulkanMemoryDedicated {
     memory: vk::DeviceMemory,
     mapped_ptr: *mut u8,
     size: u64,
+    memory_type_index: u32,
 }
 
 #[derive(Clone)]
 pub struct VulkanMemorySubAlloc {
-    allocation: tlsf::Allocation<VulkanAllocationInfo>,
+    allocation: tlsf::Allocation<VulkanSuperBlockInfo>,
     size: u64,
-    memory_type_index: u32,
 }
 
 #[derive(Clone)]
@@ -93,6 +137,121 @@ impl VulkanMemory {
 }
 
 impl VulkanDevice {
+    fn free_memory(&self, memory: VulkanMemory) {
+        match memory {
+            VulkanMemory::Dedicated(dedicated) => {
+                self.allocator.dedicated.lock().remove(&dedicated.memory);
+
+                let memory_heap = &self.allocator.memory_heaps[self
+                    .physical_device_memory_properties
+                    .memory_types[dedicated.memory_type_index.widen()]
+                .heap_index
+                .widen()];
+
+                memory_heap
+                    .total_allocated_bytes
+                    .fetch_sub(dedicated.size, Ordering::SeqCst);
+
+                self.allocator
+                    .allocation_count
+                    .fetch_sub(1, Ordering::SeqCst);
+
+                unsafe {
+                    self.device_fn
+                        .free_memory(self.device, dedicated.memory, None)
+                }
+            }
+            VulkanMemory::SubAlloc(sub_alloc) => {
+                let mut allocator = self.allocator.memory_types
+                    [sub_alloc.allocation.user_data().memory_type_index.widen()]
+                .tlsf
+                .lock();
+                allocator.free(sub_alloc.allocation)
+            }
+        }
+    }
+
+    fn try_allocate_memory(
+        &self,
+        host_mapped: bool,
+        allocation_size: u64,
+        memory_type_index: u32,
+        memory_dedicated_allocate_info: Option<&vk::MemoryDedicatedAllocateInfo>,
+    ) -> Option<(vk::DeviceMemory, *mut u8)> {
+        // Can't allocate if we would blow the global allocation limit.
+        if self.allocator.allocation_count.load(Ordering::Relaxed)
+            >= self
+                .physical_device_properties
+                .properties
+                .limits
+                .max_memory_allocation_count
+        {
+            return None;
+        }
+
+        let heap_index = self.physical_device_memory_properties.memory_types
+            [memory_type_index.widen()]
+        .heap_index;
+
+        let memory_heap_properties =
+            &self.physical_device_memory_properties.memory_heaps[heap_index.widen()];
+        let memory_heap = &self.allocator.memory_heaps[heap_index.widen()];
+
+        // Can't allocate if we would blow this heap's size.
+        let current_allocated_bytes = memory_heap.total_allocated_bytes.load(Ordering::Relaxed);
+        if current_allocated_bytes + allocation_size > memory_heap_properties.size {
+            return None;
+        }
+
+        let mut allocate_info = vk::MemoryAllocateInfo {
+            allocation_size,
+            memory_type_index,
+            ..default()
+        };
+
+        if let Some(memory_dedicated_allocate_info) = memory_dedicated_allocate_info {
+            allocate_info._next = memory_dedicated_allocate_info
+                as *const vk::MemoryDedicatedAllocateInfo
+                as *const _;
+        }
+
+        let mut memory = vk::DeviceMemory::null();
+        let memory = match unsafe {
+            self.device_fn
+                .allocate_memory(self.device, &allocate_info, None, &mut memory)
+        } {
+            vk::Result::Success => memory,
+            vk::Result::ErrorOutOfDeviceMemory | vk::Result::ErrorOutOfHostMemory => return None,
+            _ => panic!(),
+        };
+
+        // Update allocation statistics.
+        self.allocator
+            .allocation_count
+            .fetch_add(1, Ordering::AcqRel);
+
+        memory_heap
+            .total_allocated_bytes
+            .fetch_add(allocation_size, Ordering::SeqCst);
+
+        let mapped_ptr = if host_mapped {
+            let mut data = std::ptr::null_mut();
+            vk_check!(self.device_fn.map_memory(
+                self.device,
+                memory,
+                0,
+                vk::WHOLE_SIZE,
+                vk::MemoryMapFlags::default(),
+                &mut data
+            ));
+            data as *mut u8
+        } else {
+            std::ptr::null_mut()
+        };
+
+        Some((memory, mapped_ptr))
+    }
+
     pub fn allocate_memory(
         &self,
         memory_location: MemoryLocation,
@@ -115,101 +274,32 @@ impl VulkanDevice {
         let size = memory_requirements.size;
         let align = memory_requirements.alignment;
 
-        fn allocate(
-            device: &VulkanDevice,
-            host_mapped: bool,
-            allocation_size: u64,
-            memory_type_index: u32,
-            memory_dedicated_allocate_info: Option<&vk::MemoryDedicatedAllocateInfo>,
-        ) -> Option<(vk::DeviceMemory, *mut u8)> {
-            if device.allocator.allocation_count.load(Ordering::Relaxed)
-                >= device
-                    .physical_device_properties
-                    .properties
-                    .limits
-                    .max_memory_allocation_count
-            {
-                return None;
-            }
-
-            let mut allocate_info = vk::MemoryAllocateInfo {
-                allocation_size,
-                memory_type_index,
-                ..default()
-            };
-
-            if let Some(memory_dedicated_allocate_info) = memory_dedicated_allocate_info {
-                allocate_info._next = memory_dedicated_allocate_info
-                    as *const vk::MemoryDedicatedAllocateInfo
-                    as *const _;
-            }
-
-            let mut memory = vk::DeviceMemory::null();
-            let memory = match unsafe {
-                device
-                    .device_fn
-                    .allocate_memory(device.device, &allocate_info, None, &mut memory)
-            } {
-                vk::Result::Success => memory,
-                vk::Result::ErrorOutOfDeviceMemory | vk::Result::ErrorOutOfHostMemory => {
-                    return None
-                }
-                _ => panic!(),
-            };
-
-            device
-                .allocator
-                .allocation_count
-                .fetch_add(1, Ordering::AcqRel);
-
-            let mapped_ptr = if host_mapped {
-                let mut data = std::ptr::null_mut();
-                vk_check!(device.device_fn.map_memory(
-                    device.device,
-                    memory,
-                    0,
-                    vk::WHOLE_SIZE,
-                    vk::MemoryMapFlags::default(),
-                    &mut data
-                ));
-                data as *mut u8
-            } else {
-                std::ptr::null_mut()
-            };
-
-            Some((memory, mapped_ptr))
-        }
-
         // Outer loop here so that if we fail the first time around, we can clear the
         // preferred memory property flags and try again.
         loop {
             for memory_type_index in
                 BitIter::new(std::iter::once(memory_requirements.memory_type_bits))
             {
-                let memory_type =
+                let memory_type_properties =
                     &self.physical_device_memory_properties.memory_types[memory_type_index];
+                let memory_heap_index = memory_type_properties.heap_index.widen();
 
-                if !memory_type
-                    .property_flags
-                    .contains(required_memory_property_flags)
+                let memory_type_property_flags = memory_type_properties.property_flags;
+                if !memory_type_property_flags
+                    .contains(required_memory_property_flags | preferred_memory_property_flags)
                 {
                     continue;
                 }
 
-                if !memory_type
-                    .property_flags
-                    .contains(preferred_memory_property_flags)
-                {
-                    continue;
-                }
+                let memory_type = &self.allocator.memory_types[memory_type_index];
+                let memory_heap = &self.allocator.memory_heaps[memory_heap_index];
 
                 // Does the driver want a dedicated allocation?
                 if memory_dedicated_requirements.requires_dedicated_allocation == vk::Bool32::True
                     || memory_dedicated_requirements.prefers_dedicated_allocation
                         == vk::Bool32::True
                 {
-                    if let Some((memory, mapped_ptr)) = allocate(
-                        self,
+                    if let Some((memory, mapped_ptr)) = self.try_allocate_memory(
                         host_mapped,
                         size,
                         memory_type_index as u32,
@@ -221,39 +311,36 @@ impl VulkanDevice {
                             memory,
                             mapped_ptr,
                             size,
+                            memory_type_index: memory_type_index as u32,
                         });
                     }
                 }
 
-                let block_size = VULKAN_CONSTANTS.tlsf_maximum_block_size;
-
-                // If the allocation is smaller than the TLSF super-block size for this
+                // If the allocation is smaller than the Tlsf super-block size for this
                 // allocation type, we should attempt sub-allocation.
-                if size <= block_size {
-                    let mut tlsf = self.allocator.tlsf[memory_type_index].lock();
+                if size <= memory_heap.tlsf_super_block_size {
+                    let mut tlsf = memory_type.tlsf.lock();
 
                     if let Some(allocation) = tlsf.alloc(size, align) {
-                        return VulkanMemory::SubAlloc(VulkanMemorySubAlloc {
-                            allocation,
-                            size,
-                            memory_type_index: memory_type_index as u32,
-                        });
+                        return VulkanMemory::SubAlloc(VulkanMemorySubAlloc { allocation, size });
                     } else {
-                        // When allocating backing storage for TLSF super-blocks, ensure that all memory
+                        // When allocating backing storage for Tlsf super-blocks, ensure that all memory
                         // is mapped if the memory type supports host mapping. This ensures we never
                         // have to map a super-block later if an individual allocation desires it.
-                        if let Some((memory, mapped_ptr)) = allocate(
-                            self,
-                            memory_type
-                                .property_flags
+                        if let Some((memory, mapped_ptr)) = self.try_allocate_memory(
+                            memory_type_property_flags
                                 .contains(vk::MemoryPropertyFlags::HOST_VISIBLE),
-                            block_size,
+                            memory_heap.tlsf_super_block_size,
                             memory_type_index as u32,
                             None,
                         ) {
                             tlsf.insert_super_block(
-                                block_size,
-                                VulkanAllocationInfo { memory, mapped_ptr },
+                                memory_heap.tlsf_super_block_size,
+                                VulkanSuperBlockInfo {
+                                    memory,
+                                    mapped_ptr,
+                                    memory_type_index: memory_type_index as u32,
+                                },
                             );
 
                             // After inserting a new super-block we should always be able to service the
@@ -263,7 +350,6 @@ impl VulkanDevice {
                             return VulkanMemory::SubAlloc(VulkanMemorySubAlloc {
                                 allocation,
                                 size,
-                                memory_type_index: memory_type_index as u32,
                             });
                         }
                     }
@@ -271,11 +357,11 @@ impl VulkanDevice {
 
                 // If sub-allocation failed, and we were unable to allocate a new super-block,
                 // OR
-                // If the requested allocation size was too large for the TLSF allocator,
+                // If the requested allocation size was too large for the Tlsf allocator,
                 //
                 // Attempt a dedicated allocation for the exact requested size.
                 if let Some((memory, mapped_ptr)) =
-                    allocate(self, host_mapped, size, memory_type_index as u32, None)
+                    self.try_allocate_memory(host_mapped, size, memory_type_index as u32, None)
                 {
                     self.allocator.dedicated.lock().insert(memory);
 
@@ -283,6 +369,7 @@ impl VulkanDevice {
                         memory,
                         mapped_ptr,
                         size,
+                        memory_type_index: memory_type_index as u32,
                     });
                 }
             }
@@ -299,27 +386,27 @@ impl VulkanDevice {
 
     pub fn allocator_begin_frame(&self, frame: &mut VulkanFrame) {
         for allocation in frame.destroyed_allocations.get_mut().drain(..) {
-            match allocation {
-                VulkanMemory::Dedicated(dedicated) => {
-                    self.allocator.dedicated.lock().remove(&dedicated.memory);
-                    unsafe {
-                        self.device_fn
-                            .free_memory(self.device, dedicated.memory, None)
-                    }
-                }
-                VulkanMemory::SubAlloc(sub_alloc) => {
-                    let mut allocator =
-                        self.allocator.tlsf[sub_alloc.memory_type_index.widen()].lock();
-                    allocator.free(sub_alloc.allocation)
-                }
-            }
+            self.free_memory(allocation);
         }
     }
 
     pub fn allocator_drop(&mut self) {
-        for tlsf in self.allocator.tlsf.iter_mut() {
-            // Clear out all memory blocks held by the TLSF allocators.
-            let tlsf = tlsf.get_mut();
+        println!(
+            "{:?}",
+            &self.allocator.memory_heaps[..self
+                .physical_device_memory_properties
+                .memory_heap_count
+                .widen()]
+        );
+
+        println!(
+            "count: {}",
+            self.allocator.allocation_count.load(Ordering::Relaxed)
+        );
+
+        for memory_type in self.allocator.memory_types.iter_mut() {
+            // Clear out all memory blocks held by the Tlsf allocators.
+            let tlsf = memory_type.tlsf.get_mut();
             for super_block in tlsf.super_blocks() {
                 unsafe {
                     self.device_fn
diff --git a/libs/narcissus-gpu/src/backend/vulkan/mod.rs b/libs/narcissus-gpu/src/backend/vulkan/mod.rs
index 32a482c..c8f3ff8 100644
--- a/libs/narcissus-gpu/src/backend/vulkan/mod.rs
+++ b/libs/narcissus-gpu/src/backend/vulkan/mod.rs
@@ -53,8 +53,13 @@ pub struct VulkanConstants {
     /// transient allocations.
     transient_buffer_size: u64,
 
-    /// Maximum size for backing allocations used by the TLSF allocator.
-    tlsf_maximum_block_size: u64,
+    /// Default size for backing allocations used by the Tlsf allocator.
+    tlsf_default_super_block_size: u64,
+
+    /// For memory heaps that are smaller than `tlsf_default_super_block_size` *
+    /// `tlsf_small_super_block_divisor`, use heap size divided by
+    /// `tlsf_small_super_block_divisor` as the super block size.
+    tlsf_small_super_block_divisor: u64,
 
     /// The max number of descriptor sets allocatable from each descriptor pool.
     descriptor_pool_max_sets: u32,
@@ -72,7 +77,8 @@ const VULKAN_CONSTANTS: VulkanConstants = VulkanConstants {
     num_frames: 2,
     swapchain_destroy_delay: 8,
     transient_buffer_size: 4 * 1024 * 1024,
-    tlsf_maximum_block_size: 128 * 1024 * 1024,
+    tlsf_default_super_block_size: 128 * 1024 * 1024,
+    tlsf_small_super_block_divisor: 16,
     descriptor_pool_max_sets: 500,
     descriptor_pool_sampler_count: 100,
     descriptor_pool_uniform_buffer_count: 500,
@@ -652,6 +658,8 @@ impl VulkanDevice {
             })
         }));
 
+        let allocator = VulkanAllocator::new(physical_device_memory_properties.as_ref());
+
         Self {
             instance,
             physical_device,
@@ -686,7 +694,7 @@ impl VulkanDevice {
             recycled_descriptor_pools: default(),
             recycled_transient_buffers: default(),
 
-            allocator: default(),
+            allocator,
 
             _global_fn: global_fn,
             instance_fn,