narcissus-gpu: Track allocation metrics per-heap

author Joshua Simmons <josh@nega.tv>

Wed, 19 Jul 2023 18:55:21 +0000 (20:55 +0200)

committer Joshua Simmons <josh@nega.tv>

Wed, 19 Jul 2023 18:55:21 +0000 (20:55 +0200)
author Joshua Simmons <josh@nega.tv>
Wed, 19 Jul 2023 18:55:21 +0000 (20:55 +0200)
committer Joshua Simmons <josh@nega.tv>
Wed, 19 Jul 2023 18:55:21 +0000 (20:55 +0200)
diff --git a/libs/narcissus-gpu/src/backend/vulkan/allocator.rs b/libs/narcissus-gpu/src/backend/vulkan/allocator.rs

index abd7832b2ffbd57f6cfa54d78a4deac87ef140ee..880bb256872995c7c17cc317378601acdf2ed973 100644 (file)
--- a/libs/narcissus-gpu/src/backend/vulkan/allocator.rs
+++ b/libs/narcissus-gpu/src/backend/vulkan/allocator.rs
@@ -1,6 +1,6 @@
  use std::{
      collections::HashSet,
-    sync::atomic::{AtomicU32, Ordering},
+    sync::atomic::{AtomicU32, AtomicU64, Ordering},
  };
  
  use narcissus_core::{default, BitIter, Mutex, Widen};
@@ -8,25 +8,69 @@ use narcissus_core::{default, BitIter, Mutex, Widen};
  use vulkan_sys as vk;
  
  use crate::{
-    backend::vulkan::VULKAN_CONSTANTS,
      tlsf::{self, Tlsf},
      vk_check, MemoryLocation,
  };
  
-use super::{VulkanDevice, VulkanFrame};
+use super::{VulkanDevice, VulkanFrame, VULKAN_CONSTANTS};
+
+#[derive(Default, Debug)]
+pub struct VulkanMemoryHeap {
+    /// The calculated Tlsf super-block size for this memory heap.
+    ///
+    /// Smaller heaps will require a smaller super block size to prevent excess
+    /// memory waste. Calculate a suitable super block size using
+    /// `VULKAN_CONSTANTS.tlsf_default_super_block_size` and
+    /// `VULKAN_CONSTANTS.tlsf_small_super_block_divisor`.
+    tlsf_super_block_size: u64,
+
+    /// Total size in bytes we have allocated against this memory heap.
+    total_allocated_bytes: AtomicU64,
+}
+
+#[derive(Default)]
+pub struct VulkanMemoryType {
+    tlsf: Mutex<Tlsf<VulkanSuperBlockInfo>>,
+}
  
  #[derive(Default)]
  pub struct VulkanAllocator {
-    tlsf: [Mutex<Tlsf<VulkanAllocationInfo>>; vk::MAX_MEMORY_TYPES as usize],
+    memory_heaps: [VulkanMemoryHeap; vk::MAX_MEMORY_HEAPS as usize],
+    memory_types: [VulkanMemoryType; vk::MAX_MEMORY_TYPES as usize],
      dedicated: Mutex<HashSet<vk::DeviceMemory>>,
-
      allocation_count: AtomicU32,
  }
  
+impl VulkanAllocator {
+    pub fn new(memory_properties: &vk::PhysicalDeviceMemoryProperties) -> Self {
+        let memory_heaps = std::array::from_fn(|memory_heap_index| {
+            let memory_heap_properties = &memory_properties.memory_heaps[memory_heap_index];
+            let tlsf_super_block_size = if memory_heap_properties.size
+                >= VULKAN_CONSTANTS.tlsf_small_super_block_divisor
+                    * VULKAN_CONSTANTS.tlsf_default_super_block_size
+            {
+                VULKAN_CONSTANTS.tlsf_default_super_block_size
+            } else {
+                memory_heap_properties.size / VULKAN_CONSTANTS.tlsf_small_super_block_divisor
+            };
+            VulkanMemoryHeap {
+                tlsf_super_block_size,
+                total_allocated_bytes: default(),
+            }
+        });
+
+        Self {
+            memory_heaps,
+            ..default()
+        }
+    }
+}
+
  #[derive(Clone, Copy)]
-pub struct VulkanAllocationInfo {
+pub struct VulkanSuperBlockInfo {
      memory: vk::DeviceMemory,
      mapped_ptr: *mut u8,
+    memory_type_index: u32,
  }
  
  #[derive(Clone)]
@@ -34,13 +78,13 @@ pub struct VulkanMemoryDedicated {
      memory: vk::DeviceMemory,
      mapped_ptr: *mut u8,
      size: u64,
+    memory_type_index: u32,
  }
  
  #[derive(Clone)]
  pub struct VulkanMemorySubAlloc {
-    allocation: tlsf::Allocation<VulkanAllocationInfo>,
+    allocation: tlsf::Allocation<VulkanSuperBlockInfo>,
      size: u64,
-    memory_type_index: u32,
  }
  
  #[derive(Clone)]
@@ -93,6 +137,121 @@ impl VulkanMemory {
  }
  
  impl VulkanDevice {
+    fn free_memory(&self, memory: VulkanMemory) {
+        match memory {
+            VulkanMemory::Dedicated(dedicated) => {
+                self.allocator.dedicated.lock().remove(&dedicated.memory);
+
+                let memory_heap = &self.allocator.memory_heaps[self
+                    .physical_device_memory_properties
+                    .memory_types[dedicated.memory_type_index.widen()]
+                .heap_index
+                .widen()];
+
+                memory_heap
+                    .total_allocated_bytes
+                    .fetch_sub(dedicated.size, Ordering::SeqCst);
+
+                self.allocator
+                    .allocation_count
+                    .fetch_sub(1, Ordering::SeqCst);
+
+                unsafe {
+                    self.device_fn
+                        .free_memory(self.device, dedicated.memory, None)
+                }
+            }
+            VulkanMemory::SubAlloc(sub_alloc) => {
+                let mut allocator = self.allocator.memory_types
+                    [sub_alloc.allocation.user_data().memory_type_index.widen()]
+                .tlsf
+                .lock();
+                allocator.free(sub_alloc.allocation)
+            }
+        }
+    }
+
+    fn try_allocate_memory(
+        &self,
+        host_mapped: bool,
+        allocation_size: u64,
+        memory_type_index: u32,
+        memory_dedicated_allocate_info: Option<&vk::MemoryDedicatedAllocateInfo>,
+    ) -> Option<(vk::DeviceMemory, *mut u8)> {
+        // Can't allocate if we would blow the global allocation limit.
+        if self.allocator.allocation_count.load(Ordering::Relaxed)
+            >= self
+                .physical_device_properties
+                .properties
+                .limits
+                .max_memory_allocation_count
+        {
+            return None;
+        }
+
+        let heap_index = self.physical_device_memory_properties.memory_types
+            [memory_type_index.widen()]
+        .heap_index;
+
+        let memory_heap_properties =
+            &self.physical_device_memory_properties.memory_heaps[heap_index.widen()];
+        let memory_heap = &self.allocator.memory_heaps[heap_index.widen()];
+
+        // Can't allocate if we would blow this heap's size.
+        let current_allocated_bytes = memory_heap.total_allocated_bytes.load(Ordering::Relaxed);
+        if current_allocated_bytes + allocation_size > memory_heap_properties.size {
+            return None;
+        }
+
+        let mut allocate_info = vk::MemoryAllocateInfo {
+            allocation_size,
+            memory_type_index,
+            ..default()
+        };
+
+        if let Some(memory_dedicated_allocate_info) = memory_dedicated_allocate_info {
+            allocate_info._next = memory_dedicated_allocate_info
+                as *const vk::MemoryDedicatedAllocateInfo
+                as *const _;
+        }
+
+        let mut memory = vk::DeviceMemory::null();
+        let memory = match unsafe {
+            self.device_fn
+                .allocate_memory(self.device, &allocate_info, None, &mut memory)
+        } {
+            vk::Result::Success => memory,
+            vk::Result::ErrorOutOfDeviceMemory | vk::Result::ErrorOutOfHostMemory => return None,
+            _ => panic!(),
+        };
+
+        // Update allocation statistics.
+        self.allocator
+            .allocation_count
+            .fetch_add(1, Ordering::AcqRel);
+
+        memory_heap
+            .total_allocated_bytes
+            .fetch_add(allocation_size, Ordering::SeqCst);
+
+        let mapped_ptr = if host_mapped {
+            let mut data = std::ptr::null_mut();
+            vk_check!(self.device_fn.map_memory(
+                self.device,
+                memory,
+                0,
+                vk::WHOLE_SIZE,
+                vk::MemoryMapFlags::default(),
+                &mut data
+            ));
+            data as *mut u8
+        } else {
+            std::ptr::null_mut()
+        };
+
+        Some((memory, mapped_ptr))
+    }
+
      pub fn allocate_memory(
          &self,
          memory_location: MemoryLocation,
@@ -115,101 +274,32 @@ impl VulkanDevice {
          let size = memory_requirements.size;
          let align = memory_requirements.alignment;
  
-        fn allocate(
-            device: &VulkanDevice,
-            host_mapped: bool,
-            allocation_size: u64,
-            memory_type_index: u32,
-            memory_dedicated_allocate_info: Option<&vk::MemoryDedicatedAllocateInfo>,
-        ) -> Option<(vk::DeviceMemory, *mut u8)> {
-            if device.allocator.allocation_count.load(Ordering::Relaxed)
-                >= device
-                    .physical_device_properties
-                    .properties
-                    .limits
-                    .max_memory_allocation_count
-            {
-                return None;
-            }
-
-            let mut allocate_info = vk::MemoryAllocateInfo {
-                allocation_size,
-                memory_type_index,
-                ..default()
-            };
-
-            if let Some(memory_dedicated_allocate_info) = memory_dedicated_allocate_info {
-                allocate_info._next = memory_dedicated_allocate_info
-                    as *const vk::MemoryDedicatedAllocateInfo
-                    as *const _;
-            }
-
-            let mut memory = vk::DeviceMemory::null();
-            let memory = match unsafe {
-                device
-                    .device_fn
-                    .allocate_memory(device.device, &allocate_info, None, &mut memory)
-            } {
-                vk::Result::Success => memory,
-                vk::Result::ErrorOutOfDeviceMemory | vk::Result::ErrorOutOfHostMemory => {
-                    return None
-                }
-                _ => panic!(),
-            };
-
-            device
-                .allocator
-                .allocation_count
-                .fetch_add(1, Ordering::AcqRel);
-
-            let mapped_ptr = if host_mapped {
-                let mut data = std::ptr::null_mut();
-                vk_check!(device.device_fn.map_memory(
-                    device.device,
-                    memory,
-                    0,
-                    vk::WHOLE_SIZE,
-                    vk::MemoryMapFlags::default(),
-                    &mut data
-                ));
-                data as *mut u8
-            } else {
-                std::ptr::null_mut()
-            };
-
-            Some((memory, mapped_ptr))
-        }
-
          // Outer loop here so that if we fail the first time around, we can clear the
          // preferred memory property flags and try again.
          loop {
              for memory_type_index in
                  BitIter::new(std::iter::once(memory_requirements.memory_type_bits))
              {
-                let memory_type =
+                let memory_type_properties =
                      &self.physical_device_memory_properties.memory_types[memory_type_index];
+                let memory_heap_index = memory_type_properties.heap_index.widen();
  
-                if !memory_type
-                    .property_flags
-                    .contains(required_memory_property_flags)
+                let memory_type_property_flags = memory_type_properties.property_flags;
+                if !memory_type_property_flags
+                    .contains(required_memory_property_flags | preferred_memory_property_flags)
                  {
                      continue;
                  }
  
-                if !memory_type
-                    .property_flags
-                    .contains(preferred_memory_property_flags)
-                {
-                    continue;
-                }
+                let memory_type = &self.allocator.memory_types[memory_type_index];
+                let memory_heap = &self.allocator.memory_heaps[memory_heap_index];
  
                  // Does the driver want a dedicated allocation?
                  if memory_dedicated_requirements.requires_dedicated_allocation == vk::Bool32::True
                      || memory_dedicated_requirements.prefers_dedicated_allocation
                          == vk::Bool32::True
                  {
-                    if let Some((memory, mapped_ptr)) = allocate(
-                        self,
+                    if let Some((memory, mapped_ptr)) = self.try_allocate_memory(
                          host_mapped,
                          size,
                          memory_type_index as u32,
@@ -221,39 +311,36 @@ impl VulkanDevice {
                              memory,
                              mapped_ptr,
                              size,
+                            memory_type_index: memory_type_index as u32,
                          });
                      }
                  }
  
-                let block_size = VULKAN_CONSTANTS.tlsf_maximum_block_size;
-
-                // If the allocation is smaller than the TLSF super-block size for this
+                // If the allocation is smaller than the Tlsf super-block size for this
                  // allocation type, we should attempt sub-allocation.
-                if size <= block_size {
-                    let mut tlsf = self.allocator.tlsf[memory_type_index].lock();
+                if size <= memory_heap.tlsf_super_block_size {
+                    let mut tlsf = memory_type.tlsf.lock();
  
                      if let Some(allocation) = tlsf.alloc(size, align) {
-                        return VulkanMemory::SubAlloc(VulkanMemorySubAlloc {
-                            allocation,
-                            size,
-                            memory_type_index: memory_type_index as u32,
-                        });
+                        return VulkanMemory::SubAlloc(VulkanMemorySubAlloc { allocation, size });
                      } else {
-                        // When allocating backing storage for TLSF super-blocks, ensure that all memory
+                        // When allocating backing storage for Tlsf super-blocks, ensure that all memory
                          // is mapped if the memory type supports host mapping. This ensures we never
                          // have to map a super-block later if an individual allocation desires it.
-                        if let Some((memory, mapped_ptr)) = allocate(
-                            self,
-                            memory_type
-                                .property_flags
+                        if let Some((memory, mapped_ptr)) = self.try_allocate_memory(
+                            memory_type_property_flags
                                  .contains(vk::MemoryPropertyFlags::HOST_VISIBLE),
-                            block_size,
+                            memory_heap.tlsf_super_block_size,
                              memory_type_index as u32,
                              None,
                          ) {
                              tlsf.insert_super_block(
-                                block_size,
-                                VulkanAllocationInfo { memory, mapped_ptr },
+                                memory_heap.tlsf_super_block_size,
+                                VulkanSuperBlockInfo {
+                                    memory,
+                                    mapped_ptr,
+                                    memory_type_index: memory_type_index as u32,
+                                },
                              );
  
                              // After inserting a new super-block we should always be able to service the
@@ -263,7 +350,6 @@ impl VulkanDevice {
                              return VulkanMemory::SubAlloc(VulkanMemorySubAlloc {
                                  allocation,
                                  size,
-                                memory_type_index: memory_type_index as u32,
                              });
                          }
                      }
@@ -271,11 +357,11 @@ impl VulkanDevice {
  
                  // If sub-allocation failed, and we were unable to allocate a new super-block,
                  // OR
-                // If the requested allocation size was too large for the TLSF allocator,
+                // If the requested allocation size was too large for the Tlsf allocator,
                  //
                  // Attempt a dedicated allocation for the exact requested size.
                  if let Some((memory, mapped_ptr)) =
-                    allocate(self, host_mapped, size, memory_type_index as u32, None)
+                    self.try_allocate_memory(host_mapped, size, memory_type_index as u32, None)
                  {
                      self.allocator.dedicated.lock().insert(memory);
  
@@ -283,6 +369,7 @@ impl VulkanDevice {
                          memory,
                          mapped_ptr,
                          size,
+                        memory_type_index: memory_type_index as u32,
                      });
                  }
              }
@@ -299,27 +386,27 @@ impl VulkanDevice {
  
      pub fn allocator_begin_frame(&self, frame: &mut VulkanFrame) {
          for allocation in frame.destroyed_allocations.get_mut().drain(..) {
-            match allocation {
-                VulkanMemory::Dedicated(dedicated) => {
-                    self.allocator.dedicated.lock().remove(&dedicated.memory);
-                    unsafe {
-                        self.device_fn
-                            .free_memory(self.device, dedicated.memory, None)
-                    }
-                }
-                VulkanMemory::SubAlloc(sub_alloc) => {
-                    let mut allocator =
-                        self.allocator.tlsf[sub_alloc.memory_type_index.widen()].lock();
-                    allocator.free(sub_alloc.allocation)
-                }
-            }
+            self.free_memory(allocation);
          }
      }
  
      pub fn allocator_drop(&mut self) {
-        for tlsf in self.allocator.tlsf.iter_mut() {
-            // Clear out all memory blocks held by the TLSF allocators.
-            let tlsf = tlsf.get_mut();
+        println!(
+            "{:?}",
+            &self.allocator.memory_heaps[..self
+                .physical_device_memory_properties
+                .memory_heap_count
+                .widen()]
+        );
+
+        println!(
+            "count: {}",
+            self.allocator.allocation_count.load(Ordering::Relaxed)
+        );
+
+        for memory_type in self.allocator.memory_types.iter_mut() {
+            // Clear out all memory blocks held by the Tlsf allocators.
+            let tlsf = memory_type.tlsf.get_mut();
              for super_block in tlsf.super_blocks() {
                  unsafe {
                      self.device_fn
diff --git a/libs/narcissus-gpu/src/backend/vulkan/mod.rs b/libs/narcissus-gpu/src/backend/vulkan/mod.rs

index 32a482c73d85d7e5c215b5f786dcb7c18fb2c152..c8f3ff8969c01b0b3512ee5b3f3475a33d8925f9 100644 (file)
--- a/libs/narcissus-gpu/src/backend/vulkan/mod.rs
+++ b/libs/narcissus-gpu/src/backend/vulkan/mod.rs
@@ -53,8 +53,13 @@ pub struct VulkanConstants {
      /// transient allocations.
      transient_buffer_size: u64,
  
-    /// Maximum size for backing allocations used by the TLSF allocator.
-    tlsf_maximum_block_size: u64,
+    /// Default size for backing allocations used by the Tlsf allocator.
+    tlsf_default_super_block_size: u64,
+
+    /// For memory heaps that are smaller than `tlsf_default_super_block_size` *
+    /// `tlsf_small_super_block_divisor`, use heap size divided by
+    /// `tlsf_small_super_block_divisor` as the super block size.
+    tlsf_small_super_block_divisor: u64,
  
      /// The max number of descriptor sets allocatable from each descriptor pool.
      descriptor_pool_max_sets: u32,
@@ -72,7 +77,8 @@ const VULKAN_CONSTANTS: VulkanConstants = VulkanConstants {
      num_frames: 2,
      swapchain_destroy_delay: 8,
      transient_buffer_size: 4 * 1024 * 1024,
-    tlsf_maximum_block_size: 128 * 1024 * 1024,
+    tlsf_default_super_block_size: 128 * 1024 * 1024,
+    tlsf_small_super_block_divisor: 16,
      descriptor_pool_max_sets: 500,
      descriptor_pool_sampler_count: 100,
      descriptor_pool_uniform_buffer_count: 500,
@@ -652,6 +658,8 @@ impl VulkanDevice {
              })
          }));
  
+        let allocator = VulkanAllocator::new(physical_device_memory_properties.as_ref());
+
          Self {
              instance,
              physical_device,
@@ -686,7 +694,7 @@ impl VulkanDevice {
              recycled_descriptor_pools: default(),
              recycled_transient_buffers: default(),
  
-            allocator: default(),
+            allocator,
  
              _global_fn: global_fn,
              instance_fn,
author	Joshua Simmons <josh@nega.tv>
	Wed, 19 Jul 2023 18:55:21 +0000 (20:55 +0200)
committer	Joshua Simmons <josh@nega.tv>
	Wed, 19 Jul 2023 18:55:21 +0000 (20:55 +0200)
libs/narcissus-gpu/src/backend/vulkan/allocator.rs		patch \| blob \| blame \| history
libs/narcissus-gpu/src/backend/vulkan/mod.rs		patch \| blob \| blame \| history