From 5bc77e51f16e2ae4de319a816c8cfd0abc544711 Mon Sep 17 00:00:00 2001 From: Joshua Simmons Date: Thu, 20 Jul 2023 13:09:59 +0200 Subject: [PATCH] narcissus-gpu: Fix bufferImageGranularity support bufferImageGranularity applies to linear resources and non-linear resources - not to images and buffers generally. --- bins/narcissus/src/main.rs | 8 ++-- .../src/backend/vulkan/allocator.rs | 48 ++++++++++--------- .../src/backend/vulkan/convert.rs | 10 +++- libs/narcissus-gpu/src/backend/vulkan/mod.rs | 20 +++++--- libs/narcissus-gpu/src/lib.rs | 8 +++- 5 files changed, 58 insertions(+), 36 deletions(-) diff --git a/bins/narcissus/src/main.rs b/bins/narcissus/src/main.rs index fb6815a..4608a8f 100644 --- a/bins/narcissus/src/main.rs +++ b/bins/narcissus/src/main.rs @@ -12,7 +12,7 @@ use narcissus_font::{FontCollection, GlyphCache, HorizontalMetrics, TouchedGlyph use narcissus_gpu::{ create_device, Access, BufferDesc, BufferImageCopy, BufferUsageFlags, ClearValue, Extent2d, Extent3d, ImageAspectFlags, ImageBarrier, ImageDesc, ImageDimension, ImageFormat, ImageLayout, - ImageUsageFlags, LoadOp, MemoryLocation, Offset2d, Offset3d, RenderingAttachment, + ImageTiling, ImageUsageFlags, LoadOp, MemoryLocation, Offset2d, Offset3d, RenderingAttachment, RenderingDesc, Scissor, StoreOp, ThreadToken, Viewport, }; use narcissus_maths::{sin_cos_pi_f32, vec3, Affine3, HalfTurn, Mat3, Mat4, Point3, Vec3}; @@ -88,7 +88,7 @@ pub fn main() { usage: ImageUsageFlags::SAMPLED | ImageUsageFlags::TRANSFER, dimension: ImageDimension::Type2d, format: ImageFormat::RGBA8_SRGB, - initial_layout: ImageLayout::Optimal, + tiling: ImageTiling::Optimal, width: blåhaj_image_data.width() as u32, height: blåhaj_image_data.height() as u32, depth: 1, @@ -120,7 +120,7 @@ pub fn main() { host_mapped: false, dimension: ImageDimension::Type2d, format: ImageFormat::R8_UNORM, - initial_layout: ImageLayout::Optimal, + tiling: ImageTiling::Optimal, width: glyph_cache.width() as u32, height: glyph_cache.height() as u32, depth: 1, @@ -300,7 +300,7 @@ pub fn main() { usage: ImageUsageFlags::DEPTH_STENCIL_ATTACHMENT, dimension: ImageDimension::Type2d, format: ImageFormat::DEPTH_F32, - initial_layout: ImageLayout::Optimal, + tiling: ImageTiling::Optimal, width, height, depth: 1, diff --git a/libs/narcissus-gpu/src/backend/vulkan/allocator.rs b/libs/narcissus-gpu/src/backend/vulkan/allocator.rs index 77bfae0..565fa19 100644 --- a/libs/narcissus-gpu/src/backend/vulkan/allocator.rs +++ b/libs/narcissus-gpu/src/backend/vulkan/allocator.rs @@ -31,8 +31,10 @@ pub struct VulkanMemoryHeap { pub struct VulkanMemoryType { tlsf: Mutex, - /// Tlsf instance used exclusively for images when the - tlsf_images: Mutex, + /// Tlsf instance used exclusively for non-linear images when the + /// `buffer_image_granularity` limit is greater than the minimum alignment + /// guaranteed by the current Tlsf configuration. + tlsf_non_linear: Mutex, } #[derive(Default)] @@ -40,7 +42,7 @@ pub struct VulkanAllocator { memory_heaps: [VulkanMemoryHeap; vk::MAX_MEMORY_HEAPS as usize], memory_types: [VulkanMemoryType; vk::MAX_MEMORY_TYPES as usize], dedicated: Mutex>, - use_segregated_image_allocator: bool, + use_segregated_non_linear_allocator: bool, allocation_count: AtomicU32, } @@ -74,7 +76,7 @@ impl VulkanAllocator { Self { memory_heaps, - use_segregated_image_allocator, + use_segregated_non_linear_allocator: use_segregated_image_allocator, ..default() } } @@ -84,7 +86,7 @@ impl VulkanAllocator { pub struct VulkanSuperBlockInfo { memory: vk::DeviceMemory, mapped_ptr: *mut u8, - image_allocator: bool, + non_linear: bool, memory_type_index: u32, } @@ -185,8 +187,8 @@ impl VulkanDevice { VulkanMemory::SubAlloc(sub_alloc) => { let user_data = sub_alloc.allocation.user_data(); let memory_type = &self.allocator.memory_types[user_data.memory_type_index.widen()]; - let mut tlsf = if user_data.image_allocator { - memory_type.tlsf_images.lock() + let mut tlsf = if user_data.non_linear { + memory_type.tlsf_non_linear.lock() } else { memory_type.tlsf.lock() }; @@ -279,6 +281,7 @@ impl VulkanDevice { pub fn allocate_memory( &self, memory_location: MemoryLocation, + non_linear: bool, host_mapped: bool, resource: VulkanAllocationResource, ) -> VulkanMemory { @@ -301,7 +304,7 @@ impl VulkanDevice { ..default() }; - let (is_image_allocation, memory_dedicated_allocate_info) = match resource { + let memory_dedicated_allocate_info = match resource { // SAFETY: Safe so long as `_next` on `memory_requirements` is valid. VulkanAllocationResource::Buffer(buffer) => unsafe { self.device_fn.get_buffer_memory_requirements2( @@ -312,13 +315,10 @@ impl VulkanDevice { }, &mut memory_requirements, ); - ( - false, - vk::MemoryDedicatedAllocateInfo { - buffer, - ..default() - }, - ) + vk::MemoryDedicatedAllocateInfo { + buffer, + ..default() + } }, // SAFETY: Safe so long as `_next` on `memory_requirements` is valid. VulkanAllocationResource::Image(image) => unsafe { @@ -327,7 +327,7 @@ impl VulkanDevice { &vk::ImageMemoryRequirementsInfo2 { image, ..default() }, &mut memory_requirements, ); - (true, vk::MemoryDedicatedAllocateInfo { image, ..default() }) + vk::MemoryDedicatedAllocateInfo { image, ..default() } }, }; @@ -381,12 +381,12 @@ impl VulkanDevice { // If the allocation is smaller than the Tlsf super-block size for this // allocation type, we should attempt sub-allocation. if size <= memory_heap.tlsf_super_block_size { - let (image_allocator, mut tlsf) = if (VULKAN_CONSTANTS - .tlsf_force_segregated_image_allocator - || self.allocator.use_segregated_image_allocator) - && is_image_allocation + let (non_linear, mut tlsf) = if (VULKAN_CONSTANTS + .tlsf_force_segregated_non_linear_allocator + || self.allocator.use_segregated_non_linear_allocator) + && non_linear { - (true, memory_type.tlsf_images.lock()) + (true, memory_type.tlsf_non_linear.lock()) } else { (false, memory_type.tlsf.lock()) }; @@ -409,7 +409,9 @@ impl VulkanDevice { VulkanSuperBlockInfo { memory, mapped_ptr, - image_allocator, + // `non_linear` is only true here if we're allocating in the `tlsf_non_linear` + // allocator, *not* if the resource we're allocating for is non-linear. + non_linear, memory_type_index: memory_type_index as u32, }, ); @@ -473,7 +475,7 @@ impl VulkanDevice { .free_memory(self.device, super_block.user_data.memory, None) } } - for super_block in memory_type.tlsf_images.get_mut().super_blocks() { + for super_block in memory_type.tlsf_non_linear.get_mut().super_blocks() { unsafe { self.device_fn .free_memory(self.device, super_block.user_data.memory, None) diff --git a/libs/narcissus-gpu/src/backend/vulkan/convert.rs b/libs/narcissus-gpu/src/backend/vulkan/convert.rs index 129ff98..deb5457 100644 --- a/libs/narcissus-gpu/src/backend/vulkan/convert.rs +++ b/libs/narcissus-gpu/src/backend/vulkan/convert.rs @@ -6,7 +6,8 @@ use vulkan_sys as vk; use crate::{ BindingType, BlendMode, BufferUsageFlags, ClearValue, CompareOp, CullingMode, FrontFace, ImageAspectFlags, ImageDimension, ImageFormat, ImageSubresourceLayers, ImageSubresourceRange, - IndexType, LoadOp, PolygonMode, ShaderStageFlags, StencilOp, StencilOpState, StoreOp, Topology, + ImageTiling, IndexType, LoadOp, PolygonMode, ShaderStageFlags, StencilOp, StencilOpState, + StoreOp, Topology, }; #[must_use] @@ -302,3 +303,10 @@ pub fn vulkan_subresource_range(subresource: &ImageSubresourceRange) -> vk::Imag layer_count: subresource.array_layer_count, } } + +pub fn vulkan_image_tiling(tiling: ImageTiling) -> vk::ImageTiling { + match tiling { + ImageTiling::Linear => vk::ImageTiling::LINEAR, + ImageTiling::Optimal => vk::ImageTiling::OPTIMAL, + } +} diff --git a/libs/narcissus-gpu/src/backend/vulkan/mod.rs b/libs/narcissus-gpu/src/backend/vulkan/mod.rs index be0424a..f430ffb 100644 --- a/libs/narcissus-gpu/src/backend/vulkan/mod.rs +++ b/libs/narcissus-gpu/src/backend/vulkan/mod.rs @@ -19,9 +19,9 @@ use crate::{ BindGroupLayoutDesc, Buffer, BufferArg, BufferDesc, BufferImageCopy, BufferUsageFlags, CmdBuffer, ComputePipelineDesc, Device, Extent2d, Extent3d, Frame, GlobalBarrier, GpuConcurrent, GraphicsPipelineDesc, Image, ImageBarrier, ImageBlit, ImageDesc, ImageDimension, - ImageFormat, ImageLayout, ImageUsageFlags, ImageViewDesc, IndexType, MemoryLocation, Offset2d, - Offset3d, Pipeline, Sampler, SamplerAddressMode, SamplerCompareOp, SamplerDesc, SamplerFilter, - SwapchainOutOfDateError, ThreadToken, TransientBuffer, TypedBind, + ImageFormat, ImageLayout, ImageTiling, ImageUsageFlags, ImageViewDesc, IndexType, + MemoryLocation, Offset2d, Offset3d, Pipeline, Sampler, SamplerAddressMode, SamplerCompareOp, + SamplerDesc, SamplerFilter, SwapchainOutOfDateError, ThreadToken, TransientBuffer, TypedBind, }; mod allocator; @@ -61,8 +61,8 @@ pub struct VulkanConstants { /// `tlsf_small_super_block_divisor` as the super block size. tlsf_small_super_block_divisor: u64, - /// Force use of separate allocators for images and buffers. - tlsf_force_segregated_image_allocator: bool, + /// Force use of separate allocators for optimal tiling images and buffers. + tlsf_force_segregated_non_linear_allocator: bool, /// The max number of descriptor sets allocatable from each descriptor pool. descriptor_pool_max_sets: u32, @@ -82,7 +82,7 @@ const VULKAN_CONSTANTS: VulkanConstants = VulkanConstants { transient_buffer_size: 4 * 1024 * 1024, tlsf_default_super_block_size: 128 * 1024 * 1024, tlsf_small_super_block_divisor: 16, - tlsf_force_segregated_image_allocator: false, + tlsf_force_segregated_non_linear_allocator: false, descriptor_pool_max_sets: 500, descriptor_pool_sampler_count: 100, descriptor_pool_uniform_buffer_count: 500, @@ -817,6 +817,7 @@ impl VulkanDevice { let memory = self.allocate_memory( desc.memory_location, + false, desc.host_mapped, allocator::VulkanAllocationResource::Buffer(buffer), ); @@ -930,6 +931,8 @@ impl Device for VulkanDevice { depth: desc.depth, }; + let tiling = vulkan_image_tiling(desc.tiling); + let mut usage = default(); if desc.usage.contains(ImageUsageFlags::SAMPLED) { usage |= vk::ImageUsageFlags::SAMPLED; @@ -959,7 +962,7 @@ impl Device for VulkanDevice { mip_levels: desc.mip_levels, array_layers: desc.layer_count, samples: vk::SampleCountFlags::SAMPLE_COUNT_1, - tiling: vk::ImageTiling::OPTIMAL, + tiling, usage, sharing_mode: vk::SharingMode::Exclusive, queue_family_indices: queue_family_indices.into(), @@ -974,6 +977,7 @@ impl Device for VulkanDevice { let memory = self.allocate_memory( desc.memory_location, + desc.tiling == ImageTiling::Optimal, desc.host_mapped, allocator::VulkanAllocationResource::Image(image), ); @@ -2329,6 +2333,7 @@ impl VulkanDevice { let memory = self.allocate_memory( MemoryLocation::Host, + false, true, allocator::VulkanAllocationResource::Buffer(buffer), ); @@ -2453,6 +2458,7 @@ impl VulkanDevice { let memory = self.allocate_memory( MemoryLocation::Host, + false, true, allocator::VulkanAllocationResource::Buffer(buffer), ); diff --git a/libs/narcissus-gpu/src/lib.rs b/libs/narcissus-gpu/src/lib.rs index a41100a..41a9b30 100644 --- a/libs/narcissus-gpu/src/lib.rs +++ b/libs/narcissus-gpu/src/lib.rs @@ -216,7 +216,7 @@ pub struct ImageDesc { pub usage: ImageUsageFlags, pub dimension: ImageDimension, pub format: ImageFormat, - pub initial_layout: ImageLayout, + pub tiling: ImageTiling, pub width: u32, pub height: u32, pub depth: u32, @@ -626,6 +626,12 @@ pub enum ImageLayout { General, } +#[derive(Clone, Copy, PartialEq, Eq)] +pub enum ImageTiling { + Linear, + Optimal, +} + pub struct GlobalBarrier<'a> { pub prev_access: &'a [Access], pub next_access: &'a [Access], -- 2.49.0