From 291de6bf315e9caf6ef785e7f95acb34422eb760 Mon Sep 17 00:00:00 2001 From: Joshua Simmons Date: Sat, 6 Apr 2024 09:44:59 +0200 Subject: [PATCH] narcissus-gpu: Use VK_EXT_swapchain_maintenance1 Fixes the broken way we were handling presentation semaphore recycling as well as swapchain destruction. When VK_EXT_swapchain_maintenance1 is unavailable, fallback to a fixed frame delay. Remove the unused delay queue structure. --- .../narcissus-gpu/src/backend/vulkan/mod.rs | 85 +++++-- .../narcissus-gpu/src/backend/vulkan/wsi.rs | 213 ++++++++++++++---- engine/narcissus-gpu/src/delay_queue.rs | 42 ---- engine/narcissus-gpu/src/lib.rs | 1 - 4 files changed, 231 insertions(+), 110 deletions(-) delete mode 100644 engine/narcissus-gpu/src/delay_queue.rs diff --git a/engine/narcissus-gpu/src/backend/vulkan/mod.rs b/engine/narcissus-gpu/src/backend/vulkan/mod.rs index abe3466..ed5bc32 100644 --- a/engine/narcissus-gpu/src/backend/vulkan/mod.rs +++ b/engine/narcissus-gpu/src/backend/vulkan/mod.rs @@ -1,6 +1,7 @@ use std::{ cell::{Cell, RefCell, UnsafeCell}, collections::{HashMap, VecDeque}, + ffi::CStr, marker::PhantomData, os::raw::c_char, ptr::NonNull, @@ -15,14 +16,13 @@ use narcissus_core::{ use vulkan_sys as vk; use crate::{ - delay_queue::DelayQueue, frame_counter::FrameCounter, Bind, BindGroupLayout, - BindGroupLayoutDesc, Buffer, BufferArg, BufferDesc, BufferImageCopy, BufferUsageFlags, - CmdEncoder, ComputePipelineDesc, Device, Extent2d, Extent3d, Frame, GlobalBarrier, - GpuConcurrent, GraphicsPipelineDesc, Image, ImageBarrier, ImageBlit, ImageDesc, ImageDimension, - ImageFormat, ImageLayout, ImageTiling, ImageUsageFlags, ImageViewDesc, IndexType, - MemoryLocation, Offset2d, Offset3d, PersistentBuffer, Pipeline, Sampler, SamplerAddressMode, - SamplerCompareOp, SamplerDesc, SamplerFilter, SwapchainOutOfDateError, ThreadToken, - TransientBuffer, TypedBind, + frame_counter::FrameCounter, Bind, BindGroupLayout, BindGroupLayoutDesc, Buffer, BufferArg, + BufferDesc, BufferImageCopy, BufferUsageFlags, CmdEncoder, ComputePipelineDesc, Device, + Extent2d, Extent3d, Frame, GlobalBarrier, GpuConcurrent, GraphicsPipelineDesc, Image, + ImageBarrier, ImageBlit, ImageDesc, ImageDimension, ImageFormat, ImageLayout, ImageTiling, + ImageUsageFlags, ImageViewDesc, IndexType, MemoryLocation, Offset2d, Offset3d, + PersistentBuffer, Pipeline, Sampler, SamplerAddressMode, SamplerCompareOp, SamplerDesc, + SamplerFilter, SwapchainOutOfDateError, ThreadToken, TransientBuffer, TypedBind, }; mod allocator; @@ -46,9 +46,12 @@ pub struct VulkanConstants { /// recycling of resources. num_frames: usize, - /// How many frames to delay swapchain destruction. There's no correct answer - /// here (spec bug) we're just picking a big number and hoping for the best. - swapchain_destroy_delay: usize, + /// How many frames to delay swapchain semaphore release and swapchain + /// destruction. There's no correct answer here (spec bug) we're just picking a + /// big number and hoping for the best. + /// + /// This will not be used if VK_EXT_swapchain_maintenance1 is available. + swapchain_semaphore_destroy_delay: u64, /// How large should transient buffers be, this will limit the maximum size of /// transient allocations. @@ -79,7 +82,7 @@ pub struct VulkanConstants { const VULKAN_CONSTANTS: VulkanConstants = VulkanConstants { num_frames: 2, - swapchain_destroy_delay: 8, + swapchain_semaphore_destroy_delay: 8, transient_buffer_size: 4 * 1024 * 1024, tlsf_default_super_block_size: 128 * 1024 * 1024, tlsf_small_super_block_divisor: 16, @@ -326,8 +329,6 @@ impl VulkanFrame { } } -type SwapchainDestroyQueue = DelayQueue<(vk::SwapchainKHR, vk::SurfaceKHR, Box<[vk::ImageView]>)>; - pub(crate) struct VulkanDevice { instance: vk::Instance, physical_device: vk::PhysicalDevice, @@ -349,9 +350,9 @@ pub(crate) struct VulkanDevice { bind_group_layout_pool: Mutex>, pipeline_pool: Mutex>, + recycled_fences: Mutex>, recycled_semaphores: Mutex>, recycled_descriptor_pools: Mutex>, - recycled_transient_buffers: Mutex>, allocator: VulkanAllocator, @@ -378,7 +379,7 @@ impl VulkanDevice { cstr!("libvulkan.so.1").as_ptr(), libc::RTLD_NOW | libc::RTLD_LOCAL, ); - libc::dlsym(module, cstr!("vkGetInstanceProcAddr").as_ptr()) + libc::dlsym(module, (c"vkGetInstanceProcAddr").as_ptr()) }; let global_fn = unsafe { vk::GlobalFunctions::new(get_proc_addr) }; @@ -394,7 +395,7 @@ impl VulkanDevice { } #[cfg(debug_assertions)] - let enabled_layers = &[cstr!("VK_LAYER_KHRONOS_validation").as_ptr()]; + let enabled_layers = &[(c"VK_LAYER_KHRONOS_validation").as_ptr()]; #[cfg(not(debug_assertions))] let enabled_layers = &[]; @@ -404,8 +405,24 @@ impl VulkanDevice { let mut enabled_extensions = vec![]; - let wsi_support = - VulkanWsi::check_instance_extensions(&extension_properties, &mut enabled_extensions); + let mut has_get_surface_capabilities2 = false; + for extension in &extension_properties { + let extension_name = CStr::from_bytes_until_nul(&extension.extension_name).unwrap(); + if extension_name.to_str().unwrap() == "VK_KHR_get_surface_capabilities2" { + has_get_surface_capabilities2 = true; + enabled_extensions.push(extension_name); + break; + } + } + + assert!(has_get_surface_capabilities2); + + let mut wsi_support = default(); + VulkanWsi::check_instance_extensions( + &extension_properties, + &mut enabled_extensions, + &mut wsi_support, + ); let enabled_extensions = enabled_extensions .iter() @@ -434,8 +451,6 @@ impl VulkanDevice { let instance_fn = vk::InstanceFunctions::new(&global_fn, instance, vk::VERSION_1_2); - let wsi = Box::new(VulkanWsi::new(&global_fn, instance, &wsi_support)); - let physical_devices = vk_vec(|count, ptr| unsafe { instance_fn.enumerate_physical_devices(instance, count, ptr) }); @@ -555,7 +570,11 @@ impl VulkanDevice { let mut enabled_extensions = vec![]; - VulkanWsi::check_device_extensions(&extension_properties, &mut enabled_extensions); + VulkanWsi::check_device_extensions( + &extension_properties, + &mut enabled_extensions, + &mut wsi_support, + ); let enabled_extensions = enabled_extensions .iter() @@ -596,6 +615,8 @@ impl VulkanDevice { let device_fn = vk::DeviceFunctions::new(&instance_fn, device, vk::VERSION_1_3); + let wsi = Box::new(VulkanWsi::new(&global_fn, instance, wsi_support)); + let universal_queue = unsafe { let mut queue = vk::Queue::default(); device_fn.get_device_queue(device, queue_family_index, 0, &mut queue); @@ -701,6 +722,7 @@ impl VulkanDevice { bind_group_layout_pool: default(), pipeline_pool: default(), + recycled_fences: default(), recycled_semaphores: default(), recycled_descriptor_pools: default(), recycled_transient_buffers: default(), @@ -779,6 +801,21 @@ impl VulkanDevice { } } + fn request_fence(&self) -> vk::Fence { + if let Some(fence) = self.recycled_fences.lock().pop_front() { + let fences = &[fence]; + vk_check!(self.device_fn.reset_fences(self.device, fences)); + fence + } else { + let mut fence = vk::Fence::null(); + let create_info = vk::FenceCreateInfo::default(); + vk_check!(self + .device_fn + .create_fence(self.device, &create_info, None, &mut fence)); + fence + } + } + fn request_semaphore(&self) -> vk::Semaphore { if let Some(semaphore) = self.recycled_semaphores.lock().pop_front() { semaphore @@ -2599,6 +2636,10 @@ impl Drop for VulkanDevice { } } + for fence in self.recycled_fences.get_mut() { + unsafe { self.device_fn.destroy_fence(device, *fence, None) } + } + for semaphore in self .recycled_semaphores .get_mut() diff --git a/engine/narcissus-gpu/src/backend/vulkan/wsi.rs b/engine/narcissus-gpu/src/backend/vulkan/wsi.rs index c8c7585..74949b1 100644 --- a/engine/narcissus-gpu/src/backend/vulkan/wsi.rs +++ b/engine/narcissus-gpu/src/backend/vulkan/wsi.rs @@ -12,11 +12,10 @@ use vulkan_sys as vk; use crate::{ backend::vulkan::{vk_vec, vulkan_format, VulkanImageHolder, VulkanImageSwapchain}, - delay_queue::DelayQueue, vk_check, Frame, Image, ImageFormat, SwapchainOutOfDateError, }; -use super::{SwapchainDestroyQueue, VulkanDevice, VulkanFrame, VULKAN_CONSTANTS}; +use super::{VulkanDevice, VulkanFrame, VULKAN_CONSTANTS}; #[derive(Default)] struct VulkanPresentInfo { @@ -46,19 +45,36 @@ pub struct VulkanSwapchain { capabilities: vk::SurfaceCapabilitiesKHR, } -#[derive(Default)] +#[derive(Default, Clone, Copy, Debug)] pub struct VulkanWsiSupport { wayland: bool, xlib: bool, xcb: bool, + surface_maintenance1: bool, + swapchain_maintenance1: bool, +} + +struct RecycleSwapchainSemaphore { + fence: vk::Fence, + semaphore: vk::Semaphore, + swapchain: vk::SwapchainKHR, +} + +struct DestroySwapchain { + swapchain: vk::SwapchainKHR, + surface: vk::SurfaceKHR, + image_views: Box<[vk::ImageView]>, } pub struct VulkanWsi { + support: VulkanWsiSupport, + surfaces: Mutex>, swapchains: Mutex>, suboptimal_swapchains: Mutex>, - swapchain_destroy_queue: Mutex, + recycle_swapchain_semaphores: Mutex>, + destroy_swapchains: Mutex>, xcb_surface_fn: Option, xlib_surface_fn: Option, @@ -73,13 +89,16 @@ impl VulkanWsi { pub fn check_instance_extensions<'a>( extension_properties: &'a [vk::ExtensionProperties], enabled_extensions: &mut Vec<&'a CStr>, - ) -> VulkanWsiSupport { - let mut wsi_support: VulkanWsiSupport = default(); - + wsi_support: &mut VulkanWsiSupport, + ) { for extension in extension_properties { let extension_name = CStr::from_bytes_until_nul(&extension.extension_name).unwrap(); match extension_name.to_str().unwrap() { + "VK_EXT_surface_maintenance1" => { + wsi_support.surface_maintenance1 = true; + enabled_extensions.push(extension_name); + } "VK_KHR_wayland_surface" => { wsi_support.wayland = true; enabled_extensions.push(extension_name); @@ -101,8 +120,6 @@ impl VulkanWsi { if wsi_support.wayland || wsi_support.xlib || wsi_support.xcb { enabled_extensions.push(cstr!("VK_KHR_surface")); } - - wsi_support } /// Check available WSI device extensions, and append required extensions to @@ -112,36 +129,44 @@ impl VulkanWsi { pub fn check_device_extensions<'a>( extension_properties: &'a [vk::ExtensionProperties], enabled_extensions: &mut Vec<&'a CStr>, + wsi_support: &mut VulkanWsiSupport, ) { + let mut khr_swapchain_support = false; for extension in extension_properties { let extension_name = CStr::from_bytes_until_nul(&extension.extension_name).unwrap(); - if extension_name.to_str().unwrap() == "VK_KHR_swapchain" { - enabled_extensions.push(extension_name); - return; + match extension_name.to_str().unwrap() { + "VK_KHR_swapchain" => { + khr_swapchain_support = true; + enabled_extensions.push(extension_name); + } + "VK_EXT_swapchain_maintenance1" => { + wsi_support.swapchain_maintenance1 = true; + enabled_extensions.push(extension_name); + } + _ => {} } } - - panic!("VK_KHR_swapchain not supported") + assert!(khr_swapchain_support); } pub fn new( global_fn: &vk::GlobalFunctions, instance: vk::Instance, - wsi_support: &VulkanWsiSupport, + support: VulkanWsiSupport, ) -> Self { - let xcb_surface_fn = if wsi_support.xcb { + let xcb_surface_fn = if support.xcb { Some(vk::XcbSurfaceKHRFunctions::new(global_fn, instance)) } else { None }; - let xlib_surface_fn = if wsi_support.xlib { + let xlib_surface_fn = if support.xlib { Some(vk::XlibSurfaceKHRFunctions::new(global_fn, instance)) } else { None }; - let wayland_surface_fn = if wsi_support.wayland { + let wayland_surface_fn = if support.wayland { Some(vk::WaylandSurfaceKHRFunctions::new(global_fn, instance)) } else { None @@ -151,12 +176,15 @@ impl VulkanWsi { let swapchain_fn = vk::SwapchainKHRFunctions::new(global_fn, instance, vk::VERSION_1_1); VulkanWsi { + support, + surfaces: default(), swapchains: default(), suboptimal_swapchains: default(), - swapchain_destroy_queue: Mutex::new(DelayQueue::new( - VULKAN_CONSTANTS.swapchain_destroy_delay, - )), + + recycle_swapchain_semaphores: default(), + destroy_swapchains: default(), + xcb_surface_fn, xlib_surface_fn, wayland_surface_fn, @@ -416,7 +444,7 @@ impl VulkanDevice { swapchain, image_views, } => { - let destroy_image_views = + let detach_image_views = |images: &mut Pool| -> Box<[vk::ImageView]> { let mut vulkan_image_views = Vec::new(); for &image_view in image_views.iter() { @@ -438,13 +466,13 @@ impl VulkanDevice { || height != *current_height || suboptimal_swapchains.remove(&swapchain) { - let image_views = destroy_image_views(&mut image_pool); + let image_views = detach_image_views(&mut image_pool); old_swapchain = swapchain; - self.wsi.swapchain_destroy_queue.lock().push(( - old_swapchain, - vk::SurfaceKHR::null(), + self.wsi.destroy_swapchains.lock().push(DestroySwapchain { + swapchain: old_swapchain, + surface: vk::SurfaceKHR::null(), image_views, - )); + }); vulkan_swapchain.state = VulkanSwapchainState::Vacant; continue; @@ -471,14 +499,15 @@ impl VulkanDevice { suboptimal_swapchains.insert(swapchain); } vk::Result::ErrorOutOfDateKHR => { - let image_views = destroy_image_views(&mut image_pool); + let image_views = detach_image_views(&mut image_pool); old_swapchain = swapchain; - self.wsi.swapchain_destroy_queue.lock().push(( - old_swapchain, - vk::SurfaceKHR::null(), + + self.wsi.destroy_swapchains.lock().push(DestroySwapchain { + swapchain: old_swapchain, + surface: vk::SurfaceKHR::null(), image_views, - )); + }); vulkan_swapchain.state = VulkanSwapchainState::Vacant; return Err(SwapchainOutOfDateError(())); @@ -533,11 +562,11 @@ impl VulkanDevice { } } - self.wsi.swapchain_destroy_queue.lock().push(( + self.wsi.destroy_swapchains.lock().push(DestroySwapchain { swapchain, surface, - vulkan_image_views.into_boxed_slice(), - )); + image_views: vulkan_image_views.into_boxed_slice(), + }); } } } @@ -559,7 +588,7 @@ impl VulkanDevice { !present_swapchain.acquire.is_null(), "acquiring a swapchain image multiple times" ); - present_swapchain.release = self.request_transient_semaphore(frame); + present_swapchain.release = self.request_semaphore(); wait_semaphores.push(vk::SemaphoreSubmitInfo { semaphore: present_swapchain.acquire, @@ -601,12 +630,53 @@ impl VulkanDevice { } pub fn wsi_begin_frame(&self) { - self.wsi - .swapchain_destroy_queue - .lock() - .expire(|(swapchain, surface, image_views)| { - self.destroy_swapchain_deferred(surface, swapchain, &image_views); + let mut recycle_swapchain_semaphores = self.wsi.recycle_swapchain_semaphores.lock(); + + if self.wsi.support.swapchain_maintenance1 { + recycle_swapchain_semaphores.retain(|recycle| { + // With VK_EXT_swapchain_maintenance1 we can just check the fence. + if unsafe { + self.device_fn.get_fence_status(self.device, recycle.fence) + == vk::Result::NotReady + } { + return true; + } + + self.recycled_fences.lock().push_back(recycle.fence); + self.recycled_semaphores.lock().push_back(recycle.semaphore); + + false }); + } else { + recycle_swapchain_semaphores.retain_mut(|recycle| { + // Without VK_EXT_swapchain_maintenance1 we use the fence to store a counter. + // When the counter hits zero we cleanup. + if !recycle.fence.is_null() { + recycle.fence = vk::Fence::from_raw(recycle.fence.as_raw() - 1); + return true; + } + + self.recycled_semaphores.lock().push_back(recycle.semaphore); + + false + }); + } + + self.wsi.destroy_swapchains.lock().retain(|destroy| { + let found_associated_semaphore = recycle_swapchain_semaphores + .iter() + .any(|recycle| destroy.swapchain == recycle.swapchain); + + if !found_associated_semaphore { + self.destroy_swapchain_deferred( + destroy.surface, + destroy.swapchain, + &destroy.image_views, + ); + } + + found_associated_semaphore + }); } pub fn wsi_end_frame(&self, frame: &mut VulkanFrame) { @@ -628,14 +698,41 @@ impl VulkanDevice { ); } + let fences: &[_] = if self.wsi.support.swapchain_maintenance1 { + arena.alloc_slice_fill_with(presents.len(), |_| self.request_fence()) + } else { + arena.alloc_slice_fill_with(presents.len(), |_| { + vk::Fence::from_raw(VULKAN_CONSTANTS.swapchain_semaphore_destroy_delay) + }) + }; let wait_semaphores: &[_] = arena.alloc_slice_fill_iter(presents.iter().map(|x| x.release)); let swapchains: &[_] = arena.alloc_slice_fill_iter(presents.iter().map(|x| x.swapchain)); let image_indices: &[_] = arena.alloc_slice_fill_iter(presents.iter().map(|x| x.image_index)); - let results = arena.alloc_slice_fill_copy(swapchains.len(), vk::Result::Success); + for i in 0..presents.len() { + self.wsi + .recycle_swapchain_semaphores + .lock() + .push(RecycleSwapchainSemaphore { + fence: fences[i], + semaphore: wait_semaphores[i], + swapchain: swapchains[i], + }); + } + + let present_fence_info = vk::SwapchainPresentFenceInfoEXT { + fences: fences.into(), + ..default() + }; + let present_info = vk::PresentInfoKHR { + _next: if self.wsi.support.swapchain_maintenance1 { + &present_fence_info as *const _ as *const core::ffi::c_void + } else { + core::ptr::null() + }, wait_semaphores: wait_semaphores.into(), swapchains: (swapchains, image_indices).into(), results: results.as_mut_ptr(), @@ -662,14 +759,40 @@ impl VulkanDevice { } pub fn wsi_drop(&mut self) { + for recyle in self.wsi.recycle_swapchain_semaphores.get_mut().drain(..) { + if self.wsi.support.swapchain_maintenance1 { + let fences = &[recyle.fence]; + vk_check!(self.device_fn.wait_for_fences( + self.device, + fences, + vk::Bool32::True, + !0 + )); + unsafe { + self.device_fn + .destroy_fence(self.device, recyle.fence, None) + }; + } + + unsafe { + self.device_fn + .destroy_semaphore(self.device, recyle.semaphore, None); + } + } + let destroyed_swapchains = self .wsi - .swapchain_destroy_queue + .destroy_swapchains .get_mut() .drain(..) .collect::>(); - for (_, (swapchain, surface, image_views)) in destroyed_swapchains { - self.destroy_swapchain_deferred(surface, swapchain, &image_views); + + for destroy in destroyed_swapchains { + self.destroy_swapchain_deferred( + destroy.surface, + destroy.swapchain, + &destroy.image_views, + ); } for (&surface, swapchain) in self.wsi.swapchains.get_mut().iter() { diff --git a/engine/narcissus-gpu/src/delay_queue.rs b/engine/narcissus-gpu/src/delay_queue.rs deleted file mode 100644 index 44efaeb..0000000 --- a/engine/narcissus-gpu/src/delay_queue.rs +++ /dev/null @@ -1,42 +0,0 @@ -use std::collections::VecDeque; - -pub struct DelayQueue { - delay: usize, - counter: usize, - values: VecDeque<(usize, T)>, -} - -impl DelayQueue { - pub fn new(delay: usize) -> Self { - Self { - delay, - counter: 0, - values: VecDeque::new(), - } - } - - pub fn push(&mut self, value: T) { - self.values.push_back((self.counter + self.delay, value)) - } - - pub fn expire(&mut self, mut f: F) { - self.counter += 1; - - let to_remove = self - .values - .iter() - .take_while(|(expiry, _)| *expiry == self.counter) - .count(); - - for _ in 0..to_remove { - f(self.values.pop_front().unwrap().1); - } - } - - pub fn drain(&mut self, range: R) -> std::collections::vec_deque::Drain<'_, (usize, T)> - where - R: std::ops::RangeBounds, - { - self.values.drain(range) - } -} diff --git a/engine/narcissus-gpu/src/lib.rs b/engine/narcissus-gpu/src/lib.rs index ce07042..fc8e284 100644 --- a/engine/narcissus-gpu/src/lib.rs +++ b/engine/narcissus-gpu/src/lib.rs @@ -6,7 +6,6 @@ use narcissus_core::{ }; mod backend; -mod delay_queue; mod frame_counter; mod mapped_buffer; mod tlsf; -- 2.49.0