From: Joshua Simmons Date: Sun, 6 Aug 2023 13:05:16 +0000 (+0200) Subject: narcissus-gpu: Add new API for mapped buffers X-Git-Url: https://git.nega.tv//gitweb.cgi?a=commitdiff_plain;h=1dda8d5aa3bcc7bb922d0ad36d7ce11095c9e889;p=josh%2Fnarcissus narcissus-gpu: Add new API for mapped buffers Add new functionality for persistent and transient buffers that allows easy copying from Rust data types. Replace code in narcissus with the new APIs. Fix some GPU read - CPU write synchronisation issues by switching some persistent buffers into automatically managed transient buffers. --- diff --git a/bins/narcissus/src/helpers.rs b/bins/narcissus/src/helpers.rs index 8ab78df..f64afea 100644 --- a/bins/narcissus/src/helpers.rs +++ b/bins/narcissus/src/helpers.rs @@ -1,11 +1,10 @@ use std::path::Path; use narcissus_core::{obj, Widen}; -use narcissus_gpu::{Buffer, BufferDesc, BufferUsageFlags, Device, MemoryLocation}; use narcissus_image as image; use narcissus_maths::{vec2, vec3, vec4, Vec2, Vec3}; -use crate::{pipelines::Vertex, Blit}; +use crate::pipelines::Vertex; pub fn load_obj>(path: P) -> (Vec, Vec) { #[derive(Default)] @@ -88,27 +87,3 @@ pub fn load_image>(path: P) -> image::Image { ); texture } - -pub fn create_host_buffer_with_data( - device: &dyn Device, - usage: BufferUsageFlags, - data: &[T], -) -> Buffer -where - T: Blit, -{ - // SAFETY: T: Blittable which implies it's freely convertable to a byte slice. - unsafe { - let len = std::mem::size_of_val(data); - let initial_data = std::slice::from_raw_parts(data.as_ptr() as *const u8, len); - device.create_buffer_with_data( - &BufferDesc { - memory_location: MemoryLocation::Host, - host_mapped: true, - usage, - size: len, - }, - initial_data, - ) - } -} diff --git a/bins/narcissus/src/main.rs b/bins/narcissus/src/main.rs index 31a07d7..79cf71c 100644 --- a/bins/narcissus/src/main.rs +++ b/bins/narcissus/src/main.rs @@ -4,72 +4,25 @@ use crate::{ fonts::{FontFamily, Fonts}, pipelines::{BasicPipeline, TextPipeline}, }; -use helpers::{create_host_buffer_with_data, load_image, load_obj}; -use mapped_buffer::MappedBuffer; +use helpers::{load_image, load_obj}; use narcissus_app::{create_app, Event, Key, PressedState, WindowDesc}; use narcissus_core::{default, rand::Pcg64, slice::array_windows}; -use narcissus_font::{FontCollection, GlyphCache, HorizontalMetrics, TouchedGlyph}; +use narcissus_font::{FontCollection, GlyphCache, HorizontalMetrics}; use narcissus_gpu::{ - create_device, Access, BufferDesc, BufferImageCopy, BufferUsageFlags, ClearValue, Extent2d, - Extent3d, ImageAspectFlags, ImageBarrier, ImageDesc, ImageDimension, ImageFormat, ImageLayout, - ImageTiling, ImageUsageFlags, LoadOp, MemoryLocation, Offset2d, Offset3d, RenderingAttachment, - RenderingDesc, Scissor, StoreOp, ThreadToken, Viewport, + create_device, Access, BufferDesc, BufferImageCopy, BufferUsageFlags, ClearValue, DeviceExt, + Extent2d, Extent3d, ImageAspectFlags, ImageBarrier, ImageDesc, ImageDimension, ImageFormat, + ImageLayout, ImageTiling, ImageUsageFlags, LoadOp, MemoryLocation, Offset2d, Offset3d, + RenderingAttachment, RenderingDesc, Scissor, StoreOp, ThreadToken, Viewport, }; use narcissus_maths::{sin_cos_pi_f32, vec3, Affine3, HalfTurn, Mat3, Mat4, Point3, Vec3}; -use pipelines::{BasicUniforms, GlyphInstance, PrimitiveVertex, TextUniforms}; +use pipelines::{BasicUniforms, PrimitiveInstance, PrimitiveVertex, TextUniforms}; mod fonts; mod helpers; -mod mapped_buffer; mod pipelines; -const MAX_SHARKS: usize = 262_144; const NUM_SHARKS: usize = 50; - const GLYPH_CACHE_SIZE: usize = 1024; -const MAX_GLYPH_INSTANCES: usize = 262_144; -const MAX_GLYPHS: usize = 8192; - -/// Marker trait indicates it's safe to convert a given type directly to an -/// array of bytes. -/// -/// # Safety -/// -/// Must not be applied to any types with padding -pub unsafe trait Blit {} - -unsafe impl Blit for u8 {} -unsafe impl Blit for u16 {} -unsafe impl Blit for Affine3 {} -unsafe impl Blit for TouchedGlyph {} - -trait AsBytes { - fn as_bytes(&self) -> &[u8]; -} - -impl AsBytes for T -where - T: Blit, -{ - fn as_bytes(&self) -> &[u8] { - // SAFETY: Safe while `Blit` trait is correctly applied. - unsafe { - std::slice::from_raw_parts(self as *const _ as *const u8, std::mem::size_of_val(self)) - } - } -} - -impl AsBytes for [T] -where - T: Blit, -{ - fn as_bytes(&self) -> &[u8] { - // SAFETY: Safe while `Blit` trait is correctly applied. - unsafe { - std::slice::from_raw_parts(self as *const _ as *const u8, std::mem::size_of_val(self)) - } - } -} pub fn main() { let app = create_app(); @@ -91,14 +44,14 @@ pub fn main() { let blåhaj_image_data = load_image("bins/narcissus/data/blåhaj.png"); let (blåhaj_vertices, blåhaj_indices) = load_obj("bins/narcissus/data/blåhaj.obj"); - let blåhaj_vertex_buffer = create_host_buffer_with_data( - device.as_ref(), + let blåhaj_vertex_buffer = device.create_mapped_buffer_with_data( + MemoryLocation::Device, BufferUsageFlags::STORAGE, blåhaj_vertices.as_slice(), ); - let blåhaj_index_buffer = create_host_buffer_with_data( - device.as_ref(), + let blåhaj_index_buffer = device.create_mapped_buffer_with_data( + MemoryLocation::Device, BufferUsageFlags::INDEX, blåhaj_indices.as_slice(), ); @@ -117,24 +70,6 @@ pub fn main() { mip_levels: 1, }); - let mut basic_transform_buffer = MappedBuffer::new( - device.as_ref(), - BufferUsageFlags::STORAGE, - std::mem::size_of::() * MAX_SHARKS, - ); - - let mut glyph_instance_buffer = MappedBuffer::new( - device.as_ref(), - BufferUsageFlags::STORAGE, - std::mem::size_of::() * MAX_GLYPH_INSTANCES, - ); - - let mut glyph_buffer = MappedBuffer::new( - device.as_ref(), - BufferUsageFlags::STORAGE, - std::mem::size_of::() * MAX_GLYPHS, - ); - let glyph_atlas = device.create_image(&ImageDesc { memory_location: MemoryLocation::Device, usage: ImageUsageFlags::SAMPLED | ImageUsageFlags::TRANSFER, @@ -206,7 +141,7 @@ pub fn main() { device.cmd_copy_buffer_to_image( &mut cmd_buffer, - blåhaj_buffer.into(), + blåhaj_buffer.to_arg(), blåhaj_image, ImageLayout::Optimal, &[BufferImageCopy { @@ -349,13 +284,6 @@ pub fn main() { depth_height = height; } - let _buffer = device.request_transient_buffer( - &frame, - &thread_token, - BufferUsageFlags::UNIFORM, - 16 * 1024 * 1024, - ); - let frame_start = Instant::now() - start_time; let frame_start = frame_start.as_secs_f32() * 0.125; @@ -366,8 +294,6 @@ pub fn main() { transform.matrix *= Mat3::from_axis_rotation(Vec3::Y, HalfTurn::new(0.002 * direction)) } - basic_transform_buffer.write_slice(&shark_transforms); - let (s, c) = sin_cos_pi_f32(frame_start * 0.2); let camera_height = c * 8.0; let camera_radius = 20.0; @@ -455,7 +381,7 @@ pub fn main() { *rng.array_select(&[0xfffac228, 0xfff57d15, 0xffd44842, 0xff9f2a63]); let instance_index = primitive_instances.len() as u32; - primitive_instances.push(GlyphInstance { + primitive_instances.push(PrimitiveInstance { x, y, touched_glyph_index, @@ -479,13 +405,8 @@ pub fn main() { let atlas_width = glyph_cache.width() as u32; let atlas_height = glyph_cache.height() as u32; - glyph_instance_buffer.write_slice(&primitive_instances); - let (touched_glyphs, texture) = glyph_cache.update_atlas(); - // Update information for the glyphs we need this frame. - glyph_buffer.write_slice(touched_glyphs); - // If the atlas has been updated, we need to upload it to the GPU. if let Some(texture) = texture { let width = atlas_width; @@ -512,7 +433,26 @@ pub fn main() { device.cmd_copy_buffer_to_image( &mut cmd_buffer, - buffer.into(), + buffer.to_arg(), + image, + ImageLayout::Optimal, + &[BufferImageCopy { + buffer_offset: 0, + buffer_row_length: 0, + buffer_image_height: 0, + image_subresource: default(), + image_offset: Offset3d { x: 0, y: 0, z: 0 }, + image_extent: Extent3d { + width, + height, + depth: 1, + }, + }], + ); + + device.cmd_copy_buffer_to_image( + &mut cmd_buffer, + buffer.to_arg(), image, ImageLayout::Optimal, &[BufferImageCopy { @@ -592,9 +532,9 @@ pub fn main() { &thread_token, &mut cmd_buffer, &BasicUniforms { clip_from_model }, - blåhaj_vertex_buffer, - blåhaj_index_buffer, - basic_transform_buffer.buffer(), + &blåhaj_vertex_buffer, + &blåhaj_index_buffer, + shark_transforms.as_slice(), blåhaj_image, ); @@ -620,8 +560,8 @@ pub fn main() { atlas_height, }, primitive_vertices.as_slice(), - glyph_buffer.buffer(), - glyph_instance_buffer.buffer(), + touched_glyphs, + primitive_instances.as_slice(), glyph_atlas, ); diff --git a/bins/narcissus/src/mapped_buffer.rs b/bins/narcissus/src/mapped_buffer.rs deleted file mode 100644 index fedfcf6..0000000 --- a/bins/narcissus/src/mapped_buffer.rs +++ /dev/null @@ -1,53 +0,0 @@ -use narcissus_gpu::{Buffer, BufferDesc, BufferUsageFlags, Device, MemoryLocation}; - -use crate::Blit; - -pub struct MappedBuffer<'a> { - device: &'a dyn Device, - buffer: Buffer, - slice: &'a mut [u8], -} - -impl<'a> MappedBuffer<'a> { - pub fn new(device: &'a dyn Device, usage: BufferUsageFlags, len: usize) -> Self { - let buffer = device.create_buffer(&BufferDesc { - memory_location: MemoryLocation::Host, - host_mapped: true, - usage, - size: len, - }); - unsafe { - let ptr = device.map_buffer(buffer); - let slice = std::slice::from_raw_parts_mut(ptr, len); - Self { - device, - buffer, - slice, - } - } - } - - pub fn buffer(&self) -> Buffer { - self.buffer - } - - pub fn write_slice(&mut self, values: &[T]) - where - T: Blit, - { - unsafe { - let len = std::mem::size_of_val(values); - let src = std::slice::from_raw_parts(values.as_ptr() as *const u8, len); - self.slice[..len].copy_from_slice(src) - } - } -} - -impl<'a> Drop for MappedBuffer<'a> { - fn drop(&mut self) { - // SAFETY: Make sure we don't have the slice outlive the mapping. - unsafe { - self.device.unmap_buffer(self.buffer); - } - } -} diff --git a/bins/narcissus/src/pipelines/basic.rs b/bins/narcissus/src/pipelines/basic.rs index 41fa7d9..7c75e3a 100644 --- a/bins/narcissus/src/pipelines/basic.rs +++ b/bins/narcissus/src/pipelines/basic.rs @@ -1,14 +1,12 @@ use narcissus_core::{cstr, default, include_bytes_align}; use narcissus_gpu::{ Bind, BindGroupLayout, BindGroupLayoutDesc, BindGroupLayoutEntryDesc, BindingType, BlendMode, - Buffer, BufferUsageFlags, CmdBuffer, CompareOp, CullingMode, Device, Frame, FrontFace, + BufferUsageFlags, CmdBuffer, CompareOp, CullingMode, Device, DeviceExt, Frame, FrontFace, GraphicsPipelineDesc, GraphicsPipelineLayout, Image, ImageFormat, ImageLayout, IndexType, - Pipeline, PolygonMode, Sampler, SamplerAddressMode, SamplerDesc, SamplerFilter, ShaderDesc, - ShaderStageFlags, ThreadToken, Topology, TypedBind, + MappedBuffer, Pipeline, PolygonMode, Sampler, SamplerAddressMode, SamplerDesc, SamplerFilter, + ShaderDesc, ShaderStageFlags, ThreadToken, Topology, TypedBind, }; -use narcissus_maths::Mat4; - -use crate::{AsBytes, Blit}; +use narcissus_maths::{Affine3, Mat4}; const VERT_SPV: &[u8] = include_bytes_align!(4, "../shaders/basic.vert.spv"); const FRAG_SPV: &[u8] = include_bytes_align!(4, "../shaders/basic.frag.spv"); @@ -27,9 +25,6 @@ pub struct Vertex { pub texcoord: [f32; 4], } -unsafe impl Blit for BasicUniforms {} -unsafe impl Blit for Vertex {} - pub struct BasicPipeline { pub uniforms_bind_group_layout: BindGroupLayout, pub storage_bind_group_layout: BindGroupLayout, @@ -126,24 +121,29 @@ impl BasicPipeline { pub fn bind( &self, - device: &dyn Device, + device: &(dyn Device + 'static), frame: &Frame, thread_token: &ThreadToken, cmd_buffer: &mut CmdBuffer, basic_uniforms: &BasicUniforms, - vertex_buffer: Buffer, - index_buffer: Buffer, - transform_buffer: Buffer, + vertex_buffer: &MappedBuffer, + index_buffer: &MappedBuffer, + transforms: &[Affine3], texture: Image, ) { - let mut uniform_buffer = device.request_transient_buffer( + let uniform_buffer = device.request_transient_buffer_with_data( frame, thread_token, BufferUsageFlags::UNIFORM, - std::mem::size_of::(), + basic_uniforms, ); - uniform_buffer.copy_from_slice(basic_uniforms.as_bytes()); + let transform_buffer = device.request_transient_buffer_with_data( + frame, + thread_token, + BufferUsageFlags::STORAGE, + transforms, + ); device.cmd_set_pipeline(cmd_buffer, self.pipeline); @@ -155,7 +155,7 @@ impl BasicPipeline { &[Bind { binding: 0, array_element: 0, - typed: TypedBind::UniformBuffer(&[uniform_buffer.into()]), + typed: TypedBind::UniformBuffer(&[uniform_buffer.to_arg()]), }], ); @@ -168,12 +168,12 @@ impl BasicPipeline { Bind { binding: 0, array_element: 0, - typed: TypedBind::StorageBuffer(&[vertex_buffer.into()]), + typed: TypedBind::StorageBuffer(&[vertex_buffer.to_arg()]), }, Bind { binding: 1, array_element: 0, - typed: TypedBind::StorageBuffer(&[transform_buffer.into()]), + typed: TypedBind::StorageBuffer(&[transform_buffer.to_arg()]), }, Bind { binding: 2, @@ -188,6 +188,6 @@ impl BasicPipeline { ], ); - device.cmd_set_index_buffer(cmd_buffer, index_buffer.into(), 0, IndexType::U16); + device.cmd_set_index_buffer(cmd_buffer, index_buffer.to_arg(), 0, IndexType::U16); } } diff --git a/bins/narcissus/src/pipelines/mod.rs b/bins/narcissus/src/pipelines/mod.rs index f5241c0..303a68c 100644 --- a/bins/narcissus/src/pipelines/mod.rs +++ b/bins/narcissus/src/pipelines/mod.rs @@ -3,4 +3,4 @@ mod text; pub use basic::{BasicPipeline, BasicUniforms, Vertex}; -pub use text::{GlyphInstance, PrimitiveVertex, TextPipeline, TextUniforms}; +pub use text::{PrimitiveInstance, PrimitiveVertex, TextPipeline, TextUniforms}; diff --git a/bins/narcissus/src/pipelines/text.rs b/bins/narcissus/src/pipelines/text.rs index f4ed102..8ca76a6 100644 --- a/bins/narcissus/src/pipelines/text.rs +++ b/bins/narcissus/src/pipelines/text.rs @@ -1,15 +1,13 @@ use narcissus_core::{cstr, default, include_bytes_align}; -use narcissus_font::TouchedGlyphIndex; +use narcissus_font::{TouchedGlyph, TouchedGlyphIndex}; use narcissus_gpu::{ Bind, BindGroupLayout, BindGroupLayoutDesc, BindGroupLayoutEntryDesc, BindingType, BlendMode, - Buffer, BufferUsageFlags, CmdBuffer, CompareOp, CullingMode, Device, Frame, FrontFace, + BufferUsageFlags, CmdBuffer, CompareOp, CullingMode, Device, DeviceExt, Frame, FrontFace, GraphicsPipelineDesc, GraphicsPipelineLayout, Image, ImageFormat, ImageLayout, Pipeline, PolygonMode, Sampler, SamplerAddressMode, SamplerDesc, SamplerFilter, ShaderDesc, ShaderStageFlags, ThreadToken, Topology, TypedBind, }; -use crate::{AsBytes, Blit}; - const VERT_SPV: &[u8] = include_bytes_align!(4, "../shaders/text.vert.spv"); const FRAG_SPV: &[u8] = include_bytes_align!(4, "../shaders/text.frag.spv"); @@ -41,17 +39,13 @@ impl PrimitiveVertex { #[allow(unused)] #[repr(C)] -pub struct GlyphInstance { +pub struct PrimitiveInstance { pub x: f32, pub y: f32, pub touched_glyph_index: TouchedGlyphIndex, pub color: u32, } -unsafe impl Blit for TextUniforms {} -unsafe impl Blit for PrimitiveVertex {} -unsafe impl Blit for GlyphInstance {} - pub struct TextPipeline { bind_group_layout: BindGroupLayout, sampler: Sampler, @@ -149,28 +143,41 @@ impl TextPipeline { pub fn bind( &self, - device: &dyn Device, + device: &(dyn Device + 'static), frame: &Frame, thread_token: &ThreadToken, cmd_buffer: &mut CmdBuffer, text_uniforms: &TextUniforms, primitive_vertices: &[PrimitiveVertex], - cached_glyphs: Buffer, - glyph_instances: Buffer, + touched_glyphs: &[TouchedGlyph], + primitive_instances: &[PrimitiveInstance], atlas: Image, ) { let uniforms_buffer = device.request_transient_buffer_with_data( frame, thread_token, BufferUsageFlags::UNIFORM, - text_uniforms.as_bytes(), + text_uniforms, ); let primitive_vertex_buffer = device.request_transient_buffer_with_data( frame, thread_token, BufferUsageFlags::STORAGE, - primitive_vertices.as_bytes(), + primitive_vertices, + ); + + let cached_glyphs_buffer = device.request_transient_buffer_with_data( + frame, + thread_token, + BufferUsageFlags::STORAGE, + touched_glyphs, + ); + let glyph_instance_buffer = device.request_transient_buffer_with_data( + frame, + thread_token, + BufferUsageFlags::STORAGE, + primitive_instances, ); device.cmd_set_pipeline(cmd_buffer, self.pipeline); @@ -183,22 +190,22 @@ impl TextPipeline { Bind { binding: 0, array_element: 0, - typed: TypedBind::UniformBuffer(&[uniforms_buffer.into()]), + typed: TypedBind::UniformBuffer(&[uniforms_buffer.to_arg()]), }, Bind { binding: 1, array_element: 0, - typed: TypedBind::StorageBuffer(&[primitive_vertex_buffer.into()]), + typed: TypedBind::StorageBuffer(&[primitive_vertex_buffer.to_arg()]), }, Bind { binding: 2, array_element: 0, - typed: TypedBind::StorageBuffer(&[cached_glyphs.into()]), + typed: TypedBind::StorageBuffer(&[cached_glyphs_buffer.to_arg()]), }, Bind { binding: 3, array_element: 0, - typed: TypedBind::StorageBuffer(&[glyph_instances.into()]), + typed: TypedBind::StorageBuffer(&[glyph_instance_buffer.to_arg()]), }, Bind { binding: 4, diff --git a/libs/narcissus-gpu/src/backend/vulkan/mod.rs b/libs/narcissus-gpu/src/backend/vulkan/mod.rs index 0c83b3b..35a9b3b 100644 --- a/libs/narcissus-gpu/src/backend/vulkan/mod.rs +++ b/libs/narcissus-gpu/src/backend/vulkan/mod.rs @@ -19,7 +19,7 @@ use crate::{ BindGroupLayoutDesc, Buffer, BufferArg, BufferDesc, BufferImageCopy, BufferUsageFlags, CmdBuffer, ComputePipelineDesc, Device, Extent2d, Extent3d, Frame, GlobalBarrier, GpuConcurrent, GraphicsPipelineDesc, Image, ImageBarrier, ImageBlit, ImageDesc, ImageDimension, - ImageFormat, ImageLayout, ImageTiling, ImageUsageFlags, ImageViewDesc, IndexType, + ImageFormat, ImageLayout, ImageTiling, ImageUsageFlags, ImageViewDesc, IndexType, MappedBuffer, MemoryLocation, Offset2d, Offset3d, Pipeline, Sampler, SamplerAddressMode, SamplerCompareOp, SamplerDesc, SamplerFilter, SwapchainOutOfDateError, ThreadToken, TransientBuffer, TypedBind, }; @@ -800,7 +800,40 @@ impl VulkanDevice { semaphore } - fn create_buffer(&self, desc: &BufferDesc, data: Option<&[u8]>) -> Buffer { + fn destroy_deferred( + device_fn: &vk::DeviceFunctions, + device: vk::Device, + frame: &mut VulkanFrame, + ) { + for pipeline_layout in frame.destroyed_pipeline_layouts.get_mut().drain(..) { + unsafe { device_fn.destroy_pipeline_layout(device, pipeline_layout, None) } + } + for pipeline in frame.destroyed_pipelines.get_mut().drain(..) { + unsafe { device_fn.destroy_pipeline(device, pipeline, None) } + } + for descriptor_set_layout in frame.destroyed_descriptor_set_layouts.get_mut().drain(..) { + unsafe { device_fn.destroy_descriptor_set_layout(device, descriptor_set_layout, None) } + } + for sampler in frame.destroyed_samplers.get_mut().drain(..) { + unsafe { device_fn.destroy_sampler(device, sampler, None) } + } + for image_view in frame.destroyed_image_views.get_mut().drain(..) { + unsafe { device_fn.destroy_image_view(device, image_view, None) } + } + for image in frame.destroyed_images.get_mut().drain(..) { + unsafe { device_fn.destroy_image(device, image, None) } + } + for buffer_view in frame.destroyed_buffer_views.get_mut().drain(..) { + unsafe { device_fn.destroy_buffer_view(device, buffer_view, None) } + } + for buffer in frame.destroyed_buffers.get_mut().drain(..) { + unsafe { device_fn.destroy_buffer(device, buffer, None) } + } + } +} + +impl Device for VulkanDevice { + fn create_buffer(&self, desc: &BufferDesc) -> Buffer { let queue_family_indices = &[self.universal_queue_family_index]; let create_info = vk::BufferCreateInfo { @@ -822,17 +855,6 @@ impl VulkanDevice { allocator::VulkanAllocationResource::Buffer(buffer), ); - if let Some(data) = data { - assert!(!memory.mapped_ptr().is_null()); - // SAFETY: The memory has just been allocated, so as long as the pointer is - // non-null, then we can create a slice for it. - unsafe { - let dst = - std::slice::from_raw_parts_mut(memory.mapped_ptr(), memory.size().widen()); - dst[..desc.size].copy_from_slice(data); - } - } - unsafe { self.device_fn.bind_buffer_memory2( self.device, @@ -854,47 +876,6 @@ impl VulkanDevice { Buffer(handle) } - fn destroy_deferred( - device_fn: &vk::DeviceFunctions, - device: vk::Device, - frame: &mut VulkanFrame, - ) { - for pipeline_layout in frame.destroyed_pipeline_layouts.get_mut().drain(..) { - unsafe { device_fn.destroy_pipeline_layout(device, pipeline_layout, None) } - } - for pipeline in frame.destroyed_pipelines.get_mut().drain(..) { - unsafe { device_fn.destroy_pipeline(device, pipeline, None) } - } - for descriptor_set_layout in frame.destroyed_descriptor_set_layouts.get_mut().drain(..) { - unsafe { device_fn.destroy_descriptor_set_layout(device, descriptor_set_layout, None) } - } - for sampler in frame.destroyed_samplers.get_mut().drain(..) { - unsafe { device_fn.destroy_sampler(device, sampler, None) } - } - for image_view in frame.destroyed_image_views.get_mut().drain(..) { - unsafe { device_fn.destroy_image_view(device, image_view, None) } - } - for image in frame.destroyed_images.get_mut().drain(..) { - unsafe { device_fn.destroy_image(device, image, None) } - } - for buffer_view in frame.destroyed_buffer_views.get_mut().drain(..) { - unsafe { device_fn.destroy_buffer_view(device, buffer_view, None) } - } - for buffer in frame.destroyed_buffers.get_mut().drain(..) { - unsafe { device_fn.destroy_buffer(device, buffer, None) } - } - } -} - -impl Device for VulkanDevice { - fn create_buffer(&self, desc: &BufferDesc) -> Buffer { - self.create_buffer(desc, None) - } - - fn create_buffer_with_data(&self, desc: &BufferDesc, initial_data: &[u8]) -> Buffer { - self.create_buffer(desc, Some(initial_data)) - } - fn create_image(&self, desc: &ImageDesc) -> Image { debug_assert_ne!(desc.layer_count, 0, "layers must be at least one"); debug_assert_ne!(desc.width, 0, "width must be at least one"); @@ -1601,12 +1582,7 @@ impl Device for VulkanDevice { ) { let arena = HybridArena::<4096>::new(); - let (src_buffer, base_offset) = match src_buffer { - BufferArg::Unmanaged(buffer) => { - (self.buffer_pool.lock().get(buffer.0).unwrap().buffer, 0) - } - BufferArg::Transient(buffer) => (vk::Buffer::from_raw(buffer.buffer), buffer.offset), - }; + let (src_buffer, base_offset, _range) = self.unwrap_buffer_arg(&src_buffer); let regions = arena.alloc_slice_fill_iter(copies.iter().map(|copy| vk::BufferImageCopy { buffer_offset: copy.buffer_offset + base_offset, @@ -1791,21 +1767,13 @@ impl Device for VulkanDevice { } } TypedBind::UniformBuffer(buffers) => { - let buffer_pool = self.buffer_pool.lock(); - let buffer_infos_iter = buffers.iter().map(|buffer| match buffer { - BufferArg::Unmanaged(buffer) => { - let buffer = buffer_pool.get(buffer.0).unwrap().buffer; - vk::DescriptorBufferInfo { - buffer, - offset: 0, - range: vk::WHOLE_SIZE, - } + let buffer_infos_iter = buffers.iter().map(|buffer_arg| { + let (buffer, offset, range) = self.unwrap_buffer_arg(buffer_arg); + vk::DescriptorBufferInfo { + buffer, + offset, + range, } - BufferArg::Transient(transient) => vk::DescriptorBufferInfo { - buffer: vk::Buffer::from_raw(transient.buffer), - offset: transient.offset, - range: transient.len as u64, - }, }); let buffer_infos = arena.alloc_slice_fill_iter(buffer_infos_iter); vk::WriteDescriptorSet { @@ -1819,21 +1787,13 @@ impl Device for VulkanDevice { } } TypedBind::StorageBuffer(buffers) => { - let buffer_pool = self.buffer_pool.lock(); - let buffer_infos_iter = buffers.iter().map(|buffer| match buffer { - BufferArg::Unmanaged(buffer) => { - let buffer = buffer_pool.get(buffer.0).unwrap().buffer; - vk::DescriptorBufferInfo { - buffer, - offset: 0, - range: vk::WHOLE_SIZE, - } + let buffer_infos_iter = buffers.iter().map(|buffer_arg| { + let (buffer, offset, range) = self.unwrap_buffer_arg(buffer_arg); + vk::DescriptorBufferInfo { + buffer, + offset, + range, } - BufferArg::Transient(transient) => vk::DescriptorBufferInfo { - buffer: vk::Buffer::from_raw(transient.buffer), - offset: transient.offset, - range: transient.len as u64, - }, }); let buffer_infos = arena.alloc_slice_fill_iter(buffer_infos_iter); vk::WriteDescriptorSet { @@ -1885,12 +1845,7 @@ impl Device for VulkanDevice { offset: u64, index_type: IndexType, ) { - let (buffer, base_offset) = match buffer { - BufferArg::Unmanaged(buffer) => { - (self.buffer_pool.lock().get(buffer.0).unwrap().buffer, 0) - } - BufferArg::Transient(buffer) => (vk::Buffer::from_raw(buffer.buffer), buffer.offset), - }; + let (buffer, base_offset, _range) = self.unwrap_buffer_arg(&buffer); let command_buffer = self.cmd_buffer_mut(cmd_buffer).command_buffer; let index_type = vulkan_index_type(index_type); @@ -2305,6 +2260,28 @@ impl Device for VulkanDevice { fn destroy_swapchain(&self, window: &dyn AsRawWindow) { self.destroy_swapchain(window) } + + fn create_mapped_buffer<'device>(&'device self, desc: &BufferDesc) -> MappedBuffer<'device> { + assert!(desc.host_mapped); + + let buffer = self.create_buffer(desc); + unsafe { + let ptr = std::ptr::NonNull::new(self.map_buffer(buffer)) + .expect("failed to map buffer memory"); + + MappedBuffer { + ptr, + len: desc.size, + buffer, + phantom: PhantomData, + } + } + } + + fn destroy_mapped_buffer(&self, frame: &Frame, buffer: MappedBuffer) { + unsafe { self.unmap_buffer(buffer.buffer) } + self.destroy_buffer(frame, buffer.buffer) + } } impl VulkanDevice { @@ -2487,6 +2464,26 @@ impl VulkanDevice { VulkanTransientBuffer { buffer, memory } } + + fn unwrap_buffer_arg(&self, buffer_arg: &BufferArg) -> (vk::Buffer, u64, u64) { + match buffer_arg { + BufferArg::Unmanaged(buffer) => ( + self.buffer_pool.lock().get(buffer.0).unwrap().buffer, + 0, + vk::WHOLE_SIZE, + ), + BufferArg::Transient(transient) => ( + vk::Buffer::from_raw(transient.buffer), + transient.offset, + transient.len as u64, + ), + BufferArg::Mapped(buffer) => ( + self.buffer_pool.lock().get(buffer.buffer.0).unwrap().buffer, + 0, + vk::WHOLE_SIZE, + ), + } + } } impl Drop for VulkanDevice { diff --git a/libs/narcissus-gpu/src/lib.rs b/libs/narcissus-gpu/src/lib.rs index 071fb8a..3b13abd 100644 --- a/libs/narcissus-gpu/src/lib.rs +++ b/libs/narcissus-gpu/src/lib.rs @@ -1,4 +1,4 @@ -use std::{ffi::CStr, marker::PhantomData, ptr::NonNull}; +use std::{ffi::CStr, marker::PhantomData}; use backend::vulkan; use narcissus_core::{ @@ -8,8 +8,11 @@ use narcissus_core::{ mod backend; mod delay_queue; mod frame_counter; +mod mapped_memory; pub mod tlsf; +pub use mapped_memory::{MappedBuffer, TransientBuffer}; + pub enum DeviceBackend { Vulkan, } @@ -52,6 +55,12 @@ pub struct Image(Handle); #[derive(Clone, Copy, PartialEq, Eq, Hash)] pub struct Buffer(Handle); +impl Buffer { + pub fn to_arg(self) -> BufferArg<'static> { + BufferArg::Unmanaged(self) + } +} + #[derive(Clone, Copy, PartialEq, Eq, Hash)] pub struct Sampler(Handle); @@ -61,21 +70,6 @@ pub struct BindGroupLayout(Handle); #[derive(Clone, Copy, PartialEq, Eq, Hash)] pub struct Pipeline(Handle); -pub struct TransientBuffer<'a> { - ptr: NonNull, - len: usize, - buffer: u64, - offset: u64, - phantom: PhantomData<&'a u8>, -} - -impl<'a> TransientBuffer<'a> { - pub fn copy_from_slice(&mut self, bytes: &[u8]) { - unsafe { std::slice::from_raw_parts_mut(self.ptr.as_ptr(), self.len) } - .copy_from_slice(bytes) - } -} - #[derive(Clone, Copy, PartialEq, Eq, Debug)] pub enum MemoryLocation { Host, @@ -482,19 +476,8 @@ pub struct Bind<'a> { pub enum BufferArg<'a> { Unmanaged(Buffer), - Transient(TransientBuffer<'a>), -} - -impl<'a> From for BufferArg<'a> { - fn from(value: Buffer) -> Self { - BufferArg::Unmanaged(value) - } -} - -impl<'a> From> for BufferArg<'a> { - fn from(value: TransientBuffer<'a>) -> Self { - BufferArg::Transient(value) - } + Mapped(&'a MappedBuffer<'a>), + Transient(&'a TransientBuffer<'a>), } pub enum TypedBind<'a> { @@ -711,7 +694,7 @@ impl std::error::Error for SwapchainOutOfDateError {} pub trait Device { fn create_buffer(&self, desc: &BufferDesc) -> Buffer; - fn create_buffer_with_data(&self, desc: &BufferDesc, inital_data: &[u8]) -> Buffer; + fn create_mapped_buffer<'device>(&'device self, desc: &BufferDesc) -> MappedBuffer<'device>; fn create_image(&self, desc: &ImageDesc) -> Image; fn create_image_view(&self, desc: &ImageViewDesc) -> Image; fn create_sampler(&self, desc: &SamplerDesc) -> Sampler; @@ -720,6 +703,7 @@ pub trait Device { fn create_compute_pipeline(&self, desc: &ComputePipelineDesc) -> Pipeline; fn destroy_buffer(&self, frame: &Frame, buffer: Buffer); + fn destroy_mapped_buffer(&self, frame: &Frame, buffer: MappedBuffer); fn destroy_image(&self, frame: &Frame, image: Image); fn destroy_sampler(&self, frame: &Frame, sampler: Sampler); fn destroy_bind_group_layout(&self, frame: &Frame, bind_group_layout: BindGroupLayout); @@ -760,20 +744,6 @@ pub trait Device { size: usize, ) -> TransientBuffer<'a>; - #[must_use] - fn request_transient_buffer_with_data<'a>( - &self, - frame: &'a Frame<'a>, - thread_token: &'a ThreadToken, - usage: BufferUsageFlags, - data: &[u8], - ) -> TransientBuffer<'a> { - let mut transient_buffer = - self.request_transient_buffer(frame, thread_token, usage, data.len()); - transient_buffer.copy_from_slice(data); - transient_buffer - } - #[must_use] fn create_cmd_buffer<'a, 'thread>( &'a self, @@ -867,3 +837,52 @@ pub trait Device { fn end_frame<'device>(&'device self, frame: Frame<'device>); } + +pub trait DeviceExt: Device { + fn create_mapped_buffer_with_data<'a, T: ?Sized>( + &'a self, + memory_location: MemoryLocation, + usage: BufferUsageFlags, + data: &T, + ) -> MappedBuffer<'a>; + + fn request_transient_buffer_with_data<'a, T: ?Sized>( + &'a self, + frame: &'a Frame<'a>, + thread_token: &'a ThreadToken, + usage: BufferUsageFlags, + data: &T, + ) -> TransientBuffer<'a>; +} + +impl DeviceExt for dyn Device { + fn create_mapped_buffer_with_data<'a, T: ?Sized>( + &'a self, + memory_location: MemoryLocation, + usage: BufferUsageFlags, + data: &T, + ) -> MappedBuffer<'a> { + let size = std::mem::size_of_val(data); + let mut mapped_buffer = self.create_mapped_buffer(&BufferDesc { + memory_location, + host_mapped: true, + usage, + size, + }); + mapped_buffer.copy_with_offset(0, data); + mapped_buffer + } + + fn request_transient_buffer_with_data<'a, T: ?Sized>( + &'a self, + frame: &'a Frame<'a>, + thread_token: &'a ThreadToken, + usage: BufferUsageFlags, + data: &T, + ) -> TransientBuffer<'a> { + let mut transient_buffer = + self.request_transient_buffer(frame, thread_token, usage, std::mem::size_of_val(data)); + transient_buffer.copy_with_offset(0, data); + transient_buffer + } +} diff --git a/libs/narcissus-gpu/src/mapped_memory.rs b/libs/narcissus-gpu/src/mapped_memory.rs new file mode 100644 index 0000000..8511d33 --- /dev/null +++ b/libs/narcissus-gpu/src/mapped_memory.rs @@ -0,0 +1,101 @@ +use std::{marker::PhantomData, ptr::NonNull}; + +use crate::{Buffer, BufferArg}; + +#[cold] +fn overflow() -> ! { + panic!("overflow") +} + +/// Copies the byte representation of T into the given pointer. +/// +/// # Panics +/// +/// Panics if `len` is insufficient for the object `src` to be placed at the given +/// `offset` +/// +/// # Safety +/// +/// The memory region from `ptr` through `ptr` + `len` must be valid. +/// +/// This function will propagate undefined values from T, for example, padding +/// bytes, so it's vital that no Rust reference to the written memory exists +/// after writing a `T` which contains undefined values. +unsafe fn copy_from_with_offset(ptr: NonNull, len: usize, offset: usize, src: &T) { + let size = std::mem::size_of_val(src); + + let Some(end) = offset.checked_add(size) else { + overflow() + }; + + if end > len { + overflow() + } + + // SAFETY: + // * Taking a pointer of `T` as bytes is always valid, even when it contains + // padding. So long as we never materialize a reference to those undef bytes + // and directly copy through the pointer instead. + // + // * The number of bytes we're reading from src is directly derived from its + // size in bytes. + // + // * We check the length of the buffer is sufficient for `size` plus `offset` + // bytes above. + // + // * `src` and `dst` cannot overlap because it's not possible to make a + // reference to the bytes from the transient buffer. + let count = size; + let src = src as *const _ as *const u8; + let src = src.add(offset); + let dst = ptr.as_ptr(); + std::ptr::copy_nonoverlapping(src, dst, count) +} + +/// A mapped buffer is a GPU memory buffer that is persistently mapped into CPU +/// address space and can be written to at any time. +/// +/// Making sure the buffer is not updated while it is concurrently in use by the +/// GPU is the responsibility of the caller. +pub struct MappedBuffer<'a> { + pub(crate) ptr: NonNull, + pub(crate) len: usize, + pub(crate) buffer: Buffer, + pub(crate) phantom: PhantomData<&'a u8>, +} + +impl<'a> MappedBuffer<'a> { + pub fn to_arg(&self) -> BufferArg { + BufferArg::Mapped(self) + } + + pub fn copy_from(&mut self, src: &T) { + unsafe { copy_from_with_offset(self.ptr, self.len, 0, src) } + } + + pub fn copy_with_offset(&mut self, offset: usize, src: &T) { + unsafe { copy_from_with_offset(self.ptr, self.len, offset, src) } + } +} + +pub struct TransientBuffer<'a> { + pub(crate) ptr: NonNull, + pub(crate) offset: u64, + pub(crate) len: usize, + pub(crate) buffer: u64, + pub(crate) phantom: PhantomData<&'a u8>, +} + +impl<'a> TransientBuffer<'a> { + pub fn to_arg(&self) -> BufferArg { + BufferArg::Transient(self) + } + + pub fn copy_from(&mut self, src: &T) { + unsafe { copy_from_with_offset(self.ptr, self.len, 0, src) } + } + + pub fn copy_with_offset(&mut self, offset: usize, src: &T) { + unsafe { copy_from_with_offset(self.ptr, self.len, offset, src) } + } +}