]> git.nega.tv - josh/narcissus/commitdiff
shark: Render only front-facing block faces
authorJoshua Simmons <josh@nega.tv>
Sat, 25 Oct 2025 14:12:57 +0000 (16:12 +0200)
committerJoshua Simmons <josh@nega.tv>
Mon, 27 Oct 2025 22:13:02 +0000 (23:13 +0100)
title/shark-shaders/shaders/block.slang
title/shark-shaders/src/pipelines.rs
title/shark/src/draw.rs

index 017b705570c24858c802725be7e57121d95a58bb..e19cabd6a87df77487a30ebfa98896cfea091d6d 100644 (file)
@@ -1,5 +1,26 @@
+import color_map;
+
+// Size of a block.
+static const float BLOCK_SIZE = 2.0;
+
+struct Block {
+    uint value;
+
+    float3 position() {
+        return float3(value & 31, (value >> 5) & 31, (value >> 10) & 31) * BLOCK_SIZE;
+    }
+}
+
+struct BlockBuffer {
+    float3 position;
+    uint count;
+    Block blocks[32 * 32 * 32];
+}
+
 struct BlockConstants {
-    float4x4 clip_from_camera;
+    float4x4 clip_from_model;
+    float4 camera_position;
+    BlockBuffer *block_buffer;
 }
 
 struct VertexAttributes {
@@ -11,30 +32,90 @@ struct PrimitiveAttributes {
     float3 color;
 }
 
+// Max blocks per-group.
+static const uint MAX_BLOCK = 4;
+
+// Max vertices per-group.
+static const uint V = MAX_BLOCK * 8;
+
+// Max triangles per-group.
+static const uint T = MAX_BLOCK * 6;
+
 [shader("mesh")]
 [outputtopology("triangle")]
+[require(spvMeshShadingEXT, spvGroupNonUniformShuffle)]
 [numthreads(32, 1, 1)]
-void mesh(uniform BlockConstants constants, OutputVertices<VertexAttributes, 32> vertices, OutputPrimitives<PrimitiveAttributes, 16> primitives, OutputIndices<uint3, 16> indices, uint thread_id_in_group: SV_GroupThreadID) {
-    SetMeshOutputCounts(4, 2);
-
-    static const float4 positions[4] = {
-        float4(-0.5, 0.0, -0.5, 1.0),
-        float4(-0.5, 0.0, 0.5, 1.0),
-        float4(0.5, 0.0, 0.5, 1.0),
-        float4(0.5, 0.0, -0.5, 1.0),
-    };
+void mesh(uniform BlockConstants constants,
+          out OutputVertices<VertexAttributes, V> vertices,
+          out OutputPrimitives<PrimitiveAttributes, T> primitives,
+          out OutputIndices<uint3, T> indices,
+          uint group_id: SV_GroupID,
+          uint local_id: SV_GroupThreadID) {
+    let start = group_id.x * MAX_BLOCK;
+    let end = min(constants.block_buffer.count, start + MAX_BLOCK);
+    let count = end - start;
 
-    if (thread_id_in_group < 4) {
-        vertices[thread_id_in_group].position = mul(constants.clip_from_camera, positions[thread_id_in_group]);
-    }
+    SetMeshOutputCounts(count * 8, count * 6);
 
-    if (thread_id_in_group < 2) {
-        primitives[thread_id_in_group].color = float3(0x9b / 255.0, 0x61 / 255.0, 0x56 / 255.0);
-        primitives[thread_id_in_group].normal = float3(0.0, 1.0, 0.0);
+    bool3 face_flip;
+    {
+        let block = constants.block_buffer.blocks[start + local_id / 8];
+        let block_position = constants.block_buffer.position + block.position();
+        let local_camera_position = constants.camera_position.xyz - block_position;
+        face_flip = local_camera_position > float3(0.0);
+
+        let vertex = uint3(local_id & 0x1, (local_id & 0x2) >> 1, (local_id & 0x4) >> 2) * BLOCK_SIZE;
+        vertices[local_id].position = mul(constants.clip_from_model, float4(block_position + vertex, 1.0));
     }
 
-    indices[0] = uint3(0, 1, 2);
-    indices[1] = uint3(2, 3, 0);
+    let primitive_index = local_id % 6;
+    let face_index = primitive_index / 2; // x = 0, y = 1, z = 2
+
+    // We calculated the face flip values while generating vertices, so we must
+    // shuffle those values down to the correct lanes.
+    let primitive_vertex_offset = (local_id / 6) * 8;
+    let flip = WaveReadLaneAt(face_flip, primitive_vertex_offset)[face_index];
+
+    // Indices for the face on the opposite side are given by a power-of-two
+    // offset. That is, on the x-axis if we look at triangle 0-4-2, then we flip
+    // by adding 1 to each index, producing 1-5-3. Similarly, to flip 0-5-4
+    // around z, we must add 2, leaving us with 2-7-6. This means the offset is
+    // always a power of two, `1 << flip_index`.
+    //
+    // Additionally, we need to flip the winding to support backface culling in
+    // the rasterizer.
+    //
+    //      7 *------* 6          +y
+    //       /|     /|            ^  +z
+    //    3 / |  2 / |            |/
+    //     *------*  |      +x <--*
+    //     |5 *---|--* 4
+    //     | /    | /       flip x = +1
+    //     |/     |/        flip y = +2
+    //     *------*         flip z = +4
+    //    1       0
+    //
+    // `CUBE_INDICES` contains the indices for -x, -y, and -z faces.
+    static const uint3 CUBE_INDICES[6] = {
+        uint3(2, 0, 4),
+        uint3(2, 4, 6),
+        uint3(0, 1, 5),
+        uint3(0, 5, 4),
+        uint3(0, 2, 3),
+        uint3(0, 3, 1),
+    };
+
+    let flip_offset = uint(flip) << face_index;
+    let index = primitive_vertex_offset + CUBE_INDICES[primitive_index] + flip_offset;
+
+    let flip_sign = flip ? 1.0 : -1.0;
+    primitives[local_id].normal = float3(face_index == 0 ? flip_sign : 0,
+                                         face_index == 1 ? flip_sign : 0,
+                                         face_index == 2 ? flip_sign : 0);
+    primitives[local_id].color = color_map::plasma(primitive_index / 6.0);
+
+    // Flip winding for backface culling.
+    indices[local_id] = flip ? index.yxz : index;
 }
 
 struct Fragment {
@@ -42,7 +123,7 @@ struct Fragment {
 }
 
 [shader("fragment")]
-Fragment fragment(PrimitiveAttributes primitive, VertexAttributes vertex) {
+Fragment fragment(in perprimitive PrimitiveAttributes primitive, in VertexAttributes vertex) {
     let n_dot_l = max(dot(primitive.normal, float3(0.0, 1.0, 0.0)), 0.1);
 
     Fragment output;
index ff16084384a4809b394dd8f9f65947d427ca8301..ffc07a8fa5275cb6531c7757e5adfaead65d8192 100644 (file)
@@ -7,7 +7,7 @@ use narcissus_gpu::{
     Sampler, SamplerAddressMode, SamplerDesc, SamplerFilter, ShaderDesc, ShaderStageFlags,
     SpecConstant, Topology, VertexOrMeshShader,
 };
-use narcissus_maths::{Mat4, Vec2};
+use narcissus_maths::{Mat4, Point3, Vec2};
 
 pub const DRAW_2D_TILE_SIZE: u32 = 32;
 
@@ -175,8 +175,11 @@ pub struct BasicConstants<'a> {
 }
 
 #[repr(C)]
-pub struct BlockConstants {
+pub struct BlockConstants<'a> {
     pub clip_from_model: Mat4,
+    pub camera_position: Point3,
+    pub _pad: f32,
+    pub block_buffer_address: BufferAddress<'a>,
 }
 
 #[repr(C)]
@@ -376,6 +379,8 @@ impl Pipelines {
             vertex_or_mesh_shader: VertexOrMeshShader::Mesh(ShaderDesc {
                 code: crate::BLOCK_SPV,
                 entry: c"mesh",
+                require_full_subgroups: true,
+                required_subgroup_size: Some(32),
                 ..default()
             }),
             fragment_shader: ShaderDesc {
index bca0e4101976026df8b67d6074ebff5808220c3a..343892253ae2225f9ed139408c0ceea5fc98b1f8 100644 (file)
@@ -1,3 +1,4 @@
+use std::mem::MaybeUninit;
 use std::ops::Index;
 use std::path::Path;
 
@@ -22,7 +23,7 @@ use narcissus_gpu::{
     RenderingDesc, Scissor, ShaderStageFlags, StoreOp, ThreadToken, TypedBind, Viewport,
 };
 use narcissus_image as image;
-use narcissus_maths::{Affine3, HalfTurn, Mat3, Mat4, Vec3, vec3};
+use narcissus_maths::{Affine3, HalfTurn, Mat3, Mat4, Vec3, sin_cos_pi_f32, vec3};
 
 pub struct Model<'a> {
     indices: u32,
@@ -420,6 +421,7 @@ impl<'gpu> DrawState<'gpu> {
             }
         }
 
+        let camera_position = game_state.camera.position();
         let camera_from_model = game_state.camera.camera_from_model();
         let clip_from_camera = Mat4::perspective_rev_inf_zo(
             HalfTurn::new(1.0 / 3.0),
@@ -684,16 +686,79 @@ impl<'gpu> DrawState<'gpu> {
                     }],
                 );
 
+                #[repr(C)]
+                struct BlockMeshPacket {
+                    x: f32,
+                    y: f32,
+                    z: f32,
+                    count: u32,
+                    blocks: [u32; 32 * 32 * 32],
+                }
+
+                impl BlockMeshPacket {
+                    fn push(&mut self, x: usize, y: usize, z: usize) {
+                        let index = self.count as usize;
+                        if index >= 32 * 32 * 32 {
+                            return;
+                        }
+
+                        self.blocks[index] = ((z & 31) << 10 | (y & 31) << 5 | x & 31) as u32;
+                        self.count += 1;
+                    }
+
+                    fn as_slice(&self) -> &[u8] {
+                        let size = 4 * 4 + self.count as usize * 4;
+                        unsafe {
+                            core::slice::from_raw_parts(
+                                self as *const BlockMeshPacket as *const u8,
+                                size,
+                            )
+                        }
+                    }
+                }
+
+                let mut block_packet = BlockMeshPacket {
+                    x: 0.0,
+                    y: -64.0,
+                    z: 0.0,
+                    count: 0,
+                    blocks: unsafe { MaybeUninit::zeroed().assume_init() },
+                };
+
+                for y in (0..32).rev() {
+                    for z in 0..32 {
+                        for x in 0..32 {
+                            let freq = 2.0;
+                            let (s, _) = sin_cos_pi_f32(x as f32 / 32.0 * freq);
+                            if (y as f32) < ((s + 1.0) * 16.0) + 16.0 {
+                                block_packet.push(x, y, z);
+                            }
+                        }
+                    }
+                }
+
                 // Render blocks.
                 gpu.cmd_set_pipeline(cmd_encoder, self.pipelines.block_pipeline);
                 gpu.cmd_set_bind_group(cmd_encoder, 0, &graphics_bind_group);
+
+                let block_buffer = gpu.request_transient_buffer_with_data(
+                    frame,
+                    thread_token,
+                    BufferUsageFlags::STORAGE,
+                    block_packet.as_slice(),
+                );
                 gpu.cmd_push_constants_with_data(
                     cmd_encoder,
                     ShaderStageFlags::MESH,
                     0,
-                    &BlockConstants { clip_from_model },
+                    &BlockConstants {
+                        clip_from_model,
+                        camera_position,
+                        _pad: 0.0,
+                        block_buffer_address: gpu.get_buffer_address(block_buffer.to_arg()),
+                    },
                 );
-                gpu.cmd_draw_mesh_tasks(cmd_encoder, 1, 1, 1);
+                gpu.cmd_draw_mesh_tasks(cmd_encoder, block_packet.count.div_ceil(4), 1, 1);
 
                 gpu.cmd_set_pipeline(cmd_encoder, self.pipelines.basic_pipeline);
                 gpu.cmd_set_bind_group(cmd_encoder, 0, &graphics_bind_group);