+import color_map;
+
+// Size of a block.
+static const float BLOCK_SIZE = 2.0;
+
+struct Block {
+ uint value;
+
+ float3 position() {
+ return float3(value & 31, (value >> 5) & 31, (value >> 10) & 31) * BLOCK_SIZE;
+ }
+}
+
+struct BlockBuffer {
+ float3 position;
+ uint count;
+ Block blocks[32 * 32 * 32];
+}
+
struct BlockConstants {
- float4x4 clip_from_camera;
+ float4x4 clip_from_model;
+ float4 camera_position;
+ BlockBuffer *block_buffer;
}
struct VertexAttributes {
float3 color;
}
+// Max blocks per-group.
+static const uint MAX_BLOCK = 4;
+
+// Max vertices per-group.
+static const uint V = MAX_BLOCK * 8;
+
+// Max triangles per-group.
+static const uint T = MAX_BLOCK * 6;
+
[shader("mesh")]
[outputtopology("triangle")]
+[require(spvMeshShadingEXT, spvGroupNonUniformShuffle)]
[numthreads(32, 1, 1)]
-void mesh(uniform BlockConstants constants, OutputVertices<VertexAttributes, 32> vertices, OutputPrimitives<PrimitiveAttributes, 16> primitives, OutputIndices<uint3, 16> indices, uint thread_id_in_group: SV_GroupThreadID) {
- SetMeshOutputCounts(4, 2);
-
- static const float4 positions[4] = {
- float4(-0.5, 0.0, -0.5, 1.0),
- float4(-0.5, 0.0, 0.5, 1.0),
- float4(0.5, 0.0, 0.5, 1.0),
- float4(0.5, 0.0, -0.5, 1.0),
- };
+void mesh(uniform BlockConstants constants,
+ out OutputVertices<VertexAttributes, V> vertices,
+ out OutputPrimitives<PrimitiveAttributes, T> primitives,
+ out OutputIndices<uint3, T> indices,
+ uint group_id: SV_GroupID,
+ uint local_id: SV_GroupThreadID) {
+ let start = group_id.x * MAX_BLOCK;
+ let end = min(constants.block_buffer.count, start + MAX_BLOCK);
+ let count = end - start;
- if (thread_id_in_group < 4) {
- vertices[thread_id_in_group].position = mul(constants.clip_from_camera, positions[thread_id_in_group]);
- }
+ SetMeshOutputCounts(count * 8, count * 6);
- if (thread_id_in_group < 2) {
- primitives[thread_id_in_group].color = float3(0x9b / 255.0, 0x61 / 255.0, 0x56 / 255.0);
- primitives[thread_id_in_group].normal = float3(0.0, 1.0, 0.0);
+ bool3 face_flip;
+ {
+ let block = constants.block_buffer.blocks[start + local_id / 8];
+ let block_position = constants.block_buffer.position + block.position();
+ let local_camera_position = constants.camera_position.xyz - block_position;
+ face_flip = local_camera_position > float3(0.0);
+
+ let vertex = uint3(local_id & 0x1, (local_id & 0x2) >> 1, (local_id & 0x4) >> 2) * BLOCK_SIZE;
+ vertices[local_id].position = mul(constants.clip_from_model, float4(block_position + vertex, 1.0));
}
- indices[0] = uint3(0, 1, 2);
- indices[1] = uint3(2, 3, 0);
+ let primitive_index = local_id % 6;
+ let face_index = primitive_index / 2; // x = 0, y = 1, z = 2
+
+ // We calculated the face flip values while generating vertices, so we must
+ // shuffle those values down to the correct lanes.
+ let primitive_vertex_offset = (local_id / 6) * 8;
+ let flip = WaveReadLaneAt(face_flip, primitive_vertex_offset)[face_index];
+
+ // Indices for the face on the opposite side are given by a power-of-two
+ // offset. That is, on the x-axis if we look at triangle 0-4-2, then we flip
+ // by adding 1 to each index, producing 1-5-3. Similarly, to flip 0-5-4
+ // around z, we must add 2, leaving us with 2-7-6. This means the offset is
+ // always a power of two, `1 << flip_index`.
+ //
+ // Additionally, we need to flip the winding to support backface culling in
+ // the rasterizer.
+ //
+ // 7 *------* 6 +y
+ // /| /| ^ +z
+ // 3 / | 2 / | |/
+ // *------* | +x <--*
+ // |5 *---|--* 4
+ // | / | / flip x = +1
+ // |/ |/ flip y = +2
+ // *------* flip z = +4
+ // 1 0
+ //
+ // `CUBE_INDICES` contains the indices for -x, -y, and -z faces.
+ static const uint3 CUBE_INDICES[6] = {
+ uint3(2, 0, 4),
+ uint3(2, 4, 6),
+ uint3(0, 1, 5),
+ uint3(0, 5, 4),
+ uint3(0, 2, 3),
+ uint3(0, 3, 1),
+ };
+
+ let flip_offset = uint(flip) << face_index;
+ let index = primitive_vertex_offset + CUBE_INDICES[primitive_index] + flip_offset;
+
+ let flip_sign = flip ? 1.0 : -1.0;
+ primitives[local_id].normal = float3(face_index == 0 ? flip_sign : 0,
+ face_index == 1 ? flip_sign : 0,
+ face_index == 2 ? flip_sign : 0);
+ primitives[local_id].color = color_map::plasma(primitive_index / 6.0);
+
+ // Flip winding for backface culling.
+ indices[local_id] = flip ? index.yxz : index;
}
struct Fragment {
}
[shader("fragment")]
-Fragment fragment(PrimitiveAttributes primitive, VertexAttributes vertex) {
+Fragment fragment(in perprimitive PrimitiveAttributes primitive, in VertexAttributes vertex) {
let n_dot_l = max(dot(primitive.normal, float3(0.0, 1.0, 0.0)), 0.1);
Fragment output;
+use std::mem::MaybeUninit;
use std::ops::Index;
use std::path::Path;
RenderingDesc, Scissor, ShaderStageFlags, StoreOp, ThreadToken, TypedBind, Viewport,
};
use narcissus_image as image;
-use narcissus_maths::{Affine3, HalfTurn, Mat3, Mat4, Vec3, vec3};
+use narcissus_maths::{Affine3, HalfTurn, Mat3, Mat4, Vec3, sin_cos_pi_f32, vec3};
pub struct Model<'a> {
indices: u32,
}
}
+ let camera_position = game_state.camera.position();
let camera_from_model = game_state.camera.camera_from_model();
let clip_from_camera = Mat4::perspective_rev_inf_zo(
HalfTurn::new(1.0 / 3.0),
}],
);
+ #[repr(C)]
+ struct BlockMeshPacket {
+ x: f32,
+ y: f32,
+ z: f32,
+ count: u32,
+ blocks: [u32; 32 * 32 * 32],
+ }
+
+ impl BlockMeshPacket {
+ fn push(&mut self, x: usize, y: usize, z: usize) {
+ let index = self.count as usize;
+ if index >= 32 * 32 * 32 {
+ return;
+ }
+
+ self.blocks[index] = ((z & 31) << 10 | (y & 31) << 5 | x & 31) as u32;
+ self.count += 1;
+ }
+
+ fn as_slice(&self) -> &[u8] {
+ let size = 4 * 4 + self.count as usize * 4;
+ unsafe {
+ core::slice::from_raw_parts(
+ self as *const BlockMeshPacket as *const u8,
+ size,
+ )
+ }
+ }
+ }
+
+ let mut block_packet = BlockMeshPacket {
+ x: 0.0,
+ y: -64.0,
+ z: 0.0,
+ count: 0,
+ blocks: unsafe { MaybeUninit::zeroed().assume_init() },
+ };
+
+ for y in (0..32).rev() {
+ for z in 0..32 {
+ for x in 0..32 {
+ let freq = 2.0;
+ let (s, _) = sin_cos_pi_f32(x as f32 / 32.0 * freq);
+ if (y as f32) < ((s + 1.0) * 16.0) + 16.0 {
+ block_packet.push(x, y, z);
+ }
+ }
+ }
+ }
+
// Render blocks.
gpu.cmd_set_pipeline(cmd_encoder, self.pipelines.block_pipeline);
gpu.cmd_set_bind_group(cmd_encoder, 0, &graphics_bind_group);
+
+ let block_buffer = gpu.request_transient_buffer_with_data(
+ frame,
+ thread_token,
+ BufferUsageFlags::STORAGE,
+ block_packet.as_slice(),
+ );
gpu.cmd_push_constants_with_data(
cmd_encoder,
ShaderStageFlags::MESH,
0,
- &BlockConstants { clip_from_model },
+ &BlockConstants {
+ clip_from_model,
+ camera_position,
+ _pad: 0.0,
+ block_buffer_address: gpu.get_buffer_address(block_buffer.to_arg()),
+ },
);
- gpu.cmd_draw_mesh_tasks(cmd_encoder, 1, 1, 1);
+ gpu.cmd_draw_mesh_tasks(cmd_encoder, block_packet.count.div_ceil(4), 1, 1);
gpu.cmd_set_pipeline(cmd_encoder, self.pipelines.basic_pipeline);
gpu.cmd_set_bind_group(cmd_encoder, 0, &graphics_bind_group);