From: Joshua Simmons Date: Sun, 12 Oct 2025 10:33:36 +0000 (+0200) Subject: shark-shaders: Migrate radix_sort pipeline to slang X-Git-Url: https://git.nega.tv//gitweb.cgi?a=commitdiff_plain;h=69bb53f35d049763d3e1b64f8006299d1f384f01;p=josh%2Fnarcissus shark-shaders: Migrate radix_sort pipeline to slang --- diff --git a/title/shark-shaders/build.rs b/title/shark-shaders/build.rs index 7411248..0e5ecec 100644 --- a/title/shark-shaders/build.rs +++ b/title/shark-shaders/build.rs @@ -80,18 +80,10 @@ const SLANG_SHADERS: &[SlangShader] = &[ SlangShader { name: "basic" }, SlangShader { name: "draw_2d" }, SlangShader { name: "composite" }, + SlangShader { name: "radix_sort" }, ]; -const SHADERS: &[Shader] = &[ - Shader { - stage: "comp", - name: "radix_sort_0_upsweep", - }, - Shader { - stage: "comp", - name: "radix_sort_1_downsweep", - }, -]; +const SHADERS: &[Shader] = &[]; fn main() { let out_dir = std::env::var("OUT_DIR").unwrap(); diff --git a/title/shark-shaders/shaders/bindings_compute.h b/title/shark-shaders/shaders/bindings_compute.h deleted file mode 100644 index 1eb1fd3..0000000 --- a/title/shark-shaders/shaders/bindings_compute.h +++ /dev/null @@ -1,16 +0,0 @@ -#ifndef COMPUTE_BINDINGS_INCLUDE -#define COMPUTE_BINDINGS_INCLUDE - -const uint SAMPLER_BILINEAR = 0; -const uint SAMPLER_BILINEAR_UNNORMALIZED = 1; -const uint SAMPLER_COUNT = 2; - -layout (set = 0, binding = 0) uniform sampler samplers[SAMPLER_COUNT]; -layout (set = 0, binding = 1) uniform texture3D tony_mc_mapface_lut; -layout (set = 0, binding = 2) uniform texture2D glyph_atlas; -layout (set = 0, binding = 3) uniform writeonly image2D ui_layer_write; -layout (set = 0, binding = 3) uniform readonly image2D ui_layer_read; -layout (set = 0, binding = 4) uniform readonly image2D color_layer; -layout (set = 0, binding = 5) uniform writeonly image2D composited_output; - -#endif diff --git a/title/shark-shaders/shaders/indirect.h b/title/shark-shaders/shaders/indirect.h deleted file mode 100644 index d409cf2..0000000 --- a/title/shark-shaders/shaders/indirect.h +++ /dev/null @@ -1,10 +0,0 @@ -#ifndef INDIRECT_H -#define INDIRECT_H - -struct VkDispatchIndirectCommand { - uint x; - uint y; - uint z; -}; - -#endif \ No newline at end of file diff --git a/title/shark-shaders/shaders/radix_sort.h b/title/shark-shaders/shaders/radix_sort.h deleted file mode 100644 index edf5a6c..0000000 --- a/title/shark-shaders/shaders/radix_sort.h +++ /dev/null @@ -1,20 +0,0 @@ -#ifndef RADIX_SORT_H -#define RADIX_SORT_H - -const uint RADIX_BITS = 8; -const uint RADIX_DIGITS = 1 << RADIX_BITS; -const uint RADIX_MASK = RADIX_DIGITS - 1; - -const uint RADIX_WGP_SIZE = 256; -const uint RADIX_ITEMS_PER_INVOCATION = 16; -const uint RADIX_ITEMS_PER_WGP = RADIX_WGP_SIZE * RADIX_ITEMS_PER_INVOCATION; - -layout(buffer_reference, std430, buffer_reference_align = 4) coherent buffer FinishedRef { - coherent uint value; -}; - -layout(buffer_reference, std430, buffer_reference_align = 4) readonly buffer CountRef { - uint value; -}; - -#endif diff --git a/title/shark-shaders/shaders/radix_sort.slang b/title/shark-shaders/shaders/radix_sort.slang new file mode 100644 index 0000000..120569e --- /dev/null +++ b/title/shark-shaders/shaders/radix_sort.slang @@ -0,0 +1,256 @@ +module radix_sort; + +uint4 WaveLtMask() { + if (WaveGetLaneIndex() < 32) + return uint4((1u << WaveGetLaneIndex()) - 1, 0, 0, 0); + + if (WaveGetLaneIndex() < 64) + return uint4(~0, (1u << WaveGetLaneIndex()) - 1, 0, 0); + + if (WaveGetLaneIndex() < 96) + return uint4(~0, ~0, (1u << WaveGetLaneIndex()) - 1, 0); + + return uint4(~0, ~0, ~0, (1u << WaveGetLaneIndex()) - 1); +} + +layout(constant_id = 0) const uint WAVE_SIZE = 64; + +namespace radix_sort { +public func CalculateGroupCountForItemCount(uint item_count)->uint { + return (item_count + (RADIX_ITEMS_PER_GROUP - 1)) / RADIX_ITEMS_PER_GROUP; +} +} + +static const uint RADIX_BITS = 8; +static const uint RADIX_DIGITS = 1 << RADIX_BITS; +static const uint RADIX_MASK = RADIX_DIGITS - 1; + +static const uint RADIX_GROUP_SIZE = RADIX_DIGITS; +static const uint RADIX_ITEMS_PER_THREAD = 16; +static const uint RADIX_ITEMS_PER_GROUP = RADIX_GROUP_SIZE * RADIX_ITEMS_PER_THREAD; + +static const uint WAVE_COUNT = RADIX_GROUP_SIZE / WAVE_SIZE; + +struct UpsweepConstants { + uint shift; + uint _pad; + uint *finished_buffer; + uint *count_buffer; + uint *src_buffer; + uint *spine_buffer; +}; + +groupshared uint histogram[RADIX_DIGITS]; + +groupshared bool is_last_group_dynamic; +groupshared uint carry; +groupshared uint sums[WAVE_COUNT]; + +[shader("compute")] +[require(spvGroupNonUniformBallot, spvGroupNonUniformArithmetic)] +[numthreads(RADIX_GROUP_SIZE, 1, 1)] +void upsweep(uniform UpsweepConstants constants, uint3 thread_id: SV_DispatchThreadID, uint3 group_id: SV_GroupID, uint3 thread_id_in_group: SV_GroupThreadID) { + let shift = constants.shift; + let count = constants.count_buffer[0]; + + let dispatch_group_count = radix_sort::CalculateGroupCountForItemCount(count); + let is_last_group_in_dispatch = group_id.x == dispatch_group_count - 1; + + // Clear local histogram. + // Assumes RADIX_GROUP_SIZE == RADIX_DIGITS + histogram[thread_id_in_group.x] = 0; + + GroupMemoryBarrierWithGroupSync(); + + if (is_last_group_in_dispatch) { + for (uint i = 0; i < RADIX_ITEMS_PER_THREAD; i++) { + const uint src_index = group_id.x * WorkgroupSize().x * RADIX_ITEMS_PER_THREAD + i * RADIX_DIGITS + thread_id_in_group.x; + if (src_index < count) { + const uint value = constants.src_buffer[src_index]; + const uint digit = (value >> shift) & RADIX_MASK; + InterlockedAdd(histogram[digit], 1); + } + } + } else { + for (uint i = 0; i < RADIX_ITEMS_PER_THREAD; i++) { + const uint src_index = group_id.x * WorkgroupSize().x * RADIX_ITEMS_PER_THREAD + i * RADIX_DIGITS + thread_id_in_group.x; + const uint value = constants.src_buffer[src_index]; + const uint digit = (value >> shift) & RADIX_MASK; + InterlockedAdd(histogram[digit], 1); + } + } + + GroupMemoryBarrierWithGroupSync(); + + // Scatter to the spine, this is a striped layout so we can efficiently + // calculate the prefix sum. Re-calculate how many workgroups we dispatched + // to determine the stride we need to write at. + constants.spine_buffer[(thread_id_in_group.x * dispatch_group_count) + group_id.x] = histogram[thread_id_in_group.x]; + + DeviceMemoryBarrierWithGroupSync(); + + // Store whether we're the last-executing group in LDS. This contrasts with + // the 'static' `is_last_group_in_dispatch`, which represents the group with + // the largest group_id in the dispatch. + if (thread_id_in_group.x == 0) { + var old_value = 0; + InterlockedAdd(*constants.finished_buffer, 1, old_value); + is_last_group_dynamic = old_value == dispatch_group_count - 1; + } + + GroupMemoryBarrierWithGroupSync(); + + // Only the last-executing group needs to continue, it will mop up the spine + // prefix sum. + if (!is_last_group_dynamic) { + return; + } + + // Reset for the next pass. + InterlockedExchange(*constants.finished_buffer, 0); + + let wave_id = thread_id_in_group.x / WaveGetLaneCount(); + + carry = 0; + for (uint i = 0; i < dispatch_group_count; i++) { + // Load values and calculate partial sums. + let value = constants.spine_buffer[i * RADIX_DIGITS + thread_id_in_group.x]; + let sum = WaveActiveSum(value); + let scan = WavePrefixSum(value); + + if (WaveIsFirstLane()) { + sums[wave_id] = sum; + } + + // Even though we read and write from the spine, this can be a group + // barrier only as we read and write from disjoint locations each + // iteration. + // + // Note the write at the end of the loop body has no barrier against the + // spine load at the start of the loop body, across iterations. + GroupMemoryBarrierWithGroupSync(); + + // Load the carry value out of LDS here so we can borrow the barrier + // below. + let carry_in = carry; + + // Scan partials. + if (thread_id_in_group.x < WAVE_COUNT) { + sums[thread_id_in_group.x] = WavePrefixSum(sums[thread_id_in_group.x]); + } + + GroupMemoryBarrierWithGroupSync(); + + // Write out the final prefix sum, combining the carry-in, wave sums, + // and local scan. + constants.spine_buffer[i * RADIX_DIGITS + thread_id_in_group.x] = carry_in + sums[wave_id] + scan; + + // `sums` in LDS now contains partials, so we need to also add the wave + // sum to get an inclusive prefix sum for the next iteration. + if (wave_id == WAVE_COUNT - 1 && WaveIsFirstLane()) { + InterlockedAdd(carry, sums[WAVE_COUNT - 1] + sum); + } + } +} + +struct DownsweepConstants { + uint shift; + uint _pad; + uint *count_buffer; + uint *spine_buffer; + uint *src_buffer; + uint *dst_buffer; +} + +groupshared uint spine[RADIX_DIGITS]; +groupshared uint match_masks[WAVE_COUNT][RADIX_DIGITS]; + +[shader("compute")] +[require(spvGroupNonUniformBallot)] +[numthreads(RADIX_GROUP_SIZE, 1, 1)] +void downsweep(uniform DownsweepConstants constants, uint3 thread_id: SV_DispatchThreadID, uint3 group_id: SV_GroupID, uint3 thread_id_in_group: SV_GroupThreadID) { + let shift = constants.shift; + let count = constants.count_buffer[0]; + + let dispatch_group_count = radix_sort::CalculateGroupCountForItemCount(count); + let is_last_group_in_dispatch = group_id.x == dispatch_group_count - 1; + + let wave_id = thread_id_in_group.x / WaveGetLaneCount(); + + // Gather from spine buffer into LDS. + spine[thread_id_in_group.x] = constants.spine_buffer[thread_id_in_group.x * dispatch_group_count + group_id.x]; + + if (is_last_group_in_dispatch) { + for (uint i = 0; i < RADIX_ITEMS_PER_THREAD; i++) { + // Clear shared memory and load values from src buffer. + for (uint j = 0; j < WAVE_COUNT; j++) { + match_masks[j][thread_id_in_group.x] = 0; + } + + GroupMemoryBarrierWithGroupSync(); + + let index = group_id.x * RADIX_ITEMS_PER_GROUP + i * RADIX_DIGITS + thread_id_in_group.x; + let value = index < count ? constants.src_buffer[index] : 0xffffffff; + let digit = (value >> shift) & RADIX_MASK; + InterlockedOr(match_masks[wave_id][digit], 1 << WaveGetLaneIndex()); + + GroupMemoryBarrierWithGroupSync(); + + uint peer_scan = 0; + for (uint j = 0; j < WAVE_COUNT; j++) { + if (j < wave_id) { + peer_scan += countbits(match_masks[j][digit]); + } + } + peer_scan += countbits(match_masks[wave_id][digit] & WaveLtMask().x); + + if (index < count) { + constants.dst_buffer[spine[digit] + peer_scan] = value; + } + + GroupMemoryBarrierWithGroupSync(); + + // Increment the spine with the counts for the workgroup we just + // wrote out. + for (uint i = 0; i < WAVE_COUNT; i++) { + InterlockedAdd(spine[thread_id_in_group.x], countbits(match_masks[i][thread_id_in_group.x])); + } + } + } else { + for (uint i = 0; i < RADIX_ITEMS_PER_THREAD; i++) { + // Clear shared memory and load values from src buffer. + for (uint j = 0; j < WAVE_COUNT; j++) { + match_masks[j][thread_id_in_group.x] = 0; + } + + GroupMemoryBarrierWithGroupSync(); + + let index = group_id.x * RADIX_ITEMS_PER_GROUP + i * RADIX_DIGITS + thread_id_in_group.x; + let value = constants.src_buffer[index]; + let digit = (value >> shift) & RADIX_MASK; + InterlockedOr(match_masks[wave_id][digit], 1 << WaveGetLaneIndex()); + + GroupMemoryBarrierWithGroupSync(); + + uint peer_scan = 0; + for (uint j = 0; j < WAVE_COUNT; j++) { + if (j < wave_id) { + peer_scan += countbits(match_masks[j][digit]); + } + } + peer_scan += countbits(match_masks[wave_id][digit] & WaveLtMask().x); + + constants.dst_buffer[spine[digit] + peer_scan] = value; + + if (i != RADIX_ITEMS_PER_THREAD - 1) { + GroupMemoryBarrierWithGroupSync(); + + // Increment the spine with the counts for the workgroup we just + // wrote out. + for (uint i = 0; i < WAVE_COUNT; i++) { + InterlockedAdd(spine[thread_id_in_group.x], countbits(match_masks[i][thread_id_in_group.x])); + } + } + } + } +} diff --git a/title/shark-shaders/shaders/radix_sort_0_upsweep.comp b/title/shark-shaders/shaders/radix_sort_0_upsweep.comp deleted file mode 100644 index 1e9e571..0000000 --- a/title/shark-shaders/shaders/radix_sort_0_upsweep.comp +++ /dev/null @@ -1,135 +0,0 @@ -#version 460 - -#extension GL_GOOGLE_include_directive : require - -#extension GL_EXT_buffer_reference : require -#extension GL_EXT_buffer_reference2 : require -#extension GL_EXT_scalar_block_layout : require -#extension GL_EXT_control_flow_attributes : require - -#extension GL_KHR_shader_subgroup_arithmetic : require -#extension GL_KHR_shader_subgroup_ballot : require -#extension GL_KHR_shader_subgroup_shuffle_relative: enable -#extension GL_KHR_shader_subgroup_vote : require - -#include "radix_sort.h" - -#include "indirect.h" - -layout (constant_id = 0) const uint SUBGROUP_SIZE = 64; - -const uint NUM_SUBGROUPS = RADIX_WGP_SIZE / SUBGROUP_SIZE; - -layout(buffer_reference, std430, buffer_reference_align = 4) readonly buffer ValuesRef { - uint values[]; -}; - -layout(buffer_reference, std430, buffer_reference_align = 4) buffer SpineRef { - uint values[]; -}; - -struct RadixSortUpsweepConstants { - uint shift; - uint _pad; - FinishedRef finished_buffer; - CountRef count_buffer; - ValuesRef src_buffer; - SpineRef spine_buffer; -}; - -layout(std430, push_constant) uniform RadixSortUpsweepConstantsBlock { - RadixSortUpsweepConstants constants; -}; - -shared uint histogram[RADIX_DIGITS]; - -shared bool finished; -shared uint carry; -shared uint sums[NUM_SUBGROUPS]; - -layout (local_size_x = RADIX_DIGITS, local_size_y = 1, local_size_z = 1) in; - -void main() { - const uint shift = constants.shift; - const uint count = constants.count_buffer.value; - const uint workgroup_count = (count + (RADIX_ITEMS_PER_WGP - 1)) / RADIX_ITEMS_PER_WGP; - - const bool needs_bounds_check = gl_WorkGroupID.x == workgroup_count - 1; - - // Clear local histogram - histogram[gl_LocalInvocationID.x] = 0; - - barrier(); - - if (needs_bounds_check) { - for (uint i = 0; i < RADIX_ITEMS_PER_INVOCATION; i++) { - const uint global_id = gl_WorkGroupID.x * gl_WorkGroupSize.x * RADIX_ITEMS_PER_INVOCATION + i * RADIX_DIGITS + gl_LocalInvocationID.x; - if (global_id < count) { - const uint value = constants.src_buffer.values[global_id]; - const uint digit = (value >> shift) & RADIX_MASK; - atomicAdd(histogram[digit], 1); - } - } - } else { - for (uint i = 0; i < RADIX_ITEMS_PER_INVOCATION; i++) { - const uint global_id = gl_WorkGroupID.x * gl_WorkGroupSize.x * RADIX_ITEMS_PER_INVOCATION + i * RADIX_DIGITS + gl_LocalInvocationID.x; - const uint value = constants.src_buffer.values[global_id]; - const uint digit = (value >> shift) & RADIX_MASK; - atomicAdd(histogram[digit], 1); - } - } - - barrier(); - - // Scatter to the spine, this is a striped layout so we can efficiently - // calculate the prefix sum. Re-calculate how many workgroups we dispatched - // to determine the stride we need to write at. - constants.spine_buffer.values[(gl_LocalInvocationID.x * workgroup_count) + gl_WorkGroupID.x] = histogram[gl_LocalInvocationID.x]; - - barrier(); - - if (gl_SubgroupID == 0 && subgroupElect()) { - finished = atomicAdd(constants.finished_buffer.value, 1) < workgroup_count - 1; - } - - barrier(); - - if (finished) { - return; - } - - // reset for the next pass - constants.finished_buffer.value = 0; - - const uint local_id = gl_SubgroupID * gl_SubgroupSize + gl_SubgroupInvocationID; - - carry = 0; - for (uint i = 0; i < workgroup_count; i++) { - // Load values and calculate partial sums - const uint value = constants.spine_buffer.values[i * RADIX_DIGITS + local_id]; - const uint sum = subgroupAdd(value); - const uint scan = subgroupExclusiveAdd(value); - - if (subgroupElect()) { - sums[gl_SubgroupID] = sum; - } - - barrier(); - - const uint carry_in = carry; - - // Scan partials - if (local_id < NUM_SUBGROUPS) { - sums[local_id] = subgroupExclusiveAdd(sums[local_id]); - } - - barrier(); - - // Write out the final prefix sum, combining the carry-in, subgroup sums, and local scan - constants.spine_buffer.values[i * RADIX_DIGITS + local_id] = carry_in + sums[gl_SubgroupID] + scan; - - if (gl_SubgroupID == gl_NumSubgroups - 1 && subgroupElect()) { - atomicAdd(carry, sums[gl_NumSubgroups - 1] + sum); - } - } -} \ No newline at end of file diff --git a/title/shark-shaders/shaders/radix_sort_1_downsweep.comp b/title/shark-shaders/shaders/radix_sort_1_downsweep.comp deleted file mode 100644 index 23ff1c3..0000000 --- a/title/shark-shaders/shaders/radix_sort_1_downsweep.comp +++ /dev/null @@ -1,137 +0,0 @@ -#version 460 - -#extension GL_GOOGLE_include_directive : require - -#extension GL_EXT_buffer_reference : require -#extension GL_EXT_buffer_reference2 : require -#extension GL_EXT_scalar_block_layout : require -#extension GL_EXT_control_flow_attributes : require - -#extension GL_KHR_shader_subgroup_arithmetic : require -#extension GL_KHR_shader_subgroup_ballot : require -#extension GL_KHR_shader_subgroup_shuffle_relative: enable -#extension GL_KHR_shader_subgroup_vote : require - -#include "radix_sort.h" - -#include "indirect.h" - -layout (constant_id = 0) const uint SUBGROUP_SIZE = 64; - -const uint NUM_SUBGROUPS = RADIX_WGP_SIZE / SUBGROUP_SIZE; - -layout(buffer_reference, std430, buffer_reference_align = 4) readonly buffer SpineRef { - uint values[]; -}; - -layout(buffer_reference, std430, buffer_reference_align = 4) buffer ValuesRef { - uint values[]; -}; - -struct RadixSortDownsweepConstants { - uint shift; - uint _pad; - CountRef count_buffer; - SpineRef spine_buffer; - ValuesRef src_buffer; - ValuesRef dst_buffer; -}; - -layout(std430, push_constant) uniform RadixSortDownsweepConstantsBlock { - RadixSortDownsweepConstants constants; -}; - -shared uint spine[RADIX_DIGITS]; -shared uint match_masks[NUM_SUBGROUPS][RADIX_DIGITS]; - -layout (local_size_x = RADIX_WGP_SIZE, local_size_y = 1, local_size_z = 1) in; - -void main() { - const uint local_id = gl_SubgroupID * gl_SubgroupSize + gl_SubgroupInvocationID; - - const uint shift = constants.shift; - const uint count = constants.count_buffer.value; - const uint wgp_count = (count + (RADIX_ITEMS_PER_WGP - 1)) / RADIX_ITEMS_PER_WGP; - - // Gather from spine. - spine[local_id] = constants.spine_buffer.values[(local_id * wgp_count) + gl_WorkGroupID.x]; - - const bool needs_bounds_check = gl_WorkGroupID.x == wgp_count - 1; - - if (needs_bounds_check) { - for (uint i = 0; i < RADIX_ITEMS_PER_INVOCATION; i++) { - const uint base = gl_WorkGroupID.x * RADIX_ITEMS_PER_WGP + i * RADIX_DIGITS; - - if (base >= count) - break; - - // Clear shared memory and load values from src buffer. - for (uint j = 0; j < NUM_SUBGROUPS; j++) { - match_masks[j][local_id] = 0; - } - - barrier(); - - const uint global_id = base + local_id; - const uint value = global_id < count ? constants.src_buffer.values[global_id] : 0xffffffff; - const uint digit = (value >> shift) & RADIX_MASK; - atomicOr(match_masks[gl_SubgroupID][digit], 1 << gl_SubgroupInvocationID); - - barrier(); - - uint peer_scan = 0; - for (uint i = 0; i < gl_NumSubgroups; i++) { - if (i < gl_SubgroupID) { - peer_scan += bitCount(match_masks[i][digit]); - } - } - peer_scan += bitCount(match_masks[gl_SubgroupID][digit] & gl_SubgroupLtMask.x); - - if (global_id < count) { - constants.dst_buffer.values[spine[digit] + peer_scan] = value; - } - - barrier(); - - // Increment the spine with the counts for the workgroup we just wrote out. - for (uint i = 0; i < NUM_SUBGROUPS; i++) { - atomicAdd(spine[local_id], bitCount(match_masks[i][local_id])); - } - } - } else { - for (uint i = 0; i < RADIX_ITEMS_PER_INVOCATION; i++) { - // Clear shared memory and load values from src buffer. - for (uint j = 0; j < NUM_SUBGROUPS; j++) { - match_masks[j][local_id] = 0; - } - - barrier(); - - const uint global_id = gl_WorkGroupID.x * RADIX_ITEMS_PER_WGP + i * RADIX_DIGITS + local_id; - const uint value = constants.src_buffer.values[global_id]; - const uint digit = (value >> shift) & RADIX_MASK; - atomicOr(match_masks[gl_SubgroupID][digit], 1 << gl_SubgroupInvocationID); - - barrier(); - - uint peer_scan = 0; - for (uint i = 0; i < gl_NumSubgroups; i++) { - if (i < gl_SubgroupID) { - peer_scan += bitCount(match_masks[i][digit]); - } - } - peer_scan += bitCount(match_masks[gl_SubgroupID][digit] & gl_SubgroupLtMask.x); - - constants.dst_buffer.values[spine[digit] + peer_scan] = value; - - if (i != RADIX_ITEMS_PER_INVOCATION - 1) { - barrier(); - - // Increment the spine with the counts for the workgroup we just wrote out. - for (uint i = 0; i < NUM_SUBGROUPS; i++) { - atomicAdd(spine[local_id], bitCount(match_masks[i][local_id])); - } - } - } - } -} diff --git a/title/shark-shaders/src/pipelines.rs b/title/shark-shaders/src/pipelines.rs index 19abe02..6c3fef1 100644 --- a/title/shark-shaders/src/pipelines.rs +++ b/title/shark-shaders/src/pipelines.rs @@ -464,16 +464,18 @@ impl Pipelines { std::mem::size_of::(), ); - let radix_sort_0_upsweep_pipeline = create_compute_pipeline( - crate::RADIX_SORT_0_UPSWEEP_COMP_SPV, - "radix_sort_upsweep", + let radix_sort_0_upsweep_pipeline = create_compute_pipeline_with_entry( + crate::RADIX_SORT_SPV, + c"upsweep", + "radix sort upsweep", 32, true, std::mem::size_of::(), ); - let radix_sort_1_downsweep_pipeline = create_compute_pipeline( - crate::RADIX_SORT_1_DOWNSWEEP_COMP_SPV, + let radix_sort_1_downsweep_pipeline = create_compute_pipeline_with_entry( + crate::RADIX_SORT_SPV, + c"downsweep", "radix_sort_downsweep", 32, true,