#extension GL_EXT_scalar_block_layout : require
#include "compute_bindings.h"
+#include "draw_2d.h"
float srgb_oetf(float a) {
return (.0031308f >= a) ? 12.92f * a : 1.055f * pow(a, .4166666666666667f) - .055f;
return textureLod(sampler3D(tony_mc_mapface_lut, bilinear_sampler), uv, 0.0).rgb;
}
+struct CompositeConstants {
+ uvec2 tile_resolution;
+ TileRef tile_buffer;
+};
+
+layout(std430, push_constant) uniform CompositeConstantsBlock {
+ CompositeConstants constants;
+};
+
layout (local_size_x = 8, local_size_y = 8, local_size_z = 1) in;
void main() {
- // const uvec2 tile_coord = gl_WorkGroupID.xy / 4;
- // const uint tile_index = tile_coord.y * uniforms.tile_resolution.x + tile_coord.x;
-
- // TilesRead tiles_read = TilesRead(uniforms.tiles);
- // const uint lo = tiles_read.values[tile_base + TILE_BITMAP_RANGE_LO_OFFSET];
- // const uint hi = tiles_read.values[tile_base + TILE_BITMAP_RANGE_HI_OFFSET];
+ const uvec2 tile_coord = gl_WorkGroupID.xy / (TILE_SIZE / gl_WorkGroupSize.xy);
+ const uint tile_index = tile_coord.y * constants.tile_resolution.x + tile_coord.x;
+
+ const uint lo = constants.tile_buffer.values[tile_index].min_index;
+ const uint hi = constants.tile_buffer.values[tile_index].max_index;
// Display transform
const vec3 stimulus = imageLoad(color_layer, ivec2(gl_GlobalInvocationID.xy)).rgb;
vec3 composited = srgb_oetf(transformed);
// UI Composite
- // if (lo <= hi) {
- // const vec4 ui = imageLoad(ui_layer_read, ivec2(gl_GlobalInvocationID.xy)).rgba;
- // composited = ui.rgb + (composited * (1.0 - ui.a));
- // }
+ if (lo != hi) {
+ const vec4 ui = imageLoad(ui_layer_read, ivec2(gl_GlobalInvocationID.xy)).rgba;
+ composited = ui.rgb + (composited * (1.0 - ui.a));
+ }
imageStore(composited_output, ivec2(gl_GlobalInvocationID.xy), vec4(composited, 1.0));
}
const uint DRAW_2D_CMD_RECT = 0;
const uint DRAW_2D_CMD_GLYPH = 1;
+struct Tile {
+ uint min_index;
+ uint max_index;
+};
+
struct Glyph {
ivec2 atlas_min;
ivec2 atlas_max;
};
struct Draw2dCmdRect {
- uint border_width;
+ float border_width;
vec2 position;
vec2 half_extent;
uint background_color;
Draw2dCmdRect decode_rect(Draw2dCmd cmd) {
return Draw2dCmdRect(
- cmd.words[0],
+ uintBitsToFloat(cmd.words[0]),
vec2(uintBitsToFloat(cmd.words[1]), uintBitsToFloat(cmd.words[2])),
vec2(uintBitsToFloat(cmd.words[3]), uintBitsToFloat(cmd.words[4])),
cmd.words[5],
uint values[];
};
+layout(buffer_reference, std430, buffer_reference_align = 4) buffer FineRef {
+ uint values[];
+};
+
+layout(buffer_reference, std430, buffer_reference_align = 4) buffer TileRef {
+ Tile values[];
+};
+
#endif
\ No newline at end of file
void main() {
const uint local_id = gl_SubgroupID * gl_SubgroupSize + gl_SubgroupInvocationID;
- const uint draw_index = gl_WorkGroupID.x * gl_WorkGroupSize.x + local_id;
+ const uint draw_index = gl_WorkGroupID.x * gl_WorkGroupSize.x + local_id;
// Bounds for this command, any tiles which intersect this AABB will be written.
vec2 cmd_min = vec2(99999.9);
const bool cmd_absolute_max = cmd_max_tile == cmds_max_tile;
const bool use_individual_bounds = !any(notEqual(subgroupBallot(cmd_absolute_min) & subgroupBallot(cmd_absolute_max), uvec4(0)));
- if (false && use_individual_bounds) {
+ if (use_individual_bounds) {
+ uint count = 0;
+ for (uint y = cmds_min_tile.y; y <= cmds_max_tile.y; y++) {
+ for (uint x = cmds_min_tile.x; x <= cmds_max_tile.x; x++) {
+ const vec2 tile_min = uvec2(x, y) * TILE_SIZE;
+ const vec2 tile_max = min(tile_min + TILE_SIZE, constants.screen_resolution);
+
+ const bool intersects = !(any(lessThan(tile_max, cmd_min)) || any(greaterThan(tile_min, cmd_max)));
+ const uvec4 ballot = subgroupBallot(intersects);
+
+ if (subgroupElect()) {
+ count += uint(ballot.x != 0);
+ }
+ }
+ }
+
+ uint offset;
+ if (subgroupElect()) {
+ offset = atomicAdd(constants.coarse_buffer.values[0], count) + 1;
+ }
+ offset = subgroupBroadcastFirst(offset);
+
+ if (offset >= constants.coarse_buffer_len) {
+ return;
+ }
+
+ for (uint y = cmds_min_tile.y; y <= cmds_max_tile.y; y++) {
+ for (uint x = cmds_min_tile.x; x <= cmds_max_tile.x; x++) {
+ const vec2 tile_min = uvec2(x, y) * TILE_SIZE;
+ const vec2 tile_max = min(tile_min + TILE_SIZE, constants.screen_resolution);
+
+ const bool intersects = !(any(lessThan(tile_max, cmd_min)) || any(greaterThan(tile_min, cmd_max)));
+ const uvec4 ballot = subgroupBallot(intersects);
+
+ if (subgroupElect() && ballot.x != 0 && offset < constants.coarse_buffer_len) {
+ const uint packed = ((y & 0xff) << 24) | ((x & 0xff) << 16) | (gl_WorkGroupID.x & 0xffff);
+ constants.coarse_buffer.values[offset++] = packed;
+ }
+ }
+ }
} else {
const uvec2 tile_count = cmds_max_tile - cmds_min_tile + uvec2(1);
const uint count = tile_count.x * tile_count.y;
offset += tile_count.x;
}
}
-
- // if (gl_SubgroupSize == 32 && fullscreen_ballot.x != 0) {
- // uint offset;
- // if (subgroupElect()) {
- // const uint count = constants.tile_resolution.x * constants.tile_resolution.y;
- // offset = atomicAdd(constants.coarse_buffer.values[0], count) + 1;
- // }
- // offset = subgroupBroadcastFirst(offset);
-
- // if (offset >= constants.coarse_buffer_len) {
- // return;
- // }
-
- // const uint word_index = gl_WorkGroupID.x;
-
- // for (uint y = 0; y < constants.tile_resolution.y; y++) {
- // for (uint x = 0; x < constants.tile_resolution.x; x++) {
- // const uint tile_index = y * constants.tile_resolution.x + x;
- // const uint packed = (tile_index << 16) | word_index;
- // if (subgroupElect() && offset + tile_index < constants.coarse_buffer_len) {
- // constants.coarse_buffer.values[offset + tile_index] = packed;
- // }
- // }
- // }
-
- // return;
- // }
-
- // uint count = 0;
-
- // for (uint y = cmds_min_tile.y; y <= cmds_max_tile.y; y++) {
- // for (uint x = cmds_min_tile.x; x <= cmds_max_tile.x; x++) {
- // const vec2 tile_min = uvec2(x, y) * TILE_SIZE;
- // const vec2 tile_max = min(tile_min + TILE_SIZE, constants.screen_resolution);
-
- // const bool intersects = !(any(lessThan(tile_max, cmd_min)) || any(greaterThan(tile_min, cmd_max)));
- // const uvec4 ballot = subgroupBallot(intersects);
-
- // if (subgroupElect()) {
- // count += uint(ballot.x != 0);
- // }
- // }
- // }
-
- // if (count == 0) {
- // return;
- // }
-
- // uint offset;
- // if (subgroupElect()) {
- // offset = atomicAdd(constants.coarse_buffer.values[0], count) + 1;
- // }
- // offset = subgroupBroadcastFirst(offset);
-
- // if (offset >= constants.coarse_buffer_len) {
- // return;
- // }
-
- // for (uint y = cmds_min_tile.y; y <= cmds_max_tile.y; y++) {
- // for (uint x = cmds_min_tile.x; x <= cmds_max_tile.x; x++) {
- // const vec2 tile_min = uvec2(x, y) * TILE_SIZE;
- // const vec2 tile_max = min(tile_min + TILE_SIZE, constants.screen_resolution);
- // const uint tile_index = y * constants.tile_resolution.x + x;
-
- // const bool intersects = !(any(lessThan(tile_max, cmd_min)) || any(greaterThan(tile_min, cmd_max)));
- // const uvec4 ballot = subgroupBallot(intersects);
-
- // if (subgroupElect() && ballot.x != 0 && offset < constants.coarse_buffer_len) {
- // const uint word_index = gl_WorkGroupID.x;
- // const uint packed = (tile_index << 16) | word_index;
- // constants.coarse_buffer.values[offset++] = packed;
- // }
- // }
- // }
}
#extension GL_EXT_buffer_reference2 : require
#extension GL_EXT_scalar_block_layout : require
#extension GL_EXT_control_flow_attributes : require
+#extension GL_EXT_control_flow_attributes2 : require
#extension GL_KHR_shader_subgroup_arithmetic : require
#extension GL_KHR_shader_subgroup_ballot : require
#extension GL_KHR_shader_subgroup_vote : require
-#include "compute_bindings.h"
+#include "draw_2d.h"
-const uint SUBGROUP_SIZE = 64;
+struct Draw2dResolveConstants {
+ uvec2 screen_resolution;
+ uvec2 tile_resolution;
-// TODO: Spec constant support for different subgroup sizes.
-layout (local_size_x = SUBGROUP_SIZE, local_size_y = 1, local_size_z = 1) in;
+ uint draw_buffer_len;
+ uint _pad;
+
+ Draw2dCommandRef draw_buffer;
+ GlyphRef glyph_buffer;
+ CoarseRef coarse_buffer;
+ FineRef fine_buffer;
+ TileRef tile_buffer;
+};
+
+layout(std430, push_constant) uniform Draw2dResolveConstantsBlock {
+ Draw2dResolveConstants constants;
+};
+
+layout (local_size_x_id = 0, local_size_y = 1, local_size_z = 1) in;
void main() {
+ const uint local_id = gl_SubgroupID * gl_SubgroupSize + gl_SubgroupInvocationID;
+ const uint x = gl_GlobalInvocationID.y;
+ const uint y = gl_GlobalInvocationID.z;
+ const uint tile_offset = (constants.tile_resolution.x * y + x);
+ const uint search = ((y & 0xff) << 24) | ((x & 0xff) << 16);
+ const uint count = constants.coarse_buffer.values[0];
+
+ if (count == 0) {
+ constants.tile_buffer.values[tile_offset].min_index = 0;
+ constants.tile_buffer.values[tile_offset].max_index = 0;
+ return;
+ }
+
+ // Binary search for the upper bound of the tile.
+ uint base = 0;
+ {
+ uint n = count;
+ uint mid;
+ uint max_iters = 32;
+ while (max_iters --> 0 && (mid = n / 2) > 0) {
+ const uint value = constants.coarse_buffer.values[1 + base + mid] & 0xffff0000;
+ base = value > search ? base : base + mid;
+ n -= mid;
+ }
+ }
+
+ const vec2 tile_min = uvec2(x, y) * TILE_SIZE;
+ const vec2 tile_max = min(tile_min + TILE_SIZE, constants.screen_resolution);
+
+ bool hit_opaque = false;
+ uint lo = base + 1;
+ const uint hi = base + 1;
+ for(; !hit_opaque && lo > 0; lo--) {
+ const uint i = lo;
+ const uint packed = constants.coarse_buffer.values[i];
+
+ if ((packed & 0xffff0000) != (search & 0xffff0000)) {
+ break;
+ }
+
+ const uint draw_offset = packed & 0xffff;
+ const uint draw_index = draw_offset * gl_WorkGroupSize.x + local_id;
+
+ bool intersects = false;
+ bool opaque_tile = false;
+
+ if (draw_index < constants.draw_buffer_len) {
+ vec2 cmd_min = vec2(99999.9);
+ vec2 cmd_max = vec2(-99999.9);
+
+ const Draw2dCmd cmd = constants.draw_buffer.values[draw_index];
+ for (;;) {
+ const uint scalar_type = subgroupBroadcastFirst(cmd.type);
+ [[branch]]
+ if (scalar_type == cmd.type) {
+ switch (scalar_type) {
+ case DRAW_2D_CMD_RECT:
+ const Draw2dCmdRect cmd_rect = decode_rect(cmd);
+ cmd_min = cmd_rect.position - cmd_rect.half_extent - cmd_rect.border_width;
+ cmd_max = cmd_rect.position + cmd_rect.half_extent + cmd_rect.border_width;
+ opaque_tile = all(greaterThanEqual(tile_min, cmd_min)) && all(lessThanEqual(tile_max, cmd_max));
+ opaque_tile = opaque_tile && ((cmd_rect.background_color & 0xff000000) == 0xff000000);
+ break;
+ case DRAW_2D_CMD_GLYPH:
+ const Draw2dCmdGlyph cmd_glyph = decode_glyph(cmd);
+ const Glyph glyph = constants.glyph_buffer.values[cmd_glyph.index];
+ cmd_min = cmd_glyph.position + glyph.offset_min;
+ cmd_max = cmd_glyph.position + glyph.offset_max;
+ break;
+ }
+ break;
+ }
+ }
+
+ intersects = !(any(lessThan(tile_max, cmd_min)) || any(greaterThan(tile_min, cmd_max)));
+ }
+
+ uint intersects_mask = subgroupBallot(intersects).x;
+
+ if (subgroupAny(opaque_tile)) {
+ const uvec4 opaque_tile_ballot = subgroupBallot(opaque_tile);
+ const uint first_opaque_tile = subgroupBallotFindMSB(opaque_tile_ballot);
+ const uint opaque_mask = ~((1 << first_opaque_tile) - 1);
+ intersects_mask &= opaque_mask;
+ constants.fine_buffer.values[i] = intersects_mask;
+ hit_opaque = true;
+ } else {
+ constants.fine_buffer.values[i] = intersects_mask;
+ }
+ }
+
+ constants.tile_buffer.values[tile_offset].min_index = lo + 1;
+ constants.tile_buffer.values[tile_offset].max_index = hi + 1;
}
#extension GL_KHR_shader_subgroup_ballot : require
#include "compute_bindings.h"
+#include "draw_2d.h"
-layout (local_size_x = 8, local_size_y = 8, local_size_z = 1) in;
+struct Draw2dRasterizeConstants {
+ uvec2 screen_resolution;
+ uvec2 tile_resolution;
+ uvec2 atlas_resolution;
+
+ Draw2dCommandRef draw_buffer;
+ GlyphRef glyph_buffer;
+ CoarseRef coarse_buffer;
+ FineRef fine_buffer;
+ TileRef tile_buffer;
+};
+
+layout(std430, push_constant) uniform Draw2dRasterizeConstantsBlock {
+ Draw2dRasterizeConstants constants;
+};
/// x = (((index >> 2) & 0x0007) & 0xFFFE) | index & 0x0001
/// y = ((index >> 1) & 0x0003) | (((index >> 3) & 0x0007) & 0xFFFC)
#define DEBUG_SHOW_TILES 0
-#if DEBUG_SHOW_TILES != 0
-
-vec3 plasma_quintic(float x)
+vec3 plasma_quintic(in float x)
{
x = clamp(x, 0.0, 1.0);
vec4 x1 = vec4(1.0, x, x * x, x * x * x); // 1 x x2 x3
);
}
-#endif
+layout (local_size_x = 8, local_size_y = 8, local_size_z = 1) in;
void main() {
- // const uvec2 tile_coord = gl_WorkGroupID.xy / (TILE_SIZE / gl_WorkGroupSize.xy);
- // const uint tile_index = tile_coord.y * uniforms.tile_resolution.x + tile_coord.x;
-
- // TilesRead tiles_read = TilesRead(uniforms.tiles);
-
- // const uint lo = tiles_read.values[tile_base + TILE_BITMAP_RANGE_LO_OFFSET];
- // const uint hi = tiles_read.values[tile_base + TILE_BITMAP_RANGE_HI_OFFSET];
-
- // if (hi < lo) {
- // return;
- // }
-
-// #if DEBUG_SHOW_TILES == 1
-
-// uint count = 0;
-// // For each tile, iterate over all words in the L1 bitmap.
-// for (uint index_l1 = lo; index_l1 <= hi; index_l1++) {
-// // For each word, iterate all set bits.
-// uint bitmap_l1 = tiles_read.values[tile_base + TILE_BITMAP_L1_OFFSET + index_l1];
-
-// while (bitmap_l1 != 0) {
-// const uint i = findLSB(bitmap_l1);
-// bitmap_l1 ^= bitmap_l1 & -bitmap_l1;
-
-// // For each set bit in the L1 bitmap, iterate the set bits in the
-// // corresponding L0 bitmap.
-// const uint index_l0 = index_l1 * 32 + i;
-// uint bitmap_l0 = tiles_read.values[tile_base + TILE_BITMAP_L0_OFFSET + index_l0];
-
-// count += bitCount(bitmap_l0);
-// }
-// }
-
-// const vec3 color = plasma_quintic(float(count) / 100.0);
-// imageStore(ui_layer_write, ivec2(gl_GlobalInvocationID.xy), vec4(color, 1.0));
-
-// #elif DEBUG_SHOW_TILES == 2
-
-// uint count = hi - lo;
-// const vec3 color = plasma_quintic(float(count) / 100.0);
-// imageStore(ui_layer_write, ivec2(gl_GlobalInvocationID.xy), vec4(color, 1.0));
-
-// #else
-
-// const vec2 sample_center = gl_GlobalInvocationID.xy + vec2(0.5);
-
-// vec4 accum = vec4(0.0);
-
-// // For each tile, iterate over all words in the L1 bitmap.
-// for (uint index_l1 = lo; index_l1 <= hi; index_l1++) {
-// // For each word, iterate all set bits.
-// uint bitmap_l1 = tiles_read.values[tile_base + TILE_BITMAP_L1_OFFSET + index_l1];
-
-// while (bitmap_l1 != 0) {
-// const uint i = findLSB(bitmap_l1);
-// bitmap_l1 ^= bitmap_l1 & -bitmap_l1;
-
-// // For each set bit in the L1 bitmap, iterate the set bits in the
-// // corresponding L0 bitmap.
-// const uint index_l0 = index_l1 * 32 + i;
-// uint bitmap_l0 = tiles_read.values[tile_base + TILE_BITMAP_L0_OFFSET + index_l0];
-// while (bitmap_l0 != 0) {
-// const uint j = findLSB(bitmap_l0);
-// bitmap_l0 ^= bitmap_l0 & -bitmap_l0;
-
-// // Set bits in the L0 bitmap indicate binned primitives for this tile.
-// const uint primitive_index = index_l0 * 32 + j;
-// const Primitive2d primitive = uniforms.primitives.values[primitive_index];
-// const uint type = primitive.type;
-
-// vec4 primitive_color = vec4(0.0);
-
-// switch (type) {
-// case PRIMITIVE_TYPE_RECT: {
-// // const Rect rect = uniforms.rects.values[offset];
-// // const vec2 rect_min = primitive_instance.position - rect.half_extent - rect.border_width;
-// // const vec2 rect_max = primitive_instance.position + rect.half_extent + rect.border_width;
-// if (all(greaterThanEqual(sample_center, rect_min)) && all(lessThanEqual(sample_center, rect_max))) {
-// primitive_color = unpackUnorm4x8(primitive_instance.color).bgra;
-// }
-// break;
-// }
-// case PRIMITIVE_TYPE_GLYPH: {
-// const Glyph glyph = uniforms.glyphs.values[offset];
-// const vec2 glyph_min = primitive_instance.position + glyph.offset_min;
-// const vec2 glyph_max = primitive_instance.position + glyph.offset_max;
-// if (all(greaterThanEqual(sample_center, glyph_min)) && all(lessThanEqual(sample_center, glyph_max))) {
-// const vec2 glyph_size = glyph.offset_max - glyph.offset_min;
-// const vec2 uv = mix(glyph.atlas_min, glyph.atlas_max, (sample_center - glyph_min) / glyph_size) / uniforms.atlas_resolution;
-// const vec4 color = unpackUnorm4x8(primitive_instance.color).bgra;
-// const float coverage = textureLod(sampler2D(glyph_atlas, bilinear_sampler), uv, 0.0).r * color.a;
-// primitive_color = color * coverage;
-// }
-// break;
-// }
-// }
-
-// // does it blend?
-// accum.rgba = primitive_color.rgba + accum.rgba * (1.0 - primitive_color.a);
-// }
-// }
-// }
-
-// imageStore(ui_layer_write, ivec2(gl_GlobalInvocationID.xy), accum);
-
-// #endif
+ const uvec2 tile_coord = gl_WorkGroupID.xy / (TILE_SIZE / gl_WorkGroupSize.xy);
+ const uint tile_index = tile_coord.y * constants.tile_resolution.x + tile_coord.x;
+
+ const uint lo = constants.tile_buffer.values[tile_index].min_index;
+ const uint hi = constants.tile_buffer.values[tile_index].max_index;
+
+ if (lo == hi) {
+ return;
+ }
+
+#if DEBUG_SHOW_TILES == 1
+
+ uint count = hi - lo;
+ const vec3 color = plasma_quintic(float(count) / 50.0);
+ imageStore(ui_layer_write, ivec2(gl_GlobalInvocationID.xy), vec4(color, 1.0));
+
+#elif DEBUG_SHOW_TILES == 2
+
+ uint count = 0;
+ for (uint i = lo; i < hi; i++) {
+ count += bitCount(constants.fine_buffer.values[i]);
+ }
+ const vec3 color = plasma_quintic(float(count) / 600.0);
+ imageStore(ui_layer_write, ivec2(gl_GlobalInvocationID.xy), vec4(color, 1.0));
+
+#else
+
+ const vec2 sample_center = gl_GlobalInvocationID.xy + vec2(0.5);
+
+ vec4 accum = vec4(0.0);
+
+ for (uint i = lo; i < hi; i++) {
+ uint bitmap = constants.fine_buffer.values[i];
+ while (bitmap != 0) {
+ const uint index = findLSB(bitmap);
+ bitmap ^= bitmap & -bitmap;
+
+ const uint base_index = (constants.coarse_buffer.values[i] & 0xffff) * 32;
+ const Draw2dCmd cmd = constants.draw_buffer.values[base_index + index];
+
+ vec4 primitive_color = vec4(0.0);
+ switch (cmd.type) {
+ case DRAW_2D_CMD_RECT:
+ const Draw2dCmdRect cmd_rect = decode_rect(cmd);
+ const vec2 rect_min = cmd_rect.position - cmd_rect.half_extent - cmd_rect.border_width;
+ const vec2 rect_max = cmd_rect.position + cmd_rect.half_extent + cmd_rect.border_width;
+ if (all(greaterThanEqual(sample_center, rect_min)) && all(lessThanEqual(sample_center, rect_max))) {
+ primitive_color = unpackUnorm4x8(cmd_rect.background_color).bgra;
+ }
+ break;
+ case DRAW_2D_CMD_GLYPH:
+ const Draw2dCmdGlyph cmd_glyph = decode_glyph(cmd);
+ const Glyph glyph = constants.glyph_buffer.values[cmd_glyph.index];
+ const vec2 glyph_min = cmd_glyph.position + glyph.offset_min;
+ const vec2 glyph_max = cmd_glyph.position + glyph.offset_max;
+ if (all(greaterThanEqual(sample_center, glyph_min)) && all(lessThanEqual(sample_center, glyph_max))) {
+ const vec2 glyph_size = glyph.offset_max - glyph.offset_min;
+ const vec2 uv = mix(glyph.atlas_min, glyph.atlas_max, (sample_center - glyph_min) / glyph_size) / constants.atlas_resolution;
+ const vec4 color = unpackUnorm4x8(cmd_glyph.color).bgra;
+ const float coverage = textureLod(sampler2D(glyph_atlas, bilinear_sampler), uv, 0.0).r * color.a;
+ primitive_color = color * coverage;
+ }
+ break;
+ }
+
+ // does it blend?
+ accum.rgba = primitive_color.rgba + accum.rgba * (1.0 - primitive_color.a);
+ }
+ }
+
+ imageStore(ui_layer_write, ivec2(gl_GlobalInvocationID.xy), accum);
+
+#endif
+
}
#extension GL_KHR_shader_subgroup_shuffle_relative: enable
#extension GL_KHR_shader_subgroup_vote : require
-//#extension GL_EXT_debug_printf : enable
-
#include "compute_bindings.h"
#include "radix_sort.h"
pub coarse_buffer_address: BufferAddress<'a>,
}
+#[repr(C)]
+pub struct Draw2dResolveConstants<'a> {
+ pub screen_resolution_x: u32,
+ pub screen_resolution_y: u32,
+ pub tile_resolution_x: u32,
+ pub tile_resolution_y: u32,
+
+ pub draw_buffer_len: u32,
+ pub _pad: u32,
+
+ pub draw_buffer_address: BufferAddress<'a>,
+ pub glyph_buffer_address: BufferAddress<'a>,
+ pub coarse_buffer_address: BufferAddress<'a>,
+ pub fine_buffer_address: BufferAddress<'a>,
+ pub tile_buffer_address: BufferAddress<'a>,
+}
+
+#[repr(C)]
+pub struct Draw2dRasterizeConstants<'a> {
+ pub screen_resolution_x: u32,
+ pub screen_resolution_y: u32,
+ pub tile_resolution_x: u32,
+ pub tile_resolution_y: u32,
+ pub atlas_resolution_x: u32,
+ pub atlas_resolution_y: u32,
+
+ pub draw_buffer_address: BufferAddress<'a>,
+ pub glyph_buffer_address: BufferAddress<'a>,
+ pub coarse_buffer_address: BufferAddress<'a>,
+ pub fine_buffer_address: BufferAddress<'a>,
+ pub tile_buffer_address: BufferAddress<'a>,
+}
+
+#[repr(C)]
+pub struct CompositeConstants<'a> {
+ pub tile_resolution_x: u32,
+ pub tile_resolution_y: u32,
+ pub tile_buffer_address: BufferAddress<'a>,
+}
+
#[repr(C)]
pub struct RadixSortUpsweepConstants<'a> {
pub shift: u32,
gpu.debug_name_pipeline(basic_pipeline, "basic");
- let create_compute_pipeline = |code, name, workgroup_size, push_constant_size| {
- let push_constant_range = PushConstantRange {
- stage_flags: ShaderStageFlags::COMPUTE,
- offset: 0,
- size: push_constant_size as u32,
- };
-
- let pipeline = gpu.create_compute_pipeline(&ComputePipelineDesc {
- shader: ShaderDesc {
- code,
- require_full_subgroups: workgroup_size != 0,
- required_subgroup_size: if workgroup_size != 0 {
- Some(workgroup_size)
- } else {
- None
+ let create_compute_pipeline =
+ |code, name, workgroup_size, require_full_subgroups, push_constant_size| {
+ let push_constant_range = PushConstantRange {
+ stage_flags: ShaderStageFlags::COMPUTE,
+ offset: 0,
+ size: push_constant_size as u32,
+ };
+
+ let pipeline = gpu.create_compute_pipeline(&ComputePipelineDesc {
+ shader: ShaderDesc {
+ code,
+ require_full_subgroups,
+ required_subgroup_size: if workgroup_size != 0 {
+ Some(workgroup_size)
+ } else {
+ None
+ },
+ spec_constants: &[SpecConstant::U32 {
+ id: 0,
+ value: workgroup_size,
+ }],
+ ..default()
},
- spec_constants: &[SpecConstant::U32 {
- id: 0,
- value: workgroup_size,
- }],
- ..default()
- },
- layout: PipelineLayout {
- bind_group_layouts: &[compute_bind_group_layout],
- // Validation cries about push constant ranges with zero size.
- push_constant_ranges: if push_constant_range.size != 0 {
- std::slice::from_ref(&push_constant_range)
- } else {
- &[]
+ layout: PipelineLayout {
+ bind_group_layouts: &[compute_bind_group_layout],
+ // Validation cries about push constant ranges with zero size.
+ push_constant_ranges: if push_constant_range.size != 0 {
+ std::slice::from_ref(&push_constant_range)
+ } else {
+ &[]
+ },
},
- },
- });
+ });
- gpu.debug_name_pipeline(pipeline, name);
+ gpu.debug_name_pipeline(pipeline, name);
- pipeline
- };
+ pipeline
+ };
let draw_2d_bin_0_clear_pipeline = create_compute_pipeline(
crate::DRAW_2D_BIN_0_CLEAR_COMP_SPV,
"draw2d_bin_clear",
0,
+ false,
std::mem::size_of::<Draw2dClearConstants>(),
);
crate::DRAW_2D_BIN_1_SCATTER_COMP_SPV,
"draw2d_bin_scatter",
draw_2d_bin_1_scatter_pipeline_workgroup_size,
+ true,
std::mem::size_of::<Draw2dScatterConstants>(),
);
crate::DRAW_2D_BIN_2_SORT_COMP_SPV,
"draw2d_bin_sort",
0,
+ false,
std::mem::size_of::<Draw2dSortConstants>(),
);
let draw_2d_bin_3_resolve_pipeline = create_compute_pipeline(
crate::DRAW_2D_BIN_3_RESOLVE_COMP_SPV,
"draw2d_bin_resolve",
- 0,
- 0,
+ 32,
+ true,
+ std::mem::size_of::<Draw2dResolveConstants>(),
);
- let draw_2d_rasterize_pipeline =
- create_compute_pipeline(crate::DRAW_2D_RASTERIZE_COMP_SPV, "draw2d_rasterize", 0, 0);
+ let draw_2d_rasterize_pipeline = create_compute_pipeline(
+ crate::DRAW_2D_RASTERIZE_COMP_SPV,
+ "draw2d_rasterize",
+ 0,
+ false,
+ std::mem::size_of::<Draw2dRasterizeConstants>(),
+ );
let radix_sort_0_upsweep_pipeline = create_compute_pipeline(
crate::RADIX_SORT_0_UPSWEEP_COMP_SPV,
"radix_sort_upsweep",
32,
+ true,
std::mem::size_of::<RadixSortUpsweepConstants>(),
);
crate::RADIX_SORT_1_SPINE_COMP_SPV,
"radix_sort_spine",
32,
+ true,
std::mem::size_of::<RadixSortSpineConstants>(),
);
crate::RADIX_SORT_2_DOWNSWEEP_COMP_SPV,
"radix_sort_downsweep",
32,
+ true,
std::mem::size_of::<RadixSortDownsweepConstants>(),
);
- let composite_pipeline =
- create_compute_pipeline(crate::COMPOSITE_COMP_SPV, "composite", 0, 0);
+ let composite_pipeline = create_compute_pipeline(
+ crate::COMPOSITE_COMP_SPV,
+ "composite",
+ 0,
+ false,
+ std::mem::size_of::<CompositeConstants>(),
+ );
Self {
_samplers: samplers,
use narcissus_core::dds;
use shark_shaders::pipelines::{
- calculate_spine_size, BasicConstants, ComputeBinds, Draw2dClearConstants, Draw2dCmd,
- Draw2dScatterConstants, Draw2dSortConstants, GraphicsBinds, Pipelines,
- RadixSortDownsweepConstants, RadixSortSpineConstants, RadixSortUpsweepConstants,
- DRAW_2D_TILE_SIZE,
+ calculate_spine_size, BasicConstants, CompositeConstants, ComputeBinds, Draw2dClearConstants,
+ Draw2dCmd, Draw2dRasterizeConstants, Draw2dResolveConstants, Draw2dScatterConstants,
+ Draw2dSortConstants, GraphicsBinds, Pipelines, RadixSortDownsweepConstants,
+ RadixSortSpineConstants, RadixSortUpsweepConstants, DRAW_2D_TILE_SIZE,
};
use renderdoc_sys as rdoc;
}
}
- fn rect(&mut self, x: f32, y: f32, width: f32, height: f32) {
+ fn rect(&mut self, x: f32, y: f32, width: f32, height: f32, background_color: u32) {
let half_extent_x = width / 2.0;
let half_extent_y = height / 2.0;
let center_x = x + half_extent_x;
center_y,
half_extent_x,
half_extent_y,
- 1.0,
- 0x4400ff00,
+ 5.0,
+ background_color,
0xffff0000,
))
}
x += advance * scale;
self.draw_cmds
- .push(Draw2dCmd::glyph(touched_glyph_index, 0x880000ff, x, y));
+ .push(Draw2dCmd::glyph(touched_glyph_index, 0xff0000ff, x, y));
x += advance_width * scale;
}
],
);
+ let tile_buffer = gpu.request_transient_buffer(
+ frame,
+ thread_token,
+ BufferUsageFlags::STORAGE,
+ self.tile_resolution_x as usize
+ * self.tile_resolution_y as usize
+ * std::mem::size_of::<u32>()
+ * 2,
+ );
+ let tile_buffer_address = gpu.get_buffer_address(tile_buffer.to_arg());
+
// Render UI
{
gpu.cmd_begin_debug_marker(
gpu.cmd_set_pipeline(cmd_encoder, self.pipelines.draw_2d_bin_3_resolve_pipeline);
gpu.cmd_set_bind_group(cmd_encoder, 0, &compute_bind_group);
+ gpu.cmd_push_constants(
+ cmd_encoder,
+ ShaderStageFlags::COMPUTE,
+ 0,
+ &Draw2dResolveConstants {
+ screen_resolution_x: self.width,
+ screen_resolution_y: self.height,
+ tile_resolution_x: self.tile_resolution_x,
+ tile_resolution_y: self.tile_resolution_y,
+ draw_buffer_len,
+ _pad: 0,
+ draw_buffer_address,
+ glyph_buffer_address,
+ coarse_buffer_address,
+ fine_buffer_address: tmp_buffer_address,
+ tile_buffer_address,
+ },
+ );
gpu.cmd_dispatch(
cmd_encoder,
+ 1,
self.tile_resolution_x,
self.tile_resolution_y,
- 1,
);
gpu.cmd_barrier(
gpu.cmd_set_pipeline(cmd_encoder, self.pipelines.draw_2d_rasterize_pipeline);
gpu.cmd_set_bind_group(cmd_encoder, 0, &compute_bind_group);
+ gpu.cmd_push_constants(
+ cmd_encoder,
+ ShaderStageFlags::COMPUTE,
+ 0,
+ &Draw2dRasterizeConstants {
+ screen_resolution_x: self.width,
+ screen_resolution_y: self.height,
+ tile_resolution_x: self.tile_resolution_x,
+ tile_resolution_y: self.tile_resolution_y,
+ atlas_resolution_x: atlas_width,
+ atlas_resolution_y: atlas_height,
+ draw_buffer_address,
+ glyph_buffer_address,
+ coarse_buffer_address,
+ fine_buffer_address: tmp_buffer_address,
+ tile_buffer_address,
+ },
+ );
gpu.cmd_dispatch(cmd_encoder, (self.width + 7) / 8, (self.height + 7) / 8, 1);
gpu.cmd_end_debug_marker(cmd_encoder);
{
gpu.cmd_begin_debug_marker(
cmd_encoder,
- "display transform",
+ "composite",
microshades::GREEN_RGBA_F32[3],
);
gpu.cmd_set_pipeline(cmd_encoder, self.pipelines.composite_pipeline);
gpu.cmd_set_bind_group(cmd_encoder, 0, &compute_bind_group);
-
+ gpu.cmd_push_constants(
+ cmd_encoder,
+ ShaderStageFlags::COMPUTE,
+ 0,
+ &CompositeConstants {
+ tile_resolution_x: self.tile_resolution_x,
+ tile_resolution_y: self.tile_resolution_y,
+ tile_buffer_address,
+ },
+ );
gpu.cmd_dispatch(cmd_encoder, (self.width + 7) / 8, (self.height + 7) / 8, 1);
gpu.cmd_end_debug_marker(cmd_encoder);
let base_y = (base_y + 1.0) * 0.5;
for _ in 0..100 {
- ui_state.rect(0.0, 0.0, width as f32, height as f32);
+ ui_state.rect(
+ 100.0,
+ 100.0,
+ width as f32 - 200.0,
+ height as f32 - 200.0,
+ 0x88008800,
+ );
}
- for i in 0..80 {
+ for i in 0..1 {
let i = i as f32;
ui_state.text_fmt(
base_x * 100.0 * scale - 5.0,
);
}
- ui_state.rect(base_x * 60.0, base_y * 60.0, 120.0, 120.0);
- ui_state.rect(base_x * 500.0, base_y * 100.0, 120.0, 120.0);
- ui_state.rect(base_x * 90.0, base_y * 290.0, 140.0, 120.0);
- ui_state.rect(base_x * 800.0, base_y * 320.0, 120.0, 120.0);
- ui_state.rect(base_x * 200.0, base_y * 200.0, 120.0, 120.0);
- ui_state.rect(base_x * 300.0, base_y * 120.0, 120.0, 170.0);
- ui_state.rect(base_x * 1000.0, base_y * 30.0, 50.0, 120.0);
- ui_state.rect(base_x * 340.0, base_y * 400.0, 120.0, 110.0);
- ui_state.rect(base_x * 290.0, base_y * 80.0, 120.0, 10.0);
- ui_state.rect(base_x * 310.0, base_y * 190.0, 10.0, 120.0);
+ for _ in 0..500 {
+ ui_state.rect(base_x * 60.0, base_y * 60.0, 2400.0, 900.0, 0xff00ff00);
+ }
+ ui_state.rect(base_x * 60.0, base_y * 60.0, 2400.0, 900.0, 0xffff0000);
draw_state.draw(
thread_token,