From: Josh Simmons Date: Sat, 9 Nov 2024 12:38:33 +0000 (+0100) Subject: shark: Restore 2d drawing with new binning X-Git-Url: https://git.nega.tv//gitweb.cgi?a=commitdiff_plain;h=9179db7087354c35ef5bb581aa70d14e62fe9110;p=josh%2Fnarcissus shark: Restore 2d drawing with new binning --- diff --git a/title/shark-shaders/shaders/composite.comp b/title/shark-shaders/shaders/composite.comp index 0e6211d..26be838 100644 --- a/title/shark-shaders/shaders/composite.comp +++ b/title/shark-shaders/shaders/composite.comp @@ -7,6 +7,7 @@ #extension GL_EXT_scalar_block_layout : require #include "compute_bindings.h" +#include "draw_2d.h" float srgb_oetf(float a) { return (.0031308f >= a) ? 12.92f * a : 1.055f * pow(a, .4166666666666667f) - .055f; @@ -23,15 +24,23 @@ vec3 tony_mc_mapface(vec3 stimulus) { return textureLod(sampler3D(tony_mc_mapface_lut, bilinear_sampler), uv, 0.0).rgb; } +struct CompositeConstants { + uvec2 tile_resolution; + TileRef tile_buffer; +}; + +layout(std430, push_constant) uniform CompositeConstantsBlock { + CompositeConstants constants; +}; + layout (local_size_x = 8, local_size_y = 8, local_size_z = 1) in; void main() { - // const uvec2 tile_coord = gl_WorkGroupID.xy / 4; - // const uint tile_index = tile_coord.y * uniforms.tile_resolution.x + tile_coord.x; - - // TilesRead tiles_read = TilesRead(uniforms.tiles); - // const uint lo = tiles_read.values[tile_base + TILE_BITMAP_RANGE_LO_OFFSET]; - // const uint hi = tiles_read.values[tile_base + TILE_BITMAP_RANGE_HI_OFFSET]; + const uvec2 tile_coord = gl_WorkGroupID.xy / (TILE_SIZE / gl_WorkGroupSize.xy); + const uint tile_index = tile_coord.y * constants.tile_resolution.x + tile_coord.x; + + const uint lo = constants.tile_buffer.values[tile_index].min_index; + const uint hi = constants.tile_buffer.values[tile_index].max_index; // Display transform const vec3 stimulus = imageLoad(color_layer, ivec2(gl_GlobalInvocationID.xy)).rgb; @@ -39,10 +48,10 @@ void main() { vec3 composited = srgb_oetf(transformed); // UI Composite - // if (lo <= hi) { - // const vec4 ui = imageLoad(ui_layer_read, ivec2(gl_GlobalInvocationID.xy)).rgba; - // composited = ui.rgb + (composited * (1.0 - ui.a)); - // } + if (lo != hi) { + const vec4 ui = imageLoad(ui_layer_read, ivec2(gl_GlobalInvocationID.xy)).rgba; + composited = ui.rgb + (composited * (1.0 - ui.a)); + } imageStore(composited_output, ivec2(gl_GlobalInvocationID.xy), vec4(composited, 1.0)); } diff --git a/title/shark-shaders/shaders/draw_2d.h b/title/shark-shaders/shaders/draw_2d.h index 43181eb..b39368e 100644 --- a/title/shark-shaders/shaders/draw_2d.h +++ b/title/shark-shaders/shaders/draw_2d.h @@ -6,6 +6,11 @@ const uint TILE_SIZE = 32; const uint DRAW_2D_CMD_RECT = 0; const uint DRAW_2D_CMD_GLYPH = 1; +struct Tile { + uint min_index; + uint max_index; +}; + struct Glyph { ivec2 atlas_min; ivec2 atlas_max; @@ -20,7 +25,7 @@ struct Draw2dCmd { }; struct Draw2dCmdRect { - uint border_width; + float border_width; vec2 position; vec2 half_extent; uint background_color; @@ -35,7 +40,7 @@ struct Draw2dCmdGlyph { Draw2dCmdRect decode_rect(Draw2dCmd cmd) { return Draw2dCmdRect( - cmd.words[0], + uintBitsToFloat(cmd.words[0]), vec2(uintBitsToFloat(cmd.words[1]), uintBitsToFloat(cmd.words[2])), vec2(uintBitsToFloat(cmd.words[3]), uintBitsToFloat(cmd.words[4])), cmd.words[5], @@ -66,4 +71,12 @@ layout(buffer_reference, std430, buffer_reference_align = 4) buffer CoarseRef uint values[]; }; +layout(buffer_reference, std430, buffer_reference_align = 4) buffer FineRef { + uint values[]; +}; + +layout(buffer_reference, std430, buffer_reference_align = 4) buffer TileRef { + Tile values[]; +}; + #endif \ No newline at end of file diff --git a/title/shark-shaders/shaders/draw_2d_bin_1_scatter.comp b/title/shark-shaders/shaders/draw_2d_bin_1_scatter.comp index d991eb1..df750ed 100644 --- a/title/shark-shaders/shaders/draw_2d_bin_1_scatter.comp +++ b/title/shark-shaders/shaders/draw_2d_bin_1_scatter.comp @@ -33,7 +33,7 @@ layout (local_size_x_id = 0, local_size_y = 1, local_size_z = 1) in; void main() { const uint local_id = gl_SubgroupID * gl_SubgroupSize + gl_SubgroupInvocationID; - const uint draw_index = gl_WorkGroupID.x * gl_WorkGroupSize.x + local_id; + const uint draw_index = gl_WorkGroupID.x * gl_WorkGroupSize.x + local_id; // Bounds for this command, any tiles which intersect this AABB will be written. vec2 cmd_min = vec2(99999.9); @@ -82,8 +82,47 @@ void main() { const bool cmd_absolute_max = cmd_max_tile == cmds_max_tile; const bool use_individual_bounds = !any(notEqual(subgroupBallot(cmd_absolute_min) & subgroupBallot(cmd_absolute_max), uvec4(0))); - if (false && use_individual_bounds) { + if (use_individual_bounds) { + uint count = 0; + for (uint y = cmds_min_tile.y; y <= cmds_max_tile.y; y++) { + for (uint x = cmds_min_tile.x; x <= cmds_max_tile.x; x++) { + const vec2 tile_min = uvec2(x, y) * TILE_SIZE; + const vec2 tile_max = min(tile_min + TILE_SIZE, constants.screen_resolution); + + const bool intersects = !(any(lessThan(tile_max, cmd_min)) || any(greaterThan(tile_min, cmd_max))); + const uvec4 ballot = subgroupBallot(intersects); + + if (subgroupElect()) { + count += uint(ballot.x != 0); + } + } + } + + uint offset; + if (subgroupElect()) { + offset = atomicAdd(constants.coarse_buffer.values[0], count) + 1; + } + offset = subgroupBroadcastFirst(offset); + + if (offset >= constants.coarse_buffer_len) { + return; + } + + for (uint y = cmds_min_tile.y; y <= cmds_max_tile.y; y++) { + for (uint x = cmds_min_tile.x; x <= cmds_max_tile.x; x++) { + const vec2 tile_min = uvec2(x, y) * TILE_SIZE; + const vec2 tile_max = min(tile_min + TILE_SIZE, constants.screen_resolution); + + const bool intersects = !(any(lessThan(tile_max, cmd_min)) || any(greaterThan(tile_min, cmd_max))); + const uvec4 ballot = subgroupBallot(intersects); + + if (subgroupElect() && ballot.x != 0 && offset < constants.coarse_buffer_len) { + const uint packed = ((y & 0xff) << 24) | ((x & 0xff) << 16) | (gl_WorkGroupID.x & 0xffff); + constants.coarse_buffer.values[offset++] = packed; + } + } + } } else { const uvec2 tile_count = cmds_max_tile - cmds_min_tile + uvec2(1); const uint count = tile_count.x * tile_count.y; @@ -112,78 +151,4 @@ void main() { offset += tile_count.x; } } - - // if (gl_SubgroupSize == 32 && fullscreen_ballot.x != 0) { - // uint offset; - // if (subgroupElect()) { - // const uint count = constants.tile_resolution.x * constants.tile_resolution.y; - // offset = atomicAdd(constants.coarse_buffer.values[0], count) + 1; - // } - // offset = subgroupBroadcastFirst(offset); - - // if (offset >= constants.coarse_buffer_len) { - // return; - // } - - // const uint word_index = gl_WorkGroupID.x; - - // for (uint y = 0; y < constants.tile_resolution.y; y++) { - // for (uint x = 0; x < constants.tile_resolution.x; x++) { - // const uint tile_index = y * constants.tile_resolution.x + x; - // const uint packed = (tile_index << 16) | word_index; - // if (subgroupElect() && offset + tile_index < constants.coarse_buffer_len) { - // constants.coarse_buffer.values[offset + tile_index] = packed; - // } - // } - // } - - // return; - // } - - // uint count = 0; - - // for (uint y = cmds_min_tile.y; y <= cmds_max_tile.y; y++) { - // for (uint x = cmds_min_tile.x; x <= cmds_max_tile.x; x++) { - // const vec2 tile_min = uvec2(x, y) * TILE_SIZE; - // const vec2 tile_max = min(tile_min + TILE_SIZE, constants.screen_resolution); - - // const bool intersects = !(any(lessThan(tile_max, cmd_min)) || any(greaterThan(tile_min, cmd_max))); - // const uvec4 ballot = subgroupBallot(intersects); - - // if (subgroupElect()) { - // count += uint(ballot.x != 0); - // } - // } - // } - - // if (count == 0) { - // return; - // } - - // uint offset; - // if (subgroupElect()) { - // offset = atomicAdd(constants.coarse_buffer.values[0], count) + 1; - // } - // offset = subgroupBroadcastFirst(offset); - - // if (offset >= constants.coarse_buffer_len) { - // return; - // } - - // for (uint y = cmds_min_tile.y; y <= cmds_max_tile.y; y++) { - // for (uint x = cmds_min_tile.x; x <= cmds_max_tile.x; x++) { - // const vec2 tile_min = uvec2(x, y) * TILE_SIZE; - // const vec2 tile_max = min(tile_min + TILE_SIZE, constants.screen_resolution); - // const uint tile_index = y * constants.tile_resolution.x + x; - - // const bool intersects = !(any(lessThan(tile_max, cmd_min)) || any(greaterThan(tile_min, cmd_max))); - // const uvec4 ballot = subgroupBallot(intersects); - - // if (subgroupElect() && ballot.x != 0 && offset < constants.coarse_buffer_len) { - // const uint word_index = gl_WorkGroupID.x; - // const uint packed = (tile_index << 16) | word_index; - // constants.coarse_buffer.values[offset++] = packed; - // } - // } - // } } diff --git a/title/shark-shaders/shaders/draw_2d_bin_3_resolve.comp b/title/shark-shaders/shaders/draw_2d_bin_3_resolve.comp index 718563d..aeb6f33 100644 --- a/title/shark-shaders/shaders/draw_2d_bin_3_resolve.comp +++ b/title/shark-shaders/shaders/draw_2d_bin_3_resolve.comp @@ -6,17 +6,126 @@ #extension GL_EXT_buffer_reference2 : require #extension GL_EXT_scalar_block_layout : require #extension GL_EXT_control_flow_attributes : require +#extension GL_EXT_control_flow_attributes2 : require #extension GL_KHR_shader_subgroup_arithmetic : require #extension GL_KHR_shader_subgroup_ballot : require #extension GL_KHR_shader_subgroup_vote : require -#include "compute_bindings.h" +#include "draw_2d.h" -const uint SUBGROUP_SIZE = 64; +struct Draw2dResolveConstants { + uvec2 screen_resolution; + uvec2 tile_resolution; -// TODO: Spec constant support for different subgroup sizes. -layout (local_size_x = SUBGROUP_SIZE, local_size_y = 1, local_size_z = 1) in; + uint draw_buffer_len; + uint _pad; + + Draw2dCommandRef draw_buffer; + GlyphRef glyph_buffer; + CoarseRef coarse_buffer; + FineRef fine_buffer; + TileRef tile_buffer; +}; + +layout(std430, push_constant) uniform Draw2dResolveConstantsBlock { + Draw2dResolveConstants constants; +}; + +layout (local_size_x_id = 0, local_size_y = 1, local_size_z = 1) in; void main() { + const uint local_id = gl_SubgroupID * gl_SubgroupSize + gl_SubgroupInvocationID; + const uint x = gl_GlobalInvocationID.y; + const uint y = gl_GlobalInvocationID.z; + const uint tile_offset = (constants.tile_resolution.x * y + x); + const uint search = ((y & 0xff) << 24) | ((x & 0xff) << 16); + const uint count = constants.coarse_buffer.values[0]; + + if (count == 0) { + constants.tile_buffer.values[tile_offset].min_index = 0; + constants.tile_buffer.values[tile_offset].max_index = 0; + return; + } + + // Binary search for the upper bound of the tile. + uint base = 0; + { + uint n = count; + uint mid; + uint max_iters = 32; + while (max_iters --> 0 && (mid = n / 2) > 0) { + const uint value = constants.coarse_buffer.values[1 + base + mid] & 0xffff0000; + base = value > search ? base : base + mid; + n -= mid; + } + } + + const vec2 tile_min = uvec2(x, y) * TILE_SIZE; + const vec2 tile_max = min(tile_min + TILE_SIZE, constants.screen_resolution); + + bool hit_opaque = false; + uint lo = base + 1; + const uint hi = base + 1; + for(; !hit_opaque && lo > 0; lo--) { + const uint i = lo; + const uint packed = constants.coarse_buffer.values[i]; + + if ((packed & 0xffff0000) != (search & 0xffff0000)) { + break; + } + + const uint draw_offset = packed & 0xffff; + const uint draw_index = draw_offset * gl_WorkGroupSize.x + local_id; + + bool intersects = false; + bool opaque_tile = false; + + if (draw_index < constants.draw_buffer_len) { + vec2 cmd_min = vec2(99999.9); + vec2 cmd_max = vec2(-99999.9); + + const Draw2dCmd cmd = constants.draw_buffer.values[draw_index]; + for (;;) { + const uint scalar_type = subgroupBroadcastFirst(cmd.type); + [[branch]] + if (scalar_type == cmd.type) { + switch (scalar_type) { + case DRAW_2D_CMD_RECT: + const Draw2dCmdRect cmd_rect = decode_rect(cmd); + cmd_min = cmd_rect.position - cmd_rect.half_extent - cmd_rect.border_width; + cmd_max = cmd_rect.position + cmd_rect.half_extent + cmd_rect.border_width; + opaque_tile = all(greaterThanEqual(tile_min, cmd_min)) && all(lessThanEqual(tile_max, cmd_max)); + opaque_tile = opaque_tile && ((cmd_rect.background_color & 0xff000000) == 0xff000000); + break; + case DRAW_2D_CMD_GLYPH: + const Draw2dCmdGlyph cmd_glyph = decode_glyph(cmd); + const Glyph glyph = constants.glyph_buffer.values[cmd_glyph.index]; + cmd_min = cmd_glyph.position + glyph.offset_min; + cmd_max = cmd_glyph.position + glyph.offset_max; + break; + } + break; + } + } + + intersects = !(any(lessThan(tile_max, cmd_min)) || any(greaterThan(tile_min, cmd_max))); + } + + uint intersects_mask = subgroupBallot(intersects).x; + + if (subgroupAny(opaque_tile)) { + const uvec4 opaque_tile_ballot = subgroupBallot(opaque_tile); + const uint first_opaque_tile = subgroupBallotFindMSB(opaque_tile_ballot); + const uint opaque_mask = ~((1 << first_opaque_tile) - 1); + intersects_mask &= opaque_mask; + constants.fine_buffer.values[i] = intersects_mask; + hit_opaque = true; + } else { + constants.fine_buffer.values[i] = intersects_mask; + } + } + + constants.tile_buffer.values[tile_offset].min_index = lo + 1; + constants.tile_buffer.values[tile_offset].max_index = hi + 1; } diff --git a/title/shark-shaders/shaders/draw_2d_rasterize.comp b/title/shark-shaders/shaders/draw_2d_rasterize.comp index dfa9b7d..bdcc6f1 100644 --- a/title/shark-shaders/shaders/draw_2d_rasterize.comp +++ b/title/shark-shaders/shaders/draw_2d_rasterize.comp @@ -11,17 +11,30 @@ #extension GL_KHR_shader_subgroup_ballot : require #include "compute_bindings.h" +#include "draw_2d.h" -layout (local_size_x = 8, local_size_y = 8, local_size_z = 1) in; +struct Draw2dRasterizeConstants { + uvec2 screen_resolution; + uvec2 tile_resolution; + uvec2 atlas_resolution; + + Draw2dCommandRef draw_buffer; + GlyphRef glyph_buffer; + CoarseRef coarse_buffer; + FineRef fine_buffer; + TileRef tile_buffer; +}; + +layout(std430, push_constant) uniform Draw2dRasterizeConstantsBlock { + Draw2dRasterizeConstants constants; +}; /// x = (((index >> 2) & 0x0007) & 0xFFFE) | index & 0x0001 /// y = ((index >> 1) & 0x0003) | (((index >> 3) & 0x0007) & 0xFFFC) #define DEBUG_SHOW_TILES 0 -#if DEBUG_SHOW_TILES != 0 - -vec3 plasma_quintic(float x) +vec3 plasma_quintic(in float x) { x = clamp(x, 0.0, 1.0); vec4 x1 = vec4(1.0, x, x * x, x * x * x); // 1 x x2 x3 @@ -33,113 +46,81 @@ vec3 plasma_quintic(float x) ); } -#endif +layout (local_size_x = 8, local_size_y = 8, local_size_z = 1) in; void main() { - // const uvec2 tile_coord = gl_WorkGroupID.xy / (TILE_SIZE / gl_WorkGroupSize.xy); - // const uint tile_index = tile_coord.y * uniforms.tile_resolution.x + tile_coord.x; - - // TilesRead tiles_read = TilesRead(uniforms.tiles); - - // const uint lo = tiles_read.values[tile_base + TILE_BITMAP_RANGE_LO_OFFSET]; - // const uint hi = tiles_read.values[tile_base + TILE_BITMAP_RANGE_HI_OFFSET]; - - // if (hi < lo) { - // return; - // } - -// #if DEBUG_SHOW_TILES == 1 - -// uint count = 0; -// // For each tile, iterate over all words in the L1 bitmap. -// for (uint index_l1 = lo; index_l1 <= hi; index_l1++) { -// // For each word, iterate all set bits. -// uint bitmap_l1 = tiles_read.values[tile_base + TILE_BITMAP_L1_OFFSET + index_l1]; - -// while (bitmap_l1 != 0) { -// const uint i = findLSB(bitmap_l1); -// bitmap_l1 ^= bitmap_l1 & -bitmap_l1; - -// // For each set bit in the L1 bitmap, iterate the set bits in the -// // corresponding L0 bitmap. -// const uint index_l0 = index_l1 * 32 + i; -// uint bitmap_l0 = tiles_read.values[tile_base + TILE_BITMAP_L0_OFFSET + index_l0]; - -// count += bitCount(bitmap_l0); -// } -// } - -// const vec3 color = plasma_quintic(float(count) / 100.0); -// imageStore(ui_layer_write, ivec2(gl_GlobalInvocationID.xy), vec4(color, 1.0)); - -// #elif DEBUG_SHOW_TILES == 2 - -// uint count = hi - lo; -// const vec3 color = plasma_quintic(float(count) / 100.0); -// imageStore(ui_layer_write, ivec2(gl_GlobalInvocationID.xy), vec4(color, 1.0)); - -// #else - -// const vec2 sample_center = gl_GlobalInvocationID.xy + vec2(0.5); - -// vec4 accum = vec4(0.0); - -// // For each tile, iterate over all words in the L1 bitmap. -// for (uint index_l1 = lo; index_l1 <= hi; index_l1++) { -// // For each word, iterate all set bits. -// uint bitmap_l1 = tiles_read.values[tile_base + TILE_BITMAP_L1_OFFSET + index_l1]; - -// while (bitmap_l1 != 0) { -// const uint i = findLSB(bitmap_l1); -// bitmap_l1 ^= bitmap_l1 & -bitmap_l1; - -// // For each set bit in the L1 bitmap, iterate the set bits in the -// // corresponding L0 bitmap. -// const uint index_l0 = index_l1 * 32 + i; -// uint bitmap_l0 = tiles_read.values[tile_base + TILE_BITMAP_L0_OFFSET + index_l0]; -// while (bitmap_l0 != 0) { -// const uint j = findLSB(bitmap_l0); -// bitmap_l0 ^= bitmap_l0 & -bitmap_l0; - -// // Set bits in the L0 bitmap indicate binned primitives for this tile. -// const uint primitive_index = index_l0 * 32 + j; -// const Primitive2d primitive = uniforms.primitives.values[primitive_index]; -// const uint type = primitive.type; - -// vec4 primitive_color = vec4(0.0); - -// switch (type) { -// case PRIMITIVE_TYPE_RECT: { -// // const Rect rect = uniforms.rects.values[offset]; -// // const vec2 rect_min = primitive_instance.position - rect.half_extent - rect.border_width; -// // const vec2 rect_max = primitive_instance.position + rect.half_extent + rect.border_width; -// if (all(greaterThanEqual(sample_center, rect_min)) && all(lessThanEqual(sample_center, rect_max))) { -// primitive_color = unpackUnorm4x8(primitive_instance.color).bgra; -// } -// break; -// } -// case PRIMITIVE_TYPE_GLYPH: { -// const Glyph glyph = uniforms.glyphs.values[offset]; -// const vec2 glyph_min = primitive_instance.position + glyph.offset_min; -// const vec2 glyph_max = primitive_instance.position + glyph.offset_max; -// if (all(greaterThanEqual(sample_center, glyph_min)) && all(lessThanEqual(sample_center, glyph_max))) { -// const vec2 glyph_size = glyph.offset_max - glyph.offset_min; -// const vec2 uv = mix(glyph.atlas_min, glyph.atlas_max, (sample_center - glyph_min) / glyph_size) / uniforms.atlas_resolution; -// const vec4 color = unpackUnorm4x8(primitive_instance.color).bgra; -// const float coverage = textureLod(sampler2D(glyph_atlas, bilinear_sampler), uv, 0.0).r * color.a; -// primitive_color = color * coverage; -// } -// break; -// } -// } - -// // does it blend? -// accum.rgba = primitive_color.rgba + accum.rgba * (1.0 - primitive_color.a); -// } -// } -// } - -// imageStore(ui_layer_write, ivec2(gl_GlobalInvocationID.xy), accum); - -// #endif + const uvec2 tile_coord = gl_WorkGroupID.xy / (TILE_SIZE / gl_WorkGroupSize.xy); + const uint tile_index = tile_coord.y * constants.tile_resolution.x + tile_coord.x; + + const uint lo = constants.tile_buffer.values[tile_index].min_index; + const uint hi = constants.tile_buffer.values[tile_index].max_index; + + if (lo == hi) { + return; + } + +#if DEBUG_SHOW_TILES == 1 + + uint count = hi - lo; + const vec3 color = plasma_quintic(float(count) / 50.0); + imageStore(ui_layer_write, ivec2(gl_GlobalInvocationID.xy), vec4(color, 1.0)); + +#elif DEBUG_SHOW_TILES == 2 + + uint count = 0; + for (uint i = lo; i < hi; i++) { + count += bitCount(constants.fine_buffer.values[i]); + } + const vec3 color = plasma_quintic(float(count) / 600.0); + imageStore(ui_layer_write, ivec2(gl_GlobalInvocationID.xy), vec4(color, 1.0)); + +#else + + const vec2 sample_center = gl_GlobalInvocationID.xy + vec2(0.5); + + vec4 accum = vec4(0.0); + + for (uint i = lo; i < hi; i++) { + uint bitmap = constants.fine_buffer.values[i]; + while (bitmap != 0) { + const uint index = findLSB(bitmap); + bitmap ^= bitmap & -bitmap; + + const uint base_index = (constants.coarse_buffer.values[i] & 0xffff) * 32; + const Draw2dCmd cmd = constants.draw_buffer.values[base_index + index]; + + vec4 primitive_color = vec4(0.0); + switch (cmd.type) { + case DRAW_2D_CMD_RECT: + const Draw2dCmdRect cmd_rect = decode_rect(cmd); + const vec2 rect_min = cmd_rect.position - cmd_rect.half_extent - cmd_rect.border_width; + const vec2 rect_max = cmd_rect.position + cmd_rect.half_extent + cmd_rect.border_width; + if (all(greaterThanEqual(sample_center, rect_min)) && all(lessThanEqual(sample_center, rect_max))) { + primitive_color = unpackUnorm4x8(cmd_rect.background_color).bgra; + } + break; + case DRAW_2D_CMD_GLYPH: + const Draw2dCmdGlyph cmd_glyph = decode_glyph(cmd); + const Glyph glyph = constants.glyph_buffer.values[cmd_glyph.index]; + const vec2 glyph_min = cmd_glyph.position + glyph.offset_min; + const vec2 glyph_max = cmd_glyph.position + glyph.offset_max; + if (all(greaterThanEqual(sample_center, glyph_min)) && all(lessThanEqual(sample_center, glyph_max))) { + const vec2 glyph_size = glyph.offset_max - glyph.offset_min; + const vec2 uv = mix(glyph.atlas_min, glyph.atlas_max, (sample_center - glyph_min) / glyph_size) / constants.atlas_resolution; + const vec4 color = unpackUnorm4x8(cmd_glyph.color).bgra; + const float coverage = textureLod(sampler2D(glyph_atlas, bilinear_sampler), uv, 0.0).r * color.a; + primitive_color = color * coverage; + } + break; + } + + // does it blend? + accum.rgba = primitive_color.rgba + accum.rgba * (1.0 - primitive_color.a); + } + } + + imageStore(ui_layer_write, ivec2(gl_GlobalInvocationID.xy), accum); + +#endif + } diff --git a/title/shark-shaders/shaders/radix_sort_1_spine.comp b/title/shark-shaders/shaders/radix_sort_1_spine.comp index c9459bd..a02949e 100644 --- a/title/shark-shaders/shaders/radix_sort_1_spine.comp +++ b/title/shark-shaders/shaders/radix_sort_1_spine.comp @@ -12,8 +12,6 @@ #extension GL_KHR_shader_subgroup_shuffle_relative: enable #extension GL_KHR_shader_subgroup_vote : require -//#extension GL_EXT_debug_printf : enable - #include "compute_bindings.h" #include "radix_sort.h" diff --git a/title/shark-shaders/src/pipelines.rs b/title/shark-shaders/src/pipelines.rs index 9bc69e3..9eb8155 100644 --- a/title/shark-shaders/src/pipelines.rs +++ b/title/shark-shaders/src/pipelines.rs @@ -169,6 +169,46 @@ pub struct Draw2dSortConstants<'a> { pub coarse_buffer_address: BufferAddress<'a>, } +#[repr(C)] +pub struct Draw2dResolveConstants<'a> { + pub screen_resolution_x: u32, + pub screen_resolution_y: u32, + pub tile_resolution_x: u32, + pub tile_resolution_y: u32, + + pub draw_buffer_len: u32, + pub _pad: u32, + + pub draw_buffer_address: BufferAddress<'a>, + pub glyph_buffer_address: BufferAddress<'a>, + pub coarse_buffer_address: BufferAddress<'a>, + pub fine_buffer_address: BufferAddress<'a>, + pub tile_buffer_address: BufferAddress<'a>, +} + +#[repr(C)] +pub struct Draw2dRasterizeConstants<'a> { + pub screen_resolution_x: u32, + pub screen_resolution_y: u32, + pub tile_resolution_x: u32, + pub tile_resolution_y: u32, + pub atlas_resolution_x: u32, + pub atlas_resolution_y: u32, + + pub draw_buffer_address: BufferAddress<'a>, + pub glyph_buffer_address: BufferAddress<'a>, + pub coarse_buffer_address: BufferAddress<'a>, + pub fine_buffer_address: BufferAddress<'a>, + pub tile_buffer_address: BufferAddress<'a>, +} + +#[repr(C)] +pub struct CompositeConstants<'a> { + pub tile_resolution_x: u32, + pub tile_resolution_y: u32, + pub tile_buffer_address: BufferAddress<'a>, +} + #[repr(C)] pub struct RadixSortUpsweepConstants<'a> { pub shift: u32, @@ -298,48 +338,50 @@ impl Pipelines { gpu.debug_name_pipeline(basic_pipeline, "basic"); - let create_compute_pipeline = |code, name, workgroup_size, push_constant_size| { - let push_constant_range = PushConstantRange { - stage_flags: ShaderStageFlags::COMPUTE, - offset: 0, - size: push_constant_size as u32, - }; - - let pipeline = gpu.create_compute_pipeline(&ComputePipelineDesc { - shader: ShaderDesc { - code, - require_full_subgroups: workgroup_size != 0, - required_subgroup_size: if workgroup_size != 0 { - Some(workgroup_size) - } else { - None + let create_compute_pipeline = + |code, name, workgroup_size, require_full_subgroups, push_constant_size| { + let push_constant_range = PushConstantRange { + stage_flags: ShaderStageFlags::COMPUTE, + offset: 0, + size: push_constant_size as u32, + }; + + let pipeline = gpu.create_compute_pipeline(&ComputePipelineDesc { + shader: ShaderDesc { + code, + require_full_subgroups, + required_subgroup_size: if workgroup_size != 0 { + Some(workgroup_size) + } else { + None + }, + spec_constants: &[SpecConstant::U32 { + id: 0, + value: workgroup_size, + }], + ..default() }, - spec_constants: &[SpecConstant::U32 { - id: 0, - value: workgroup_size, - }], - ..default() - }, - layout: PipelineLayout { - bind_group_layouts: &[compute_bind_group_layout], - // Validation cries about push constant ranges with zero size. - push_constant_ranges: if push_constant_range.size != 0 { - std::slice::from_ref(&push_constant_range) - } else { - &[] + layout: PipelineLayout { + bind_group_layouts: &[compute_bind_group_layout], + // Validation cries about push constant ranges with zero size. + push_constant_ranges: if push_constant_range.size != 0 { + std::slice::from_ref(&push_constant_range) + } else { + &[] + }, }, - }, - }); + }); - gpu.debug_name_pipeline(pipeline, name); + gpu.debug_name_pipeline(pipeline, name); - pipeline - }; + pipeline + }; let draw_2d_bin_0_clear_pipeline = create_compute_pipeline( crate::DRAW_2D_BIN_0_CLEAR_COMP_SPV, "draw2d_bin_clear", 0, + false, std::mem::size_of::(), ); @@ -348,6 +390,7 @@ impl Pipelines { crate::DRAW_2D_BIN_1_SCATTER_COMP_SPV, "draw2d_bin_scatter", draw_2d_bin_1_scatter_pipeline_workgroup_size, + true, std::mem::size_of::(), ); @@ -355,23 +398,31 @@ impl Pipelines { crate::DRAW_2D_BIN_2_SORT_COMP_SPV, "draw2d_bin_sort", 0, + false, std::mem::size_of::(), ); let draw_2d_bin_3_resolve_pipeline = create_compute_pipeline( crate::DRAW_2D_BIN_3_RESOLVE_COMP_SPV, "draw2d_bin_resolve", - 0, - 0, + 32, + true, + std::mem::size_of::(), ); - let draw_2d_rasterize_pipeline = - create_compute_pipeline(crate::DRAW_2D_RASTERIZE_COMP_SPV, "draw2d_rasterize", 0, 0); + let draw_2d_rasterize_pipeline = create_compute_pipeline( + crate::DRAW_2D_RASTERIZE_COMP_SPV, + "draw2d_rasterize", + 0, + false, + std::mem::size_of::(), + ); let radix_sort_0_upsweep_pipeline = create_compute_pipeline( crate::RADIX_SORT_0_UPSWEEP_COMP_SPV, "radix_sort_upsweep", 32, + true, std::mem::size_of::(), ); @@ -379,6 +430,7 @@ impl Pipelines { crate::RADIX_SORT_1_SPINE_COMP_SPV, "radix_sort_spine", 32, + true, std::mem::size_of::(), ); @@ -386,11 +438,17 @@ impl Pipelines { crate::RADIX_SORT_2_DOWNSWEEP_COMP_SPV, "radix_sort_downsweep", 32, + true, std::mem::size_of::(), ); - let composite_pipeline = - create_compute_pipeline(crate::COMPOSITE_COMP_SPV, "composite", 0, 0); + let composite_pipeline = create_compute_pipeline( + crate::COMPOSITE_COMP_SPV, + "composite", + 0, + false, + std::mem::size_of::(), + ); Self { _samplers: samplers, diff --git a/title/shark/src/main.rs b/title/shark/src/main.rs index f1a368f..6ffb3cb 100644 --- a/title/shark/src/main.rs +++ b/title/shark/src/main.rs @@ -6,10 +6,10 @@ use std::time::{Duration, Instant}; use narcissus_core::dds; use shark_shaders::pipelines::{ - calculate_spine_size, BasicConstants, ComputeBinds, Draw2dClearConstants, Draw2dCmd, - Draw2dScatterConstants, Draw2dSortConstants, GraphicsBinds, Pipelines, - RadixSortDownsweepConstants, RadixSortSpineConstants, RadixSortUpsweepConstants, - DRAW_2D_TILE_SIZE, + calculate_spine_size, BasicConstants, CompositeConstants, ComputeBinds, Draw2dClearConstants, + Draw2dCmd, Draw2dRasterizeConstants, Draw2dResolveConstants, Draw2dScatterConstants, + Draw2dSortConstants, GraphicsBinds, Pipelines, RadixSortDownsweepConstants, + RadixSortSpineConstants, RadixSortUpsweepConstants, DRAW_2D_TILE_SIZE, }; use renderdoc_sys as rdoc; @@ -469,7 +469,7 @@ impl<'a> UiState<'a> { } } - fn rect(&mut self, x: f32, y: f32, width: f32, height: f32) { + fn rect(&mut self, x: f32, y: f32, width: f32, height: f32, background_color: u32) { let half_extent_x = width / 2.0; let half_extent_y = height / 2.0; let center_x = x + half_extent_x; @@ -480,8 +480,8 @@ impl<'a> UiState<'a> { center_y, half_extent_x, half_extent_y, - 1.0, - 0x4400ff00, + 5.0, + background_color, 0xffff0000, )) } @@ -527,7 +527,7 @@ impl<'a> UiState<'a> { x += advance * scale; self.draw_cmds - .push(Draw2dCmd::glyph(touched_glyph_index, 0x880000ff, x, y)); + .push(Draw2dCmd::glyph(touched_glyph_index, 0xff0000ff, x, y)); x += advance_width * scale; } @@ -1269,6 +1269,17 @@ impl<'gpu> DrawState<'gpu> { ], ); + let tile_buffer = gpu.request_transient_buffer( + frame, + thread_token, + BufferUsageFlags::STORAGE, + self.tile_resolution_x as usize + * self.tile_resolution_y as usize + * std::mem::size_of::() + * 2, + ); + let tile_buffer_address = gpu.get_buffer_address(tile_buffer.to_arg()); + // Render UI { gpu.cmd_begin_debug_marker( @@ -1515,11 +1526,29 @@ impl<'gpu> DrawState<'gpu> { gpu.cmd_set_pipeline(cmd_encoder, self.pipelines.draw_2d_bin_3_resolve_pipeline); gpu.cmd_set_bind_group(cmd_encoder, 0, &compute_bind_group); + gpu.cmd_push_constants( + cmd_encoder, + ShaderStageFlags::COMPUTE, + 0, + &Draw2dResolveConstants { + screen_resolution_x: self.width, + screen_resolution_y: self.height, + tile_resolution_x: self.tile_resolution_x, + tile_resolution_y: self.tile_resolution_y, + draw_buffer_len, + _pad: 0, + draw_buffer_address, + glyph_buffer_address, + coarse_buffer_address, + fine_buffer_address: tmp_buffer_address, + tile_buffer_address, + }, + ); gpu.cmd_dispatch( cmd_encoder, + 1, self.tile_resolution_x, self.tile_resolution_y, - 1, ); gpu.cmd_barrier( @@ -1533,6 +1562,24 @@ impl<'gpu> DrawState<'gpu> { gpu.cmd_set_pipeline(cmd_encoder, self.pipelines.draw_2d_rasterize_pipeline); gpu.cmd_set_bind_group(cmd_encoder, 0, &compute_bind_group); + gpu.cmd_push_constants( + cmd_encoder, + ShaderStageFlags::COMPUTE, + 0, + &Draw2dRasterizeConstants { + screen_resolution_x: self.width, + screen_resolution_y: self.height, + tile_resolution_x: self.tile_resolution_x, + tile_resolution_y: self.tile_resolution_y, + atlas_resolution_x: atlas_width, + atlas_resolution_y: atlas_height, + draw_buffer_address, + glyph_buffer_address, + coarse_buffer_address, + fine_buffer_address: tmp_buffer_address, + tile_buffer_address, + }, + ); gpu.cmd_dispatch(cmd_encoder, (self.width + 7) / 8, (self.height + 7) / 8, 1); gpu.cmd_end_debug_marker(cmd_encoder); @@ -1542,7 +1589,7 @@ impl<'gpu> DrawState<'gpu> { { gpu.cmd_begin_debug_marker( cmd_encoder, - "display transform", + "composite", microshades::GREEN_RGBA_F32[3], ); @@ -1573,7 +1620,16 @@ impl<'gpu> DrawState<'gpu> { gpu.cmd_set_pipeline(cmd_encoder, self.pipelines.composite_pipeline); gpu.cmd_set_bind_group(cmd_encoder, 0, &compute_bind_group); - + gpu.cmd_push_constants( + cmd_encoder, + ShaderStageFlags::COMPUTE, + 0, + &CompositeConstants { + tile_resolution_x: self.tile_resolution_x, + tile_resolution_y: self.tile_resolution_y, + tile_buffer_address, + }, + ); gpu.cmd_dispatch(cmd_encoder, (self.width + 7) / 8, (self.height + 7) / 8, 1); gpu.cmd_end_debug_marker(cmd_encoder); @@ -1753,10 +1809,16 @@ pub fn main() { let base_y = (base_y + 1.0) * 0.5; for _ in 0..100 { - ui_state.rect(0.0, 0.0, width as f32, height as f32); + ui_state.rect( + 100.0, + 100.0, + width as f32 - 200.0, + height as f32 - 200.0, + 0x88008800, + ); } - for i in 0..80 { + for i in 0..1 { let i = i as f32; ui_state.text_fmt( base_x * 100.0 * scale - 5.0, @@ -1780,16 +1842,10 @@ pub fn main() { ); } - ui_state.rect(base_x * 60.0, base_y * 60.0, 120.0, 120.0); - ui_state.rect(base_x * 500.0, base_y * 100.0, 120.0, 120.0); - ui_state.rect(base_x * 90.0, base_y * 290.0, 140.0, 120.0); - ui_state.rect(base_x * 800.0, base_y * 320.0, 120.0, 120.0); - ui_state.rect(base_x * 200.0, base_y * 200.0, 120.0, 120.0); - ui_state.rect(base_x * 300.0, base_y * 120.0, 120.0, 170.0); - ui_state.rect(base_x * 1000.0, base_y * 30.0, 50.0, 120.0); - ui_state.rect(base_x * 340.0, base_y * 400.0, 120.0, 110.0); - ui_state.rect(base_x * 290.0, base_y * 80.0, 120.0, 10.0); - ui_state.rect(base_x * 310.0, base_y * 190.0, 10.0, 120.0); + for _ in 0..500 { + ui_state.rect(base_x * 60.0, base_y * 60.0, 2400.0, 900.0, 0xff00ff00); + } + ui_state.rect(base_x * 60.0, base_y * 60.0, 2400.0, 900.0, 0xffff0000); draw_state.draw( thread_token,