#define TILE_SIZE_COARSE 128
#define TILE_SIZE_FINE 16
#define TILE_SIZE_SHIFT 3
-#define TILE_BITMAP_WORDS_L1 (MAX_PRIMS / 32 / 32)
-#define TILE_BITMAP_WORDS_L0 (MAX_PRIMS / 32)
-#define TILE_STRIDE_COARSE TILE_BITMAP_WORDS_L0
-#define TILE_STRIDE_FINE (TILE_BITMAP_WORDS_L0 + TILE_BITMAP_WORDS_L1)
+#define TILE_BITMAP_L1_WORDS (MAX_PRIMS / 32 / 32)
+#define TILE_BITMAP_L0_WORDS (MAX_PRIMS / 32)
+#define TILE_STRIDE_COARSE TILE_BITMAP_L0_WORDS
+#define TILE_STRIDE_FINE (TILE_BITMAP_L0_WORDS + TILE_BITMAP_L1_WORDS)
+#define TILE_BITMAP_OFFSET_COARSE 0
+#define TILE_BITMAP_L1_OFFSET_FINE 0
+#define TILE_BITMAP_L0_OFFSET_FINE TILE_BITMAP_L1_WORDS
struct PrimitiveUniforms {
uvec2 screen_resolution;
uvec4 ballot_result = subgroupBallot(intersects);
if (subgroupElect()) { // managed democracy wins again
const uint tile_index = tile_coord.y * primitive_uniforms.tile_resolution_coarse.x + tile_coord.x;
- const uint bitmap_offset = tile_index * TILE_STRIDE_COARSE;
- coarse_bitmap_wo[bitmap_offset + 2 * gl_WorkGroupID.x + 0] = ballot_result.x;
- coarse_bitmap_wo[bitmap_offset + 2 * gl_WorkGroupID.x + 1] = ballot_result.y;
+ const uint tile_offset = tile_index * TILE_STRIDE_COARSE;
+ coarse_bitmap_wo[tile_offset + 2 * gl_WorkGroupID.x + 0] = ballot_result.x;
+ coarse_bitmap_wo[tile_offset + 2 * gl_WorkGroupID.x + 1] = ballot_result.y;
}
}
const uvec2 tile_max = min(tile_min + tile_size, primitive_uniforms.screen_resolution);
const uint tile_index = tile_coord.y * primitive_uniforms.tile_resolution_fine.x + tile_coord.x;
- const uint bitmap_index = gl_WorkGroupID.x * gl_WorkGroupSize.x + gl_SubgroupID * gl_SubgroupSize + gl_SubgroupInvocationID;
+ const uint index = gl_WorkGroupID.x * gl_WorkGroupSize.x + gl_SubgroupID * gl_SubgroupSize + gl_SubgroupInvocationID;
uint bitmap_l0 = 0;
- if (bitmap_index < primitive_uniforms.num_primitives_32) {
+ if (index < primitive_uniforms.num_primitives_32) {
const uvec2 tile_coord_coarse = tile_coord >> TILE_SIZE_SHIFT;
const uint tile_index_coarse = tile_coord_coarse.y * primitive_uniforms.tile_resolution_coarse.x + tile_coord_coarse.x;
- const uint bitmap_offset_coarse = tile_index_coarse * TILE_STRIDE_COARSE + bitmap_index;
+ const uint tile_base_coarse = tile_index_coarse * TILE_STRIDE_COARSE;
+ const uint tile_bitmap_base_coarse = tile_base_coarse + TILE_BITMAP_OFFSET_COARSE;
- uint bitmap_coarse = coarse_bitmap_ro[bitmap_offset_coarse];
+ uint bitmap_coarse = coarse_bitmap_ro[tile_bitmap_base_coarse + index];
while (bitmap_coarse != 0) {
const uint i = findLSB(bitmap_coarse);
- const uint primitive_index = bitmap_index * 32 + i;
bitmap_coarse ^= bitmap_coarse & -bitmap_coarse;
+ const uint primitive_index = index * 32 + i;
if (test_glyph(primitive_index, tile_min, tile_max)) {
bitmap_l0 |= 1 << i;
}
}
}
- const uint fine_bitmap_l0_offset = tile_index * TILE_STRIDE_FINE + TILE_BITMAP_WORDS_L1 + bitmap_index;
- fine_bitmap_wo[fine_bitmap_l0_offset] = bitmap_l0;
+ const uint tile_base_fine = tile_index * TILE_STRIDE_FINE;
- const bool bit_l1 = bitmap_l0 != 0;
- uvec4 ballot_result = subgroupBallot(bit_l1);
+ // Write the L0 per-primitive bitmap.
+ const uint tile_bitmap_l0_base_fine = tile_base_fine + TILE_BITMAP_L0_OFFSET_FINE;
+ fine_bitmap_wo[tile_bitmap_l0_base_fine + index] = bitmap_l0;
+
+ // Write the L1 per-bitmap-word bitmap.
+ uvec4 ballot_result = subgroupBallot(bitmap_l0 != 0);
if (subgroupElect()) {
- const uint fine_bitmap_l1_offset = tile_index * TILE_STRIDE_FINE;
- fine_bitmap_wo[fine_bitmap_l1_offset + 2 * gl_WorkGroupID.x + 0] = ballot_result.x;
- fine_bitmap_wo[fine_bitmap_l1_offset + 2 * gl_WorkGroupID.x + 1] = ballot_result.y;
+ const uint tile_bitmap_l1_base_fine = tile_base_fine + TILE_BITMAP_L1_OFFSET_FINE;
+ fine_bitmap_wo[tile_bitmap_l1_base_fine + 2 * gl_WorkGroupID.x + 0] = ballot_result.x;
+ fine_bitmap_wo[tile_bitmap_l1_base_fine + 2 * gl_WorkGroupID.x + 1] = ballot_result.y;
}
}
layout (local_size_x = TILE_SIZE_FINE, local_size_y = TILE_SIZE_FINE, local_size_z = 1) in;
void main() {
- vec4 accum = vec4(0.0);
-
const uvec2 tile_coord = gl_WorkGroupID.xy;
const uint tile_index = tile_coord.y * primitive_uniforms.tile_resolution_fine.x + tile_coord.x;
- const uint bitmap_offset = tile_index * TILE_STRIDE_FINE;
+ const uint tile_base_fine = tile_index * TILE_STRIDE_FINE;
+ const uint tile_bitmap_l1_base_fine = tile_base_fine + TILE_BITMAP_L1_OFFSET_FINE;
+ const uint tile_bitmap_l0_base_fine = tile_base_fine + TILE_BITMAP_L0_OFFSET_FINE;
+
+ vec4 accum = vec4(0.0);
+ // For each tile, iterate over all words in the L1 bitmap.
+ //
+ // TODO: Count the non-zero words in the tile with atomics, so we can early out on empty tiles?
for (int index_l1 = 0; index_l1 < primitive_uniforms.num_primitives_1024; index_l1++) {
- uint bitmap_l1 = fine_bitmap_ro[bitmap_offset + index_l1];
+ // For each word, iterate all set bits.
+ uint bitmap_l1 = fine_bitmap_ro[tile_bitmap_l1_base_fine + index_l1];
while (bitmap_l1 != 0) {
- const uint index_l0 = index_l1 * 32 + findLSB(bitmap_l1);
- uint bitmap_l0 = fine_bitmap_ro[bitmap_offset + TILE_BITMAP_WORDS_L1 + index_l0];
+ const uint i = findLSB(bitmap_l1);
bitmap_l1 ^= bitmap_l1 & -bitmap_l1;
+
+ // For each set bit in the L1 bitmap, iterate the set bits in the
+ // corresponding L0 bitmap.
+ const uint index_l0 = index_l1 * 32 + i;
+ uint bitmap_l0 = fine_bitmap_ro[tile_bitmap_l0_base_fine + index_l0];
while (bitmap_l0 != 0) {
- const uint primitive_index = index_l0 * 32 + findLSB(bitmap_l0);
+ const uint j = findLSB(bitmap_l0);
bitmap_l0 ^= bitmap_l0 & -bitmap_l0;
+ // Set bits in the L0 bitmap indicate binned primitives for this tile.
+ const uint primitive_index = index_l0 * 32 + j;
+
const GlyphInstance gi = glyph_instances[primitive_index];
const Glyph gl = glyphs[gi.index];
const vec2 glyph_min = gi.position + gl.offset_min;