prav_core/decoder/growth/
stride32.rs

1use crate::decoder::state::DecodingState;
2use crate::intrinsics::{prefetch_l1, tzcnt};
3use crate::topology::Topology;
4
5#[inline(always)]
6fn saturate_row(b: u32, m: u32) -> u32 {
7    let mut l = b;
8    l |= (l << 1) & m;
9    l |= (l << 2) & m;
10    l |= (l << 4) & m;
11    l |= (l << 8) & m;
12    l |= (l << 16) & m;
13
14    let mut r = b;
15    r |= (r >> 1) & m;
16    r |= (r >> 2) & m;
17    r |= (r >> 4) & m;
18    r |= (r >> 8) & m;
19    r |= (r >> 16) & m;
20
21    l | r
22}
23
24#[inline(always)]
25fn spread_syndrome_2x32(boundary: u64, mask: u64) -> u64 {
26    let mut b0 = boundary as u32;
27    let mut b1 = (boundary >> 32) as u32;
28
29    let m0 = mask as u32;
30    let m1 = (mask >> 32) as u32;
31
32    // Pass 1
33    b0 = saturate_row(b0, m0);
34    b1 = saturate_row(b1, m1);
35
36    let v_down = b0 & m1;
37    let v_up = b1 & m0;
38    b1 |= v_down;
39    b0 |= v_up;
40
41    // Pass 2
42    b0 = saturate_row(b0, m0);
43    b1 = saturate_row(b1, m1);
44
45    let v_down = b0 & m1;
46    let v_up = b1 & m0;
47    b1 |= v_down;
48    b0 |= v_up;
49
50    (b0 as u64) | ((b1 as u64) << 32)
51}
52
53#[inline(always)]
54unsafe fn mark_block_dirty_slice(blk_idx: usize, block_dirty_mask: &mut [u64]) {
55    let mask_idx = blk_idx >> 6;
56    let mask_bit = blk_idx & 63;
57    if mask_idx < block_dirty_mask.len() {
58        *block_dirty_mask.get_unchecked_mut(mask_idx) |= 1 << mask_bit;
59    }
60}
61
62// O(1) fast path for find - at p=0.001, ~95% of nodes are self-rooted
63#[inline(always)]
64unsafe fn find_in_slice(parents: &mut [u32], i: u32, block_dirty_mask: &mut [u64]) -> u32 {
65    let p = *parents.get_unchecked(i as usize);
66    if p == i {
67        return i; // Fast path: self-rooted (most common case)
68    }
69    find_in_slice_slow(parents, i, p, block_dirty_mask)
70}
71
72// Cold path: actual path compression
73#[inline(never)]
74#[cold]
75unsafe fn find_in_slice_slow(
76    parents: &mut [u32],
77    mut i: u32,
78    mut p: u32,
79    block_dirty_mask: &mut [u64],
80) -> u32 {
81    loop {
82        let grandparent = *parents.get_unchecked(p as usize);
83        if p == grandparent {
84            return p; // Found root
85        }
86        // Path halving: point i to grandparent
87        *parents.get_unchecked_mut(i as usize) = grandparent;
88        mark_block_dirty_slice(i as usize >> 6, block_dirty_mask);
89        i = grandparent;
90        p = *parents.get_unchecked(i as usize);
91    }
92}
93
94#[inline(always)]
95unsafe fn union_roots_in_slice(
96    parents: &mut [u32],
97    root_u: u32,
98    root_v: u32,
99    blocks_state: &mut [crate::decoder::state::BlockStateHot],
100    block_dirty_mask: &mut [u64],
101    block_offset: usize,
102) -> bool {
103    if root_u == root_v {
104        return false;
105    }
106
107    if root_u < root_v {
108        // u joins v - only invalidate u's block cache
109        let blk_u = (root_u as usize) >> 6;
110        // Check if blk_u is within our local range
111        if blk_u >= block_offset && blk_u < block_offset + blocks_state.len() {
112            blocks_state.get_unchecked_mut(blk_u - block_offset).root = u32::MAX;
113        }
114        mark_block_dirty_slice(blk_u, block_dirty_mask);
115        *parents.get_unchecked_mut(root_u as usize) = root_v;
116    } else {
117        // v joins u - only invalidate v's block cache
118        let blk_v = (root_v as usize) >> 6;
119        if blk_v >= block_offset && blk_v < block_offset + blocks_state.len() {
120            blocks_state.get_unchecked_mut(blk_v - block_offset).root = u32::MAX;
121        }
122        mark_block_dirty_slice(blk_v, block_dirty_mask);
123        *parents.get_unchecked_mut(root_v as usize) = root_u;
124    }
125    true
126}
127
128#[inline(always)]
129unsafe fn union_in_slice(
130    parents: &mut [u32],
131    u: u32,
132    v: u32,
133    blocks_state: &mut [crate::decoder::state::BlockStateHot],
134    block_dirty_mask: &mut [u64],
135    block_offset: usize,
136) -> bool {
137    let root_u = find_in_slice(parents, u, block_dirty_mask);
138    let root_v = find_in_slice(parents, v, block_dirty_mask);
139    union_roots_in_slice(parents, root_u, root_v, blocks_state, block_dirty_mask, block_offset)
140}
141
142#[inline(always)]
143unsafe fn push_next_slice(blk_idx: usize, queued_mask: &mut [u64], _is_small_grid: bool) {
144    let mask_idx = blk_idx >> 6;
145    let mask_bit = blk_idx & 63;
146    if mask_idx < queued_mask.len() {
147        *queued_mask.get_unchecked_mut(mask_idx) |= 1 << mask_bit;
148    }
149}
150
151#[allow(clippy::too_many_arguments)]
152#[inline(always)]
153unsafe fn fast_grow_block_slice<const SILENT: bool>(
154    blk_idx: usize,
155    grow_mask: u64,
156    blocks_state: &mut [crate::decoder::state::BlockStateHot],
157    _defect_mask: &mut [u64],
158    block_dirty_mask: &mut [u64],
159    queued_mask: &mut [u64],
160    is_small_grid: bool,
161    block_offset: usize,
162) {
163    if grow_mask == 0 {
164        return;
165    }
166
167    let block = blocks_state.get_unchecked_mut(blk_idx);
168    block.occupied |= grow_mask;
169    block.boundary |= grow_mask;
170    mark_block_dirty_slice(blk_idx + block_offset, block_dirty_mask);
171
172    if !SILENT {
173        push_next_slice(blk_idx + block_offset, queued_mask, is_small_grid);
174    }
175}
176
177#[allow(clippy::too_many_arguments)]
178#[inline(always)]
179unsafe fn merge_shifted_portable_slice(
180    parents: &mut [u32],
181    mut mask: u64,
182    base_src: usize,
183    shift: isize,
184    base_target: usize,
185    blocks_state: &mut [crate::decoder::state::BlockStateHot],
186    block_dirty_mask: &mut [u64],
187    block_offset: usize,
188) -> bool {
189    let mut expanded = false;
190    let parents_ptr = parents.as_mut_ptr();
191
192    while mask != 0 {
193        let start_bit = tzcnt(mask) as usize;
194        let shifted_mask = mask >> start_bit;
195        let run_len = tzcnt(!shifted_mask) as usize;
196
197        if run_len == 64 {
198            mask = 0;
199        } else {
200            let clear_mask = !(((1u64 << run_len) - 1) << start_bit);
201            mask &= clear_mask;
202        }
203
204        let u_start = (base_src as isize + start_bit as isize + shift) as usize;
205        let v_start = base_target + start_bit;
206
207        for k in 0..run_len {
208            let u = (u_start + k) as u32;
209            let v = (v_start + k) as u32;
210
211            let pu = *parents_ptr.add(u as usize);
212            let pv = *parents_ptr.add(v as usize);
213
214            if pu == pv {
215                continue;
216            }
217
218            if pu == u && pv == v {
219                if u != v {
220                    // Both self-rooted: direct union
221                    if u < v {
222                        // u joins v
223                        let blk_u = (u as usize) >> 6;
224                        if blk_u >= block_offset && blk_u < block_offset + blocks_state.len() {
225                            blocks_state.get_unchecked_mut(blk_u - block_offset).root = u32::MAX;
226                        }
227                        *parents_ptr.add(u as usize) = v;
228                        mark_block_dirty_slice(blk_u, block_dirty_mask);
229                    } else {
230                        // v joins u
231                        let blk_v = (v as usize) >> 6;
232                        if blk_v >= block_offset && blk_v < block_offset + blocks_state.len() {
233                            blocks_state.get_unchecked_mut(blk_v - block_offset).root = u32::MAX;
234                        }
235                        *parents_ptr.add(v as usize) = u;
236                        mark_block_dirty_slice(blk_v, block_dirty_mask);
237                    }
238                    expanded = true;
239                }
240            } else if union_in_slice(parents, u, v, blocks_state, block_dirty_mask, block_offset) {
241                expanded = true;
242            }
243        }
244    }
245    expanded
246}
247
248#[allow(clippy::too_many_arguments)]
249#[inline(always)]
250unsafe fn union_mask_to_boundary_slice(
251    parents: &mut [u32],
252    mut mask: u64,
253    base_global: usize,
254    offset: isize,
255    boundary_node: u32,
256    blocks_state: &mut [crate::decoder::state::BlockStateHot],
257    block_dirty_mask: &mut [u64],
258    block_offset: usize,
259) -> bool {
260    let mut expanded = false;
261    while mask != 0 {
262        let start_bit = tzcnt(mask) as usize;
263        let shifted_mask = mask >> start_bit;
264        let run_len = tzcnt(!shifted_mask) as usize;
265
266        if run_len == 64 {
267            mask = 0;
268        } else {
269            let clear_mask = !(((1u64 << run_len) - 1) << start_bit);
270            mask &= clear_mask;
271        }
272
273        let u_start = (base_global as isize + offset + start_bit as isize) as usize;
274        for k in 0..run_len {
275            let u = (u_start + k) as u32;
276            let root_u = find_in_slice(parents, u, block_dirty_mask);
277            if root_u == boundary_node {
278                continue;
279            }
280            if union_roots_in_slice(
281                parents,
282                root_u,
283                boundary_node,
284                blocks_state,
285                block_dirty_mask,
286                block_offset,
287            ) {
288                expanded = true;
289            }
290        }
291    }
292    expanded
293}
294
295#[inline(always)]
296unsafe fn write_parents_mono_32_slice(
297    parents: &mut [u32],
298    mut mask: u64,
299    base_target: usize,
300    root_r: u32,
301    block_dirty_mask: &mut [u64],
302    _block_offset: usize,
303) {
304    let parents_ptr = parents.as_mut_ptr();
305
306    if mask != 0 {
307        mark_block_dirty_slice(base_target >> 6, block_dirty_mask);
308    }
309
310    while mask != 0 {
311        let start_bit = tzcnt(mask) as usize;
312        let shifted_mask = mask >> start_bit;
313        let run_len = tzcnt(!shifted_mask) as usize;
314
315        if run_len == 64 {
316            mask = 0;
317        } else {
318            let clear_mask = !(((1u64 << run_len) - 1) << start_bit);
319            mask &= clear_mask;
320        }
321
322        let ptr = parents_ptr.add(base_target + start_bit);
323        for k in 0..run_len {
324            *ptr.add(k) = root_r;
325        }
326    }
327}
328
329#[inline(always)]
330unsafe fn merge_mono_32_slice(
331    parents: &mut [u32],
332    mut mask: u64,
333    base_target: usize,
334    root_r: u32,
335    blocks_state: &mut [crate::decoder::state::BlockStateHot],
336    block_dirty_mask: &mut [u64],
337    block_offset: usize,
338) -> bool {
339    let mut expanded = false;
340    while mask != 0 {
341        let bit = tzcnt(mask) as usize;
342        mask &= mask - 1;
343        let target = (base_target + bit) as u32;
344
345        if *parents.get_unchecked(target as usize) == root_r {
346            continue;
347        }
348
349        if union_in_slice(parents, target, root_r, blocks_state, block_dirty_mask, block_offset) {
350            expanded = true;
351        }
352    }
353    expanded
354}
355
356impl<'a, T: Topology, const STRIDE_Y: usize> DecodingState<'a, T, STRIDE_Y> {
357    /// # Safety
358    ///
359    /// Caller must ensure:
360    /// - `blk_idx` is within bounds of `blocks_state`
361    /// - `parents`, `blocks_state`, `defect_mask`, `block_dirty_mask`, `queued_mask` are valid
362    /// - `block_offset + blk_idx` corresponds to valid parent array indices
363    #[allow(clippy::too_many_arguments)]
364    #[inline(always)]
365    pub unsafe fn process_block_small_stride_32<const SILENT: bool>(
366        blk_idx: usize,
367        parents: &mut [u32],
368        blocks_state: &mut [crate::decoder::state::BlockStateHot],
369        defect_mask: &mut [u64],
370        block_dirty_mask: &mut [u64],
371        queued_mask: &mut [u64],
372        is_small_grid: bool,
373        block_offset: usize,
374    ) -> bool {
375        let mut expanded = false;
376        let base_global = (blk_idx + block_offset) * 64;
377
378        let p_ptr = parents.as_ptr().add(base_global) as *const u8;
379        prefetch_l1(p_ptr);
380        prefetch_l1(p_ptr.add(64));
381        prefetch_l1(p_ptr.add(128));
382        prefetch_l1(p_ptr.add(192));
383
384        // Speculative prefetch for neighbor parent arrays
385        if blk_idx > 0 {
386            let up_ptr = parents.as_ptr().add((blk_idx + block_offset - 1) * 64) as *const u8;
387            prefetch_l1(up_ptr);
388            prefetch_l1(up_ptr.add(128));
389        }
390        if blk_idx + 1 < blocks_state.len() {
391            let down_ptr = parents.as_ptr().add((blk_idx + block_offset + 1) * 64) as *const u8;
392            prefetch_l1(down_ptr);
393            prefetch_l1(down_ptr.add(128));
394        }
395
396        let boundary_node = (parents.len() - 1) as u32;
397
398        let mut boundary_val = blocks_state.get_unchecked(blk_idx).boundary;
399
400        if boundary_val == 0 {
401            return false;
402        }
403
404        let occupied_val = blocks_state.get_unchecked(blk_idx).occupied;
405        let initial_boundary = boundary_val;
406
407        let valid_mask = blocks_state.get_unchecked(blk_idx).valid_mask;
408        let erasure_mask_val = !blocks_state.get_unchecked(blk_idx).erasure_mask;
409        let mask = valid_mask & erasure_mask_val;
410
411        let mut spread_boundary = spread_syndrome_2x32(boundary_val, mask);
412        let up = (spread_boundary << 32) & mask;
413        let down = (spread_boundary >> 32) & mask;
414        spread_boundary |= up | down;
415
416        // --- O(1) Monochromatic Check using cached root ---
417        // At p=0.001, ~95% of nodes are self-rooted, and the cache
418        // is invalidated on any union, so trusting it avoids O(popcount) find() calls
419        let mut is_monochromatic;
420        let root_r;
421        {
422            let cached_root = blocks_state.get_unchecked(blk_idx).root;
423            if cached_root != u32::MAX {
424                // Cache hit - validate with single O(1) check
425                let p = *parents.get_unchecked(cached_root as usize);
426                root_r = if p == cached_root {
427                    cached_root // Root unchanged (most common)
428                } else {
429                    // Root was updated by a union; find the current root
430                    find_in_slice(parents, cached_root, block_dirty_mask)
431                };
432                is_monochromatic = true;
433            } else {
434                // Cache miss - must verify monochromatic status
435                let first_bit = tzcnt(boundary_val) as usize;
436                root_r = find_in_slice(parents, (base_global + first_bit) as u32, block_dirty_mask);
437                is_monochromatic = true;
438
439                // Check remaining boundary bits for different roots
440                let mut temp = boundary_val & !(1u64 << first_bit);
441                while temp != 0 {
442                    let bit = tzcnt(temp) as usize;
443                    temp &= temp - 1;
444
445                    let node = (base_global + bit) as u32;
446                    // Fast path: direct parent check
447                    if *parents.get_unchecked(node as usize) == root_r {
448                        continue;
449                    }
450                    // Slow path: full find
451                    if find_in_slice(parents, node, block_dirty_mask) != root_r {
452                        is_monochromatic = false;
453                        break;
454                    }
455                }
456
457                // If monochromatic, cache the root for next iteration
458                if is_monochromatic {
459                    blocks_state.get_unchecked_mut(blk_idx).root = root_r;
460                }
461            }
462        }
463
464        if is_monochromatic {
465            // --- Fast Path: Monochromatic ---
466
467            let newly_occupied = spread_boundary & !occupied_val;
468            if newly_occupied != 0 {
469                write_parents_mono_32_slice(
470                    parents,
471                    newly_occupied,
472                    base_global,
473                    root_r,
474                    block_dirty_mask,
475                    block_offset,
476                );
477            }
478
479            let _boundary_down_internal =
480                (spread_boundary << 32) & !valid_mask & 0xFFFFFFFF00000000;
481            // Up Neighbor (Block - 1)
482            if blk_idx > 0 {
483                let blk_up = blk_idx - 1;
484                let valid_up = blocks_state.get_unchecked(blk_up).valid_mask;
485
486                let spread_to_up = (spread_boundary & 0xFFFFFFFF) << 32;
487
488                let hits = (spread_boundary & 0xFFFFFFFF) & (!valid_up >> 32);
489                if hits != 0
490                    && union_roots_in_slice(
491                        parents,
492                        root_r,
493                        boundary_node,
494                        blocks_state,
495                        block_dirty_mask,
496                        block_offset,
497                    )
498                {
499                    expanded = true;
500                }
501
502                if valid_up != 0 {
503                    let occupied_up = blocks_state.get_unchecked(blk_up).occupied;
504                    let erasure_up = !blocks_state.get_unchecked(blk_up).erasure_mask;
505
506                    let grow_up = spread_to_up & !occupied_up & valid_up & erasure_up;
507                    if grow_up != 0 {
508                        fast_grow_block_slice::<SILENT>(
509                            blk_up,
510                            grow_up,
511                            blocks_state,
512                            defect_mask,
513                            block_dirty_mask,
514                            queued_mask,
515                            is_small_grid,
516                            block_offset,
517                        );
518                        write_parents_mono_32_slice(
519                            parents,
520                            grow_up,
521                            (blk_up + block_offset) * 64,
522                            root_r,
523                            block_dirty_mask,
524                            block_offset,
525                        );
526                        expanded = true;
527                    }
528
529                    let merge_up = spread_to_up & occupied_up;
530                    if merge_up != 0
531                        && merge_mono_32_slice(
532                            parents,
533                            merge_up,
534                            (blk_up + block_offset) * 64,
535                            root_r,
536                            blocks_state,
537                            block_dirty_mask,
538                            block_offset,
539                        )
540                    {
541                        expanded = true;
542                    }
543                }
544            } else if (spread_boundary & 0xFFFFFFFF) != 0
545                && union_roots_in_slice(
546                    parents,
547                    root_r,
548                    boundary_node,
549                    blocks_state,
550                    block_dirty_mask,
551                    block_offset,
552                )
553            {
554                expanded = true;
555            }
556
557            // Down Neighbor (Block + 1)
558            if blk_idx + 1 < blocks_state.len() {
559                let blk_down = blk_idx + 1;
560                let valid_down = blocks_state.get_unchecked(blk_down).valid_mask;
561                let spread_to_down = spread_boundary >> 32;
562
563                let hits = spread_to_down & !valid_down;
564                if hits != 0
565                    && union_roots_in_slice(
566                        parents,
567                        root_r,
568                        boundary_node,
569                        blocks_state,
570                        block_dirty_mask,
571                        block_offset,
572                    )
573                {
574                    expanded = true;
575                }
576
577                if valid_down != 0 {
578                    let occupied_down = blocks_state.get_unchecked(blk_down).occupied;
579                    let erasure_down = !blocks_state.get_unchecked(blk_down).erasure_mask;
580
581                    let grow_down = spread_to_down & !occupied_down & valid_down & erasure_down;
582                    if grow_down != 0 {
583                        fast_grow_block_slice::<SILENT>(
584                            blk_down,
585                            grow_down,
586                            blocks_state,
587                            defect_mask,
588                            block_dirty_mask,
589                            queued_mask,
590                            is_small_grid,
591                            block_offset,
592                        );
593                        write_parents_mono_32_slice(
594                            parents,
595                            grow_down,
596                            (blk_down + block_offset) * 64,
597                            root_r,
598                            block_dirty_mask,
599                            block_offset,
600                        );
601                        expanded = true;
602                    }
603
604                    let merge_down = spread_to_down & occupied_down;
605                    if merge_down != 0
606                        && merge_mono_32_slice(
607                            parents,
608                            merge_down,
609                            (blk_down + block_offset) * 64,
610                            root_r,
611                            blocks_state,
612                            block_dirty_mask,
613                            block_offset,
614                        )
615                    {
616                        expanded = true;
617                    }
618                }
619            } else if (spread_boundary & 0xFFFFFFFF00000000) != 0
620                && union_roots_in_slice(
621                    parents,
622                    root_r,
623                    boundary_node,
624                    blocks_state,
625                    block_dirty_mask,
626                    block_offset,
627                )
628            {
629                expanded = true;
630            }
631
632            if (spread_boundary & 0x0000000100000001) != 0
633                && union_roots_in_slice(
634                    parents,
635                    root_r,
636                    boundary_node,
637                    blocks_state,
638                    block_dirty_mask,
639                    block_offset,
640                )
641            {
642                expanded = true;
643            }
644
645            if (spread_boundary & 0x8000000080000000) != 0
646                && union_roots_in_slice(
647                    parents,
648                    root_r,
649                    boundary_node,
650                    blocks_state,
651                    block_dirty_mask,
652                    block_offset,
653                )
654            {
655                expanded = true;
656            }
657        } else {
658            // --- Slow Path: Polychromatic ---
659
660            let vertical_pairs = spread_boundary & (spread_boundary >> 32) & 0xFFFFFFFF;
661            if vertical_pairs != 0 {
662                let mut temp = vertical_pairs;
663                while temp != 0 {
664                    let bit = tzcnt(temp) as usize;
665                    temp &= temp - 1;
666                    let u = (base_global + bit) as u32;
667                    let v = (base_global + bit + 32) as u32;
668                    if union_in_slice(parents, u, v, blocks_state, block_dirty_mask, block_offset) {
669                        expanded = true;
670                    }
671                }
672            }
673
674            let horizontal_pairs = spread_boundary & (spread_boundary >> 1) & 0x7FFFFFFF7FFFFFFF;
675            if horizontal_pairs != 0
676                && merge_shifted_portable_slice(
677                    parents,
678                    horizontal_pairs,
679                    base_global,
680                    1,
681                    base_global,
682                    blocks_state,
683                    block_dirty_mask,
684                    block_offset,
685                )
686            {
687                expanded = true;
688            }
689
690            let boundary_down_internal = (spread_boundary << 32) & !valid_mask & 0xFFFFFFFF00000000;
691            if boundary_down_internal != 0
692                && union_mask_to_boundary_slice(
693                    parents,
694                    boundary_down_internal,
695                    base_global,
696                    -32,
697                    boundary_node,
698                    blocks_state,
699                    block_dirty_mask,
700                    block_offset,
701                )
702            {
703                expanded = true;
704            }
705
706            let boundary_up_internal = (spread_boundary >> 32) & !valid_mask & 0x00000000FFFFFFFF;
707            if boundary_up_internal != 0
708                && union_mask_to_boundary_slice(
709                    parents,
710                    boundary_up_internal,
711                    base_global,
712                    32,
713                    boundary_node,
714                    blocks_state,
715                    block_dirty_mask,
716                    block_offset,
717                )
718            {
719                expanded = true;
720            }
721
722            // Inter-Block Connections
723
724            // Up Neighbor (Block - 1)
725            if blk_idx > 0 {
726                let blk_up = blk_idx - 1;
727                let valid_up = blocks_state.get_unchecked(blk_up).valid_mask;
728
729                let spread_to_up = (spread_boundary & 0xFFFFFFFF) << 32;
730
731                let hits = (spread_boundary & 0xFFFFFFFF) & (!valid_up >> 32);
732                if hits != 0
733                    && union_mask_to_boundary_slice(
734                        parents,
735                        hits,
736                        base_global,
737                        0,
738                        boundary_node,
739                        blocks_state,
740                        block_dirty_mask,
741                        block_offset,
742                    )
743                {
744                    expanded = true;
745                }
746
747                if valid_up != 0 {
748                    let occupied_up = blocks_state.get_unchecked(blk_up).occupied;
749                    let erasure_up = !blocks_state.get_unchecked(blk_up).erasure_mask;
750
751                    let grow_up = spread_to_up & !occupied_up & valid_up & erasure_up;
752                    if grow_up != 0 {
753                        fast_grow_block_slice::<SILENT>(
754                            blk_up,
755                            grow_up,
756                            blocks_state,
757                            defect_mask,
758                            block_dirty_mask,
759                            queued_mask,
760                            is_small_grid,
761                            block_offset,
762                        );
763                        expanded = true;
764                    }
765
766                    let merge_up = spread_to_up & occupied_up;
767                    if merge_up != 0
768                        && merge_shifted_portable_slice(
769                            parents,
770                            merge_up,
771                            base_global,
772                            -32,
773                            (blk_up + block_offset) * 64,
774                            blocks_state,
775                            block_dirty_mask,
776                            block_offset,
777                        )
778                    {
779                        expanded = true;
780                    }
781                }
782            } else {
783                let hits = spread_boundary & 0xFFFFFFFF;
784                if hits != 0
785                    && union_mask_to_boundary_slice(
786                        parents,
787                        hits,
788                        base_global,
789                        0,
790                        boundary_node,
791                        blocks_state,
792                        block_dirty_mask,
793                        block_offset,
794                    )
795                {
796                    expanded = true;
797                }
798            }
799
800            // Down Neighbor (Block + 1)
801            if blk_idx + 1 < blocks_state.len() {
802                let blk_down = blk_idx + 1;
803                let valid_down = blocks_state.get_unchecked(blk_down).valid_mask;
804                let spread_to_down = spread_boundary >> 32;
805
806                let hits = spread_to_down & !valid_down;
807                if hits != 0
808                    && union_mask_to_boundary_slice(
809                        parents,
810                        hits << 32,
811                        base_global,
812                        0,
813                        boundary_node,
814                        blocks_state,
815                        block_dirty_mask,
816                        block_offset,
817                    )
818                {
819                    expanded = true;
820                }
821
822                if valid_down != 0 {
823                    let occupied_down = blocks_state.get_unchecked(blk_down).occupied;
824                    let erasure_down = !blocks_state.get_unchecked(blk_down).erasure_mask;
825
826                    let grow_down = spread_to_down & !occupied_down & valid_down & erasure_down;
827                    if grow_down != 0 {
828                        fast_grow_block_slice::<SILENT>(
829                            blk_down,
830                            grow_down,
831                            blocks_state,
832                            defect_mask,
833                            block_dirty_mask,
834                            queued_mask,
835                            is_small_grid,
836                            block_offset,
837                        );
838                        expanded = true;
839                    }
840
841                    let merge_down = spread_to_down & occupied_down;
842                    if merge_down != 0
843                        && merge_shifted_portable_slice(
844                            parents,
845                            merge_down,
846                            base_global,
847                            32,
848                            (blk_down + block_offset) * 64,
849                            blocks_state,
850                            block_dirty_mask,
851                            block_offset,
852                        )
853                    {
854                        expanded = true;
855                    }
856                }
857            } else {
858                let hits = spread_boundary & 0xFFFFFFFF00000000;
859                if hits != 0
860                    && union_mask_to_boundary_slice(
861                        parents,
862                        hits,
863                        base_global,
864                        0,
865                        boundary_node,
866                        blocks_state,
867                        block_dirty_mask,
868                        block_offset,
869                    )
870                {
871                    expanded = true;
872                }
873            }
874
875            let left_edge_mask = 0x0000000100000001;
876            let left_hits = spread_boundary & left_edge_mask;
877            if left_hits != 0
878                && union_mask_to_boundary_slice(
879                    parents,
880                    left_hits,
881                    base_global,
882                    0,
883                    boundary_node,
884                    blocks_state,
885                    block_dirty_mask,
886                    block_offset,
887                )
888            {
889                expanded = true;
890            }
891
892            let right_edge_mask = 0x8000000080000000;
893            let right_hits = spread_boundary & right_edge_mask;
894            if right_hits != 0
895                && union_mask_to_boundary_slice(
896                    parents,
897                    right_hits,
898                    base_global,
899                    0,
900                    boundary_node,
901                    blocks_state,
902                    block_dirty_mask,
903                    block_offset,
904                )
905            {
906                expanded = true;
907            }
908        }
909
910        let new_occupied = occupied_val | spread_boundary;
911        boundary_val &= !spread_boundary;
912
913        if occupied_val != new_occupied || boundary_val != initial_boundary {
914            let block = blocks_state.get_unchecked_mut(blk_idx);
915            block.occupied = new_occupied;
916            block.boundary = boundary_val;
917            mark_block_dirty_slice(blk_idx + block_offset, block_dirty_mask);
918            expanded = true;
919            if !SILENT {
920                push_next_slice(blk_idx + block_offset, queued_mask, is_small_grid);
921            }
922        }
923
924        expanded
925    }
926}