prav_core/decoder/growth/
small_grid.rs

1use crate::decoder::state::DecodingState;
2use crate::decoder::union_find::UnionFind;
3use crate::topology::Topology;
4
5impl<'a, T: Topology, const STRIDE_Y: usize> DecodingState<'a, T, STRIDE_Y> {
6    #[allow(clippy::needless_range_loop)]
7    #[inline(always)]
8    pub(crate) unsafe fn merge_mono(
9        &mut self,
10        mut mask: u64,
11        base_target: usize,
12        root_source: u32,
13    ) -> bool {
14        if mask == 0 {
15            return false;
16        }
17
18        let mut expanded = false;
19        let mut roots = [0u32; 8]; // Stack-allocated buffer for unique roots
20        let mut root_count = 0usize;
21
22        // Phase 1: Gather unique roots from target nodes
23        while mask != 0 && root_count < 8 {
24            let bit = mask.trailing_zeros();
25            mask &= mask - 1;
26            let target_node = (base_target + bit as usize) as u32;
27
28            let p = *self.parents.get_unchecked(target_node as usize);
29
30            // Skip if already connected to root_source
31            if p == root_source {
32                continue;
33            }
34
35            // Fast path: node is its own root
36            let target_root = if p == target_node {
37                p
38            } else {
39                self.find(target_node)
40            };
41
42            // Skip if find() resolved to root_source
43            if target_root == root_source {
44                continue;
45            }
46
47            // Check if already collected (linear scan - small array)
48            let mut found = false;
49            for i in 0..root_count {
50                if roots[i] == target_root {
51                    found = true;
52                    break;
53                }
54            }
55
56            if !found {
57                roots[root_count] = target_root;
58                root_count += 1;
59            }
60        }
61
62        // Phase 2: Bulk union unique roots with root_source
63        for i in 0..root_count {
64            if self.union_roots(roots[i], root_source) {
65                expanded = true;
66            }
67        }
68
69        // Phase 3: Handle overflow (remaining bits if buffer was full)
70        while mask != 0 {
71            let bit = mask.trailing_zeros();
72            mask &= mask - 1;
73            let target_node = (base_target + bit as usize) as u32;
74            if self.union(target_node, root_source) {
75                expanded = true;
76            }
77        }
78
79        expanded
80    }
81
82    /// Optimized path for monochromatic blocks (>95% of blocks at p=0.001-0.06).
83    ///
84    /// Key optimization: When neighbor blocks share the same root (same cluster),
85    /// we skip ALL union logic and use pure bitwise spreading.
86    #[inline(always)]
87    unsafe fn process_mono<const SILENT: bool>(
88        &mut self,
89        blk_idx: usize,
90        mut canonical_root: u32,
91    ) -> bool {
92        let mut expanded = false;
93        let base_global = blk_idx * 64;
94        let boundary_node = (self.parents.len() - 1) as u32;
95
96        // Prefetch current block parent array
97        let p_ptr = self.parents.as_ptr().add(base_global) as *const u8;
98        crate::intrinsics::prefetch_l1(p_ptr);
99        crate::intrinsics::prefetch_l1(p_ptr.add(64));
100        crate::intrinsics::prefetch_l1(p_ptr.add(128));
101        crate::intrinsics::prefetch_l1(p_ptr.add(192));
102
103        // Prefetch neighbor block states and parent arrays
104        if blk_idx > 0 {
105            let ptr = self.blocks_state.as_ptr().add(blk_idx - 1) as *const u8;
106            crate::intrinsics::prefetch_l1(ptr);
107            let p_ptr = self.parents.as_ptr().add((blk_idx - 1) * 64) as *const u8;
108            crate::intrinsics::prefetch_l1(p_ptr);
109            crate::intrinsics::prefetch_l1(p_ptr.add(128));
110        }
111        if blk_idx + 1 < self.blocks_state.len() {
112            let ptr = self.blocks_state.as_ptr().add(blk_idx + 1) as *const u8;
113            crate::intrinsics::prefetch_l1(ptr);
114            let p_ptr = self.parents.as_ptr().add((blk_idx + 1) * 64) as *const u8;
115            crate::intrinsics::prefetch_l1(p_ptr);
116            crate::intrinsics::prefetch_l1(p_ptr.add(128));
117        }
118
119        // Load block state into local variables to minimize memory traffic
120        // and allow single-writeback optimization.
121        let (mut occupied, mut boundary, effective_mask, valid_mask, _flags, cached_root_in_memory) = {
122            let b = self.blocks_state.get_unchecked(blk_idx);
123            (
124                b.occupied,
125                b.boundary,
126                b.effective_mask,
127                b.valid_mask,
128                b.flags,
129                b.root,
130            )
131        };
132
133        if boundary == 0 {
134            return false;
135        }
136
137        let initial_occupied = occupied;
138        let initial_boundary = boundary;
139
140        // Validate cached root (O(1) check)
141        let parent_of_root = *self.parents.get_unchecked(canonical_root as usize);
142        if parent_of_root != canonical_root {
143            // Root was updated by a union; find actual root
144            canonical_root = self.find(canonical_root);
145            // Intermediate write removed - will be handled by single writeback
146        }
147
148        // Calculate spread (horizontal)
149        let mut spread_boundary = if STRIDE_Y == 32 {
150            crate::intrinsics::spread_syndrome_masked(
151                boundary,
152                effective_mask,
153                0x8000000080000000,
154                0x0000000100000001,
155            )
156        } else {
157            crate::intrinsics::spread_syndrome_masked(
158                boundary,
159                effective_mask,
160                self.row_end_mask,
161                self.row_start_mask,
162            )
163        };
164
165        // Intra-block vertical spread (logarithmic)
166        if STRIDE_Y < 64 {
167            let mask = effective_mask;
168            let mut current = spread_boundary;
169
170            {
171                let up = (current << STRIDE_Y) & mask;
172                let down = (current >> STRIDE_Y) & mask;
173                current |= up | down;
174            }
175            let shift_2 = STRIDE_Y * 2;
176            if shift_2 < 64 {
177                let up = (current << shift_2) & mask;
178                let down = (current >> shift_2) & mask;
179                current |= up | down;
180            }
181            let shift_4 = STRIDE_Y * 4;
182            if shift_4 < 64 {
183                let up = (current << shift_4) & mask;
184                let down = (current >> shift_4) & mask;
185                current |= up | down;
186            }
187            let shift_8 = STRIDE_Y * 8;
188            if shift_8 < 64 {
189                let up = (current << shift_8) & mask;
190                let down = (current >> shift_8) & mask;
191                current |= up | down;
192            }
193            let shift_16 = STRIDE_Y * 16;
194            if shift_16 < 64 {
195                let up = (current << shift_16) & mask;
196                let down = (current >> shift_16) & mask;
197                current |= up | down;
198            }
199            let shift_32 = STRIDE_Y * 32;
200            if shift_32 < 64 {
201                let up = (current << shift_32) & mask;
202                let down = (current >> shift_32) & mask;
203                current |= up | down;
204            }
205
206            spread_boundary = current;
207        }
208
209        // Direct parent assignment for new nodes (no union needed!)
210        let newly_occupied = spread_boundary & !occupied;
211        let mut temp_new = newly_occupied;
212        while temp_new != 0 {
213            let bit1 = temp_new.trailing_zeros();
214            temp_new &= temp_new - 1;
215            *self.parents.get_unchecked_mut(base_global + bit1 as usize) = canonical_root;
216
217            if temp_new == 0 {
218                break;
219            }
220            let bit2 = temp_new.trailing_zeros();
221            temp_new &= temp_new - 1;
222            *self.parents.get_unchecked_mut(base_global + bit2 as usize) = canonical_root;
223
224            if temp_new == 0 {
225                break;
226            }
227            let bit3 = temp_new.trailing_zeros();
228            temp_new &= temp_new - 1;
229            *self.parents.get_unchecked_mut(base_global + bit3 as usize) = canonical_root;
230
231            if temp_new == 0 {
232                break;
233            }
234            let bit4 = temp_new.trailing_zeros();
235            temp_new &= temp_new - 1;
236            *self.parents.get_unchecked_mut(base_global + bit4 as usize) = canonical_root;
237        }
238
239        // Internal boundary connections (holes)
240        if STRIDE_Y < 64 {
241            let internal_bottom_edge = spread_boundary & (!valid_mask >> STRIDE_Y);
242            let internal_top_edge = spread_boundary & (!valid_mask << STRIDE_Y);
243            let internal_hits = internal_bottom_edge | internal_top_edge;
244
245            if internal_hits != 0 && self.union_roots(canonical_root, boundary_node) {
246                expanded = true;
247                if canonical_root < boundary_node {
248                    canonical_root = boundary_node;
249                }
250            }
251        }
252
253        let new_occupied = occupied | spread_boundary;
254        if new_occupied != occupied {
255            occupied = new_occupied;
256            expanded = true;
257            if !SILENT {
258                self.push_next(blk_idx);
259            }
260        }
261
262        // ============== NEIGHBOR PROCESSING ==============
263        // Up neighbor (blk_idx - 1)
264        if blk_idx > 0 {
265            let blk_up = blk_idx - 1;
266            let (valid_up, occupied_up, effective_up, r_up_val) = {
267                let b = self.blocks_state.get_unchecked(blk_up);
268                (b.valid_mask, b.occupied, b.effective_mask, b.root)
269            };
270            let shift_amt = 64 - STRIDE_Y;
271            let spread_to_up = spread_boundary << shift_amt;
272
273            // Boundary connection check
274            let boundary_connect_mask = spread_to_up & !valid_up;
275            if boundary_connect_mask != 0 {
276                // Only connect if Top boundary is enabled for this DecodingState
277                // For internal tiles, this might be disabled.
278                // However, blk_idx > 0 means we are INSIDE the tile connecting to previous block.
279                // This is an INTERNAL edge, not a global boundary.
280                // Global boundary is when we try to connect 'up' from block 0, or to invalid bits.
281                // Wait, !valid_up means bits that are NOT valid in the up block.
282                // If the Up Block has invalid bits, it means we hit a hole or edge.
283                // If it's a hole, we connect to boundary (virtual).
284                // If it's the edge of the grid, we connect to boundary.
285                if self.union_roots(canonical_root, boundary_node) {
286                    expanded = true;
287                    if canonical_root < boundary_node {
288                        canonical_root = boundary_node;
289                    }
290                }
291            }
292
293            if valid_up != 0 {
294                let grow_up = spread_to_up & !occupied_up & effective_up;
295                let merge_up = spread_to_up & occupied_up;
296
297                // CRITICAL OPTIMIZATION: Check if neighbor has same root (same cluster)
298                if r_up_val != u32::MAX {
299                    // Fast path: check if cached roots match (common case at p=0.001)
300                    if r_up_val == canonical_root {
301                        // Same cluster - skip all union logic
302                        if grow_up != 0 {
303                            self.fast_grow_block::<SILENT>(blk_up, grow_up);
304                            expanded = true;
305                        }
306                        // merge_up handled implicitly (self-loop)
307                    } else {
308                        // Different clusters OR need to verify roots
309                        let neighbor_root = unsafe {
310                            let p = *self.parents.get_unchecked(r_up_val as usize);
311                            if p == r_up_val {
312                                r_up_val
313                            } else {
314                                self.find(r_up_val)
315                            }
316                        };
317                        if neighbor_root == canonical_root {
318                            // Same cluster after root resolution
319                            if grow_up != 0 {
320                                self.fast_grow_block::<SILENT>(blk_up, grow_up);
321                                expanded = true;
322                            }
323                        } else {
324                            // Different clusters - perform union
325                            if (merge_up != 0 || grow_up != 0)
326                                && self.union_roots(canonical_root, neighbor_root)
327                            {
328                                expanded = true;
329                                if canonical_root < neighbor_root {
330                                    canonical_root = neighbor_root;
331                                }
332                            }
333                            if grow_up != 0 {
334                                self.fast_grow_block::<SILENT>(blk_up, grow_up);
335                                expanded = true;
336                            }
337                        }
338                    }
339                } else {
340                    // Neighbor is polychromatic - use merge_mono
341                    if grow_up != 0 {
342                        self.fast_grow_block::<SILENT>(blk_up, grow_up);
343                        expanded = true;
344                    }
345                    if merge_up != 0 {
346                        if self.merge_mono(merge_up, blk_up * 64, canonical_root) {
347                            expanded = true;
348                        }
349                        canonical_root = self.find(canonical_root);
350                    }
351                }
352            }
353        } else {
354            // Top block - check boundary
355            if self.boundary_config.check_top {
356                if STRIDE_Y < 64 {
357                    let mask_low = (1u64 << STRIDE_Y) - 1;
358                    let spread_boundary_masked = spread_boundary & mask_low;
359                    if spread_boundary_masked != 0
360                        && self.union_roots(canonical_root, boundary_node)
361                    {
362                        expanded = true;
363                        if canonical_root < boundary_node {
364                            canonical_root = boundary_node;
365                        }
366                    }
367                } else if STRIDE_Y == 64 {
368                    // Stride 64: All active nodes in block 0 are on top boundary
369                    if self.union_roots(canonical_root, boundary_node) {
370                        expanded = true;
371                        if canonical_root < boundary_node {
372                            canonical_root = boundary_node;
373                        }
374                    }
375                }
376            }
377        }
378
379        // Down neighbor (blk_idx + 1)
380        if blk_idx + 1 < self.blocks_state.len() {
381            let blk_down = blk_idx + 1;
382            let (valid_down, occupied_down, effective_down, r_down_val) = {
383                let b = self.blocks_state.get_unchecked(blk_down);
384                (b.valid_mask, b.occupied, b.effective_mask, b.root)
385            };
386            let shift_amt = 64 - STRIDE_Y;
387            let spread_to_down = spread_boundary >> shift_amt;
388
389            // Boundary connection check
390            let boundary_connect_mask = spread_to_down & !valid_down;
391            if boundary_connect_mask != 0 {
392                // Internal tile connection - always try to union if it's a hole.
393                // But for tile boundaries, we rely on check_bottom.
394                // Actually, if we are internal, valid_mask should be full?
395                // If valid_mask has 0s, it's a hole.
396                // Holes connect to global boundary.
397                // If we are at the bottom of the tile (which is internal),
398                // the `blk_idx` check ensures we are not at the very last block.
399                // So this branch runs for internal blocks.
400                // If an internal block connects to a hole (invalid bit in neighbor),
401                // it should connect to boundary.
402                if self.union_roots(canonical_root, boundary_node) {
403                    expanded = true;
404                    if canonical_root < boundary_node {
405                        canonical_root = boundary_node;
406                    }
407                }
408            }
409
410            if valid_down != 0 {
411                let grow_down = spread_to_down & !occupied_down & effective_down;
412                let merge_down = spread_to_down & occupied_down;
413
414                // CRITICAL OPTIMIZATION: Check if neighbor has same root
415                if r_down_val != u32::MAX {
416                    // Fast path: check if cached roots match
417                    if r_down_val == canonical_root {
418                        // Same cluster - just spread
419                        if grow_down != 0 {
420                            self.fast_grow_block::<SILENT>(blk_down, grow_down);
421                            expanded = true;
422                        }
423                        // merge_down is a self-loop, skip entirely
424                    } else {
425                        // Different clusters OR need to verify roots
426                        let neighbor_root = unsafe {
427                            let p = *self.parents.get_unchecked(r_down_val as usize);
428                            if p == r_down_val {
429                                r_down_val
430                            } else {
431                                self.find(r_down_val)
432                            }
433                        };
434                        if neighbor_root == canonical_root {
435                            // Same cluster after root resolution
436                            if grow_down != 0 {
437                                self.fast_grow_block::<SILENT>(blk_down, grow_down);
438                                expanded = true;
439                            }
440                        } else {
441                            // Different clusters - perform union
442                            if (merge_down != 0 || grow_down != 0)
443                                && self.union_roots(canonical_root, neighbor_root)
444                            {
445                                expanded = true;
446                                if canonical_root < neighbor_root {
447                                    canonical_root = neighbor_root;
448                                }
449                            }
450                            if grow_down != 0 {
451                                self.fast_grow_block::<SILENT>(blk_down, grow_down);
452                                expanded = true;
453                            }
454                        }
455                    }
456                } else {
457                    // Neighbor is polychromatic - use merge_mono
458                    if grow_down != 0 {
459                        self.fast_grow_block::<SILENT>(blk_down, grow_down);
460                        expanded = true;
461                    }
462                    if merge_down != 0 {
463                        if self.merge_mono(merge_down, blk_down * 64, canonical_root) {
464                            expanded = true;
465                        }
466                        canonical_root = self.find(canonical_root);
467                    }
468                }
469            }
470        } else {
471            // Last block - bottom boundary
472            if self.boundary_config.check_bottom {
473                if STRIDE_Y < 64 {
474                    let shift_amt = 64 - STRIDE_Y;
475                    let spread_boundary_masked = spread_boundary >> shift_amt;
476                    if spread_boundary_masked != 0
477                        && self.union_roots(canonical_root, boundary_node)
478                    {
479                        expanded = true;
480                        if canonical_root < boundary_node {
481                            canonical_root = boundary_node;
482                        }
483                    }
484                } else {
485                    // Stride 64: All active nodes connect to boundary
486                    if self.union_roots(canonical_root, boundary_node) {
487                        expanded = true;
488                        if canonical_root < boundary_node {
489                            canonical_root = boundary_node;
490                        }
491                    }
492                }
493            }
494        }
495
496        // Horizontal edges
497        let left_edge = spread_boundary & self.row_start_mask;
498        if left_edge != 0
499            && self.boundary_config.check_left
500            && self.union_roots(canonical_root, boundary_node)
501        {
502            expanded = true;
503            if canonical_root < boundary_node {
504                canonical_root = boundary_node;
505            }
506        }
507
508        let right_edge = spread_boundary & self.row_end_mask;
509        if right_edge != 0
510            && self.boundary_config.check_right
511            && self.union_roots(canonical_root, boundary_node)
512        {
513            expanded = true;
514            if canonical_root < boundary_node {
515                canonical_root = boundary_node;
516            }
517        }
518
519        // Clean up boundary
520        boundary &= !spread_boundary;
521
522        // Single writeback of all state changes
523        if occupied != initial_occupied
524            || boundary != initial_boundary
525            || expanded
526            || canonical_root != cached_root_in_memory
527        {
528            let block = self.blocks_state.get_unchecked_mut(blk_idx);
529            block.occupied = occupied;
530            block.boundary = boundary;
531            block.root = canonical_root;
532            self.mark_block_dirty(blk_idx);
533        }
534
535        expanded
536    }
537
538    /// Polychromatic path - handles blocks with multiple roots.
539    /// This is the slow path, used when block.root == u32::MAX.
540    #[inline(always)]
541    unsafe fn process_poly<const SILENT: bool>(&mut self, blk_idx: usize) -> bool {
542        let mut expanded = false;
543        let base_global = self.parent_offset + blk_idx * 64;
544        let boundary_node = (self.parents.len() - 1) as u32;
545
546        // Prefetch current block parent array
547        let p_ptr = self.parents.as_ptr().add(base_global) as *const u8;
548        crate::intrinsics::prefetch_l1(p_ptr);
549        crate::intrinsics::prefetch_l1(p_ptr.add(64));
550        crate::intrinsics::prefetch_l1(p_ptr.add(128));
551        crate::intrinsics::prefetch_l1(p_ptr.add(192));
552
553        // Prefetch neighbor block states and parent arrays
554        if blk_idx > 0 {
555            let ptr = self.blocks_state.as_ptr().add(blk_idx - 1) as *const u8;
556            crate::intrinsics::prefetch_l1(ptr);
557            let p_ptr = self.parents.as_ptr().add((blk_idx - 1) * 64) as *const u8;
558            crate::intrinsics::prefetch_l1(p_ptr);
559            crate::intrinsics::prefetch_l1(p_ptr.add(128));
560        }
561        if blk_idx + 1 < self.blocks_state.len() {
562            let ptr = self.blocks_state.as_ptr().add(blk_idx + 1) as *const u8;
563            crate::intrinsics::prefetch_l1(ptr);
564            let p_ptr = self.parents.as_ptr().add((blk_idx + 1) * 64) as *const u8;
565            crate::intrinsics::prefetch_l1(p_ptr);
566            crate::intrinsics::prefetch_l1(p_ptr.add(128));
567        }
568
569        let block_state_ptr = self.blocks_state.get_unchecked_mut(blk_idx);
570        let _flags = block_state_ptr.flags;
571        let mut boundary = block_state_ptr.boundary;
572
573        if boundary == 0 {
574            return false;
575        }
576
577        let mut occupied = block_state_ptr.occupied;
578        let initial_occupied = occupied;
579        let initial_boundary = boundary;
580        let effective_mask = block_state_ptr.effective_mask;
581        let valid_mask = block_state_ptr.valid_mask;
582
583        // Calculate spread (horizontal)
584        let mut spread_boundary = if STRIDE_Y == 32 {
585            crate::intrinsics::spread_syndrome_masked(
586                boundary,
587                effective_mask,
588                0x8000000080000000,
589                0x0000000100000001,
590            )
591        } else {
592            crate::intrinsics::spread_syndrome_masked(
593                boundary,
594                effective_mask,
595                self.row_end_mask,
596                self.row_start_mask,
597            )
598        };
599
600        // Intra-block vertical spread (logarithmic)
601        if STRIDE_Y < 64 {
602            let mask = effective_mask;
603            let mut current = spread_boundary;
604
605            {
606                let up = (current << STRIDE_Y) & mask;
607                let down = (current >> STRIDE_Y) & mask;
608                current |= up | down;
609            }
610            let shift_2 = STRIDE_Y * 2;
611            if shift_2 < 64 {
612                let up = (current << shift_2) & mask;
613                let down = (current >> shift_2) & mask;
614                current |= up | down;
615            }
616            let shift_4 = STRIDE_Y * 4;
617            if shift_4 < 64 {
618                let up = (current << shift_4) & mask;
619                let down = (current >> shift_4) & mask;
620                current |= up | down;
621            }
622            let shift_8 = STRIDE_Y * 8;
623            if shift_8 < 64 {
624                let up = (current << shift_8) & mask;
625                let down = (current >> shift_8) & mask;
626                current |= up | down;
627            }
628            let shift_16 = STRIDE_Y * 16;
629            if shift_16 < 64 {
630                let up = (current << shift_16) & mask;
631                let down = (current >> shift_16) & mask;
632                current |= up | down;
633            }
634            let shift_32 = STRIDE_Y * 32;
635            if shift_32 < 64 {
636                let up = (current << shift_32) & mask;
637                let down = (current >> shift_32) & mask;
638                current |= up | down;
639            }
640
641            spread_boundary = current;
642        }
643
644        // Intra-block vertical unions (polychromatic)
645        if STRIDE_Y < 64 {
646            let vertical_pairs = spread_boundary & (spread_boundary >> STRIDE_Y);
647            if vertical_pairs != 0
648                && self.merge_shifted(vertical_pairs, base_global, STRIDE_Y as isize, base_global)
649            {
650                expanded = true;
651            }
652        }
653
654        // Horizontal unions
655        let horizontal = (spread_boundary & (spread_boundary << 1)) & !self.row_start_mask;
656        if horizontal != 0
657            && self.merge_shifted(horizontal, base_global, -1, base_global)
658        {
659            expanded = true;
660        }
661
662        // Internal boundary connections (holes)
663        if STRIDE_Y < 64 {
664            let internal_bottom_edge = spread_boundary & (!valid_mask >> STRIDE_Y);
665            let internal_top_edge = spread_boundary & (!valid_mask << STRIDE_Y);
666
667            if self.connect_boundary_4way_ilp(
668                internal_bottom_edge | internal_top_edge,
669                base_global,
670                0,
671                boundary_node,
672            ) {
673                expanded = true;
674            }
675        }
676
677        let new_occupied = occupied | spread_boundary;
678        if new_occupied != occupied {
679            occupied = new_occupied;
680            expanded = true;
681            if !SILENT {
682                self.push_next(blk_idx);
683            }
684        }
685
686        // ============== NEIGHBOR PROCESSING (POLYCHROMATIC) ==============
687        // Up neighbor
688        if blk_idx > 0 {
689            let blk_up = blk_idx - 1;
690            let (valid_up, occupied_up, effective_up) = {
691                let b = self.blocks_state.get_unchecked(blk_up);
692                (b.valid_mask, b.occupied, b.effective_mask)
693            };
694            let shift_amt = 64 - STRIDE_Y;
695            let spread_to_up = spread_boundary << shift_amt;
696
697            let boundary_connect_mask = spread_to_up & !valid_up;
698            if boundary_connect_mask != 0
699                && self.connect_boundary_4way_ilp(
700                    boundary_connect_mask,
701                    base_global,
702                    -(shift_amt as isize),
703                    boundary_node,
704                )
705            {
706                expanded = true;
707            }
708
709            if valid_up != 0 {
710                let grow_up = spread_to_up & !occupied_up & effective_up;
711                if grow_up != 0 {
712                    self.fast_grow_block::<SILENT>(blk_up, grow_up);
713                    expanded = true;
714                }
715
716                let merge_up = spread_to_up & occupied_up;
717                if self.merge_shifted(merge_up, base_global, -(shift_amt as isize), blk_up * 64) {
718                    expanded = true;
719                }
720            }
721        } else {
722            // Top block - check boundary
723            if STRIDE_Y < 64 {
724                let mask_low = (1u64 << STRIDE_Y) - 1;
725                let spread_boundary_masked = spread_boundary & mask_low;
726                if self.connect_boundary_4way_ilp(
727                    spread_boundary_masked,
728                    base_global,
729                    0,
730                    boundary_node,
731                ) {
732                    expanded = true;
733                }
734            } else if STRIDE_Y == 64
735                && self.connect_boundary_4way_ilp(spread_boundary, base_global, 0, boundary_node)
736            {
737                expanded = true;
738            }
739        }
740
741        // Down neighbor
742        if blk_idx + 1 < self.blocks_state.len() {
743            let blk_down = blk_idx + 1;
744            let (valid_down, occupied_down, effective_down) = {
745                let b = self.blocks_state.get_unchecked(blk_down);
746                (b.valid_mask, b.occupied, b.effective_mask)
747            };
748            let shift_amt = 64 - STRIDE_Y;
749            let spread_to_down = spread_boundary >> shift_amt;
750
751            let boundary_connect_mask = spread_to_down & !valid_down;
752            if boundary_connect_mask != 0
753                && self.connect_boundary_4way_ilp(
754                    boundary_connect_mask,
755                    base_global,
756                    shift_amt as isize,
757                    boundary_node,
758                )
759            {
760                expanded = true;
761            }
762
763            if valid_down != 0 {
764                let grow_down = spread_to_down & !occupied_down & effective_down;
765                if grow_down != 0 {
766                    self.fast_grow_block::<SILENT>(blk_down, grow_down);
767                    expanded = true;
768                }
769
770                let merge_down = spread_to_down & occupied_down;
771                if self.merge_shifted(merge_down, base_global, shift_amt as isize, blk_down * 64) {
772                    expanded = true;
773                }
774            }
775        } else {
776            // Last block - bottom boundary
777            if STRIDE_Y < 64 {
778                let shift_amt = 64 - STRIDE_Y;
779                let spread_boundary_masked = spread_boundary >> shift_amt;
780                if self.connect_boundary_4way_ilp(
781                    spread_boundary_masked,
782                    base_global,
783                    shift_amt as isize,
784                    boundary_node,
785                ) {
786                    expanded = true;
787                }
788            } else if self.connect_boundary_4way_ilp(spread_boundary, base_global, 0, boundary_node)
789            {
790                expanded = true;
791            }
792        }
793
794        // Horizontal edges
795        let left_edge = spread_boundary & self.row_start_mask;
796        if left_edge != 0
797            && self.connect_boundary_4way_ilp(left_edge, base_global, 0, boundary_node)
798        {
799            expanded = true;
800        }
801
802        let right_edge = spread_boundary & self.row_end_mask;
803        if right_edge != 0
804            && self.connect_boundary_4way_ilp(right_edge, base_global, 0, boundary_node)
805        {
806            expanded = true;
807        }
808
809        // Clean up boundary
810        boundary &= !spread_boundary;
811
812        // Write back
813        if occupied != initial_occupied || boundary != initial_boundary {
814            let block = self.blocks_state.get_unchecked_mut(blk_idx);
815            block.occupied = occupied;
816            block.boundary = boundary;
817            self.mark_block_dirty(blk_idx);
818        }
819
820        expanded
821    }
822
823    /// # Safety
824    ///
825    /// Caller must ensure:
826    /// - `blk_idx` is within bounds of `self.blocks_state`
827    /// - All internal state arrays are properly initialized
828    #[inline(always)]
829    pub unsafe fn process_block_small_stride<const SILENT: bool>(
830        &mut self,
831        blk_idx: usize,
832    ) -> bool {
833        // Assert supported stride (large stride module removed)
834        debug_assert!(STRIDE_Y <= 64);
835
836        // STRIDE_Y == 32 specialization (unchanged)
837        if STRIDE_Y == 32 {
838            let is_small = self.is_small_grid();
839            let block_offset = self.parent_offset / 64;
840            return Self::process_block_small_stride_32::<SILENT>(
841                blk_idx,
842                self.parents,
843                self.blocks_state,
844                self.defect_mask,
845                self.block_dirty_mask,
846                self.queued_mask,
847                is_small,
848                block_offset,
849            );
850        }
851
852        let block = self.blocks_state.get_unchecked(blk_idx);
853        if block.boundary == 0 {
854            return false;
855        }
856
857        // Fast dispatch based on cached root
858        let block_root = block.root;
859        if block_root != u32::MAX {
860            // Monochromatic fast path (>95% of blocks)
861            return self.process_mono::<SILENT>(blk_idx, block_root);
862        }
863
864        // Check if we can determine monochromatic status
865        let boundary = block.boundary;
866        if boundary.count_ones() == 1 {
867            // Single boundary bit - trivially monochromatic
868            let base_global = self.parent_offset + blk_idx * 64;
869            let first_bit = boundary.trailing_zeros() as usize;
870            let root = self.find((base_global + first_bit) as u32);
871            self.blocks_state.get_unchecked_mut(blk_idx).root = root;
872            return self.process_mono::<SILENT>(blk_idx, root);
873        }
874
875        // Multiple boundary bits without cached root - check if monochromatic
876        if boundary != 0 {
877            let base_global = self.parent_offset + blk_idx * 64;
878            let first_bit = boundary.trailing_zeros() as usize;
879            let root_r = self.find((base_global + first_bit) as u32);
880            let mut is_mono = true;
881
882            let mut temp = boundary & !(1u64 << first_bit);
883            while temp != 0 {
884                let bit = temp.trailing_zeros() as usize;
885                temp &= temp - 1;
886
887                let node = (base_global + bit) as u32;
888                // Fast path: direct parent check
889                if *self.parents.get_unchecked(node as usize) == root_r {
890                    continue;
891                }
892                // Slow path: full find
893                if self.find(node) != root_r {
894                    is_mono = false;
895                    break;
896                }
897            }
898
899            if is_mono {
900                self.blocks_state.get_unchecked_mut(blk_idx).root = root_r;
901                return self.process_mono::<SILENT>(blk_idx, root_r);
902            }
903        }
904
905        // Polychromatic slow path
906        self.process_poly::<SILENT>(blk_idx)
907    }
908}
909
910#[cfg(test)]
911mod tests {
912    use crate::arena::Arena;
913    use crate::decoder::state::{BoundaryConfig, DecodingState};
914    use crate::topology::SquareGrid;
915
916    #[test]
917    fn test_small_stride_intra_block_parents() {
918        extern crate std;
919        let mut memory = std::vec![0u8; 1024 * 1024];
920        let mut arena = Arena::new(&mut memory);
921        // Grid 32x32 (Stride 32). 1024 nodes. 16 blocks.
922        let mut decoder = DecodingState::<SquareGrid, 32>::new(&mut arena, 32, 32, 1);
923
924        unsafe {
925            let block = decoder.blocks_state.get_unchecked_mut(0);
926            block.boundary = 1 << 32;
927            block.occupied = 1 << 32;
928            block.valid_mask = !0;
929            block.erasure_mask = 0;
930        }
931
932        unsafe {
933            decoder.process_block_small_stride::<false>(0);
934        }
935
936        let root_32 = decoder.find(32);
937        let root_0 = decoder.find(0);
938
939        assert_eq!(root_32, root_0, "Node 32 should be connected to Node 0");
940
941        let boundary = (decoder.parents.len() - 1) as u32;
942        assert_eq!(root_0, boundary, "Node 0 should be connected to Boundary");
943    }
944
945    #[test]
946    fn test_small_stride_horizontal_connectivity() {
947        extern crate std;
948        let mut memory = std::vec![0u8; 1024 * 1024];
949        let mut arena = Arena::new(&mut memory);
950        // 32x32 grid. Stride 32.
951        let mut decoder = DecodingState::<SquareGrid, 32>::new(&mut arena, 32, 32, 1);
952
953        // Activate bit 0. It should spread to bit 1 if erasure mask allows.
954        unsafe {
955            let block = decoder.blocks_state.get_unchecked_mut(0);
956            block.boundary = 1;
957            block.occupied = 1;
958            block.valid_mask = !0;
959            block.erasure_mask = 0; // Allow spreading (0 = conductive)
960        }
961
962        unsafe {
963            decoder.process_block_small_stride::<false>(0);
964        }
965
966        // Bit 0 should have spread to Bit 1, 2, ... 31
967        let occupied = unsafe { decoder.blocks_state.get_unchecked(0).occupied };
968        assert_ne!(occupied & 2, 0, "Bit 1 should be occupied");
969
970        // Check connectivity
971        let root0 = decoder.find(0);
972        let root1 = decoder.find(1);
973
974        assert_eq!(
975            root0, root1,
976            "Horizontal neighbors (0 and 1) should be connected"
977        );
978    }
979
980    #[test]
981    fn test_merge_mono_optimization_behavior() {
982        extern crate std;
983        let mut memory = std::vec![0u8; 1024 * 1024];
984        let mut arena = Arena::new(&mut memory);
985        let mut decoder = DecodingState::<SquareGrid, 32>::new(&mut arena, 32, 32, 1);
986
987        // Root source: Node 0
988        let root_source = 0;
989
990        unsafe {
991            // Connect Node 2 to Root manually (Direct Parent)
992            *decoder.parents.get_unchecked_mut(2) = root_source;
993
994            // Verify initial state
995            assert_ne!(decoder.find(1), decoder.find(root_source));
996            assert_eq!(decoder.find(2), decoder.find(root_source));
997
998            // merge_mono targeting Node 2 (should be no-op/not expanded for that bit)
999            // Mask: bit 2
1000            let mask_only_connected = 1 << 2;
1001            let expanded = decoder.merge_mono(mask_only_connected, 0, root_source);
1002            assert!(!expanded, "Should not expand if already connected directly");
1003
1004            // merge_mono targeting Node 1 (should expand)
1005            let mask_new = 1 << 1;
1006            let expanded_new = decoder.merge_mono(mask_new, 0, root_source);
1007            assert!(expanded_new, "Should expand for new node");
1008            assert_eq!(decoder.find(1), decoder.find(root_source));
1009        }
1010    }
1011
1012    #[test]
1013    fn test_merge_mono_bulk_deduplication() {
1014        // Test that merge_mono deduplicates roots when multiple bits share the same root
1015        extern crate std;
1016        let mut memory = std::vec![0u8; 1024 * 1024];
1017        let mut arena = Arena::new(&mut memory);
1018        let mut decoder = DecodingState::<SquareGrid, 32>::new(&mut arena, 32, 32, 1);
1019
1020        let root_source = 100u32;
1021
1022        unsafe {
1023            // Pre-connect nodes 0, 1, 2 to node 10 (so they share a root)
1024            *decoder.parents.get_unchecked_mut(0) = 10;
1025            *decoder.parents.get_unchecked_mut(1) = 10;
1026            *decoder.parents.get_unchecked_mut(2) = 10;
1027            // Node 10 is its own root
1028            *decoder.parents.get_unchecked_mut(10) = 10;
1029
1030            // Ensure root_source is its own root
1031            *decoder.parents.get_unchecked_mut(root_source as usize) = root_source;
1032
1033            // Merge nodes 0, 1, 2 (bits 0, 1, 2) with root_source
1034            // All three should deduplicate to a single union_roots(10, root_source)
1035            let mask = 0b111; // bits 0, 1, 2
1036            let expanded = decoder.merge_mono(mask, 0, root_source);
1037
1038            assert!(expanded, "Should expand when merging");
1039
1040            // All nodes should now be connected
1041            let final_root = decoder.find(0);
1042            assert_eq!(decoder.find(1), final_root);
1043            assert_eq!(decoder.find(2), final_root);
1044            assert_eq!(decoder.find(10), final_root);
1045            assert_eq!(decoder.find(root_source), final_root);
1046        }
1047    }
1048
1049    #[test]
1050    fn test_merge_mono_skip_already_connected() {
1051        // Test that merge_mono skips nodes already connected to root_source
1052        extern crate std;
1053        let mut memory = std::vec![0u8; 1024 * 1024];
1054        let mut arena = Arena::new(&mut memory);
1055        let mut decoder = DecodingState::<SquareGrid, 32>::new(&mut arena, 32, 32, 1);
1056
1057        let root_source = 50u32;
1058
1059        unsafe {
1060            // Make root_source its own root
1061            *decoder.parents.get_unchecked_mut(root_source as usize) = root_source;
1062
1063            // Pre-connect nodes 0, 1 to root_source
1064            *decoder.parents.get_unchecked_mut(0) = root_source;
1065            *decoder.parents.get_unchecked_mut(1) = root_source;
1066
1067            // merge_mono on bits 0, 1, 2 - only bit 2 should cause expansion
1068            let mask = 0b111;
1069            let expanded = decoder.merge_mono(mask, 0, root_source);
1070
1071            assert!(expanded, "Should expand for node 2");
1072            assert_eq!(decoder.find(2), decoder.find(root_source));
1073        }
1074    }
1075
1076    #[test]
1077    fn test_merge_mono_overflow_handling() {
1078        // Test that merge_mono handles more than 8 unique roots
1079        extern crate std;
1080        let mut memory = std::vec![0u8; 1024 * 1024];
1081        let mut arena = Arena::new(&mut memory);
1082        let mut decoder = DecodingState::<SquareGrid, 32>::new(&mut arena, 32, 32, 1);
1083
1084        let root_source = 200u32;
1085
1086        unsafe {
1087            // Make 10 separate roots (nodes 0-9 are their own roots)
1088            for i in 0..10 {
1089                *decoder.parents.get_unchecked_mut(i) = i as u32;
1090            }
1091            *decoder.parents.get_unchecked_mut(root_source as usize) = root_source;
1092
1093            // Merge all 10 nodes
1094            let mask = 0b1111111111; // bits 0-9
1095            let expanded = decoder.merge_mono(mask, 0, root_source);
1096
1097            assert!(expanded);
1098
1099            // All should be connected to root_source
1100            let final_root = decoder.find(root_source);
1101            for i in 0..10 {
1102                assert_eq!(
1103                    decoder.find(i as u32),
1104                    final_root,
1105                    "Node {} should be connected",
1106                    i
1107                );
1108            }
1109        }
1110    }
1111
1112    #[test]
1113    fn test_vertical_neighbor_mono_to_mono_merge() {
1114        // Test the optimization: when both blocks are monochromatic, use union_roots
1115        extern crate std;
1116        let mut memory = std::vec![0u8; 1024 * 1024];
1117        let mut arena = Arena::new(&mut memory);
1118        // Use stride 16 to test the generic small stride path
1119        let mut decoder = DecodingState::<SquareGrid, 16>::new(&mut arena, 16, 16, 1);
1120
1121        unsafe {
1122            // For stride 16, inter-block vertical: block N's top row spreads to block N-1's bottom row
1123            // Block layout: each block has 64 bits = 4 rows of 16 bits each
1124            // Row 0: bits 0-15, Row 1: bits 16-31, Row 2: bits 32-47, Row 3: bits 48-63
1125            // Spreading to Up block: boundary << (64-16) = boundary << 48
1126            // So for block 1 to spread to block 0, block 1 needs bits in its Row 0 (bits 0-15)
1127
1128            // Block 1 setup - monochromatic with root 64, has boundary in Row 0
1129            let block1 = decoder.blocks_state.get_unchecked_mut(1);
1130            block1.boundary = 0xFFFF; // Row 0 set (bits 0-15 in block)
1131            block1.occupied = 0xFFFF;
1132            block1.root = 64; // Monochromatic
1133            block1.effective_mask = !0;
1134            block1.valid_mask = !0;
1135            block1.erasure_mask = 0;
1136
1137            // Connect nodes 64-79 to root 64
1138            *decoder.parents.get_unchecked_mut(64) = 64;
1139            for i in 65..80 {
1140                *decoder.parents.get_unchecked_mut(i) = 64;
1141            }
1142
1143            // Block 0 setup - monochromatic with root 0, has occupied in Row 3 (where Up spread lands)
1144            let block0 = decoder.blocks_state.get_unchecked_mut(0);
1145            block0.boundary = 0;
1146            block0.occupied = 0xFFFF_0000_0000_0000u64; // Row 3 set (bits 48-63 in block)
1147            block0.root = 0; // Monochromatic
1148            block0.effective_mask = !0;
1149            block0.valid_mask = !0;
1150            block0.erasure_mask = 0;
1151
1152            // Connect nodes 48-63 to root 0
1153            *decoder.parents.get_unchecked_mut(0) = 0;
1154            for i in 48..64 {
1155                *decoder.parents.get_unchecked_mut(i) = 0;
1156            }
1157
1158            // Process block 1 - should spread up to block 0 and merge using mono-to-mono path
1159            decoder.process_block_small_stride::<false>(1);
1160
1161            // Both blocks should be connected through their roots
1162            let root0 = decoder.find(0);
1163            let root64 = decoder.find(64);
1164            assert_eq!(
1165                root0, root64,
1166                "Mono-to-mono merge should connect the blocks"
1167            );
1168        }
1169    }
1170
1171    #[test]
1172    fn test_merge_mono_empty_mask() {
1173        // Test merge_mono returns false when mask == 0 (line 14)
1174        extern crate std;
1175        let mut memory = std::vec![0u8; 1024 * 1024];
1176        let mut arena = Arena::new(&mut memory);
1177        let mut decoder = DecodingState::<SquareGrid, 32>::new(&mut arena, 32, 32, 1);
1178
1179        unsafe {
1180            let expanded = decoder.merge_mono(0, 0, 0);
1181            assert!(!expanded, "merge_mono with empty mask should return false");
1182        }
1183    }
1184
1185    #[test]
1186    fn test_process_mono_zero_boundary() {
1187        // Test process_mono returns false when boundary == 0 (line 133)
1188        extern crate std;
1189        let mut memory = std::vec![0u8; 1024 * 1024];
1190        let mut arena = Arena::new(&mut memory);
1191        let mut decoder = DecodingState::<SquareGrid, 16>::new(&mut arena, 16, 16, 1);
1192
1193        unsafe {
1194            let block = decoder.blocks_state.get_unchecked_mut(0);
1195            block.boundary = 0; // Zero boundary
1196            block.occupied = 1;
1197            block.root = 0;
1198            block.valid_mask = !0;
1199            block.effective_mask = !0;
1200
1201            let expanded = decoder.process_mono::<false>(0, 0);
1202            assert!(!expanded, "process_mono with zero boundary should return false");
1203        }
1204    }
1205
1206    #[test]
1207    fn test_process_poly_zero_boundary() {
1208        // Test process_poly returns false when boundary == 0 (line 577)
1209        extern crate std;
1210        let mut memory = std::vec![0u8; 1024 * 1024];
1211        let mut arena = Arena::new(&mut memory);
1212        let mut decoder = DecodingState::<SquareGrid, 16>::new(&mut arena, 16, 16, 1);
1213
1214        unsafe {
1215            let block = decoder.blocks_state.get_unchecked_mut(0);
1216            block.boundary = 0; // Zero boundary
1217            block.occupied = 1;
1218            block.root = u32::MAX; // Polychromatic
1219            block.valid_mask = !0;
1220            block.effective_mask = !0;
1221
1222            let expanded = decoder.process_poly::<false>(0);
1223            assert!(!expanded, "process_poly with zero boundary should return false");
1224        }
1225    }
1226
1227    #[test]
1228    fn test_process_mono_internal_hole_connection() {
1229        // Test internal boundary/hole connection (line 244-251)
1230        extern crate std;
1231        let mut memory = std::vec![0u8; 1024 * 1024];
1232        let mut arena = Arena::new(&mut memory);
1233        let mut decoder = DecodingState::<SquareGrid, 8>::new(&mut arena, 8, 8, 1);
1234
1235        unsafe {
1236            // Create a block with a hole in the valid_mask
1237            let block = decoder.blocks_state.get_unchecked_mut(0);
1238            block.boundary = 1 << 8; // Bit 8 (row 1)
1239            block.occupied = 1 << 8;
1240            block.root = 8;
1241            // Create a hole: valid_mask has bit 0 cleared (position 0 is invalid)
1242            block.valid_mask = !1u64; // All valid except bit 0
1243            block.effective_mask = !1u64;
1244
1245            *decoder.parents.get_unchecked_mut(8) = 8;
1246
1247            // Process - should hit internal hole connection
1248            let expanded = decoder.process_mono::<false>(0, 8);
1249
1250            // Should have connected to boundary through the hole
1251            let boundary_node = (decoder.parents.len() - 1) as u32;
1252            let root8 = decoder.find(8);
1253            assert_eq!(root8, boundary_node, "Should connect to boundary via hole");
1254            assert!(expanded);
1255        }
1256    }
1257
1258    #[test]
1259    fn test_process_mono_up_neighbor_boundary_connection() {
1260        // Test up neighbor boundary connection when hitting invalid bits (lines 286-291)
1261        extern crate std;
1262        let mut memory = std::vec![0u8; 1024 * 1024];
1263        let mut arena = Arena::new(&mut memory);
1264        // Use larger grid with matching stride (16x16 -> STRIDE_Y=16, 4 rows/block -> 4 blocks)
1265        let mut decoder = DecodingState::<SquareGrid, 16>::new(&mut arena, 16, 16, 1);
1266
1267        unsafe {
1268            // Block 1 spreads up to block 0
1269            let block1 = decoder.blocks_state.get_unchecked_mut(1);
1270            block1.boundary = 0xFFFF; // Row 0 (bits 0-15)
1271            block1.occupied = 0xFFFF;
1272            block1.root = 64;
1273            block1.valid_mask = !0;
1274            block1.effective_mask = !0;
1275
1276            *decoder.parents.get_unchecked_mut(64) = 64;
1277            for i in 64..80 {
1278                *decoder.parents.get_unchecked_mut(i) = 64;
1279            }
1280
1281            // Block 0 has invalid bits where spread lands
1282            let block0 = decoder.blocks_state.get_unchecked_mut(0);
1283            block0.boundary = 0;
1284            block0.occupied = 0;
1285            block0.root = u32::MAX;
1286            block0.valid_mask = 0x0000_FFFF_FFFF_FFFFu64; // Top row invalid
1287            block0.effective_mask = 0x0000_FFFF_FFFF_FFFFu64;
1288
1289            decoder.process_mono::<false>(1, 64);
1290
1291            let boundary_node = (decoder.parents.len() - 1) as u32;
1292            let root64 = decoder.find(64);
1293            assert_eq!(root64, boundary_node, "Should connect to boundary via invalid up neighbor");
1294        }
1295    }
1296
1297    #[test]
1298    fn test_process_mono_down_neighbor_different_clusters() {
1299        // Test down neighbor with different clusters (lines 438-453)
1300        extern crate std;
1301        let mut memory = std::vec![0u8; 1024 * 1024];
1302        let mut arena = Arena::new(&mut memory);
1303        // 16x16 grid with STRIDE_Y=16 gives 4 blocks
1304        let mut decoder = DecodingState::<SquareGrid, 16>::new(&mut arena, 16, 16, 1);
1305
1306        unsafe {
1307            // Block 0 spreads down to block 1
1308            // For stride 16: 4 rows per block, row 3 = bits 48-63
1309            let block0 = decoder.blocks_state.get_unchecked_mut(0);
1310            block0.boundary = 0xFFFF_0000_0000_0000u64; // Row 3 (bits 48-63)
1311            block0.occupied = 0xFFFF_0000_0000_0000u64;
1312            block0.root = 48;
1313            block0.valid_mask = !0;
1314            block0.effective_mask = !0;
1315
1316            *decoder.parents.get_unchecked_mut(48) = 48;
1317            for i in 49..64 {
1318                *decoder.parents.get_unchecked_mut(i) = 48;
1319            }
1320
1321            // Block 1 has different root
1322            let block1 = decoder.blocks_state.get_unchecked_mut(1);
1323            block1.boundary = 0;
1324            block1.occupied = 0xFFFF; // Row 0
1325            block1.root = 64;
1326            block1.valid_mask = !0;
1327            block1.effective_mask = !0;
1328
1329            *decoder.parents.get_unchecked_mut(64) = 64;
1330            for i in 65..80 {
1331                *decoder.parents.get_unchecked_mut(i) = 64;
1332            }
1333
1334            decoder.process_mono::<false>(0, 48);
1335
1336            // Both clusters should be merged
1337            let root48 = decoder.find(48);
1338            let root64 = decoder.find(64);
1339            assert_eq!(root48, root64, "Different clusters should merge");
1340        }
1341    }
1342
1343    #[test]
1344    fn test_process_mono_top_block_boundary() {
1345        // Test top block boundary check (lines 356-377)
1346        extern crate std;
1347        let mut memory = std::vec![0u8; 1024 * 1024];
1348        let mut arena = Arena::new(&mut memory);
1349        let mut decoder = DecodingState::<SquareGrid, 8>::new(&mut arena, 8, 8, 1);
1350
1351        unsafe {
1352            // Block 0 is the top block
1353            let block0 = decoder.blocks_state.get_unchecked_mut(0);
1354            block0.boundary = 0xFF; // Row 0 at global top
1355            block0.occupied = 0xFF;
1356            block0.root = 0;
1357            block0.valid_mask = !0;
1358            block0.effective_mask = !0;
1359
1360            *decoder.parents.get_unchecked_mut(0) = 0;
1361            for i in 1..8 {
1362                *decoder.parents.get_unchecked_mut(i) = 0;
1363            }
1364
1365            decoder.process_mono::<false>(0, 0);
1366
1367            let boundary_node = (decoder.parents.len() - 1) as u32;
1368            let root0 = decoder.find(0);
1369            assert_eq!(root0, boundary_node, "Top row should connect to boundary");
1370        }
1371    }
1372
1373    #[test]
1374    fn test_process_mono_last_block_bottom_boundary() {
1375        // Test last block bottom boundary (lines 478-494)
1376        extern crate std;
1377        let mut memory = std::vec![0u8; 1024 * 1024];
1378        let mut arena = Arena::new(&mut memory);
1379        // Use 16x16 grid with STRIDE_Y=16, which gives 4 blocks
1380        let mut decoder = DecodingState::<SquareGrid, 16>::new(&mut arena, 16, 16, 1);
1381
1382        let num_blocks = decoder.blocks_state.len();
1383        assert!(num_blocks >= 3, "Need at least 3 blocks for this test");
1384        // Use second-to-last block (block 3 for 16x16) which has actual data nodes
1385        // Block 4 only contains the sentinel node
1386        let last_data_blk = num_blocks - 2;
1387        let base = last_data_blk * 64;
1388
1389        // Verify indices are within bounds
1390        let parents_len = decoder.parents.len();
1391        assert!(base + 63 < parents_len, "Last node of last data block must fit in parents array");
1392
1393        unsafe {
1394            // Last data block has boundary at bottom row
1395            // For stride 16: 4 rows per block, so row 3 = bits 48-63
1396            let block = decoder.blocks_state.get_unchecked_mut(last_data_blk);
1397            block.boundary = 0xFFFF_0000_0000_0000u64; // Row 3
1398            block.occupied = 0xFFFF_0000_0000_0000u64;
1399            block.root = (base + 48) as u32;
1400            block.valid_mask = !0;
1401            block.effective_mask = !0;
1402
1403            *decoder.parents.get_unchecked_mut(base + 48) = (base + 48) as u32;
1404            for i in 49..64 {
1405                *decoder.parents.get_unchecked_mut(base + i) = (base + 48) as u32;
1406            }
1407
1408            decoder.process_mono::<false>(last_data_blk, (base + 48) as u32);
1409
1410            let boundary_node = (decoder.parents.len() - 1) as u32;
1411            let root = decoder.find((base + 48) as u32);
1412            assert_eq!(root, boundary_node, "Bottom row of last data block should connect to boundary");
1413        }
1414    }
1415
1416    #[test]
1417    fn test_process_mono_left_edge_boundary() {
1418        // Test left edge boundary check (lines 498-508)
1419        extern crate std;
1420        let mut memory = std::vec![0u8; 1024 * 1024];
1421        let mut arena = Arena::new(&mut memory);
1422        let mut decoder = DecodingState::<SquareGrid, 8>::new(&mut arena, 8, 8, 1);
1423
1424        unsafe {
1425            // Block with left edge active
1426            let block = decoder.blocks_state.get_unchecked_mut(0);
1427            // row_start_mask for stride 8 includes bits 0, 8, 16, 24, 32, 40, 48, 56
1428            block.boundary = 0x0101_0101_0101_0101u64; // Left column
1429            block.occupied = 0x0101_0101_0101_0101u64;
1430            block.root = 0;
1431            block.valid_mask = !0;
1432            block.effective_mask = !0;
1433
1434            *decoder.parents.get_unchecked_mut(0) = 0;
1435
1436            decoder.process_mono::<false>(0, 0);
1437
1438            let boundary_node = (decoder.parents.len() - 1) as u32;
1439            let root0 = decoder.find(0);
1440            assert_eq!(root0, boundary_node, "Left edge should connect to boundary");
1441        }
1442    }
1443
1444    #[test]
1445    fn test_process_mono_right_edge_boundary() {
1446        // Test right edge boundary check (lines 510-520)
1447        extern crate std;
1448        let mut memory = std::vec![0u8; 1024 * 1024];
1449        let mut arena = Arena::new(&mut memory);
1450        let mut decoder = DecodingState::<SquareGrid, 8>::new(&mut arena, 8, 8, 1);
1451
1452        unsafe {
1453            // Block with right edge active
1454            let block = decoder.blocks_state.get_unchecked_mut(0);
1455            // row_end_mask for stride 8 includes bits 7, 15, 23, 31, 39, 47, 55, 63
1456            block.boundary = 0x8080_8080_8080_8080u64; // Right column
1457            block.occupied = 0x8080_8080_8080_8080u64;
1458            block.root = 7;
1459            block.valid_mask = !0;
1460            block.effective_mask = !0;
1461
1462            *decoder.parents.get_unchecked_mut(7) = 7;
1463
1464            decoder.process_mono::<false>(0, 7);
1465
1466            let boundary_node = (decoder.parents.len() - 1) as u32;
1467            let root7 = decoder.find(7);
1468            assert_eq!(root7, boundary_node, "Right edge should connect to boundary");
1469        }
1470    }
1471
1472    #[test]
1473    fn test_process_mono_up_neighbor_poly() {
1474        // Test up neighbor polychromatic path (line 341-352)
1475        extern crate std;
1476        let mut memory = std::vec![0u8; 1024 * 1024];
1477        let mut arena = Arena::new(&mut memory);
1478        // 16x16 grid with STRIDE_Y=16 gives 4 blocks
1479        let mut decoder = DecodingState::<SquareGrid, 16>::new(&mut arena, 16, 16, 1);
1480
1481        unsafe {
1482            // Block 1 is mono, spreads up to poly block 0
1483            let block1 = decoder.blocks_state.get_unchecked_mut(1);
1484            block1.boundary = 0xFFFF; // Row 0
1485            block1.occupied = 0xFFFF;
1486            block1.root = 64;
1487            block1.valid_mask = !0;
1488            block1.effective_mask = !0;
1489
1490            *decoder.parents.get_unchecked_mut(64) = 64;
1491            for i in 65..80 {
1492                *decoder.parents.get_unchecked_mut(i) = 64;
1493            }
1494
1495            // Block 0 is polychromatic (root = MAX)
1496            let block0 = decoder.blocks_state.get_unchecked_mut(0);
1497            block0.boundary = 0;
1498            block0.occupied = 0xFFFF_0000_0000_0000u64; // Row 3 (bits 48-63)
1499            block0.root = u32::MAX; // Polychromatic
1500            block0.valid_mask = !0;
1501            block0.effective_mask = !0;
1502
1503            // Make nodes in row 3 have different roots
1504            for i in 48..64 {
1505                *decoder.parents.get_unchecked_mut(i) = i as u32;
1506            }
1507
1508            decoder.process_mono::<false>(1, 64);
1509
1510            // Block 1's cluster should merge with block 0's nodes
1511            let root64 = decoder.find(64);
1512            let root48 = decoder.find(48);
1513            assert_eq!(root64, root48, "Mono should merge with poly neighbor");
1514        }
1515    }
1516
1517    #[test]
1518    fn test_process_mono_down_neighbor_poly() {
1519        // Test down neighbor polychromatic path (lines 457-469)
1520        extern crate std;
1521        let mut memory = std::vec![0u8; 1024 * 1024];
1522        let mut arena = Arena::new(&mut memory);
1523        // 16x16 grid with STRIDE_Y=16 gives 4 blocks
1524        let mut decoder = DecodingState::<SquareGrid, 16>::new(&mut arena, 16, 16, 1);
1525
1526        unsafe {
1527            // Block 0 is mono, spreads down to poly block 1
1528            let block0 = decoder.blocks_state.get_unchecked_mut(0);
1529            block0.boundary = 0xFFFF_0000_0000_0000u64; // Row 3 (bits 48-63)
1530            block0.occupied = 0xFFFF_0000_0000_0000u64;
1531            block0.root = 48;
1532            block0.valid_mask = !0;
1533            block0.effective_mask = !0;
1534
1535            *decoder.parents.get_unchecked_mut(48) = 48;
1536            for i in 49..64 {
1537                *decoder.parents.get_unchecked_mut(i) = 48;
1538            }
1539
1540            // Block 1 is polychromatic
1541            let block1 = decoder.blocks_state.get_unchecked_mut(1);
1542            block1.boundary = 0;
1543            block1.occupied = 0xFFFF; // Row 0
1544            block1.root = u32::MAX; // Polychromatic
1545            block1.valid_mask = !0;
1546            block1.effective_mask = !0;
1547
1548            // Make nodes in row 0 of block 1 have different roots
1549            for i in 64..80 {
1550                *decoder.parents.get_unchecked_mut(i) = i as u32;
1551            }
1552
1553            decoder.process_mono::<false>(0, 48);
1554
1555            // Block 0's cluster should merge with block 1's nodes
1556            let root48 = decoder.find(48);
1557            let root64 = decoder.find(64);
1558            assert_eq!(root48, root64, "Mono should merge with poly down neighbor");
1559        }
1560    }
1561
1562    #[test]
1563    fn test_process_poly_horizontal_unions() {
1564        // Test horizontal unions in process_poly (line 658-663)
1565        extern crate std;
1566        let mut memory = std::vec![0u8; 1024 * 1024];
1567        let mut arena = Arena::new(&mut memory);
1568        let mut decoder = DecodingState::<SquareGrid, 8>::new(&mut arena, 8, 8, 1);
1569
1570        unsafe {
1571            // Block 0 is polychromatic with horizontal neighbors
1572            let block0 = decoder.blocks_state.get_unchecked_mut(0);
1573            block0.boundary = 0b11; // Bits 0 and 1 adjacent horizontally
1574            block0.occupied = 0b11;
1575            block0.root = u32::MAX; // Polychromatic
1576            block0.valid_mask = !0;
1577            block0.effective_mask = !0;
1578
1579            // Different roots for adjacent nodes
1580            *decoder.parents.get_unchecked_mut(0) = 0;
1581            *decoder.parents.get_unchecked_mut(1) = 1;
1582
1583            decoder.process_poly::<false>(0);
1584
1585            // Should have merged horizontally
1586            let root0 = decoder.find(0);
1587            let root1 = decoder.find(1);
1588            assert_eq!(root0, root1, "Horizontal neighbors should merge in poly path");
1589        }
1590    }
1591
1592    #[test]
1593    fn test_process_poly_up_neighbor_boundary() {
1594        // Test poly up neighbor boundary connection (lines 700-710)
1595        extern crate std;
1596        let mut memory = std::vec![0u8; 1024 * 1024];
1597        let mut arena = Arena::new(&mut memory);
1598        // 16x16 grid with STRIDE_Y=16 gives 4 blocks
1599        let mut decoder = DecodingState::<SquareGrid, 16>::new(&mut arena, 16, 16, 1);
1600
1601        unsafe {
1602            // Block 1 is poly, spreads up to invalid region
1603            let block1 = decoder.blocks_state.get_unchecked_mut(1);
1604            block1.boundary = 0xFFFF; // Row 0
1605            block1.occupied = 0xFFFF;
1606            block1.root = u32::MAX; // Polychromatic
1607            block1.valid_mask = !0;
1608            block1.effective_mask = !0;
1609
1610            for i in 64..80 {
1611                *decoder.parents.get_unchecked_mut(i) = i as u32;
1612            }
1613
1614            // Block 0 has invalid bits where spread lands
1615            let block0 = decoder.blocks_state.get_unchecked_mut(0);
1616            block0.valid_mask = 0x0000_FFFF_FFFF_FFFFu64; // Top row invalid
1617            block0.effective_mask = 0x0000_FFFF_FFFF_FFFFu64;
1618            block0.occupied = 0;
1619            block0.boundary = 0;
1620            block0.root = u32::MAX;
1621
1622            decoder.process_poly::<false>(1);
1623
1624            let boundary_node = (decoder.parents.len() - 1) as u32;
1625            let root64 = decoder.find(64);
1626            assert_eq!(root64, boundary_node, "Poly should connect to boundary via invalid up");
1627        }
1628    }
1629
1630    #[test]
1631    fn test_process_poly_down_neighbor_boundary() {
1632        // Test poly down neighbor boundary connection (lines 754-764)
1633        extern crate std;
1634        let mut memory = std::vec![0u8; 1024 * 1024];
1635        let mut arena = Arena::new(&mut memory);
1636        // 16x16 grid with STRIDE_Y=16 gives 4 blocks
1637        let mut decoder = DecodingState::<SquareGrid, 16>::new(&mut arena, 16, 16, 1);
1638
1639        unsafe {
1640            // Block 0 is poly, spreads down to invalid region
1641            let block0 = decoder.blocks_state.get_unchecked_mut(0);
1642            block0.boundary = 0xFFFF_0000_0000_0000u64; // Row 3 (bits 48-63)
1643            block0.occupied = 0xFFFF_0000_0000_0000u64;
1644            block0.root = u32::MAX; // Polychromatic
1645            block0.valid_mask = !0;
1646            block0.effective_mask = !0;
1647
1648            for i in 48..64 {
1649                *decoder.parents.get_unchecked_mut(i) = i as u32;
1650            }
1651
1652            // Block 1 has invalid bits where spread lands (row 0 = bits 0-15)
1653            let block1 = decoder.blocks_state.get_unchecked_mut(1);
1654            block1.valid_mask = 0xFFFF_FFFF_FFFF_0000u64; // Bottom row invalid
1655            block1.effective_mask = 0xFFFF_FFFF_FFFF_0000u64;
1656            block1.occupied = 0;
1657            block1.boundary = 0;
1658            block1.root = u32::MAX;
1659
1660            decoder.process_poly::<false>(0);
1661
1662            let boundary_node = (decoder.parents.len() - 1) as u32;
1663            let root48 = decoder.find(48);
1664            assert_eq!(root48, boundary_node, "Poly should connect to boundary via invalid down");
1665        }
1666    }
1667
1668    #[test]
1669    fn test_process_poly_left_right_edges() {
1670        // Test poly left/right edge boundary (lines 798-811)
1671        extern crate std;
1672        let mut memory = std::vec![0u8; 1024 * 1024];
1673        let mut arena = Arena::new(&mut memory);
1674        let mut decoder = DecodingState::<SquareGrid, 8>::new(&mut arena, 8, 8, 1);
1675
1676        unsafe {
1677            // Block with left and right edges active
1678            let block = decoder.blocks_state.get_unchecked_mut(0);
1679            // Left column: bits 0, 8, 16, 24, 32, 40, 48, 56
1680            // Right column: bits 7, 15, 23, 31, 39, 47, 55, 63
1681            block.boundary = 0x8181_8181_8181_8181u64; // Left and right columns
1682            block.occupied = 0x8181_8181_8181_8181u64;
1683            block.root = u32::MAX; // Polychromatic
1684            block.valid_mask = !0;
1685            block.effective_mask = !0;
1686
1687            // Make each edge node its own root
1688            for bit in [0, 7, 8, 15, 16, 23, 24, 31, 32, 39, 40, 47, 48, 55, 56, 63] {
1689                *decoder.parents.get_unchecked_mut(bit) = bit as u32;
1690            }
1691
1692            decoder.process_poly::<false>(0);
1693
1694            let boundary_node = (decoder.parents.len() - 1) as u32;
1695            let root0 = decoder.find(0); // Left edge
1696            let root7 = decoder.find(7); // Right edge
1697            assert_eq!(root0, boundary_node, "Left edge should connect to boundary in poly");
1698            assert_eq!(root7, boundary_node, "Right edge should connect to boundary in poly");
1699        }
1700    }
1701
1702    #[test]
1703    fn test_process_poly_top_block_boundary() {
1704        // Test poly top block boundary (lines 724-742)
1705        extern crate std;
1706        let mut memory = std::vec![0u8; 1024 * 1024];
1707        let mut arena = Arena::new(&mut memory);
1708        let mut decoder = DecodingState::<SquareGrid, 8>::new(&mut arena, 8, 8, 1);
1709
1710        unsafe {
1711            // Block 0 is the top block, polychromatic
1712            let block0 = decoder.blocks_state.get_unchecked_mut(0);
1713            block0.boundary = 0xFF; // Row 0 at global top
1714            block0.occupied = 0xFF;
1715            block0.root = u32::MAX; // Polychromatic
1716            block0.valid_mask = !0;
1717            block0.effective_mask = !0;
1718
1719            for i in 0..8 {
1720                *decoder.parents.get_unchecked_mut(i) = i as u32;
1721            }
1722
1723            decoder.process_poly::<false>(0);
1724
1725            let boundary_node = (decoder.parents.len() - 1) as u32;
1726            let root0 = decoder.find(0);
1727            assert_eq!(root0, boundary_node, "Top row should connect to boundary in poly");
1728        }
1729    }
1730
1731    #[test]
1732    fn test_process_poly_bottom_block_boundary() {
1733        // Test poly bottom block boundary (lines 778-796)
1734        extern crate std;
1735        let mut memory = std::vec![0u8; 1024 * 1024];
1736        let mut arena = Arena::new(&mut memory);
1737        // 16x16 grid with STRIDE_Y=16 gives 4 blocks
1738        let mut decoder = DecodingState::<SquareGrid, 16>::new(&mut arena, 16, 16, 1);
1739
1740        let num_blocks = decoder.blocks_state.len();
1741        assert!(num_blocks >= 3, "Need at least 3 blocks");
1742        // Use second-to-last block (block 3 for 16x16) which has actual data nodes
1743        // Block 4 only contains the sentinel node
1744        let last_data_blk = num_blocks - 2;
1745        let base = last_data_blk * 64;
1746
1747        // Verify indices are within bounds
1748        let parents_len = decoder.parents.len();
1749        assert!(base + 63 < parents_len, "Last node of last data block must fit in parents array");
1750
1751        unsafe {
1752            // Last data block is polychromatic with bottom boundary (row 3 = bits 48-63)
1753            let block = decoder.blocks_state.get_unchecked_mut(last_data_blk);
1754            block.boundary = 0xFFFF_0000_0000_0000u64;
1755            block.occupied = 0xFFFF_0000_0000_0000u64;
1756            block.root = u32::MAX; // Polychromatic
1757            block.valid_mask = !0;
1758            block.effective_mask = !0;
1759
1760            for i in 48..64 {
1761                *decoder.parents.get_unchecked_mut(base + i) = (base + i) as u32;
1762            }
1763
1764            decoder.process_poly::<false>(last_data_blk);
1765
1766            let boundary_node = (decoder.parents.len() - 1) as u32;
1767            let root = decoder.find((base + 48) as u32);
1768            assert_eq!(root, boundary_node, "Bottom row of last data block should connect to boundary in poly");
1769        }
1770    }
1771
1772    #[test]
1773    fn test_process_mono_cached_root_invalidation() {
1774        // Test cached root invalidation path (lines 140-145)
1775        // Use a 16x16 grid with STRIDE_Y=16 (not 32) to test the generic path
1776        // Block 1 is interior (not at top/bottom), neighbors are valid
1777        extern crate std;
1778        let mut memory = std::vec![0u8; 1024 * 1024];
1779        let mut arena = Arena::new(&mut memory);
1780        let mut decoder = DecodingState::<SquareGrid, 16>::new(&mut arena, 16, 16, 1);
1781
1782        // Disable horizontal boundary checks
1783        decoder.boundary_config = BoundaryConfig {
1784            check_top: true,    // Won't trigger because block 1 is not at top
1785            check_bottom: true, // Won't trigger because block 1 is not at bottom
1786            check_left: false,
1787            check_right: false,
1788        };
1789
1790        unsafe {
1791            // Use block 1 (nodes 64-127, rows 4-7)
1792            // Interior position: row 4, col 1 = node 4*16+1 = 65, which is bit 1 in block 1
1793            // Stale root points to node 65, actual root at node 66
1794            let block = decoder.blocks_state.get_unchecked_mut(1);
1795            block.boundary = 1 << 1; // bit 1 = node 65
1796            block.occupied = 1 << 1;
1797            block.root = 65; // Cached root points to node 65
1798            block.valid_mask = !0;
1799            block.effective_mask = !0;
1800
1801            // Node 65's parent is actually node 66 (cache is stale)
1802            *decoder.parents.get_unchecked_mut(65) = 66;
1803            *decoder.parents.get_unchecked_mut(66) = 66;
1804
1805            // Process should detect stale cache and update
1806            decoder.process_mono::<false>(1, 65);
1807
1808            // The find operation during process_mono should have resolved this
1809            let root65 = decoder.find(65);
1810            assert_eq!(root65, 66, "Should use actual root after cache invalidation");
1811        }
1812    }
1813
1814    #[test]
1815    fn test_process_mono_up_neighbor_same_cluster_after_resolution() {
1816        // Test up neighbor same cluster after root resolution (lines 318-323)
1817        extern crate std;
1818        let mut memory = std::vec![0u8; 1024 * 1024];
1819        let mut arena = Arena::new(&mut memory);
1820        // 16x16 grid with STRIDE_Y=16 gives 4 blocks
1821        let mut decoder = DecodingState::<SquareGrid, 16>::new(&mut arena, 16, 16, 1);
1822
1823        unsafe {
1824            // Block 1 mono spreads up to block 0 mono
1825            let block1 = decoder.blocks_state.get_unchecked_mut(1);
1826            block1.boundary = 0xFFFF; // Row 0
1827            block1.occupied = 0xFFFF;
1828            block1.root = 64;
1829            block1.valid_mask = !0;
1830            block1.effective_mask = !0;
1831
1832            *decoder.parents.get_unchecked_mut(64) = 64;
1833            for i in 65..80 {
1834                *decoder.parents.get_unchecked_mut(i) = 64;
1835            }
1836
1837            // Block 0 has a cached root that needs resolution
1838            let block0 = decoder.blocks_state.get_unchecked_mut(0);
1839            block0.boundary = 0;
1840            block0.occupied = 0xFFFF_0000_0000_0000u64; // Row 3 (bits 48-63)
1841            block0.root = 48; // Cached root
1842            block0.valid_mask = !0;
1843            block0.effective_mask = !0;
1844
1845            // But 48's actual root is 64 (same cluster)
1846            *decoder.parents.get_unchecked_mut(48) = 64;
1847            for i in 49..64 {
1848                *decoder.parents.get_unchecked_mut(i) = 64;
1849            }
1850
1851            decoder.process_mono::<false>(1, 64);
1852
1853            // Should have recognized same cluster and just grown
1854            let root48 = decoder.find(48);
1855            let root64 = decoder.find(64);
1856            assert_eq!(root48, root64);
1857        }
1858    }
1859
1860    #[test]
1861    fn test_process_mono_down_neighbor_same_cluster_after_resolution() {
1862        // Test down neighbor same cluster after root resolution (lines 435-440)
1863        extern crate std;
1864        let mut memory = std::vec![0u8; 1024 * 1024];
1865        let mut arena = Arena::new(&mut memory);
1866        // 16x16 grid with STRIDE_Y=16 gives 4 blocks
1867        let mut decoder = DecodingState::<SquareGrid, 16>::new(&mut arena, 16, 16, 1);
1868
1869        unsafe {
1870            // Block 0 mono spreads down to block 1 mono
1871            let block0 = decoder.blocks_state.get_unchecked_mut(0);
1872            block0.boundary = 0xFFFF_0000_0000_0000u64; // Row 3 (bits 48-63)
1873            block0.occupied = 0xFFFF_0000_0000_0000u64;
1874            block0.root = 48;
1875            block0.valid_mask = !0;
1876            block0.effective_mask = !0;
1877
1878            *decoder.parents.get_unchecked_mut(48) = 48;
1879            for i in 49..64 {
1880                *decoder.parents.get_unchecked_mut(i) = 48;
1881            }
1882
1883            // Block 1 has cached root that resolves to same cluster
1884            let block1 = decoder.blocks_state.get_unchecked_mut(1);
1885            block1.boundary = 0;
1886            block1.occupied = 0xFFFF; // Row 0
1887            block1.root = 64; // Cached root
1888            block1.valid_mask = !0;
1889            block1.effective_mask = !0;
1890
1891            // But 64's actual root is 48 (same cluster)
1892            *decoder.parents.get_unchecked_mut(64) = 48;
1893            for i in 65..80 {
1894                *decoder.parents.get_unchecked_mut(i) = 48;
1895            }
1896
1897            decoder.process_mono::<false>(0, 48);
1898
1899            // Should have recognized same cluster
1900            let root48 = decoder.find(48);
1901            let root64 = decoder.find(64);
1902            assert_eq!(root48, root64);
1903        }
1904    }
1905
1906    #[test]
1907    fn test_very_small_stride_deep_shifts() {
1908        // Test shift_16 and shift_32 paths with STRIDE_Y=2 (lines 192-203)
1909        extern crate std;
1910        let mut memory = std::vec![0u8; 1024 * 1024];
1911        let mut arena = Arena::new(&mut memory);
1912        // For STRIDE_Y=2, max(width, height) must be <= 2
1913        // 2x2 grid: 4 nodes = fits in 1 block
1914        let mut decoder = DecodingState::<SquareGrid, 2>::new(&mut arena, 2, 2, 1);
1915
1916        unsafe {
1917            let block = decoder.blocks_state.get_unchecked_mut(0);
1918            block.boundary = 1; // Single bit at position 0
1919            block.occupied = 1;
1920            block.root = 0;
1921            block.valid_mask = 0b1111; // Only 4 bits valid for 2x2 grid
1922            block.effective_mask = 0b1111;
1923
1924            *decoder.parents.get_unchecked_mut(0) = 0;
1925
1926            decoder.process_mono::<false>(0, 0);
1927
1928            // Should have spread vertically through shift_2
1929            // With stride 2, vertical spread to bit 2 (0 + stride)
1930            let occupied = decoder.blocks_state.get_unchecked(0).occupied;
1931            assert!(occupied & (1 << 2) != 0 || occupied & (1 << 1) != 0,
1932                "Should spread with small stride");
1933        }
1934    }
1935
1936    #[test]
1937    fn test_process_block_dispatch_to_poly() {
1938        // Test process_block_small_stride dispatching to process_poly (line 905)
1939        extern crate std;
1940        let mut memory = std::vec![0u8; 1024 * 1024];
1941        let mut arena = Arena::new(&mut memory);
1942        let mut decoder = DecodingState::<SquareGrid, 8>::new(&mut arena, 8, 8, 1);
1943
1944        unsafe {
1945            // Setup block as definitely polychromatic (multiple boundary bits with different roots)
1946            let block = decoder.blocks_state.get_unchecked_mut(0);
1947            block.boundary = 0b11; // Two adjacent bits
1948            block.occupied = 0b11;
1949            block.root = u32::MAX; // Polychromatic marker
1950            block.valid_mask = !0;
1951            block.effective_mask = !0;
1952
1953            // Make them have different roots
1954            *decoder.parents.get_unchecked_mut(0) = 0;
1955            *decoder.parents.get_unchecked_mut(1) = 1;
1956
1957            // Call the dispatch function
1958            let expanded = decoder.process_block_small_stride::<false>(0);
1959
1960            // Should have processed and merged
1961            assert!(expanded);
1962            let root0 = decoder.find(0);
1963            let root1 = decoder.find(1);
1964            assert_eq!(root0, root1, "Poly path should merge adjacent nodes");
1965        }
1966    }
1967
1968    #[test]
1969    fn test_process_block_single_boundary_bit_mono_conversion() {
1970        // Test single boundary bit becomes mono (lines 864-871)
1971        extern crate std;
1972        let mut memory = std::vec![0u8; 1024 * 1024];
1973        let mut arena = Arena::new(&mut memory);
1974        let mut decoder = DecodingState::<SquareGrid, 8>::new(&mut arena, 8, 8, 1);
1975
1976        unsafe {
1977            let block = decoder.blocks_state.get_unchecked_mut(0);
1978            block.boundary = 1; // Single bit
1979            block.occupied = 1;
1980            block.root = u32::MAX; // Unknown (will be determined)
1981            block.valid_mask = !0;
1982            block.effective_mask = !0;
1983
1984            *decoder.parents.get_unchecked_mut(0) = 0;
1985
1986            decoder.process_block_small_stride::<false>(0);
1987
1988            // Block should now be marked as monochromatic
1989            let new_root = decoder.blocks_state.get_unchecked(0).root;
1990            assert_ne!(new_root, u32::MAX, "Single boundary bit should convert to mono");
1991        }
1992    }
1993
1994    #[test]
1995    fn test_process_block_multi_boundary_mono_check() {
1996        // Test multiple boundary bits with same root becomes mono (lines 875-901)
1997        // Use a 16x16 grid with STRIDE_Y=16 (not 32) to test the generic path
1998        // Block 1 is interior (not at top/bottom)
1999        extern crate std;
2000        let mut memory = std::vec![0u8; 1024 * 1024];
2001        let mut arena = Arena::new(&mut memory);
2002        let mut decoder = DecodingState::<SquareGrid, 16>::new(&mut arena, 16, 16, 1);
2003
2004        // Disable horizontal boundary checks
2005        decoder.boundary_config = BoundaryConfig {
2006            check_top: true,    // Won't trigger because block 1 is not at top
2007            check_bottom: true, // Won't trigger because block 1 is not at bottom
2008            check_left: false,
2009            check_right: false,
2010        };
2011
2012        unsafe {
2013            // Use block 1 (nodes 64-127, rows 4-7)
2014            // Use three adjacent nodes at row 4, cols 1-3: nodes 65, 66, 67 = bits 1, 2, 3
2015            let block = decoder.blocks_state.get_unchecked_mut(1);
2016            block.boundary = 0b1110; // Three bits at positions 1, 2, 3 (nodes 65-67)
2017            block.occupied = 0b1110;
2018            block.root = u32::MAX; // Unknown (poly initially)
2019            block.valid_mask = !0;
2020            block.effective_mask = !0;
2021
2022            // All three share the same root (node 65)
2023            *decoder.parents.get_unchecked_mut(65) = 65;
2024            *decoder.parents.get_unchecked_mut(66) = 65;
2025            *decoder.parents.get_unchecked_mut(67) = 65;
2026
2027            decoder.process_block_small_stride::<false>(1);
2028
2029            // Block should be recognized as monochromatic
2030            let new_root = decoder.blocks_state.get_unchecked(1).root;
2031            assert_eq!(new_root, 65, "All same root should be recognized as mono");
2032        }
2033    }
2034
2035    #[test]
2036    fn test_process_mono_up_neighbor_same_cluster_grow() {
2037        // Test up neighbor same cluster fast path with grow (lines 303-306)
2038        extern crate std;
2039        let mut memory = std::vec![0u8; 1024 * 1024];
2040        let mut arena = Arena::new(&mut memory);
2041        // 16x16 grid with STRIDE_Y=16
2042        let mut decoder = DecodingState::<SquareGrid, 16>::new(&mut arena, 16, 16, 1);
2043
2044        // Disable boundary checks
2045        decoder.boundary_config = BoundaryConfig {
2046            check_top: false,
2047            check_bottom: false,
2048            check_left: false,
2049            check_right: false,
2050        };
2051
2052        unsafe {
2053            // Block 1 has boundary at row 0 (spreading up to block 0)
2054            let block1 = decoder.blocks_state.get_unchecked_mut(1);
2055            block1.boundary = 0xFFFF; // Row 0 (bits 0-15)
2056            block1.occupied = 0xFFFF;
2057            block1.root = 64;
2058            block1.valid_mask = !0;
2059            block1.effective_mask = !0;
2060
2061            // Block 0 (up neighbor) has same root but some unoccupied space
2062            let block0 = decoder.blocks_state.get_unchecked_mut(0);
2063            block0.boundary = 0xFFFF_0000_0000_0000u64; // Row 3 (bottom of block 0)
2064            block0.occupied = 0xFFFF_0000_0000_0000u64; // Only row 3 occupied
2065            block0.root = 64; // Same root!
2066            block0.valid_mask = !0;
2067            block0.effective_mask = !0;
2068
2069            // Set up parents - both share root 64
2070            *decoder.parents.get_unchecked_mut(64) = 64;
2071            for i in 48..64 {
2072                *decoder.parents.get_unchecked_mut(i) = 64;
2073            }
2074            for i in 65..80 {
2075                *decoder.parents.get_unchecked_mut(i) = 64;
2076            }
2077
2078            let expanded = decoder.process_mono::<false>(1, 64);
2079
2080            // Processing should expand (grow into block 0's unoccupied space)
2081            assert!(expanded, "Should expand when growing into same-cluster neighbor");
2082        }
2083    }
2084
2085    #[test]
2086    fn test_process_mono_last_block_bottom_boundary_check() {
2087        // Test bottom boundary check for last block (lines 473-484)
2088        extern crate std;
2089        let mut memory = std::vec![0u8; 1024 * 1024];
2090        let mut arena = Arena::new(&mut memory);
2091        // 16x16 grid with STRIDE_Y=16
2092        let mut decoder = DecodingState::<SquareGrid, 16>::new(&mut arena, 16, 16, 1);
2093
2094        // Enable bottom boundary check
2095        decoder.boundary_config = BoundaryConfig {
2096            check_top: false,
2097            check_bottom: true, // Enable bottom boundary
2098            check_left: false,
2099            check_right: false,
2100        };
2101
2102        unsafe {
2103            let num_blocks = decoder.blocks_state.len();
2104            // Use the last data block (block 3 for 16x16 grid)
2105            let last_data_blk = num_blocks - 2;
2106            let base = last_data_blk * 64;
2107
2108            // Last block has boundary at bottom row
2109            let block = decoder.blocks_state.get_unchecked_mut(last_data_blk);
2110            block.boundary = 0xFFFF_0000_0000_0000u64; // Row 3 (bottom of block)
2111            block.occupied = 0xFFFF_0000_0000_0000u64;
2112            block.root = (base + 48) as u32;
2113            block.valid_mask = !0;
2114            block.effective_mask = !0;
2115
2116            *decoder.parents.get_unchecked_mut(base + 48) = (base + 48) as u32;
2117            for i in 49..64 {
2118                *decoder.parents.get_unchecked_mut(base + i) = (base + 48) as u32;
2119            }
2120
2121            decoder.process_mono::<false>(last_data_blk, (base + 48) as u32);
2122
2123            // Should connect to boundary node
2124            let boundary_node = (decoder.parents.len() - 1) as u32;
2125            let root = decoder.find((base + 48) as u32);
2126            assert_eq!(root, boundary_node, "Last block bottom row should connect to boundary");
2127        }
2128    }
2129
2130    #[test]
2131    fn test_process_mono_different_clusters_grow() {
2132        // Test different clusters grow path (lines 334-337)
2133        extern crate std;
2134        let mut memory = std::vec![0u8; 1024 * 1024];
2135        let mut arena = Arena::new(&mut memory);
2136        // 16x16 grid with STRIDE_Y=16
2137        let mut decoder = DecodingState::<SquareGrid, 16>::new(&mut arena, 16, 16, 1);
2138
2139        // Disable boundary checks
2140        decoder.boundary_config = BoundaryConfig {
2141            check_top: false,
2142            check_bottom: false,
2143            check_left: false,
2144            check_right: false,
2145        };
2146
2147        unsafe {
2148            // Block 1 spreading up to block 0 with different root
2149            let block1 = decoder.blocks_state.get_unchecked_mut(1);
2150            block1.boundary = 0xFFFF; // Row 0
2151            block1.occupied = 0xFFFF;
2152            block1.root = 64;
2153            block1.valid_mask = !0;
2154            block1.effective_mask = !0;
2155
2156            // Block 0 has different root and unoccupied space for grow
2157            let block0 = decoder.blocks_state.get_unchecked_mut(0);
2158            block0.boundary = 0xFFFF_0000_0000_0000u64; // Row 3
2159            block0.occupied = 0xFFFF_0000_0000_0000u64; // Only row 3 occupied
2160            block0.root = 48; // Different root!
2161            block0.valid_mask = !0;
2162            block0.effective_mask = !0;
2163
2164            // Set up different roots
2165            *decoder.parents.get_unchecked_mut(48) = 48;
2166            *decoder.parents.get_unchecked_mut(64) = 64;
2167            for i in 49..64 {
2168                *decoder.parents.get_unchecked_mut(i) = 48;
2169            }
2170            for i in 65..80 {
2171                *decoder.parents.get_unchecked_mut(i) = 64;
2172            }
2173
2174            let expanded = decoder.process_mono::<false>(1, 64);
2175
2176            // Should expand (union different clusters and grow)
2177            assert!(expanded, "Should expand when merging different clusters");
2178            // Clusters should be merged
2179            let root48 = decoder.find(48);
2180            let root64 = decoder.find(64);
2181            assert_eq!(root48, root64, "Different clusters should be merged");
2182        }
2183    }
2184}