prav_core/decoder/growth/
unrolled.rs

1#![allow(unsafe_op_in_unsafe_fn)]
2use crate::decoder::state::DecodingState;
3use crate::intrinsics::{blsr, spread_syndrome_masked, tzcnt};
4use crate::topology::Topology;
5
6impl<'a, T: Topology, const STRIDE_Y: usize> DecodingState<'a, T, STRIDE_Y> {
7    /// Unrolled processing for exactly 16 blocks (1024 nodes) with STRIDE_Y=32.
8    /// Returns (any_expanded, next_active_mask).
9    ///
10    /// # Safety
11    ///
12    /// Caller must ensure the decoder state is properly initialized with exactly 16 blocks.
13    pub unsafe fn process_all_blocks_stride_32_unrolled_16(&mut self) -> (bool, u64) {
14        const ROW_START_MASK: u64 = 0x0000000100000001;
15        const ROW_END_MASK: u64 = 0x8000000080000000;
16
17        let parents_ptr = self.parents.as_mut_ptr();
18        let parents_len = self.parents.len();
19        let boundary_node = (parents_len - 1) as u32;
20
21        let blocks_state_ptr = self.blocks_state.as_mut_ptr();
22        let block_dirty_mask_ptr = self.block_dirty_mask.as_mut_ptr();
23
24        let active_mask = self.active_block_mask;
25        let mut next_mask = 0u64;
26        let mut any_expanded = false;
27
28        macro_rules! mark_dirty {
29            ($blk:expr) => {
30                let mask_idx = $blk >> 6;
31                let mask_bit = $blk & 63;
32                let m_ptr = block_dirty_mask_ptr.add(mask_idx);
33                *m_ptr |= 1 << mask_bit;
34            };
35        }
36
37        macro_rules! union_boundary {
38            ($u:expr) => {
39                let mut root_u = *parents_ptr.add($u as usize);
40                while root_u != *parents_ptr.add(root_u as usize) {
41                    let gp = *parents_ptr.add(root_u as usize);
42                    *parents_ptr.add(root_u as usize) = gp;
43                    mark_dirty!(root_u as usize >> 6);
44                    root_u = gp;
45                }
46
47                if root_u != boundary_node {
48                    if root_u < boundary_node {
49                        *parents_ptr.add(root_u as usize) = boundary_node;
50                        mark_dirty!(root_u as usize >> 6);
51                    } else {
52                        *parents_ptr.add(boundary_node as usize) = root_u;
53                        mark_dirty!(boundary_node as usize >> 6);
54                    }
55                    any_expanded = true;
56                }
57            };
58        }
59
60        macro_rules! connect_bits {
61            ($mask:expr, $base_global:expr, $offset:expr) => {
62                let mut m = $mask;
63                while m != 0 {
64                    let bit = tzcnt(m);
65                    m = blsr(m);
66                    let u = (($base_global as isize) + ($offset as isize) + (bit as isize)) as u32;
67                    union_boundary!(u);
68                }
69            };
70        }
71
72        macro_rules! fast_grow {
73            ($blk:expr, $grow_mask:expr) => {
74                let g = $grow_mask;
75                if g != 0 {
76                    mark_dirty!($blk);
77                    (*blocks_state_ptr.add($blk)).occupied |= g;
78                    (*blocks_state_ptr.add($blk)).boundary |= g;
79                    next_mask |= 1 << $blk;
80                }
81            };
82        }
83
84        macro_rules! merge_shifted_macro {
85            ($mask:expr, $base_src:expr, $shift:expr, $base_target:expr) => {
86                let mut m = $mask;
87                while m != 0 {
88                    let start_bit = m.trailing_zeros();
89                    let shifted = m >> start_bit;
90                    let run_len = (!shifted).trailing_zeros();
91
92                    if run_len == 64 {
93                        m = 0;
94                    } else {
95                        m ^= ((1u64 << run_len) - 1) << start_bit;
96                    }
97
98                    let base_u = ($base_src as isize + start_bit as isize + $shift) as u32;
99                    let base_v = ($base_target + start_bit as usize) as u32;
100
101                    for k in 0..run_len {
102                        let u = base_u + k;
103                        let v = base_v + k;
104                        let pu = *parents_ptr.add(u as usize);
105                        let pv = *parents_ptr.add(v as usize);
106                        if pu == u && pv == v {
107                            if u != v {
108                                if u < v {
109                                    *parents_ptr.add(u as usize) = v;
110                                    mark_dirty!(u as usize >> 6);
111                                } else {
112                                    *parents_ptr.add(v as usize) = u;
113                                    mark_dirty!(v as usize >> 6);
114                                }
115                                any_expanded = true;
116                            }
117                        } else {
118                            let mut root_u = pu;
119                            while root_u != *parents_ptr.add(root_u as usize) {
120                                let gp = *parents_ptr.add(root_u as usize);
121                                *parents_ptr.add(root_u as usize) = gp;
122                                mark_dirty!(root_u as usize >> 6);
123                                root_u = gp;
124                            }
125                            let mut root_v = pv;
126                            while root_v != *parents_ptr.add(root_v as usize) {
127                                let gp = *parents_ptr.add(root_v as usize);
128                                *parents_ptr.add(root_v as usize) = gp;
129                                mark_dirty!(root_v as usize >> 6);
130                                root_v = gp;
131                            }
132                            if root_u != root_v {
133                                if root_u < root_v {
134                                    *parents_ptr.add(root_u as usize) = root_v;
135                                    mark_dirty!(root_u as usize >> 6);
136                                } else {
137                                    *parents_ptr.add(root_v as usize) = root_u;
138                                    mark_dirty!(root_v as usize >> 6);
139                                }
140                                any_expanded = true;
141                            }
142                        }
143                    }
144                }
145            };
146        }
147
148        macro_rules! process_block_unrolled {
149            ($blk:expr, $up_type:ident, $down_type:ident) => {
150                if (active_mask & (1 << $blk)) != 0 {
151                    const BLK: usize = $blk;
152                    const BASE_GLOBAL: usize = BLK * 64;
153
154                    let mut boundary = (*blocks_state_ptr.add(BLK)).boundary;
155
156                    if boundary != 0 {
157                        let mut occupied = (*blocks_state_ptr.add(BLK)).occupied;
158                        let initial_occupied = occupied;
159                        let initial_boundary = boundary;
160
161                        let valid_mask = (*blocks_state_ptr.add(BLK)).valid_mask;
162                        // Use effective_mask from Hot state
163                        let effective_mask = (*blocks_state_ptr.add(BLK)).effective_mask;
164                        let mask = effective_mask;
165
166                        let mut spread_boundary = spread_syndrome_masked(boundary, mask, ROW_END_MASK, ROW_START_MASK);
167                        {
168                            let up = (spread_boundary << 32) & mask;
169                            let down = (spread_boundary >> 32) & mask;
170                            spread_boundary |= up | down;
171                        }
172
173                        let internal_bottom_edge = spread_boundary & (!valid_mask >> 32);
174                        let internal_top_edge = spread_boundary & (!valid_mask << 32);
175                        let internal_boundary_mask = internal_bottom_edge | internal_top_edge;
176                        if internal_boundary_mask != 0 {
177                            connect_bits!(internal_boundary_mask, BASE_GLOBAL, 0);
178                        }
179
180                        // Intra-block Vertical Union
181                        let vertical_pairs = spread_boundary & (spread_boundary >> 32) & 0xFFFFFFFF;
182                        if vertical_pairs != 0 {
183                            merge_shifted_macro!(vertical_pairs, BASE_GLOBAL, 32, BASE_GLOBAL);
184                        }
185
186                        // Intra-block Horizontal Union
187                        let horizontal_pairs = spread_boundary & (spread_boundary >> 1) & !ROW_END_MASK;
188                        if horizontal_pairs != 0 {
189                            merge_shifted_macro!(horizontal_pairs, BASE_GLOBAL, 1, BASE_GLOBAL);
190                        }
191
192                        let new_occupied = occupied | spread_boundary;
193                        if new_occupied != occupied {
194                            occupied = new_occupied;
195                            any_expanded = true;
196                            next_mask |= 1 << BLK;
197                        }
198
199                        process_block_unrolled!(@UP $up_type, spread_boundary, BASE_GLOBAL);
200                        process_block_unrolled!(@DOWN $down_type, spread_boundary, BASE_GLOBAL);
201
202                        let row_start_hits = spread_boundary & ROW_START_MASK;
203                        if row_start_hits != 0 { connect_bits!(row_start_hits, BASE_GLOBAL, 0); }
204                        let row_end_hits = spread_boundary & ROW_END_MASK;
205                        if row_end_hits != 0 { connect_bits!(row_end_hits, BASE_GLOBAL, 0); }
206
207                        boundary &= !spread_boundary;
208                        if occupied != initial_occupied || boundary != initial_boundary {
209                             (*blocks_state_ptr.add(BLK)).occupied = occupied;
210                             (*blocks_state_ptr.add(BLK)).boundary = boundary;
211                        }
212                    }
213                }
214            };
215
216            (@UP BOUNDARY, $spread_boundary:expr, $base_global:expr) => {
217                let hits = $spread_boundary & 0xFFFFFFFF;
218                if hits != 0 { connect_bits!(hits, $base_global, 0); }
219            };
220            (@UP NEIGHBOR, $spread_boundary:expr, $base_global:expr) => {
221                const BLK_UP: usize = BLK - 1;
222                let valid_up = (*blocks_state_ptr.add(BLK_UP)).valid_mask;
223                let spread_to_up = $spread_boundary << 32;
224                let boundary_hits = spread_to_up & !valid_up;
225                if boundary_hits != 0 { connect_bits!(boundary_hits, $base_global, -32); }
226                if valid_up != 0 {
227                    let occupied_up = (*blocks_state_ptr.add(BLK_UP)).occupied;
228                    let effective_up = (*blocks_state_ptr.add(BLK_UP)).effective_mask;
229                    let grow_up = spread_to_up & !occupied_up & effective_up;
230                    fast_grow!(BLK_UP, grow_up);
231                    if grow_up != 0 { any_expanded = true; }
232                    let merge_up = spread_to_up & occupied_up;
233                    if merge_up != 0 { merge_shifted_macro!(merge_up, $base_global, -32, BLK_UP * 64); }
234                }
235            };
236
237            (@DOWN BOUNDARY, $spread_boundary:expr, $base_global:expr) => {
238                let hits = $spread_boundary >> 32;
239                if hits != 0 { connect_bits!(hits, $base_global, 32); }
240            };
241            (@DOWN NEIGHBOR, $spread_boundary:expr, $base_global:expr) => {
242                const BLK_DOWN: usize = BLK + 1;
243                let valid_down = (*blocks_state_ptr.add(BLK_DOWN)).valid_mask;
244                let spread_to_down = $spread_boundary >> 32;
245                let boundary_hits = spread_to_down & !valid_down;
246                if boundary_hits != 0 { connect_bits!(boundary_hits, $base_global, 32); }
247                if valid_down != 0 {
248                    let occupied_down = (*blocks_state_ptr.add(BLK_DOWN)).occupied;
249                    let effective_down = (*blocks_state_ptr.add(BLK_DOWN)).effective_mask;
250                    let grow_down = spread_to_down & !occupied_down & effective_down;
251                    fast_grow!(BLK_DOWN, grow_down);
252                    if grow_down != 0 { any_expanded = true; }
253                    let merge_down = spread_to_down & occupied_down;
254                    if merge_down != 0 { merge_shifted_macro!(merge_down, $base_global, 32, BLK_DOWN * 64); }
255                }
256            };
257        }
258
259        process_block_unrolled!(0, BOUNDARY, NEIGHBOR);
260        process_block_unrolled!(1, NEIGHBOR, NEIGHBOR);
261        process_block_unrolled!(2, NEIGHBOR, NEIGHBOR);
262        process_block_unrolled!(3, NEIGHBOR, NEIGHBOR);
263        process_block_unrolled!(4, NEIGHBOR, NEIGHBOR);
264        process_block_unrolled!(5, NEIGHBOR, NEIGHBOR);
265        process_block_unrolled!(6, NEIGHBOR, NEIGHBOR);
266        process_block_unrolled!(7, NEIGHBOR, NEIGHBOR);
267        process_block_unrolled!(8, NEIGHBOR, NEIGHBOR);
268        process_block_unrolled!(9, NEIGHBOR, NEIGHBOR);
269        process_block_unrolled!(10, NEIGHBOR, NEIGHBOR);
270        process_block_unrolled!(11, NEIGHBOR, NEIGHBOR);
271        process_block_unrolled!(12, NEIGHBOR, NEIGHBOR);
272        process_block_unrolled!(13, NEIGHBOR, NEIGHBOR);
273        process_block_unrolled!(14, NEIGHBOR, NEIGHBOR);
274        process_block_unrolled!(15, NEIGHBOR, BOUNDARY);
275
276        (any_expanded, next_mask)
277    }
278
279    /// Optimized processing for stride-64 grids (64x64).
280    /// Returns (any_expanded, next_active_mask).
281    ///
282    /// # Safety
283    ///
284    /// Caller must ensure the decoder state is properly initialized for stride-64 processing.
285    pub unsafe fn process_all_blocks_stride_64(&mut self) -> (bool, u64) {
286        const ROW_START_MASK: u64 = 0x0000000000000001; // Bit 0 only
287        const ROW_END_MASK: u64 = 0x8000000000000000; // Bit 63 only
288
289        let parents_ptr = self.parents.as_mut_ptr();
290        let parents_len = self.parents.len();
291        let boundary_node = (parents_len - 1) as u32;
292
293        let blocks_state_ptr = self.blocks_state.as_mut_ptr();
294        let block_dirty_mask_ptr = self.block_dirty_mask.as_mut_ptr();
295
296        let num_blocks = self.blocks_state.len().min(65); // Max 64 data blocks + boundary
297        let active_mask = self.active_block_mask;
298        let mut next_mask = 0u64;
299        let mut any_expanded = false;
300
301        macro_rules! mark_dirty {
302            ($blk:expr) => {
303                let mask_idx = $blk >> 6;
304                let mask_bit = $blk & 63;
305                let m_ptr = block_dirty_mask_ptr.add(mask_idx);
306                *m_ptr |= 1 << mask_bit;
307            };
308        }
309
310        macro_rules! union_boundary {
311            ($u:expr) => {
312                let mut root_u = *parents_ptr.add($u as usize);
313                while root_u != *parents_ptr.add(root_u as usize) {
314                    let gp = *parents_ptr.add(root_u as usize);
315                    *parents_ptr.add(root_u as usize) = gp;
316                    mark_dirty!(root_u as usize >> 6);
317                    root_u = gp;
318                }
319                if root_u != boundary_node {
320                    if root_u < boundary_node {
321                        *parents_ptr.add(root_u as usize) = boundary_node;
322                        mark_dirty!(root_u as usize >> 6);
323                    } else {
324                        *parents_ptr.add(boundary_node as usize) = root_u;
325                        mark_dirty!(boundary_node as usize >> 6);
326                    }
327                    any_expanded = true;
328                }
329            };
330        }
331
332        macro_rules! connect_bits {
333            ($mask:expr, $base_global:expr, $offset:expr) => {
334                let mut m = $mask;
335                while m != 0 {
336                    let bit = tzcnt(m);
337                    m = blsr(m);
338                    let u = (($base_global as isize) + ($offset as isize) + (bit as isize)) as u32;
339                    union_boundary!(u);
340                }
341            };
342        }
343
344        macro_rules! fast_grow {
345            ($blk:expr, $grow_mask:expr) => {
346                let g = $grow_mask;
347                if g != 0 {
348                    mark_dirty!($blk);
349                    (*blocks_state_ptr.add($blk)).occupied |= g;
350                    (*blocks_state_ptr.add($blk)).boundary |= g;
351                    next_mask |= 1 << $blk;
352                }
353            };
354        }
355
356        macro_rules! merge_horizontal {
357            ($mask:expr, $base:expr) => {
358                let mut m = $mask;
359                while m != 0 {
360                    let start_bit = m.trailing_zeros();
361                    let shifted = m >> start_bit;
362                    let run_len = (!shifted).trailing_zeros();
363                    if run_len == 64 {
364                        m = 0;
365                    } else {
366                        m ^= ((1u64 << run_len) - 1) << start_bit;
367                    }
368
369                    let base_u = ($base + start_bit as usize + 1) as u32;
370                    let base_v = ($base + start_bit as usize) as u32;
371
372                    for k in 0..run_len {
373                        let u = base_u + k;
374                        let v = base_v + k;
375                        let pu = *parents_ptr.add(u as usize);
376                        let pv = *parents_ptr.add(v as usize);
377                        if pu == u && pv == v {
378                            if u != v {
379                                if u < v {
380                                    *parents_ptr.add(u as usize) = v;
381                                    mark_dirty!(u as usize >> 6);
382                                } else {
383                                    *parents_ptr.add(v as usize) = u;
384                                    mark_dirty!(v as usize >> 6);
385                                }
386                                any_expanded = true;
387                            }
388                        } else {
389                            let mut root_u = pu;
390                            while root_u != *parents_ptr.add(root_u as usize) {
391                                let gp = *parents_ptr.add(root_u as usize);
392                                *parents_ptr.add(root_u as usize) = gp;
393                                mark_dirty!(root_u as usize >> 6);
394                                root_u = gp;
395                            }
396                            let mut root_v = pv;
397                            while root_v != *parents_ptr.add(root_v as usize) {
398                                let gp = *parents_ptr.add(root_v as usize);
399                                *parents_ptr.add(root_v as usize) = gp;
400                                mark_dirty!(root_v as usize >> 6);
401                                root_v = gp;
402                            }
403                            if root_u != root_v {
404                                if root_u < root_v {
405                                    *parents_ptr.add(root_u as usize) = root_v;
406                                    mark_dirty!(root_u as usize >> 6);
407                                } else {
408                                    *parents_ptr.add(root_v as usize) = root_u;
409                                    mark_dirty!(root_v as usize >> 6);
410                                }
411                                any_expanded = true;
412                            }
413                        }
414                    }
415                }
416            };
417        }
418
419        // Process all active blocks
420        let mut blk_mask = active_mask;
421        while blk_mask != 0 {
422            let blk = tzcnt(blk_mask) as usize;
423            blk_mask = blsr(blk_mask);
424
425            if blk >= num_blocks {
426                continue;
427            }
428
429            let base_global = blk * 64;
430            let mut boundary = (*blocks_state_ptr.add(blk)).boundary;
431
432            if boundary == 0 {
433                continue;
434            }
435
436            let mut occupied = (*blocks_state_ptr.add(blk)).occupied;
437            let initial_occupied = occupied;
438            let initial_boundary = boundary;
439
440            let _valid_mask = (*blocks_state_ptr.add(blk)).valid_mask;
441            let effective_mask = (*blocks_state_ptr.add(blk)).effective_mask;
442
443            // Horizontal spread only (no intra-block vertical for stride-64)
444            let spread_boundary =
445                crate::intrinsics::spread_syndrome_linear(boundary, effective_mask);
446
447            // Horizontal union (adjacent bits within same row)
448            let horizontal_pairs = spread_boundary & (spread_boundary >> 1) & !ROW_END_MASK;
449            if horizontal_pairs != 0 {
450                merge_horizontal!(horizontal_pairs, base_global);
451            }
452
453            let new_occupied = occupied | spread_boundary;
454            if new_occupied != occupied {
455                occupied = new_occupied;
456                any_expanded = true;
457                next_mask |= 1 << blk;
458            }
459
460            // UP neighbor (previous block = previous row)
461            if blk > 0 {
462                let blk_up = blk - 1;
463                let valid_up = (*blocks_state_ptr.add(blk_up)).valid_mask;
464                let boundary_hits = spread_boundary & !valid_up;
465                if boundary_hits != 0 {
466                    connect_bits!(boundary_hits, base_global, 0);
467                }
468
469                let occupied_up = (*blocks_state_ptr.add(blk_up)).occupied;
470                let effective_up = (*blocks_state_ptr.add(blk_up)).effective_mask;
471                let grow_up = spread_boundary & !occupied_up & effective_up;
472                fast_grow!(blk_up, grow_up);
473                if grow_up != 0 {
474                    any_expanded = true;
475                }
476
477                // Merge with occupied cells in up block
478                let merge_up = spread_boundary & occupied_up;
479                if merge_up != 0 {
480                    let mut m = merge_up;
481                    while m != 0 {
482                        let bit = tzcnt(m) as usize;
483                        m = blsr(m);
484                        let u = (base_global + bit) as u32;
485                        let v = (blk_up * 64 + bit) as u32;
486                        let pu = *parents_ptr.add(u as usize);
487                        let pv = *parents_ptr.add(v as usize);
488                        if pu != pv {
489                            let mut root_u = pu;
490                            while root_u != *parents_ptr.add(root_u as usize) {
491                                let gp = *parents_ptr.add(root_u as usize);
492                                *parents_ptr.add(root_u as usize) = gp;
493                                root_u = gp;
494                            }
495                            let mut root_v = pv;
496                            while root_v != *parents_ptr.add(root_v as usize) {
497                                let gp = *parents_ptr.add(root_v as usize);
498                                *parents_ptr.add(root_v as usize) = gp;
499                                root_v = gp;
500                            }
501                            if root_u != root_v {
502                                if root_u < root_v {
503                                    *parents_ptr.add(root_u as usize) = root_v;
504                                } else {
505                                    *parents_ptr.add(root_v as usize) = root_u;
506                                }
507                                any_expanded = true;
508                            }
509                        }
510                    }
511                }
512            } else {
513                // Block 0: top boundary
514                let hits = spread_boundary;
515                if hits != 0 {
516                    connect_bits!(hits, base_global, 0);
517                }
518            }
519
520            // DOWN neighbor (next block = next row)
521            let blk_down = blk + 1;
522            if blk_down < num_blocks {
523                let valid_down = (*blocks_state_ptr.add(blk_down)).valid_mask;
524                let boundary_hits = spread_boundary & !valid_down;
525                if boundary_hits != 0 {
526                    connect_bits!(boundary_hits, base_global, 0);
527                }
528
529                let occupied_down = (*blocks_state_ptr.add(blk_down)).occupied;
530                let effective_down = (*blocks_state_ptr.add(blk_down)).effective_mask;
531                let grow_down = spread_boundary & !occupied_down & effective_down;
532                fast_grow!(blk_down, grow_down);
533                if grow_down != 0 {
534                    any_expanded = true;
535                }
536
537                // Merge with occupied cells in down block
538                let merge_down = spread_boundary & occupied_down;
539                if merge_down != 0 {
540                    let mut m = merge_down;
541                    while m != 0 {
542                        let bit = tzcnt(m) as usize;
543                        m = blsr(m);
544                        let u = (base_global + bit) as u32;
545                        let v = (blk_down * 64 + bit) as u32;
546                        let pu = *parents_ptr.add(u as usize);
547                        let pv = *parents_ptr.add(v as usize);
548                        if pu != pv {
549                            let mut root_u = pu;
550                            while root_u != *parents_ptr.add(root_u as usize) {
551                                let gp = *parents_ptr.add(root_u as usize);
552                                *parents_ptr.add(root_u as usize) = gp;
553                                root_u = gp;
554                            }
555                            let mut root_v = pv;
556                            while root_v != *parents_ptr.add(root_v as usize) {
557                                let gp = *parents_ptr.add(root_v as usize);
558                                *parents_ptr.add(root_v as usize) = gp;
559                                mark_dirty!(root_v as usize >> 6);
560                                root_v = gp;
561                            }
562                            if root_u != root_v {
563                                if root_u < root_v {
564                                    *parents_ptr.add(root_u as usize) = root_v;
565                                } else {
566                                    *parents_ptr.add(root_v as usize) = root_u;
567                                }
568                                any_expanded = true;
569                            }
570                        }
571                    }
572                }
573            } else {
574                // Last block: bottom boundary
575                let hits = spread_boundary;
576                if hits != 0 {
577                    connect_bits!(hits, base_global, 0);
578                }
579            }
580
581            // Left/Right edge hits
582            let left_hits = spread_boundary & ROW_START_MASK;
583            if left_hits != 0 {
584                connect_bits!(left_hits, base_global, 0);
585            }
586            let right_hits = spread_boundary & ROW_END_MASK;
587            if right_hits != 0 {
588                connect_bits!(right_hits, base_global, 0);
589            }
590
591            // Update boundary
592            boundary &= !spread_boundary;
593            if occupied != initial_occupied || boundary != initial_boundary {
594                (*blocks_state_ptr.add(blk)).occupied = occupied;
595                (*blocks_state_ptr.add(blk)).boundary = boundary;
596            }
597        }
598
599        (any_expanded, next_mask)
600    }
601
602    /// # Safety
603    ///
604    /// This function is deprecated and always returns false.
605    #[inline(always)]
606    pub unsafe fn process_block_optimized_32<const SILENT: bool>(
607        &mut self,
608        _blk_idx: usize,
609    ) -> bool {
610        // Removed as part of code simplification. Stride 32 now uses small_stride.
611        false
612    }
613}
614
615#[cfg(test)]
616mod tests {
617    use crate::arena::Arena;
618    use crate::decoder::state::DecodingState;
619    use crate::topology::SquareGrid;
620
621    #[test]
622    fn test_optimized_32_unrolled_16() {
623        extern crate std;
624        let mut memory = std::vec![0u8; 10 * 1024 * 1024];
625        let mut arena = Arena::new(&mut memory);
626        let mut decoder = DecodingState::<SquareGrid, 32>::new(&mut arena, 32, 32, 1);
627        let boundary_node = (decoder.parents.len() - 1) as u32;
628        unsafe {
629            let mut syndromes = [0u64; 16];
630            syndromes[0] = 1;
631            syndromes[1] = 1;
632            syndromes[15] = 1u64 << 63;
633            decoder.load_dense_syndromes(&syndromes);
634            decoder.active_block_mask = (1 << 0) | (1 << 1) | (1 << 15);
635            let (expanded, next_mask) = decoder.process_all_blocks_stride_32_unrolled_16();
636            assert!(expanded);
637            assert_eq!(next_mask & (1 << 0), 1 << 0);
638            assert_eq!(next_mask & (1 << 1), 1 << 1);
639            assert_eq!(next_mask & (1 << 15), 1 << 15);
640            assert_eq!(decoder.find(0), boundary_node);
641            assert_eq!(decoder.find(64), boundary_node);
642            assert_eq!(decoder.find(1023), boundary_node);
643        }
644    }
645
646    /// Test stride_64 processing for 64x64 grids.
647    #[test]
648    fn test_stride_64_basic() {
649        extern crate std;
650        let mut memory = std::vec![0u8; 50 * 1024 * 1024];
651        let mut arena = Arena::new(&mut memory);
652        let mut decoder = DecodingState::<SquareGrid, 64>::new(&mut arena, 64, 64, 1);
653        let boundary_node = (decoder.parents.len() - 1) as u32;
654
655        unsafe {
656            // Set syndrome in first block (top-left corner)
657            let mut syndromes = std::vec![0u64; decoder.blocks_state.len()];
658            syndromes[0] = 1; // bit 0 = node at (0, 0)
659            decoder.load_dense_syndromes(&syndromes);
660            decoder.active_block_mask = 1;
661
662            let (expanded, next_mask) = decoder.process_all_blocks_stride_64();
663            assert!(expanded);
664            assert!(next_mask != 0);
665            assert_eq!(decoder.find(0), boundary_node, "Corner node should connect to boundary");
666        }
667    }
668
669    /// Test stride_64 with interior nodes (not at boundary).
670    #[test]
671    fn test_stride_64_interior_nodes() {
672        extern crate std;
673        let mut memory = std::vec![0u8; 50 * 1024 * 1024];
674        let mut arena = Arena::new(&mut memory);
675        let mut decoder = DecodingState::<SquareGrid, 64>::new(&mut arena, 64, 64, 1);
676
677        unsafe {
678            // Set adjacent syndromes in middle block to trigger horizontal merge
679            let mut syndromes = std::vec![0u64; decoder.blocks_state.len()];
680            // Block 32 (middle row), nodes at positions 10 and 11 (adjacent)
681            syndromes[32] = 0b11 << 10;
682            decoder.load_dense_syndromes(&syndromes);
683            decoder.active_block_mask = 1u64 << 32;
684
685            let (expanded, _next_mask) = decoder.process_all_blocks_stride_64();
686            assert!(expanded);
687
688            // Nodes at (10, 32) and (11, 32) should be connected
689            let node_a = 32 * 64 + 10;
690            let node_b = 32 * 64 + 11;
691            assert_eq!(decoder.find(node_a), decoder.find(node_b), "Adjacent nodes should merge");
692        }
693    }
694
695    /// Test stride_64 with last block (bottom boundary).
696    #[test]
697    fn test_stride_64_last_block() {
698        extern crate std;
699        let mut memory = std::vec![0u8; 50 * 1024 * 1024];
700        let mut arena = Arena::new(&mut memory);
701        let mut decoder = DecodingState::<SquareGrid, 64>::new(&mut arena, 64, 64, 1);
702        let boundary_node = (decoder.parents.len() - 1) as u32;
703        let num_blocks = decoder.blocks_state.len();
704
705        unsafe {
706            // Set syndrome in last data block
707            let mut syndromes = std::vec![0u64; num_blocks];
708            let last_data_block = num_blocks - 2; // sentinel is at end
709            syndromes[last_data_block] = 1u64 << 63; // bottom-right corner
710            decoder.load_dense_syndromes(&syndromes);
711            decoder.active_block_mask = 1u64 << last_data_block;
712
713            let (expanded, _next_mask) = decoder.process_all_blocks_stride_64();
714            assert!(expanded);
715            assert_eq!(decoder.find((last_data_block * 64 + 63) as u32), boundary_node);
716        }
717    }
718
719    /// Test stride_64 with vertical neighbor growth (up).
720    #[test]
721    fn test_stride_64_vertical_up() {
722        extern crate std;
723        let mut memory = std::vec![0u8; 50 * 1024 * 1024];
724        let mut arena = Arena::new(&mut memory);
725        let mut decoder = DecodingState::<SquareGrid, 64>::new(&mut arena, 64, 64, 1);
726
727        unsafe {
728            // Place defect in block 5, it should spread to block 4 (up)
729            let mut syndromes = std::vec![0u64; decoder.blocks_state.len()];
730            syndromes[5] = 1 << 10; // Node in block 5
731            decoder.load_dense_syndromes(&syndromes);
732            decoder.active_block_mask = 1u64 << 5;
733
734            // Run multiple iterations
735            for _ in 0..10 {
736                let (_, next_mask) = decoder.process_all_blocks_stride_64();
737                decoder.active_block_mask = next_mask;
738                if next_mask == 0 {
739                    break;
740                }
741            }
742            // Just verify it completes without panic
743        }
744    }
745
746    /// Test stride_64 with vertical neighbor growth (down).
747    #[test]
748    fn test_stride_64_vertical_down() {
749        extern crate std;
750        let mut memory = std::vec![0u8; 50 * 1024 * 1024];
751        let mut arena = Arena::new(&mut memory);
752        let mut decoder = DecodingState::<SquareGrid, 64>::new(&mut arena, 64, 64, 1);
753
754        unsafe {
755            // Place defects in block 5 and 6 to test down neighbor merging
756            let mut syndromes = std::vec![0u64; decoder.blocks_state.len()];
757            syndromes[5] = 1 << 10;
758            syndromes[6] = 1 << 10; // Same column, next row
759            decoder.load_dense_syndromes(&syndromes);
760            decoder.active_block_mask = (1u64 << 5) | (1u64 << 6);
761
762            // Run iterations
763            for _ in 0..20 {
764                let (_, next_mask) = decoder.process_all_blocks_stride_64();
765                decoder.active_block_mask = next_mask;
766                if next_mask == 0 {
767                    break;
768                }
769            }
770
771            // Nodes should be connected via growth
772            let node_a = 5 * 64 + 10;
773            let node_b = 6 * 64 + 10;
774            assert_eq!(decoder.find(node_a as u32), decoder.find(node_b as u32));
775        }
776    }
777
778    /// Test stride_32 with interior vertical pairs.
779    #[test]
780    fn test_stride_32_vertical_pairs() {
781        extern crate std;
782        let mut memory = std::vec![0u8; 10 * 1024 * 1024];
783        let mut arena = Arena::new(&mut memory);
784        let mut decoder = DecodingState::<SquareGrid, 32>::new(&mut arena, 32, 32, 1);
785
786        unsafe {
787            // Place two defects that form a vertical pair within a block
788            // Block layout for stride 32: 2 rows of 32 bits each
789            let mut syndromes = [0u64; 17];
790            // Bits 0 and 32 are vertically adjacent in stride-32
791            syndromes[5] = (1 << 0) | (1 << 32);
792            decoder.load_dense_syndromes(&syndromes);
793            decoder.active_block_mask = 1u64 << 5;
794
795            let (expanded, _) = decoder.process_all_blocks_stride_32_unrolled_16();
796            assert!(expanded);
797
798            // The two nodes should be connected
799            let node_a = 5 * 64 + 0;
800            let node_b = 5 * 64 + 32;
801            assert_eq!(decoder.find(node_a as u32), decoder.find(node_b as u32));
802        }
803    }
804
805    /// Test stride_32 with horizontal pairs.
806    #[test]
807    fn test_stride_32_horizontal_pairs() {
808        extern crate std;
809        let mut memory = std::vec![0u8; 10 * 1024 * 1024];
810        let mut arena = Arena::new(&mut memory);
811        let mut decoder = DecodingState::<SquareGrid, 32>::new(&mut arena, 32, 32, 1);
812
813        unsafe {
814            // Place two horizontally adjacent defects
815            let mut syndromes = [0u64; 17];
816            syndromes[5] = (1 << 10) | (1 << 11);
817            decoder.load_dense_syndromes(&syndromes);
818            decoder.active_block_mask = 1u64 << 5;
819
820            let (expanded, _) = decoder.process_all_blocks_stride_32_unrolled_16();
821            assert!(expanded);
822
823            let node_a = 5 * 64 + 10;
824            let node_b = 5 * 64 + 11;
825            assert_eq!(decoder.find(node_a as u32), decoder.find(node_b as u32));
826        }
827    }
828
829    /// Test stride_32 inter-block growth (up).
830    #[test]
831    fn test_stride_32_inter_block_up() {
832        extern crate std;
833        let mut memory = std::vec![0u8; 10 * 1024 * 1024];
834        let mut arena = Arena::new(&mut memory);
835        let mut decoder = DecodingState::<SquareGrid, 32>::new(&mut arena, 32, 32, 1);
836
837        unsafe {
838            // Place defects in blocks 7 and 6 (adjacent)
839            let mut syndromes = [0u64; 17];
840            syndromes[7] = 1 << 0; // Top of block 7
841            syndromes[6] = 1u64 << 63; // Bottom of block 6
842            decoder.load_dense_syndromes(&syndromes);
843            decoder.active_block_mask = (1u64 << 7) | (1u64 << 6);
844
845            // Run multiple iterations
846            for _ in 0..20 {
847                let (_, next_mask) = decoder.process_all_blocks_stride_32_unrolled_16();
848                decoder.active_block_mask = next_mask;
849                if next_mask == 0 {
850                    break;
851                }
852            }
853
854            // Both should connect
855            let root_a = decoder.find((7 * 64) as u32);
856            let root_b = decoder.find((6 * 64 + 63) as u32);
857            assert_eq!(root_a, root_b);
858        }
859    }
860
861    /// Test stride_32 inter-block growth (down).
862    #[test]
863    fn test_stride_32_inter_block_down() {
864        extern crate std;
865        let mut memory = std::vec![0u8; 10 * 1024 * 1024];
866        let mut arena = Arena::new(&mut memory);
867        let mut decoder = DecodingState::<SquareGrid, 32>::new(&mut arena, 32, 32, 1);
868
869        unsafe {
870            // Place defects in blocks 7 and 8
871            let mut syndromes = [0u64; 17];
872            syndromes[7] = 1u64 << 63; // Bottom of block 7
873            syndromes[8] = 1 << 0; // Top of block 8
874            decoder.load_dense_syndromes(&syndromes);
875            decoder.active_block_mask = (1u64 << 7) | (1u64 << 8);
876
877            for _ in 0..20 {
878                let (_, next_mask) = decoder.process_all_blocks_stride_32_unrolled_16();
879                decoder.active_block_mask = next_mask;
880                if next_mask == 0 {
881                    break;
882                }
883            }
884
885            let root_a = decoder.find((7 * 64 + 63) as u32);
886            let root_b = decoder.find((8 * 64) as u32);
887            assert_eq!(root_a, root_b);
888        }
889    }
890
891    /// Test empty active mask produces no expansion.
892    #[test]
893    fn test_stride_32_empty_active() {
894        extern crate std;
895        let mut memory = std::vec![0u8; 10 * 1024 * 1024];
896        let mut arena = Arena::new(&mut memory);
897        let mut decoder = DecodingState::<SquareGrid, 32>::new(&mut arena, 32, 32, 1);
898
899        unsafe {
900            decoder.active_block_mask = 0;
901            let (expanded, next_mask) = decoder.process_all_blocks_stride_32_unrolled_16();
902            assert!(!expanded);
903            assert_eq!(next_mask, 0);
904        }
905    }
906
907    /// Test stride_64 empty active mask.
908    #[test]
909    fn test_stride_64_empty_active() {
910        extern crate std;
911        let mut memory = std::vec![0u8; 50 * 1024 * 1024];
912        let mut arena = Arena::new(&mut memory);
913        let mut decoder = DecodingState::<SquareGrid, 64>::new(&mut arena, 64, 64, 1);
914
915        unsafe {
916            decoder.active_block_mask = 0;
917            let (expanded, next_mask) = decoder.process_all_blocks_stride_64();
918            assert!(!expanded);
919            assert_eq!(next_mask, 0);
920        }
921    }
922
923    /// Test process_block_optimized_32 (stub function).
924    #[test]
925    fn test_process_block_optimized_32_stub() {
926        extern crate std;
927        let mut memory = std::vec![0u8; 10 * 1024 * 1024];
928        let mut arena = Arena::new(&mut memory);
929        let mut decoder = DecodingState::<SquareGrid, 32>::new(&mut arena, 32, 32, 1);
930
931        unsafe {
932            let result = decoder.process_block_optimized_32::<false>(0);
933            assert!(!result);
934
935            let result_silent = decoder.process_block_optimized_32::<true>(0);
936            assert!(!result_silent);
937        }
938    }
939}