rust-par2 0.1.2

Pure Rust PAR2 verify and repair with SIMD-accelerated Galois field arithmetic
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
//! PAR2 repair engine.
//!
//! Repairs damaged or missing files using Reed-Solomon recovery data.
//!
//! Algorithm (D×D reduced approach — D = damaged blocks, N = total blocks):
//! 1. Verify to identify damaged/missing blocks
//! 2. Load recovery blocks from volume files
//! 3. Build a D×D Vandermonde submatrix (only damaged columns × recovery rows)
//! 4. Invert the D×D matrix — O(D³) instead of O(N²D) for the full N×N
//! 5. Compute adjusted recovery: subtract intact-block contributions from recovery data
//! 6. Apply the D×D inverse to produce repaired blocks
//! 7. Write repaired blocks back to files

use std::collections::HashMap;
use std::collections::hash_map::Entry;
use std::fmt;
use std::io::{Read, Seek, SeekFrom, Write};
use std::path::Path;

use rayon::prelude::*;
use tracing::{debug, info};

use crate::gf;
use crate::gf_simd;
use crate::matrix::{GfMatrix, par2_input_constants};
use crate::recovery::{RecoveryBlock, load_recovery_blocks};
use crate::types::{Par2FileSet, VerifyResult};
use crate::verify;

/// Result of a repair operation.
#[derive(Debug)]
pub struct RepairResult {
    /// Whether the repair succeeded (all files now intact).
    pub success: bool,
    /// Number of blocks repaired.
    pub blocks_repaired: u32,
    /// Number of files repaired.
    pub files_repaired: usize,
    /// Descriptive message.
    pub message: String,
}

impl fmt::Display for RepairResult {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        if self.success {
            write!(
                f,
                "Repair complete: {} blocks repaired across {} files",
                self.blocks_repaired, self.files_repaired
            )
        } else {
            write!(f, "Repair failed: {}", self.message)
        }
    }
}

/// Errors that can occur during repair.
#[derive(Debug, thiserror::Error)]
pub enum RepairError {
    #[error("I/O error: {0}")]
    Io(#[from] std::io::Error),
    #[error("Insufficient recovery data: need {needed} blocks, have {available}")]
    InsufficientRecovery { needed: u32, available: u32 },
    #[error("Decode matrix is singular — cannot repair with these recovery blocks")]
    SingularMatrix,
    #[error("No damage detected — nothing to repair")]
    NoDamage,
    #[error("Verification after repair failed: {0}")]
    VerifyFailed(String),
}

/// Repair damaged/missing files in a PAR2 set.
///
/// This is a blocking operation. For async contexts, wrap in `spawn_blocking`.
///
/// Runs verification internally to identify damage. If you already have a
/// [`VerifyResult`] from a prior [`verify()`](crate::verify) call, use
/// [`repair_from_verify`] instead to skip the redundant verification pass.
pub fn repair(file_set: &Par2FileSet, dir: &Path) -> Result<RepairResult, RepairError> {
    let verify_result = verify::verify(file_set, dir);
    repair_from_verify_inner(file_set, dir, &verify_result, true)
}

/// Repair using a pre-computed [`VerifyResult`].
///
/// This skips the initial verification pass, saving significant time when the
/// caller has already called [`verify()`](crate::verify). The `verify_result`
/// must have been computed against the same `file_set` and `dir`.
///
/// Uses a reduced D×D matrix approach where D = number of damaged blocks.
/// The full N×N decode matrix is never constructed — only a small Vandermonde
/// submatrix covering damaged positions is built and inverted. This reduces
/// matrix inversion from O(N²D) to O(D³), which is the difference between
/// minutes and milliseconds for typical repair scenarios.
///
/// After repair, a full MD5 verification pass confirms all files are correct.
/// To skip re-verification, use [`repair_from_verify_no_reverify`].
///
/// This is a blocking operation. For async contexts, wrap in `spawn_blocking`.
pub fn repair_from_verify(
    file_set: &Par2FileSet,
    dir: &Path,
    verify_result: &VerifyResult,
) -> Result<RepairResult, RepairError> {
    repair_from_verify_inner(file_set, dir, verify_result, true)
}

/// Like [`repair_from_verify`], but skips re-verification after repair.
///
/// The Reed-Solomon math is deterministic — if the matrix inverted and I/O
/// succeeded, the output is correct. Skipping re-verify saves a full read
/// of all files (~6s per 5GB).
pub fn repair_from_verify_no_reverify(
    file_set: &Par2FileSet,
    dir: &Path,
    verify_result: &VerifyResult,
) -> Result<RepairResult, RepairError> {
    repair_from_verify_inner(file_set, dir, verify_result, false)
}

fn repair_from_verify_inner(
    file_set: &Par2FileSet,
    dir: &Path,
    verify_result: &VerifyResult,
    re_verify: bool,
) -> Result<RepairResult, RepairError> {
    if verify_result.all_correct() {
        return Err(RepairError::NoDamage);
    }

    let blocks_needed = verify_result.blocks_needed();
    info!(
        blocks_needed,
        damaged = verify_result.damaged.len(),
        missing = verify_result.missing.len(),
        "Repair: damage detected"
    );

    // Step 1: Load recovery blocks from all volume files
    let recovery_blocks = load_recovery_blocks(dir, &file_set.recovery_set_id, file_set.slice_size);

    if (recovery_blocks.len() as u32) < blocks_needed {
        return Err(RepairError::InsufficientRecovery {
            needed: blocks_needed,
            available: recovery_blocks.len() as u32,
        });
    }

    // Step 2: Map files to global block indices
    let block_map = build_block_map(file_set);
    let total_input_blocks = block_map.total_blocks as usize;

    let damaged_indices = find_damaged_block_indices(verify_result, &block_map);
    let num_damaged = damaged_indices.len();
    info!(
        damaged_block_count = num_damaged,
        total_input_blocks, "Mapped damaged blocks to global indices"
    );

    // Select exactly D recovery blocks (one per damaged block)
    let recovery_to_use: Vec<&RecoveryBlock> = recovery_blocks.iter().take(num_damaged).collect();
    let recovery_exponents: Vec<u32> = recovery_to_use.iter().map(|b| b.exponent).collect();

    // Step 3: Build and invert the D×D Vandermonde submatrix.
    //
    // The full encoding equation for recovery block e is:
    //   R_e = Σ_i (c_i^exp_e × D_i)   for all input blocks i
    //
    // Rearranging for the damaged blocks only:
    //   R_e - Σ_{intact i} (c_i^exp_e × D_i) = Σ_{damaged j} (c_j^exp_e × D_j)
    //
    // The D×D matrix V[e][j] = c_{damaged_j}^{exp_e} is what we invert.
    let constants = par2_input_constants(total_input_blocks);

    let mut vandermonde = GfMatrix::zeros(num_damaged, num_damaged);
    for (e, &exp) in recovery_exponents.iter().enumerate() {
        for (j, &dmg_idx) in damaged_indices.iter().enumerate() {
            vandermonde.set(e, j, gf::pow(constants[dmg_idx], exp));
        }
    }

    let inverse = vandermonde.invert().ok_or(RepairError::SingularMatrix)?;

    info!(
        "D×D decode matrix inverted ({}×{})",
        num_damaged, num_damaged
    );

    // Step 4: Compute adjusted recovery buffers.
    //
    // adjusted[e] = R_e ⊕ Σ_{intact i} (c_i^exp_e × D_i)
    //
    // In GF(2^16), subtraction = addition = XOR, so we XOR-accumulate the
    // intact-block contributions into the recovery data.
    //
    // Cache-tiled approach: the D output buffers (D × slice_size) typically
    // exceed L3 cache. Iterating source-major (one source at a time against
    // all D outputs) thrashes cache on every source. Instead we read batches
    // of B source blocks, then process each output buffer against the entire
    // batch — keeping the output buffer hot in L1/L2 while applying B sources
    // via mul_add_multi's pair-batched AVX2 path.

    let slice_size = file_set.slice_size as usize;

    // Build a fast lookup for damaged indices
    let damaged_set: std::collections::HashSet<usize> = damaged_indices.iter().copied().collect();

    // Initialize adjusted recovery from the actual recovery block data
    let mut adjusted: Vec<Vec<u8>> = recovery_to_use.iter().map(|rb| rb.data.clone()).collect();

    // Collect intact block indices for batch reading
    let intact_indices: Vec<usize> = (0..total_input_blocks)
        .filter(|i| !damaged_set.contains(i))
        .collect();

    // Batch size: chosen so B × slice_size fits comfortably in L3 cache (~16-32MB).
    // With 768KB slices, 24 blocks = ~18MB. Leaves room for output buffer in L1/L2.
    const BATCH_SIZE: usize = 24;

    // Read and process source blocks in cache-friendly batches.
    // I/O is sequential so OS readahead keeps the pipeline full.
    let mut file_handles: HashMap<String, std::fs::File> = HashMap::new();

    for batch in intact_indices.chunks(BATCH_SIZE) {
        // Read this batch of source blocks
        let batch_data: Vec<Vec<u8>> = batch
            .iter()
            .map(|&idx| read_source_block(dir, &block_map, idx, slice_size, &mut file_handles))
            .collect::<std::io::Result<Vec<_>>>()?;

        let batch_refs: Vec<&[u8]> = batch_data.iter().map(|v| v.as_slice()).collect();

        // For each output buffer (parallelized across cores), apply all sources
        // in this batch at once. The output buffer stays in L1/L2 for the whole
        // batch, and mul_add_multi uses pair-batched AVX2 to halve dst bandwidth.
        adjusted
            .par_iter_mut()
            .enumerate()
            .for_each(|(e, adj_buf)| {
                let coeffs: Vec<u16> = batch
                    .iter()
                    .map(|&src_idx| gf::pow(constants[src_idx], recovery_exponents[e]))
                    .collect();
                gf_simd::mul_add_multi(adj_buf, &batch_refs, &coeffs);
            });
    }

    info!("Intact-block contributions subtracted from recovery data");

    // Step 5: Apply the D×D inverse to produce repaired blocks.
    //
    // repaired[j] = Σ_e (V^{-1}[j][e] × adjusted[e])
    //
    // This is D × D mul-adds of slice_size — small relative to the streaming phase.

    let adj_refs: Vec<&[u8]> = adjusted.iter().map(|v| v.as_slice()).collect();

    let mut outputs: Vec<Vec<u8>> = (0..num_damaged).map(|_| vec![0u8; slice_size]).collect();

    outputs.par_iter_mut().enumerate().for_each(|(j, dst)| {
        let coeffs: Vec<u16> = (0..num_damaged).map(|e| inverse.get(j, e)).collect();
        gf_simd::mul_add_multi(dst, &adj_refs, &coeffs);
    });

    info!("Repaired blocks reconstructed via D×D inverse");

    let repaired_blocks: Vec<(usize, Vec<u8>)> =
        damaged_indices.iter().copied().zip(outputs).collect();

    // Step 6: Write repaired blocks back to files
    let mut files_touched = std::collections::HashSet::new();

    for (global_idx, data) in &repaired_blocks {
        let (filename, file_offset, write_len) = block_map.global_to_file(*global_idx, slice_size);

        let file_path = dir.join(&filename);
        debug!(
            filename,
            global_block = global_idx,
            offset = file_offset,
            len = write_len,
            "Writing repaired block"
        );

        let mut f = std::fs::OpenOptions::new()
            .create(true)
            .truncate(false)
            .write(true)
            .open(&file_path)?;

        // Ensure file is the right size (for missing files)
        let expected_size = block_map
            .files
            .iter()
            .find(|bf| bf.filename == filename)
            .map(|bf| bf.file_size)
            .unwrap_or(0);
        let current_size = f.metadata()?.len();
        if current_size < expected_size {
            f.set_len(expected_size)?;
        }

        f.seek(SeekFrom::Start(file_offset as u64))?;
        f.write_all(&data[..write_len])?;
        files_touched.insert(filename.clone());
    }

    // Step 7: Optional re-verify
    if re_verify {
        let verification = verify::verify(file_set, dir);
        if verification.all_correct() {
            info!(
                blocks = repaired_blocks.len(),
                files = files_touched.len(),
                "Repair successful — all files verified"
            );
            Ok(RepairResult {
                success: true,
                blocks_repaired: repaired_blocks.len() as u32,
                files_repaired: files_touched.len(),
                message: "All files repaired and verified".to_string(),
            })
        } else {
            Err(RepairError::VerifyFailed(format!("{verification}")))
        }
    } else {
        info!(
            blocks = repaired_blocks.len(),
            files = files_touched.len(),
            "Repair complete (re-verify skipped)"
        );
        Ok(RepairResult {
            success: true,
            blocks_repaired: repaired_blocks.len() as u32,
            files_repaired: files_touched.len(),
            message: "All files repaired (re-verify skipped)".to_string(),
        })
    }
}

// ---------------------------------------------------------------------------
// Block mapping
// ---------------------------------------------------------------------------

/// Maps between global block indices and per-file block positions.
struct BlockMap {
    files: Vec<BlockFile>,
    total_blocks: u32,
}

struct BlockFile {
    filename: String,
    file_size: u64,
    block_count: u32,
    /// First global block index for this file.
    start_block: u32,
}

fn build_block_map(file_set: &Par2FileSet) -> BlockMap {
    let slice_size = file_set.slice_size;
    let mut files = Vec::new();
    let mut block_offset = 0u32;

    // Sort files by file ID for deterministic ordering (same as par2cmdline)
    let mut sorted_files: Vec<_> = file_set.files.values().collect();
    sorted_files.sort_by_key(|f| f.file_id);

    for f in sorted_files {
        let block_count = if slice_size == 0 {
            0
        } else {
            f.size.div_ceil(slice_size) as u32
        };
        files.push(BlockFile {
            filename: f.filename.clone(),
            file_size: f.size,
            block_count,
            start_block: block_offset,
        });
        block_offset += block_count;
    }

    BlockMap {
        files,
        total_blocks: block_offset,
    }
}

impl BlockMap {
    /// Convert a global block index to (filename, file_byte_offset, bytes_to_write).
    fn global_to_file(&self, global_idx: usize, slice_size: usize) -> (String, usize, usize) {
        let global = global_idx as u32;
        for f in &self.files {
            if global >= f.start_block && global < f.start_block + f.block_count {
                let local_block = (global - f.start_block) as usize;
                let file_offset = local_block * slice_size;
                // Last block may be shorter than slice_size
                let remaining = f.file_size as usize - file_offset;
                let write_len = remaining.min(slice_size);
                return (f.filename.clone(), file_offset, write_len);
            }
        }
        panic!("Global block index {global_idx} out of range");
    }
}

fn find_damaged_block_indices(verify_result: &VerifyResult, block_map: &BlockMap) -> Vec<usize> {
    let mut indices = Vec::new();

    for damaged in &verify_result.damaged {
        if let Some(bf) = block_map
            .files
            .iter()
            .find(|f| f.filename == damaged.filename)
        {
            if damaged.damaged_block_indices.is_empty() {
                // No per-block info — assume all blocks damaged
                for i in 0..bf.block_count {
                    indices.push((bf.start_block + i) as usize);
                }
            } else {
                // Use precise per-block damage info
                for &local_idx in &damaged.damaged_block_indices {
                    indices.push((bf.start_block + local_idx) as usize);
                }
            }
        }
    }

    for missing in &verify_result.missing {
        if let Some(bf) = block_map
            .files
            .iter()
            .find(|f| f.filename == missing.filename)
        {
            for i in 0..bf.block_count {
                indices.push((bf.start_block + i) as usize);
            }
        }
    }

    indices.sort();
    indices.dedup();
    indices
}

/// Read a single source block from disk, reusing file handles.
fn read_source_block(
    dir: &Path,
    block_map: &BlockMap,
    global_idx: usize,
    slice_size: usize,
    file_handles: &mut HashMap<String, std::fs::File>,
) -> std::io::Result<Vec<u8>> {
    let (filename, file_offset, _) = block_map.global_to_file(global_idx, slice_size);

    let handle = match file_handles.entry(filename.clone()) {
        Entry::Occupied(e) => e.into_mut(),
        Entry::Vacant(e) => {
            let path = dir.join(&filename);
            e.insert(std::fs::File::open(&path)?)
        }
    };
    handle.seek(SeekFrom::Start(file_offset as u64))?;

    let mut buf = vec![0u8; slice_size]; // zero-initialized for last-block padding
    let mut total = 0;
    while total < slice_size {
        match handle.read(&mut buf[total..]) {
            Ok(0) => break,
            Ok(n) => total += n,
            Err(ref e) if e.kind() == std::io::ErrorKind::Interrupted => continue,
            Err(e) => return Err(e),
        }
    }
    Ok(buf)
}

#[cfg(test)]
mod tests {
    use super::*;

    /// Test that the basic RS encode→decode round-trip works with the D×D approach.
    /// 2 data blocks, 2 recovery blocks, lose both data blocks, recover.
    #[test]
    fn test_rs_roundtrip_simple() {
        // 2 input "blocks" of 4 bytes each (2 u16 values)
        let input0: Vec<u8> = vec![0x01, 0x00, 0x02, 0x00]; // [1, 2] as u16 LE
        let input1: Vec<u8> = vec![0x03, 0x00, 0x04, 0x00]; // [3, 4] as u16 LE

        let input_count = 2;
        let recovery_exponents = vec![0u32, 1u32];

        // Build encoding matrix to compute recovery blocks
        let enc = GfMatrix::par2_encoding_matrix(input_count, &recovery_exponents);

        // Compute recovery blocks
        let slice_size = 4;
        let u16_per_slice = slice_size / 2;
        let inputs = [&input0, &input1];

        let mut recovery0 = vec![0u8; slice_size];
        let mut recovery1 = vec![0u8; slice_size];

        for pos in 0..u16_per_slice {
            let off = pos * 2;
            let mut r0: u16 = 0;
            let mut r1: u16 = 0;
            for (i, inp) in inputs.iter().enumerate() {
                let val = u16::from_le_bytes([inp[off], inp[off + 1]]);
                r0 = gf::add(r0, gf::mul(enc.get(2, i), val));
                r1 = gf::add(r1, gf::mul(enc.get(3, i), val));
            }
            recovery0[off] = r0 as u8;
            recovery0[off + 1] = (r0 >> 8) as u8;
            recovery1[off] = r1 as u8;
            recovery1[off + 1] = (r1 >> 8) as u8;
        }

        // Now "lose" both input blocks. Use D×D approach to recover.
        // D = 2 (both blocks damaged), N = 2.
        // Since all blocks are damaged, there are no intact blocks to subtract.
        // adjusted[e] = recovery[e] (no intact contributions to remove).

        let constants = par2_input_constants(input_count);
        let damaged_indices = [0usize, 1usize];

        // Build D×D Vandermonde: V[e][j] = c_{damaged_j}^{exp_e}
        let num_damaged = damaged_indices.len();
        let mut vandermonde = GfMatrix::zeros(num_damaged, num_damaged);
        for (e, &exp) in recovery_exponents.iter().enumerate() {
            for (j, &dmg_idx) in damaged_indices.iter().enumerate() {
                vandermonde.set(e, j, gf::pow(constants[dmg_idx], exp));
            }
        }

        let inv = vandermonde.invert().expect("Should be invertible");

        // Apply inverse: repaired[j] = Σ_e inv[j][e] * adjusted[e]
        let adjusted = [&recovery0[..], &recovery1[..]];
        let mut result0 = vec![0u8; slice_size];
        let mut result1 = vec![0u8; slice_size];

        for pos in 0..u16_per_slice {
            let off = pos * 2;
            let mut out0: u16 = 0;
            let mut out1: u16 = 0;
            for (e, adj) in adjusted.iter().enumerate() {
                let val = u16::from_le_bytes([adj[off], adj[off + 1]]);
                out0 = gf::add(out0, gf::mul(inv.get(0, e), val));
                out1 = gf::add(out1, gf::mul(inv.get(1, e), val));
            }
            result0[off] = out0 as u8;
            result0[off + 1] = (out0 >> 8) as u8;
            result1[off] = out1 as u8;
            result1[off + 1] = (out1 >> 8) as u8;
        }

        assert_eq!(result0, input0, "Recovered block 0 should match original");
        assert_eq!(result1, input1, "Recovered block 1 should match original");
    }

    /// Test D×D approach with partial damage (some blocks intact).
    /// 4 data blocks, lose 2, recover using 2 recovery blocks.
    #[test]
    fn test_rs_roundtrip_partial_damage() {
        let slice_size = 4;
        let input_count = 4;
        let recovery_exponents = vec![0u32, 1u32];

        let inputs: Vec<Vec<u8>> = vec![
            vec![0x01, 0x00, 0x02, 0x00],
            vec![0x03, 0x00, 0x04, 0x00],
            vec![0x05, 0x00, 0x06, 0x00],
            vec![0x07, 0x00, 0x08, 0x00],
        ];

        let enc = GfMatrix::par2_encoding_matrix(input_count, &recovery_exponents);

        // Compute recovery blocks
        let mut recovery = vec![vec![0u8; slice_size]; 2];
        for pos in 0..(slice_size / 2) {
            let off = pos * 2;
            for (e, rec) in recovery.iter_mut().enumerate() {
                let mut val: u16 = 0;
                for (i, inp) in inputs.iter().enumerate() {
                    let d = u16::from_le_bytes([inp[off], inp[off + 1]]);
                    val = gf::add(val, gf::mul(enc.get(input_count + e, i), d));
                }
                rec[off] = val as u8;
                rec[off + 1] = (val >> 8) as u8;
            }
        }

        // "Lose" blocks 1 and 3. Blocks 0 and 2 are intact.
        let damaged_indices = [1usize, 3usize];
        let intact_indices: Vec<usize> = (0..input_count)
            .filter(|i| !damaged_indices.contains(i))
            .collect();
        let num_damaged = damaged_indices.len();

        let constants = par2_input_constants(input_count);

        // Build D×D Vandermonde
        let mut vandermonde = GfMatrix::zeros(num_damaged, num_damaged);
        for (e, &exp) in recovery_exponents.iter().enumerate() {
            for (j, &dmg_idx) in damaged_indices.iter().enumerate() {
                vandermonde.set(e, j, gf::pow(constants[dmg_idx], exp));
            }
        }
        let inv = vandermonde.invert().expect("Should be invertible");

        // Compute adjusted recovery: subtract intact contributions
        let mut adjusted = recovery.clone();
        for &intact_idx in &intact_indices {
            let c_i = constants[intact_idx];
            for (e, adj) in adjusted.iter_mut().enumerate() {
                let coeff = gf::pow(c_i, recovery_exponents[e]);
                gf_simd::mul_add_buffer(adj, &inputs[intact_idx], coeff);
            }
        }

        // Apply inverse
        let adj_refs: Vec<&[u8]> = adjusted.iter().map(|v| v.as_slice()).collect();
        let mut outputs: Vec<Vec<u8>> = (0..num_damaged).map(|_| vec![0u8; slice_size]).collect();

        for (j, dst) in outputs.iter_mut().enumerate() {
            let coeffs: Vec<u16> = (0..num_damaged).map(|e| inv.get(j, e)).collect();
            gf_simd::mul_add_multi(dst, &adj_refs, &coeffs);
        }

        // Verify recovery
        assert_eq!(outputs[0], inputs[1], "Recovered block 1 should match");
        assert_eq!(outputs[1], inputs[3], "Recovered block 3 should match");
    }
}