md-cli 0.6.0 - Docs.rs

//! Integration tests for `md repair` (v0.6.0; Tranche B.6 of v0.22.x
//! follow-ups cycle per plan §4.B.3).
//!
//! Covers all 5 cells locked in the plan:
//!   1. `repair_single_chunk_happy_path`
//!   2. `repair_multi_chunk_all_valid_passthrough`
//!   3. `repair_multi_chunk_one_corrupted`
//!   4. `repair_multi_chunk_atomic_failure_per_d28`
//!   5. `repair_json_multi_chunk_envelope_shape`
//!
//! Fixtures are generated by shelling out to `md encode` (mirrors the
//! `tests/cmd_decode.rs::encode()` idiom for self-contained
//! reproducibility). Multi-chunk fixtures use `--force-chunked` to
//! guarantee 2+ chunks regardless of the underlying template size.

#![allow(missing_docs)]

use assert_cmd::Command;
use std::process::Command as StdCommand;

/// Codex32 alphabet — mirrors `md_codec::chunk::CODEX32_ALPHABET` (which
/// is module-private) for deterministic single-char corruption. Stable
/// per BIP 173.
const CODEX32_ALPHABET: &[u8; 32] = b"qpzry9x8gf2tvdw0s3jn54khce6mua7l";

// ──────────────────────────────────────────────────────────────────────────
// Fixture generators
// ──────────────────────────────────────────────────────────────────────────

/// Encode a template via `md encode --force-chunked <T>`. Strips the
/// leading `chunk-set-id: 0x...` line and returns the chunk strings in
/// order. `md_codec::decode_with_correction` is chunked-format-only (it
/// runs the chunk-header parse + reassemble pipeline after BCH correction);
/// non-chunked single-string md1 must use `decode_md1_string` instead. So
/// every test fixture in this file uses the chunked encoding regardless
/// of chunk count.
fn encode_chunked(template: &str) -> Vec<String> {
    let out = StdCommand::new(assert_cmd::cargo::cargo_bin("md"))
        .args(["encode", "--force-chunked", template])
        .output()
        .expect("invoke md encode --force-chunked");
    assert!(
        out.status.success(),
        "md encode --force-chunked {template:?} failed: stderr={}",
        String::from_utf8_lossy(&out.stderr)
    );
    let s = String::from_utf8(out.stdout).expect("stdout utf-8");
    let chunks: Vec<String> = s
        .lines()
        .filter(|l| l.starts_with("md1"))
        .map(String::from)
        .collect();
    assert!(!chunks.is_empty(), "expected at least one chunk; got {chunks:?}");
    chunks
}

/// Multi-chunk fixture: 4-cosigner sortedmulti with per-cosigner deep
/// hardened paths. Mirrors md-codec's `tests/bch_decode.rs::multi_chunk_descriptor`
/// (per-cosigner path body ~180 bits × 4 cosigners ~720 bits — comfortably
/// above the 320-bit single-string limit, so chunking is required).
const MULTI_CHUNK_TEMPLATE: &str =
    "wsh(sortedmulti(2,@0/1'/2'/3'/4'/5'/6'/7'/8'/9'/10'/11'/12'/13'/14'/15'/<0;1>/*,\
     @1/101'/102'/103'/104'/105'/106'/107'/108'/109'/110'/111'/112'/113'/114'/115'/<0;1>/*,\
     @2/201'/202'/203'/204'/205'/206'/207'/208'/209'/210'/211'/212'/213'/214'/215'/<0;1>/*,\
     @3/301'/302'/303'/304'/305'/306'/307'/308'/309'/310'/311'/312'/313'/314'/315'/<0;1>/*))";

/// Encode the multi-chunk fixture and assert it produces 2+ chunks
/// (load-bearing for cells 2/3/4/5 which exercise multi-chunk semantics).
fn encode_multi_chunk() -> Vec<String> {
    let chunks = encode_chunked(MULTI_CHUNK_TEMPLATE);
    assert!(
        chunks.len() >= 2,
        "MULTI_CHUNK_TEMPLATE must produce 2+ chunks; got {}: {chunks:?}",
        chunks.len()
    );
    chunks
}

/// Flip 1 character at `pos` (0-indexed into the data-part, i.e. chars
/// after `md1`) by XORing its 5-bit symbol with `xor_mask & 0x1F`.
/// Result is guaranteed parseable but BCH-invalid.
fn corrupt_at(chunk: &str, pos: usize, xor_mask: u8) -> String {
    let hrp_len = 3; // "md1"
    let mut chars: Vec<char> = chunk.chars().collect();
    let abs_idx = hrp_len + pos;
    let original_sym = CODEX32_ALPHABET
        .iter()
        .position(|&b| b == chars[abs_idx].to_ascii_lowercase() as u8)
        .expect("char in codex32 alphabet") as u8;
    let new_sym = (original_sym ^ (xor_mask & 0x1F)) & 0x1F;
    chars[abs_idx] = CODEX32_ALPHABET[new_sym as usize] as char;
    chars.iter().collect()
}

/// Spread N corruptions across the chunk's data-part using distinct
/// positions + masks. Guarantees the BCH locator degree exceeds t=4 when
/// `positions.len() >= 5`.
fn corrupt_many(chunk: &str, positions: &[(usize, u8)]) -> String {
    positions
        .iter()
        .fold(chunk.to_string(), |acc, &(p, m)| corrupt_at(&acc, p, m))
}

// ──────────────────────────────────────────────────────────────────────────
// Cell 1: single-chunk happy path — 1 char corruption, exit 5, correction
// restores original chunk. Fixture is a small wpkh template encoded with
// --force-chunked to produce 1 chunked-form chunk (decode_with_correction
// requires chunked-form input; non-chunked single-string md1 fails the
// chunk-header parse + reassemble step that the BCH pipeline forwards to).
// ──────────────────────────────────────────────────────────────────────────
#[test]
fn repair_single_chunk_happy_path() {
    let chunks = encode_chunked("wpkh(@0/<0;1>/*)");
    assert_eq!(
        chunks.len(),
        1,
        "single-chunk fixture must produce exactly 1 chunk; got {chunks:?}"
    );
    let valid = &chunks[0];
    // Corrupt a position past the 37-bit chunk-header (~8 codex32 symbols)
    // to stay inside the payload region. Position 10 is safely past the
    // header and inside the body.
    let corrupted = corrupt_at(valid, 10, 0b10110);

    let mut cmd = Command::cargo_bin("md").unwrap();
    let out = cmd
        .args(["repair", &corrupted])
        .output()
        .expect("invoke md repair");
    let code = out.status.code().expect("exited normally");
    assert_eq!(
        code,
        5,
        "expected exit 5 (REPAIR_APPLIED); stderr={}",
        String::from_utf8_lossy(&out.stderr)
    );
    let stdout = String::from_utf8(out.stdout).expect("stdout utf-8");
    assert!(
        stdout.contains("# Repair report"),
        "expected `# Repair report` header; got {stdout:?}"
    );
    assert!(
        stdout.contains("md1 chunk 0: 1 correction at position 10"),
        "expected per-chunk correction line at position 10; got {stdout:?}"
    );
    assert!(
        stdout.lines().any(|line| line == valid.as_str()),
        "expected corrected chunk to match the original valid md1; got {stdout:?}"
    );
}

// ──────────────────────────────────────────────────────────────────────────
// Cell 2: multi-chunk all-valid pass-through — 2+ valid chunks, exit 0,
// no Repair report header, all chunks echoed.
// ──────────────────────────────────────────────────────────────────────────
#[test]
fn repair_multi_chunk_all_valid_passthrough() {
    // 4-cosigner sortedmulti — large enough to force chunking even
    // without --force-chunked, but we use --force-chunked for guarantee.
    let chunks = encode_multi_chunk();
    let mut args: Vec<String> = vec!["repair".into()];
    args.extend(chunks.iter().cloned());

    let mut cmd = Command::cargo_bin("md").unwrap();
    let out = cmd.args(&args).output().expect("invoke md repair");
    let code = out.status.code().expect("exited normally");
    assert_eq!(
        code,
        0,
        "expected exit 0 for clean multi-chunk pass-through; stderr={}",
        String::from_utf8_lossy(&out.stderr)
    );
    let stdout = String::from_utf8(out.stdout).expect("stdout utf-8");
    assert!(
        !stdout.contains("# Repair report"),
        "clean input must not emit a Repair report; got {stdout:?}"
    );
    // All original chunks must appear on stdout (pass-through).
    for c in &chunks {
        assert!(
            stdout.lines().any(|line| line == c.as_str()),
            "expected pass-through chunk on stdout; missing {c:?}; got {stdout:?}"
        );
    }
}

// ──────────────────────────────────────────────────────────────────────────
// Cell 3: multi-chunk one-corrupted — 3+ chunks, 1 corrupted by 1 char,
// exit 5, exactly 1 correction with the correct chunk_index.
// ──────────────────────────────────────────────────────────────────────────
#[test]
fn repair_multi_chunk_one_corrupted() {
    let chunks = encode_multi_chunk();
    // Pick the middle chunk to corrupt — exercises non-zero chunk_index.
    let target_idx = chunks.len() / 2;
    let mut corrupted_chunks = chunks.clone();
    corrupted_chunks[target_idx] = corrupt_at(&chunks[target_idx], 3, 0b01011);

    let mut args: Vec<String> = vec!["repair".into()];
    args.extend(corrupted_chunks.iter().cloned());

    let mut cmd = Command::cargo_bin("md").unwrap();
    let out = cmd.args(&args).output().expect("invoke md repair");
    let code = out.status.code().expect("exited normally");
    assert_eq!(
        code,
        5,
        "expected exit 5 (REPAIR_APPLIED); stderr={}",
        String::from_utf8_lossy(&out.stderr)
    );
    let stdout = String::from_utf8(out.stdout).expect("stdout utf-8");
    assert!(
        stdout.contains("# Repair report"),
        "expected `# Repair report` header; got {stdout:?}"
    );
    // Multi-chunk chunk_index propagation per D27 schema.
    let expected_line = format!("md1 chunk {target_idx}: 1 correction at position 3");
    assert!(
        stdout.contains(&expected_line),
        "expected per-chunk correction line {expected_line:?}; got {stdout:?}"
    );
    // All original (corrected) chunks must appear on stdout.
    for c in &chunks {
        assert!(
            stdout.lines().any(|line| line == c.as_str()),
            "expected restored chunk on stdout; missing {c:?}; got {stdout:?}"
        );
    }
}

// ──────────────────────────────────────────────────────────────────────────
// Cell 4: multi-chunk atomic-failure per D28 — 3+ chunks, chunk N has
// 5+ errors → exit 2, chunk_index=N named in stderr, NO partial output
// on stdout (atomic).
// ──────────────────────────────────────────────────────────────────────────
#[test]
fn repair_multi_chunk_atomic_failure_per_d28() {
    let chunks = encode_multi_chunk();
    // Pick chunk index 1 (not the first) to test chunk_index propagation
    // through the error path. Spread 5 corruptions to exceed BCH t=4.
    let target_idx = 1usize;
    let mut corrupted_chunks = chunks.clone();
    let dp_len = chunks[target_idx].len() - 3; // strip "md1"
    let positions: Vec<(usize, u8)> = vec![
        (0, 0b00001),
        (dp_len / 5, 0b00010),
        (2 * dp_len / 5, 0b00100),
        (3 * dp_len / 5, 0b01000),
        (dp_len - 1, 0b10000),
    ];
    corrupted_chunks[target_idx] = corrupt_many(&chunks[target_idx], &positions);

    let mut args: Vec<String> = vec!["repair".into()];
    args.extend(corrupted_chunks.iter().cloned());

    let mut cmd = Command::cargo_bin("md").unwrap();
    let out = cmd.args(&args).output().expect("invoke md repair");
    let code = out.status.code().expect("exited normally");
    assert_eq!(
        code,
        2,
        "expected exit 2 (atomic-fail per D28); stderr={}",
        String::from_utf8_lossy(&out.stderr)
    );
    let stderr = String::from_utf8_lossy(&out.stderr);
    // D28: chunk_index must be named in the stderr message.
    let expected_chunk_phrase = format!("chunk {target_idx}");
    assert!(
        stderr.contains(&expected_chunk_phrase),
        "expected stderr to name failing chunk {target_idx:?} (looking for {expected_chunk_phrase:?}); got stderr={stderr:?}"
    );
    // D28: NO partial corrected output on stdout. The atomic guarantee
    // is that stdout is empty in the failure path — md_codec is itself
    // atomic and the CLI does not emit until the call returns Ok.
    let stdout = String::from_utf8(out.stdout).expect("stdout utf-8");
    assert!(
        stdout.is_empty(),
        "D28 atomic-fail: stdout must be empty; got {stdout:?}"
    );
    // Belt-and-suspenders: explicitly assert NO md1 chunks leaked.
    assert!(
        !stdout.contains("md1"),
        "D28 atomic-fail: no md1 chunks on stdout; got {stdout:?}"
    );
}

// ──────────────────────────────────────────────────────────────────────────
// Cell 5: JSON multi-chunk envelope shape — `--json` over 3+ chunks with
// 1 corrupted emits the RepairJson envelope with schema_version="1",
// kind="md1", multi-chunk corrected_chunks + repairs arrays per D27.
// Schema byte-matches `mnemonic-toolkit/src/cmd/repair.rs::RepairJson`.
// ──────────────────────────────────────────────────────────────────────────
#[test]
fn repair_json_multi_chunk_envelope_shape() {
    let chunks = encode_multi_chunk();
    let target_idx = chunks.len() / 2;
    let mut corrupted_chunks = chunks.clone();
    corrupted_chunks[target_idx] = corrupt_at(&chunks[target_idx], 7, 0b11001);

    let mut args: Vec<String> = vec!["repair".into(), "--json".into()];
    args.extend(corrupted_chunks.iter().cloned());

    let mut cmd = Command::cargo_bin("md").unwrap();
    let out = cmd.args(&args).output().expect("invoke md repair --json");
    let code = out.status.code().expect("exited normally");
    assert_eq!(
        code,
        5,
        "expected exit 5 for JSON-mode multi-chunk repair; stderr={}",
        String::from_utf8_lossy(&out.stderr)
    );

    let stdout = String::from_utf8(out.stdout).expect("stdout utf-8");
    let envelope: serde_json::Value =
        serde_json::from_str(stdout.trim()).expect("stdout parses as JSON");

    // D27 schema mirror — byte-match with toolkit's `RepairJson` shape.
    assert_eq!(
        envelope["schema_version"],
        serde_json::Value::String("1".into()),
        "schema_version must equal \"1\" (string)"
    );
    assert_eq!(
        envelope["kind"],
        serde_json::Value::String("md1".into()),
        "kind must equal \"md1\""
    );

    // Multi-chunk corrected_chunks: one entry per input chunk, in order.
    let corrected = envelope["corrected_chunks"]
        .as_array()
        .expect("corrected_chunks must be a JSON array");
    assert_eq!(
        corrected.len(),
        chunks.len(),
        "corrected_chunks length must equal input chunk count"
    );
    for (i, c) in chunks.iter().enumerate() {
        assert_eq!(
            corrected[i],
            serde_json::Value::String(c.clone()),
            "corrected_chunks[{i}] must equal the restored chunk"
        );
    }

    // repairs: one entry per CORRUPTED chunk (mirror toolkit — filter out
    // pass-through chunks). Exactly 1 entry expected here.
    let repairs = envelope["repairs"]
        .as_array()
        .expect("repairs must be a JSON array");
    assert_eq!(repairs.len(), 1, "exactly 1 corrupted chunk → 1 repair entry");
    let r0 = &repairs[0];
    assert_eq!(
        r0["chunk_index"],
        serde_json::Value::from(target_idx as u64),
        "chunk_index must equal the corrupted chunk's index"
    );
    assert_eq!(
        r0["original_chunk"],
        serde_json::Value::String(corrupted_chunks[target_idx].clone())
    );
    assert_eq!(
        r0["corrected_chunk"],
        serde_json::Value::String(chunks[target_idx].clone())
    );

    let positions = r0["corrected_positions"]
        .as_array()
        .expect("corrected_positions must be a JSON array");
    assert_eq!(positions.len(), 1, "single-flip → one position entry");
    let p0 = &positions[0];
    assert_eq!(p0["position"], serde_json::Value::from(7u32));
    assert!(p0["was"].is_string(), "was must be a string");
    assert!(p0["now"].is_string(), "now must be a string");
    assert_ne!(p0["was"], p0["now"], "was != now for a real correction");
}