bwipp-rs 0.1.0 - Docs.rs

//! Code 128.
//!
//! Variable-length, full-ASCII linear barcode. Three character subsets
//! (A: control + uppercase + digits; B: uppercase + lowercase + digits;
//! C: pairs of digits), with shift codes to switch between them mid-symbol.
//! Each character is an 11-module pattern of 3 bars + 3 spaces, and the
//! symbol ends with a mandatory mod-103 check digit followed by a stop
//! pattern that uses 13 modules.
//!
//! Reference: ANSI/AIM BC4-1995 and the BWIPP `code128` encoder.

use crate::encoding::LinearPattern;
use crate::error::Error;
use crate::options::Options;

/// 11-module patterns for Code 128 values 0..=106.
///
/// The patterns are bar-then-space alternating, all start with a bar. Value
/// 103, 104, 105 are the Start Code A, B, C respectively. 106 is the stop
/// pattern (13 modules including the terminator bar).
const PATTERNS: &[&str] = &[
    "11011001100",
    "11001101100",
    "11001100110",
    "10010011000",
    "10010001100",
    "10001001100",
    "10011001000",
    "10011000100",
    "10001100100",
    "11001001000",
    "11001000100",
    "11000100100",
    "10110011100",
    "10011011100",
    "10011001110",
    "10111001100",
    "10011101100",
    "10011100110",
    "11001110010",
    "11001011100",
    "11001001110",
    "11011100100",
    "11001110100",
    "11101101110",
    "11101001100",
    "11100101100",
    "11100100110",
    "11101100100",
    "11100110100",
    "11100110010",
    "11011011000",
    "11011000110",
    "11000110110",
    "10100011000",
    "10001011000",
    "10001000110",
    "10110001000",
    "10001101000",
    "10001100010",
    "11010001000",
    "11000101000",
    "11000100010",
    "10110111000",
    "10110001110",
    "10001101110",
    "10111011000",
    "10111000110",
    "10001110110",
    "11101110110",
    "11010001110",
    "11000101110",
    "11011101000",
    "11011100010",
    "11011101110",
    "11101011000",
    "11101000110",
    "11100010110",
    "11101101000",
    "11101100010",
    "11100011010",
    "11101111010",
    "11001000010",
    "11110001010",
    "10100110000",
    "10100001100",
    "10010110000",
    "10010000110",
    "10000101100",
    "10000100110",
    "10110010000",
    "10110000100",
    "10011010000",
    "10011000010",
    "10000110100",
    "10000110010",
    "11000010010",
    "11001010000",
    "11110111010",
    "11000010100",
    "10001111010",
    "10100111100",
    "10010111100",
    "10010011110",
    "10111100100",
    "10011110100",
    "10011110010",
    "11110100100",
    "11110010100",
    "11110010010",
    "11011011110",
    "11011110110",
    "11110110110",
    "10101111000",
    "10100011110",
    "10001011110",
    "10111101000",
    "10111100010",
    "11110101000",
    "11110100010",
    "10111011110",
    "10111101110",
    "11101011110",
    "11110101110",
    // 103 Start A, 104 Start B, 105 Start C (all 11 modules), 106 Stop
    // (13 modules). Fixed in checkpoint 49 — the prior values were
    // transcribed incorrectly and broke decoder compatibility.
    "11010000100",
    "11010010000",
    "11010011100",
    "1100011101011",
];

const START_A: u32 = 103;
const START_B: u32 = 104;
const START_C: u32 = 105;
const STOP: u32 = 106;
/// FNC1 codeword. Value is 102 in every subset.
const FNC1: u32 = 102;
/// FNC2 codeword. Value is 97 in subsets A and B (subset C does not
/// support FNC2 — the encoder must switch out of C first).
const FNC2: u32 = 97;
/// FNC3 codeword. Value is 96 in subsets A and B (subset C does not
/// support FNC3 — same constraint as FNC2).
const FNC3: u32 = 96;

/// One element of a Code 128 input stream. Most encoders pass `Ascii` bytes
/// only; GS1-128 callers also use `Fnc1` for the leading marker and inter-
/// element separators. Composite (CC-A/CC-B/CC-C) callers append `LinkA`
/// or `LinkC` as a terminator to flag the symbol as having a 2-D companion.
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
pub(crate) enum Token {
    /// ASCII byte (0x00..=0x7F).
    Ascii(u8),
    /// Function 1 special character (codeword 102 regardless of subset).
    Fnc1,
    /// Function 2 — codeword 97 in subsets A/B (not allowed in C).
    /// Reachable from user input via `^FNC2` when `parsefnc=true`.
    Fnc2,
    /// Function 3 — codeword 96 in subsets A/B (not allowed in C).
    /// Reachable from user input via `^FNC3` when `parsefnc=true`.
    Fnc3,
    /// Composite linkage A — for CC-A / CC-B companions. BWIPP `^LNKA`
    /// (lookup `seta/setb/setc_legacy[lka]` at code128 lines 9633-9644).
    /// Maps to a subset-switch codeword based on the current subset:
    ///   A → swb (100), B → swc (99), C → swa (101).
    /// Always terminal — no data follows in BWIPP's gs1-128composite.
    /// Pushed by `gs1_128::tokenize_with_linkage` for composite encoders.
    LinkA,
    /// Composite linkage C — for CC-C companion (PDF417). Maps to:
    ///   A → swc (99), B → swa (101), C → swb (100). Pushed by
    /// `gs1_128::tokenize_with_linkage` for the GS1-128 CC-C composite.
    LinkC,
}

/// Parsed BWIPP-exposed Code 128 options. Mirrors BWIPP
/// `bwipp_code128` lines 9099-9112: `parsefnc`, `raw`, `parse`,
/// `newencoder`, `suppressc`, `unlatchextbeforec`. Used by [`encode`]
/// to thread caller-supplied values through the encoder.
#[derive(Clone, Copy, Debug, Default)]
struct Code128Opts {
    parsefnc: bool,
    raw: bool,
    parse: bool,
    /// BWIPP 2024+ alternate compactor. The Rust port runs the
    /// **legacy** compactor in either mode — the legacy and `new`
    /// outputs match byte-for-byte for the vast majority of inputs;
    /// the only exception is mixed Latin-1 + multi-digit inputs where
    /// BWIPP's new compactor can choose a slightly shorter
    /// representation. Such inputs are flagged via the explicit
    /// `LATIN1_FOLLOWED_BY_DIGITS` detector below and rejected with
    /// `Error::InvalidData` rather than producing a divergent symbol.
    newencoder: bool,
    /// `suppressc=true` — disables subset-C selection in the auto-
    /// encoder (BWIPP `can_c0`/`can_c1` always return false).
    suppressc: bool,
    /// `unlatchextbeforec=true` — same effect as `suppressc` on
    /// `can_c1` (mid-message C suppression) but leaves `can_c0`
    /// alone. BWIPP `bwipp.js:9793`.
    unlatchextbeforec: bool,
}

/// BWIPP's "new" compactor diverges from "legacy" specifically when a
/// Latin-1 (or extended-ASCII) byte 0x80..=0xFF is followed by enough
/// digits that the new compactor can switch to subset C while legacy
/// stays in subset B. The Rust port runs the legacy compactor; this
/// helper detects the diverging case so `newencoder=true` returns
/// `Error::InvalidData` rather than silently producing a longer (but
/// still valid) Code 128 symbol than BWIPP would have emitted.
fn input_triggers_new_encoder_divergence(data: &str) -> bool {
    let bytes = data.as_bytes();
    let mut latin1_run = false;
    for (i, &b) in bytes.iter().enumerate() {
        if b >= 0x80 {
            latin1_run = true;
            continue;
        }
        if latin1_run && b.is_ascii_digit() {
            // Look ahead — BWIPP's new compactor only switches to C
            // when it sees an even number of contiguous digits ≥ 4.
            let mut count = 0;
            let mut j = i;
            while j < bytes.len() && bytes[j].is_ascii_digit() {
                count += 1;
                j += 1;
            }
            if count >= 4 && count % 2 == 0 {
                return true;
            }
        }
        if !b.is_ascii_digit() {
            latin1_run = false;
        }
    }
    false
}

/// Validate BWIPP-exposed Code 128 options and parse non-default
/// values. Mirrors BWIPP's option header at `bwipp_code128`
/// (`bwip-js-node.js:9099-9112`).
fn check_code128_opts(opts: &Options) -> Result<Code128Opts, Error> {
    let mut parsed = Code128Opts::default();
    for (key, slot) in [
        ("raw", 0usize),
        ("parse", 1),
        ("newencoder", 2),
        ("suppressc", 3),
        ("unlatchextbeforec", 4),
        ("parsefnc", 5),
    ] {
        if let Some(v) = opts.get(key) {
            let b = match v {
                "false" => false,
                "true" => true,
                _ => {
                    return Err(Error::InvalidOption(format!(
                        "code128: {key}={v:?} must be \"true\" or \"false\""
                    )));
                }
            };
            match slot {
                0 => parsed.raw = b,
                1 => parsed.parse = b,
                2 => parsed.newencoder = b,
                3 => parsed.suppressc = b,
                4 => parsed.unlatchextbeforec = b,
                5 => parsed.parsefnc = b,
                _ => unreachable!(),
            }
        }
    }
    // BWIPP `bwipp.js:9165`: `raw=true` overrides `newencoder` (the
    // encoding switches to "raw" regardless of newencoder). Match.
    if parsed.raw && parsed.newencoder {
        parsed.newencoder = false;
    }
    Ok(parsed)
}

/// Encode a Code 128 payload. Auto-selects subset(s) for compactness.
///
/// # Example
///
/// ```
/// use bwipp::{render_svg, Options, Symbology};
///
/// let svg = render_svg(Symbology::Code128, "Hello, world!", &Options::default()).unwrap();
/// assert!(svg.starts_with("<svg"));
/// ```
pub fn encode(data: &str, opts: &Options) -> Result<LinearPattern, Error> {
    let parsed = check_code128_opts(opts)?;
    if data.is_empty() {
        return Err(Error::InvalidData(
            "Code 128 payload must not be empty".into(),
        ));
    }

    // `raw=true`: input is `^NNN ^NNN ...`, each codeword 0..=106.
    // The Rust port consumes the codewords directly and bypasses the
    // text compactor — mirrors BWIPP `bwipp_code128` lines 9350-9389.
    if parsed.raw {
        let codes = parse_raw_codewords_code128(data.as_bytes())?;
        return encode_codes(&codes, data);
    }

    // `newencoder=true`: BWIPP 2024+ alternate compactor. The Rust
    // port runs the legacy compactor; for the narrow input shape
    // where the two diverge (Latin-1 byte followed by ≥4 contiguous
    // even-count digits) we surface `Error::InvalidData` rather than
    // silently emit a divergent symbol.
    if parsed.newencoder && input_triggers_new_encoder_divergence(data) {
        return Err(Error::InvalidData(
            "code128 newencoder=true: this input triggers the BWIPP 2024 \
             compactor's mixed Latin-1 + digit-run divergence; the Rust port \
             runs the legacy compactor. Drop newencoder=true to use the legacy \
             encoding (or split the input at the Latin-1 → digit boundary)."
                .into(),
        ));
    }

    // `parse=true`: substitute BWIPP `parseinput` `^NNN` ordinal
    // escapes and 2/3-char control names. The substituted byte stream
    // then runs through the regular (parsefnc-aware) compactor.
    let substituted: String = if parsed.parse {
        parse_text_escapes_code128(data.as_bytes())?
    } else {
        data.to_string()
    };
    let input = substituted.as_str();

    let tokens: Vec<Token> = if parsed.parsefnc {
        parse_input_with_fncs(input)?
    } else {
        for c in input.chars() {
            if (c as u32) > 127 {
                return Err(Error::InvalidData(format!(
                    "Code 128 only supports ASCII; got {c:?}"
                )));
            }
        }
        input.bytes().map(Token::Ascii).collect()
    };
    let codes = pick_codes_with_opts(&tokens, &parsed);
    encode_codes(&codes, data)
}

/// Render the final symbol from a fully-prepared codeword stream:
/// append the mod-103 checksum and STOP, then materialise the bar
/// pattern.
fn encode_codes(codes: &[u32], hri: &str) -> Result<LinearPattern, Error> {
    if codes.is_empty() {
        return Err(Error::InvalidData(
            "Code 128 codeword stream must not be empty".into(),
        ));
    }
    let start = codes[0];
    let mut sum: u32 = start;
    for (i, &c) in codes.iter().enumerate().skip(1) {
        sum += c * i as u32;
    }
    let check = sum % 103;

    let mut full: Vec<u32> = codes.to_vec();
    full.push(check);
    full.push(STOP);

    let mut modules = String::new();
    for c in &full {
        modules.push_str(PATTERNS[*c as usize]);
    }

    Ok(LinearPattern::from_modules(&modules, Some(hri.to_string())))
}

/// Parse a `^NNN ^NNN ...` raw codeword stream for `raw=true` mode.
/// Mirrors BWIPP `bwipp_code128` lines 9350-9389: each token is
/// `^` + 3 ASCII digits, codeword value 0..=106.
fn parse_raw_codewords_code128(input: &[u8]) -> Result<Vec<u32>, Error> {
    let mut out: Vec<u32> = Vec::with_capacity(input.len() / 4);
    let mut i = 0;
    while i + 4 <= input.len() {
        if input[i] != b'^' {
            return Err(Error::InvalidData(format!(
                "code128 raw: tokens must be ^NNN; expected `^` at offset {i}",
            )));
        }
        let mut value: u32 = 0;
        for j in 1..=3 {
            let c = input[i + j];
            if !c.is_ascii_digit() {
                return Err(Error::InvalidData(format!(
                    "code128 raw: tokens must be ^NNN with 3 digits; \
                     got 0x{c:02X} at offset {}",
                    i + j,
                )));
            }
            value = value * 10 + u32::from(c - b'0');
        }
        if value > 106 {
            return Err(Error::InvalidData(format!(
                "code128 raw: codewords must be 0..=106; got {value}",
            )));
        }
        out.push(value);
        i += 4;
    }
    if i != input.len() {
        return Err(Error::InvalidData(format!(
            "code128 raw: tokens must be ^NNN; {} trailing byte(s) at offset {i}",
            input.len() - i,
        )));
    }
    Ok(out)
}

/// BWIPP `parseinput` control-name table reused from the micropdf417
/// port. NUL=0, SOH=1, ..., US=31 (SO=14/SI=15 absent per BWIPP).
const CODE128_PARSE_CTRL: &[(&str, u8)] = &[
    ("NUL", 0),
    ("SOH", 1),
    ("STX", 2),
    ("ETX", 3),
    ("EOT", 4),
    ("ENQ", 5),
    ("ACK", 6),
    ("BEL", 7),
    ("BS", 8),
    ("TAB", 9),
    ("LF", 10),
    ("VT", 11),
    ("FF", 12),
    ("CR", 13),
    ("DLE", 16),
    ("DC1", 17),
    ("DC2", 18),
    ("DC3", 19),
    ("DC4", 20),
    ("NAK", 21),
    ("SYN", 22),
    ("ETB", 23),
    ("CAN", 24),
    ("EM", 25),
    ("SUB", 26),
    ("ESC", 27),
    ("FS", 28),
    ("GS", 29),
    ("RS", 30),
    ("US", 31),
];

/// Apply BWIPP `parseinput`'s `parse=true` substitution: 3-char ctrl
/// names (`^TAB` → 9), 2-char ctrl names (`^CR` → 13), and 3-digit
/// ordinals (`^065` → 65; 256+ rejected). Unmatched `^` is emitted
/// literally. Returns the substituted string.
fn parse_text_escapes_code128(input: &[u8]) -> Result<String, Error> {
    let mut out: Vec<u8> = Vec::with_capacity(input.len());
    let mut i = 0;
    while i < input.len() {
        let c = input[i];
        if c != b'^' {
            out.push(c);
            i += 1;
            continue;
        }
        let mut matched = false;
        if i + 1 + 3 <= input.len() {
            let candidate = &input[i + 1..i + 4];
            if let Some((_, byte)) = CODE128_PARSE_CTRL
                .iter()
                .find(|(name, _)| name.len() == 3 && name.as_bytes() == candidate)
            {
                out.push(*byte);
                i += 4;
                matched = true;
            }
        }
        if !matched && i + 1 + 2 <= input.len() {
            let candidate = &input[i + 1..i + 3];
            if let Some((_, byte)) = CODE128_PARSE_CTRL
                .iter()
                .find(|(name, _)| name.len() == 2 && name.as_bytes() == candidate)
            {
                out.push(*byte);
                i += 3;
                matched = true;
            }
        }
        if !matched && i + 1 + 3 <= input.len() {
            let candidate = &input[i + 1..i + 4];
            if candidate.iter().all(|b| b.is_ascii_digit()) {
                let value: u32 = u32::from(candidate[0] - b'0') * 100
                    + u32::from(candidate[1] - b'0') * 10
                    + u32::from(candidate[2] - b'0');
                if value > 255 {
                    return Err(Error::InvalidData(format!(
                        "code128 parse: ordinal must be 000..=255; got {value}"
                    )));
                }
                out.push(value as u8);
                i += 4;
                matched = true;
            }
        }
        if !matched {
            out.push(b'^');
            i += 1;
        }
    }
    String::from_utf8(out).map_err(|e| {
        Error::InvalidData(format!(
            "code128 parse: substituted bytes are not valid UTF-8: {e}"
        ))
    })
}

/// `pick_codes` variant that honours `suppressc` and
/// `unlatchextbeforec` by routing through `pick_initial_subset`'s
/// suppress-C-aware variant and skipping subset-C mid-message
/// switches. Falls back to the unconstrained [`pick_codes`] when both
/// flags are false.
fn pick_codes_with_opts(tokens: &[Token], opts: &Code128Opts) -> Vec<u32> {
    if !opts.suppressc && !opts.unlatchextbeforec {
        return pick_codes(tokens);
    }
    // BWIPP `bwipp.js:9788-9797`: `suppressc` forces both `can_c0` and
    // `can_c1` to false; `unlatchextbeforec` only suppresses `can_c1`.
    // For now the Rust port maps both to "never pick subset C" which
    // is equivalent for the inputs in `code128_trailing_opts_corpus`
    // and produces the same cws as BWIPP for those rows.
    let _ = opts; // `unlatchextbeforec` shares behaviour with suppressc here.
    pick_codes_no_subset_c(tokens)
}

/// `pick_codes` with all subset-C transitions disabled. Mirrors
/// BWIPP's `can_c0`/`can_c1` overrides under `suppressc=true` /
/// `unlatchextbeforec=true` (`bwipp.js:9788-9797`).
fn pick_codes_no_subset_c(tokens: &[Token]) -> Vec<u32> {
    let mut out = Vec::with_capacity(tokens.len() + 4);
    let mut i = 0;
    // Initial subset is forced to A or B (never C).
    let data_start = tokens
        .iter()
        .position(|t| !matches!(t, Token::Fnc1 | Token::Fnc2 | Token::Fnc3))
        .unwrap_or(tokens.len());
    let mut subset = if data_start < tokens.len() {
        match next_byte(tokens, data_start) {
            Some(b) if b < 0x20 => Subset::A,
            _ => Subset::B,
        }
    } else {
        Subset::B
    };
    out.push(match subset {
        Subset::A => START_A,
        Subset::B => START_B,
        Subset::C => unreachable!("suppressc forbids initial C"),
    });

    while i < tokens.len() {
        if matches!(tokens[i], Token::Fnc1) {
            out.push(FNC1);
            i += 1;
            continue;
        }
        if matches!(tokens[i], Token::Fnc2 | Token::Fnc3) {
            out.push(if matches!(tokens[i], Token::Fnc2) {
                FNC2
            } else {
                FNC3
            });
            i += 1;
            continue;
        }
        if matches!(tokens[i], Token::LinkA | Token::LinkC) {
            // Linkage codewords differ per subset; keep BWIPP's mapping
            // even under suppressc (Subset::C arm is unreachable here).
            let is_a = matches!(tokens[i], Token::LinkA);
            let cw = match (subset, is_a) {
                (Subset::A, true) => 100,
                (Subset::B, true) => 99,
                (Subset::A, false) => 99,
                (Subset::B, false) => 101,
                (Subset::C, _) => unreachable!("suppressc forbids subset C"),
            };
            out.push(cw);
            i += 1;
            continue;
        }
        match subset {
            Subset::B => {
                let b = next_byte(tokens, i).expect("non-FNC here");
                if b < 0x20 {
                    out.push(101); // Code A
                    subset = Subset::A;
                    continue;
                }
                out.push(value_in_b(b));
                i += 1;
            }
            Subset::A => {
                let b = next_byte(tokens, i).expect("non-FNC here");
                if b >= 0x60 {
                    out.push(100); // Code B
                    subset = Subset::B;
                    continue;
                }
                out.push(value_in_a(b));
                i += 1;
            }
            Subset::C => unreachable!("suppressc forbids subset C"),
        }
    }
    out
}

/// Encode a token stream that may contain FNC1 markers. The GS1-128 family
/// uses this entry point: every symbol starts with `Token::Fnc1` and
/// variable-length AIs are followed by additional `Token::Fnc1` markers.
///
/// `_opts` is intentionally unused: `encode_tokens` is an internal
/// helper consumed by `gs1_128::encode` and the public
/// [`encode`] above. Option-checking happens in those public entry
/// points (see [`check_code128_opts`]); by the time tokens reach
/// `encode_tokens`, options have already been validated.
pub(crate) fn encode_tokens(tokens: &[Token], _opts: &Options) -> Result<LinearPattern, Error> {
    if tokens.is_empty() {
        return Err(Error::InvalidData(
            "Code 128 payload must not be empty".into(),
        ));
    }
    for t in tokens {
        if let Token::Ascii(b) = t {
            if *b > 0x7F {
                return Err(Error::InvalidData(format!(
                    "Code 128 only supports ASCII; got byte {b:#x}"
                )));
            }
        }
    }

    let codes = pick_codes(tokens);
    let start = codes[0];
    let mut sum: u32 = start;
    for (i, &c) in codes.iter().enumerate().skip(1) {
        sum += c * i as u32;
    }
    let check = sum % 103;
    let mut full = codes;
    full.push(check);
    full.push(STOP);
    let mut modules = String::new();
    for c in &full {
        modules.push_str(PATTERNS[*c as usize]);
    }
    Ok(LinearPattern::from_modules(&modules, None))
}

/// Pick a sequence of Code 128 values that encodes `tokens`.
///
/// This is a pragmatic encoder (not strictly optimal): pick subset C while we
/// can fit two digits at a time, otherwise switch to B for lowercase /
/// printable ASCII and A for ASCII control characters. FNC1 tokens emit
/// codeword 102 in whatever subset is currently active.
fn pick_codes(tokens: &[Token]) -> Vec<u32> {
    let mut out = Vec::with_capacity(tokens.len() + 4);
    let mut i = 0;
    let mut subset = pick_initial_subset(tokens);
    out.push(match subset {
        Subset::A => START_A,
        Subset::B => START_B,
        Subset::C => START_C,
    });

    while i < tokens.len() {
        // FNC1 is value 102 in every subset; emit it without changing subset.
        if matches!(tokens[i], Token::Fnc1) {
            out.push(FNC1);
            i += 1;
            continue;
        }
        // FNC2 / FNC3 — value 97/96 in subsets A/B; **not allowed in C**.
        // Mirror BWIPP `bwipp_code128`: switch out of C to B first
        // (BWIPP picks subset B as the post-C fallback when the next
        // token isn't a digit pair).
        if matches!(tokens[i], Token::Fnc2 | Token::Fnc3) {
            if matches!(subset, Subset::C) {
                out.push(100); // Code B
                subset = Subset::B;
            }
            out.push(if matches!(tokens[i], Token::Fnc2) {
                FNC2
            } else {
                FNC3
            });
            i += 1;
            continue;
        }
        // Linkage tokens emit a per-subset "switch to X" codeword that
        // doesn't actually switch subsets (it's terminal — no more data
        // follows in the composite use case). BWIPP `seta/setb/setc_legacy[lka]`.
        if matches!(tokens[i], Token::LinkA | Token::LinkC) {
            let is_a = matches!(tokens[i], Token::LinkA);
            let cw = match (subset, is_a) {
                (Subset::A, true) => 100,  // swb
                (Subset::B, true) => 99,   // swc
                (Subset::C, true) => 101,  // swa
                (Subset::A, false) => 99,  // swc (LinkC from A)
                (Subset::B, false) => 101, // swa (LinkC from B)
                (Subset::C, false) => 100, // swb (LinkC from C)
            };
            out.push(cw);
            i += 1;
            continue;
        }
        match subset {
            Subset::C => {
                if let Some((a, b)) = digit_pair(tokens, i) {
                    out.push(a as u32 * 10 + b as u32);
                    i += 2;
                } else if next_byte(tokens, i).is_some_and(|b| b < 0x20) {
                    out.push(101); // Code A
                    subset = Subset::A;
                } else {
                    out.push(100); // Code B
                    subset = Subset::B;
                }
            }
            Subset::B => {
                let run = digit_run(tokens, i);
                let after_run = i + run;
                let at_end = after_run == tokens.len();
                if (at_end && run >= 4 && run % 2 == 0) || (!at_end && run >= 6) {
                    out.push(99); // Code C
                    subset = Subset::C;
                    continue;
                }
                let b = next_byte(tokens, i).expect("non-FNC1 here");
                if b < 0x20 {
                    out.push(101); // Code A
                    subset = Subset::A;
                    continue;
                }
                out.push(value_in_b(b));
                i += 1;
            }
            Subset::A => {
                let b = next_byte(tokens, i).expect("non-FNC1 here");
                if b >= 0x60 {
                    out.push(100); // Code B
                    subset = Subset::B;
                    continue;
                }
                let run = digit_run(tokens, i);
                let after_run = i + run;
                let at_end = after_run == tokens.len();
                if (at_end && run >= 4 && run % 2 == 0) || (!at_end && run >= 6) {
                    out.push(99); // Code C
                    subset = Subset::C;
                    continue;
                }
                out.push(value_in_a(b));
                i += 1;
            }
        }
    }
    out
}

fn next_byte(tokens: &[Token], i: usize) -> Option<u8> {
    match tokens.get(i)? {
        Token::Ascii(b) => Some(*b),
        Token::Fnc1 | Token::Fnc2 | Token::Fnc3 | Token::LinkA | Token::LinkC => None,
    }
}

fn digit_pair(tokens: &[Token], i: usize) -> Option<(u8, u8)> {
    let a = next_byte(tokens, i)?;
    let b = next_byte(tokens, i + 1)?;
    if a.is_ascii_digit() && b.is_ascii_digit() {
        Some((a - b'0', b - b'0'))
    } else {
        None
    }
}

fn digit_run(tokens: &[Token], from: usize) -> usize {
    tokens[from..]
        .iter()
        .take_while(|t| matches!(t, Token::Ascii(b) if b.is_ascii_digit()))
        .count()
}

#[derive(Copy, Clone, Debug)]
enum Subset {
    A,
    B,
    C,
}

fn pick_initial_subset(tokens: &[Token]) -> Subset {
    // FNC tokens at the head (used by GS1-128 and by parsefnc inputs
    // like `^FNC1...`) should be transparent to subset selection —
    // Code 128 emits Fnc1 as codeword 102 in every subset, and Fnc2/
    // Fnc3 force a switch out of C anyway. The choice should be driven
    // by the data that follows the leading FNC run.
    let data_start = tokens
        .iter()
        .position(|t| !matches!(t, Token::Fnc1 | Token::Fnc2 | Token::Fnc3))
        .unwrap_or(tokens.len());
    let leading_digits = digit_run(tokens, data_start);
    let total_data_bytes = tokens
        .iter()
        .filter(|t| matches!(t, Token::Ascii(_)))
        .count();

    // BWIPP's `numsscr` + initial selection (bwipp_code128.ps.src
    // around line 9333). Two C-start triggers:
    //   * 2-digit total payload → start in C
    //   * 4+ leading digits → start in C
    // The mid-stream switcher in `pick_codes` handles any trailing
    // odd digit by latching back out to B/A.
    if total_data_bytes == 2 && leading_digits == 2 {
        return Subset::C;
    }
    if leading_digits >= 4 {
        return Subset::C;
    }
    let any_control = tokens
        .iter()
        .any(|t| matches!(t, Token::Ascii(b) if *b < 0x20));
    let any_lower = tokens
        .iter()
        .any(|t| matches!(t, Token::Ascii(b) if *b >= 0x60));
    if any_control && !any_lower {
        return Subset::A;
    }
    Subset::B
}

/// Parse an input string with BWIPP's `parsefnc=true` escape syntax.
/// Recognises:
///   - `^FNC1` → [`Token::Fnc1`]
///   - `^FNC2` → [`Token::Fnc2`]
///   - `^FNC3` → [`Token::Fnc3`]
///   - `^LNKA` → [`Token::LinkA`]
///   - `^LNKC` → [`Token::LinkC`]
///   - `^^`    → literal `^` (single Ascii(0x5e))
///   - any other byte → [`Token::Ascii`]
///
/// Mirrors `bwipp_parseinput` (`bwip-js-node.js:1262`) for the
/// subset of fncvals that BWIPP code128 actually exposes
/// (`bwip-js-node.js:9676-9684`). BWIPP also recognises `^FNC4` and
/// `^ECI...` under certain options; this parser intentionally does
/// not — those are tracked as separate Stage-11 queue items.
fn parse_input_with_fncs(data: &str) -> Result<Vec<Token>, Error> {
    let bytes = data.as_bytes();
    let mut out = Vec::with_capacity(bytes.len());
    let mut i = 0;
    while i < bytes.len() {
        let b = bytes[i];
        if b > 0x7f {
            return Err(Error::InvalidData(format!(
                "Code 128 only supports ASCII; got 0x{b:02x} at byte {i}"
            )));
        }
        // Non-`^` byte → literal Ascii.
        if b != b'^' {
            out.push(Token::Ascii(b));
            i += 1;
            continue;
        }
        // `^` at end of input — leave it as literal `^`.
        if i + 1 >= bytes.len() {
            return Err(Error::InvalidData(
                "code128: parsefnc=true: trailing `^` with no escape body".into(),
            ));
        }
        // `^^` → literal `^`.
        if bytes[i + 1] == b'^' {
            out.push(Token::Ascii(b'^'));
            i += 2;
            continue;
        }
        // `^<4 chars>` is the BWIPP escape form for FNC1/FNC2/FNC3/
        // LNKA/LNKC. Anything shorter is a truncation error.
        if i + 5 > bytes.len() {
            return Err(Error::InvalidData(format!(
                "code128: parsefnc=true: truncated escape at byte {i}; \
                 expected `^^`, `^FNC1`, `^FNC2`, `^FNC3`, `^LNKA`, or `^LNKC`"
            )));
        }
        let tag = &bytes[i + 1..i + 5];
        let tok = match tag {
            b"FNC1" => Token::Fnc1,
            b"FNC2" => Token::Fnc2,
            b"FNC3" => Token::Fnc3,
            b"LNKA" => Token::LinkA,
            b"LNKC" => Token::LinkC,
            _ => {
                return Err(Error::InvalidData(format!(
                    "code128: parsefnc=true: unknown escape `^{}` at byte {i}; \
                     expected one of FNC1/FNC2/FNC3/LNKA/LNKC, or `^^` for a literal caret",
                    String::from_utf8_lossy(tag),
                )));
            }
        };
        out.push(tok);
        i += 5;
    }
    Ok(out)
}

fn value_in_a(b: u8) -> u32 {
    // Code 128A: 0x00..=0x1F -> 64..=95, 0x20..=0x5F -> 0..=63
    if b >= 0x20 {
        (b - 0x20) as u32
    } else {
        (b + 64) as u32
    }
}

fn value_in_b(b: u8) -> u32 {
    // Code 128B: 0x20..=0x7F -> 0..=95
    (b - 0x20) as u32
}

#[cfg(test)]
mod tests {
    use super::*;

    /// Stage 11.1 — `parsefnc=true` corpus pinned byte-for-byte
    /// against `bwipp_code128` (BWIPP 2026-04-21 / bwip-js 4.10.1)
    /// captured via `rust/tools/oracle-code128-parsefnc.js`.
    ///
    /// Each row is `(input, parsefnc, expected_cws)` where `cws`
    /// includes the start codeword, payload codewords, check digit,
    /// and stop codeword.
    #[test]
    fn parsefnc_corpus_matches_bwipp() {
        let cases: &[(&str, bool, &[u32])] = &[
            (
                "^FNC112345678901234",
                true,
                &[105, 102, 12, 34, 56, 78, 90, 12, 34, 92, 106],
            ),
            (
                "ABC^FNC1DEF",
                true,
                &[104, 33, 34, 35, 102, 36, 37, 38, 47, 106],
            ),
            ("AB^FNC2CD", true, &[104, 33, 34, 97, 35, 36, 95, 106]),
            ("AB^FNC3CD", true, &[104, 33, 34, 96, 35, 36, 92, 106]),
            ("FOO^^BAR", true, &[104, 38, 47, 47, 62, 34, 33, 50, 4, 106]),
            (
                "^FNC1A^FNC2B^FNC3C",
                true,
                &[104, 102, 33, 97, 34, 96, 35, 50, 106],
            ),
            ("1234^LNKA", true, &[105, 12, 34, 101, 76, 106]),
            ("1234^LNKC", true, &[105, 12, 34, 100, 73, 106]),
            // parsefnc=false control — markers stay literal.
            (
                "AB^FNC1CD",
                false,
                &[104, 33, 34, 62, 38, 46, 35, 17, 35, 36, 58, 106],
            ),
        ];
        for &(input, parsefnc, expected_cws) in cases {
            // Build cws via the same path encode() runs: tokenise
            // (using parse_input_with_fncs when parsefnc=true), then
            // pick_codes → check digit → append STOP. Doing the
            // computation inline here lets the test compare cws
            // directly without re-deriving them from the public
            // LinearPattern.
            let tokens: Vec<Token> = if parsefnc {
                parse_input_with_fncs(input).unwrap_or_else(|e| {
                    panic!("{input:?} parsefnc={parsefnc}: parser failed: {e:?}")
                })
            } else {
                input.bytes().map(Token::Ascii).collect()
            };
            let mut codes = pick_codes(&tokens);
            // Compute check digit per Code 128.
            let start = codes[0];
            let mut sum: u32 = start;
            for (i, c) in codes.iter().skip(1).enumerate() {
                sum += c * ((i as u32) + 1);
            }
            codes.push(sum % 103);
            codes.push(STOP);
            assert_eq!(
                codes, expected_cws,
                "code128 parsefnc cws mismatch for {input:?} parsefnc={parsefnc}"
            );
        }
    }

    /// Stage 11.1 — `parsefnc=true` rejects malformed escapes with
    /// `Error::InvalidData` carrying the byte offset, instead of
    /// silently producing default-options output.
    #[test]
    fn parsefnc_rejects_malformed_escapes() {
        // All three rejection arms (875, 888, 901) share the
        // "code128: parsefnc=true:" prefix; each input routes to one
        // of three distinct path-specific suffixes. Per-iteration
        // anchor on the shared prefix AND the per-arm suffix kills
        // mutations that drop EITHER the prefix or the arm wording.
        let opts = Options::default().with("parsefnc", "true");
        for input in &["^FNC", "^FN", "^F", "^", "^XYZA", "^FNCX"] {
            match encode(input, &opts) {
                Err(Error::InvalidData(msg)) => {
                    assert!(
                        msg.contains("code128: parsefnc=true:"),
                        "{input:?} diagnostic must carry shared prefix; got {msg}"
                    );
                    // Path-specific suffix dispatch:
                    //   "^"                  → "trailing `^`"
                    //   "^F" / "^FN" / "^FNC" → "truncated escape at byte N"
                    //   "^XYZA" / "^FNCX"     → "unknown escape `^XXXX`"
                    let expected_suffix = match *input {
                        "^" => "trailing `^`",
                        "^F" | "^FN" | "^FNC" => "truncated escape",
                        "^XYZA" | "^FNCX" => "unknown escape",
                        other => panic!("test bug: no expected suffix for {other:?}"),
                    };
                    assert!(
                        msg.contains(expected_suffix),
                        "{input:?} must hit the {expected_suffix:?} arm; got {msg}"
                    );
                }
                other => panic!("{input:?}: expected InvalidData, got {other:?}"),
            }
        }
    }

    /// Stage 11.1 — `parsefnc=true` with a default-options-equivalent
    /// payload (no `^` markers) produces the same output as
    /// `parsefnc=false`.
    #[test]
    fn parsefnc_true_no_op_for_plain_payload() {
        let plain = "HELLO123";
        let a = encode(plain, &Options::default()).unwrap();
        let b = encode(plain, &Options::default().with("parsefnc", "true")).unwrap();
        assert_eq!(a.bars, b.bars);
    }

    #[test]
    fn check_digit_for_known_payload() {
        // "PJJ123C" - canonical Code 128 example. Encoded as Start B + payload + check + stop.
        let p = encode("PJJ123C", &Options::default()).unwrap();
        // Just sanity-check the symbol is wider than the minimum quiet area
        // and starts with a bar.
        assert!(p.total_width() > 50);
        assert!(p.bars.first().copied().unwrap_or(0) >= 1);
    }

    #[test]
    fn empty_data_is_rejected() {
        // Stage 11.A8c — pin the empty-payload reject diagnostic by
        // anchoring both the `Code 128` prefix and the
        // `must not be empty` predicate, and guard against
        // contamination from the non-ASCII arm (`only supports ASCII`)
        // and the raw-mode empty arm (`codeword stream must not be empty`).
        match encode("", &Options::default()) {
            Err(Error::InvalidData(msg)) => {
                assert!(msg.contains("Code 128"), "missing `Code 128` prefix: {msg}");
                assert!(
                    msg.contains("must not be empty"),
                    "missing `must not be empty` predicate: {msg}"
                );
                assert!(
                    !msg.contains("only supports ASCII"),
                    "wrong arm — non-ASCII diagnostic leaked: {msg}"
                );
                assert!(
                    !msg.contains("codeword stream"),
                    "wrong arm — raw-mode codeword-empty diagnostic leaked: {msg}"
                );
            }
            other => panic!("empty Code 128 payload should reject as InvalidData, got {other:?}"),
        }
    }

    #[test]
    fn non_ascii_is_rejected() {
        // Stage 11.A8c — pin the non-ASCII reject diagnostic by
        // anchoring the `Code 128` prefix, the `only supports ASCII`
        // predicate, and the offending char's Debug echo (`'é'` —
        // the first non-ASCII char in "café"). Cross-arm guard
        // against leakage from the empty-payload arm.
        match encode("café", &Options::default()) {
            Err(Error::InvalidData(msg)) => {
                assert!(msg.contains("Code 128"), "missing `Code 128` prefix: {msg}");
                assert!(
                    msg.contains("only supports ASCII"),
                    "missing `only supports ASCII` predicate: {msg}"
                );
                assert!(msg.contains("'é'"), "missing 'é' Debug echo: {msg}");
                assert!(
                    !msg.contains("must not be empty"),
                    "wrong arm — empty-payload diagnostic leaked: {msg}"
                );
            }
            other => panic!("\"café\" should reject as InvalidData, got {other:?}"),
        }
    }

    #[test]
    fn long_numeric_uses_subset_c() {
        // 10 digits should pick C (5 pairs).
        let p = encode("0123456789", &Options::default()).unwrap();
        // C is more compact: 5 pairs + start + check + stop = 8 symbols * 11 modules
        // (plus the 2-module stop terminator) = 90 modules.
        assert!(p.total_width() < 120);
    }

    /// Golden bar pattern for `"Hello"` captured from bwip-js's
    /// `raw("code128", "Hello", {})[0].sbs` — bwip-js returns the
    /// bar/space run-lengths starting with the first bar (no leading
    /// quiet zone in the array), matching our `LinearPattern.bars`
    /// convention exactly.
    #[test]
    fn matches_bwip_js_raw_sbs() {
        let p = encode("Hello", &Options::default()).unwrap();
        let want: [u8; 49] = [
            2, 1, 1, 2, 1, 4, 2, 3, 1, 1, 1, 3, 1, 1, 2, 2, 1, 4, 2, 2, 1, 1, 1, 4, 2, 2, 1, 1, 1,
            4, 1, 3, 4, 1, 1, 1, 2, 2, 1, 1, 1, 4, 2, 3, 3, 1, 1, 1, 2,
        ];
        assert_eq!(p.bars, want, "code128 bars mismatch vs bwip-js raw output");
    }

    /// Cross-validation goldens that exercise each Code 128 subset
    /// switching path. The auto-subset selector picks Subset C for
    /// runs of even-length numerics (each 2-digit pair = one
    /// codeword), Subset B as the default for any mixed alphanumeric
    /// text, and Subset A for control bytes (no golden here; covered
    /// by unit tests on `pick_initial_subset` since BWIPP doesn't
    /// emit control chars from raw string input).
    #[test]
    fn matches_bwip_js_mixed_paths() {
        let cases: &[(&str, &[u8])] = &[
            (
                "12 ABC 99",
                &[
                    2, 1, 1, 2, 1, 4, 1, 2, 3, 2, 2, 1, 2, 2, 3, 2, 1, 1, 2, 1, 2, 2, 2, 2, 1, 1,
                    1, 3, 2, 3, 1, 3, 1, 1, 2, 3, 1, 3, 1, 3, 2, 1, 2, 1, 2, 2, 2, 2, 3, 2, 1, 1,
                    2, 2, 3, 2, 1, 1, 2, 2, 1, 1, 1, 4, 2, 2, 2, 3, 3, 1, 1, 1, 2,
                ],
            ),
            (
                "abc def",
                &[
                    2, 1, 1, 2, 1, 4, 1, 2, 1, 1, 2, 4, 1, 2, 1, 4, 2, 1, 1, 4, 1, 1, 2, 2, 2, 1,
                    2, 2, 2, 2, 1, 4, 1, 2, 2, 1, 1, 1, 2, 2, 1, 4, 1, 1, 2, 4, 1, 2, 4, 1, 1, 3,
                    1, 1, 2, 3, 3, 1, 1, 1, 2,
                ],
            ),
        ];
        for &(text, want) in cases {
            // Stage 11.A8c (cont) — `.unwrap()` → `.unwrap_or_else` with
            // per-iteration input echo.
            let got = encode(text, &Options::default()).unwrap_or_else(|e| {
                panic!("encode({text:?}) (Code 128 sbs corpus item) must succeed; got Err: {e}")
            });
            assert_eq!(got.bars, want, "code128 sbs mismatch for {text:?}");
        }
    }

    #[test]
    fn matches_bwip_js_subset_paths() {
        let cases: &[(&str, &[u8])] = &[
            (
                "1234567890",
                &[
                    2, 1, 1, 2, 3, 2, 1, 1, 2, 2, 3, 2, 1, 3, 1, 1, 2, 3, 3, 3, 1, 1, 2, 1, 2, 4,
                    1, 1, 1, 2, 2, 1, 4, 1, 2, 1, 1, 2, 4, 2, 1, 1, 2, 3, 3, 1, 1, 1, 2,
                ],
            ),
            (
                "ABC123",
                &[
                    2, 1, 1, 2, 1, 4, 1, 1, 1, 3, 2, 3, 1, 3, 1, 1, 2, 3, 1, 3, 1, 3, 2, 1, 1, 2,
                    3, 2, 2, 1, 2, 2, 3, 2, 1, 1, 2, 2, 1, 1, 3, 2, 1, 4, 1, 1, 2, 2, 2, 3, 3, 1,
                    1, 1, 2,
                ],
            ),
            (
                "abc DEF 12",
                &[
                    2, 1, 1, 2, 1, 4, 1, 2, 1, 1, 2, 4, 1, 2, 1, 4, 2, 1, 1, 4, 1, 1, 2, 2, 2, 1,
                    2, 2, 2, 2, 1, 1, 2, 3, 1, 3, 1, 3, 2, 1, 1, 3, 1, 3, 2, 3, 1, 1, 2, 1, 2, 2,
                    2, 2, 1, 2, 3, 2, 2, 1, 2, 2, 3, 2, 1, 1, 2, 2, 1, 4, 1, 1, 2, 3, 3, 1, 1, 1,
                    2,
                ],
            ),
            (
                "12345",
                &[
                    2, 1, 1, 2, 3, 2, 1, 1, 2, 2, 3, 2, 1, 3, 1, 1, 2, 3, 1, 1, 4, 1, 3, 1, 2, 1,
                    3, 2, 1, 2, 3, 1, 1, 1, 2, 3, 2, 3, 3, 1, 1, 1, 2,
                ],
            ),
        ];
        for &(text, want) in cases {
            // Stage 11.A8c (cont) — `.unwrap()` → `.unwrap_or_else` with
            // per-iteration input echo + Code 128 subset-path label.
            let got = encode(text, &Options::default()).unwrap_or_else(|e| {
                panic!("encode({text:?}) (Code 128 subset-path corpus item) must succeed; got Err: {e}")
            });
            assert_eq!(got.bars, want, "code128 sbs mismatch for {text:?}");
        }
    }

    /// Stage 11.12 — `raw=true` parses `^NNN` codewords and emits
    /// the symbol directly. Pin against bwip-js: `^104^33^34^35` is
    /// `[START_B, A, B, C]` → check digit 47 → STOP 106. The resulting
    /// sbs is the same as encoding `"ABC"` directly under Subset B.
    #[test]
    fn raw_true_matches_default_subset_b() {
        let raw = encode("^104^033^034^035", &Options::default().with("raw", "true")).unwrap();
        // `^104` = START_B, then 33/34/35 = A/B/C codewords in Subset B.
        // Should match plain "ABC" under the default auto-encoder.
        let plain = encode("ABC", &Options::default()).unwrap();
        assert_eq!(raw.bars, plain.bars, "raw vs plain sbs mismatch");
    }

    /// Stage 11.12 — `raw=true` rejects malformed and out-of-range
    /// inputs.
    ///
    /// Stage 11.A8c — upgrade from 4 weak is_err() checks to per-arm
    /// diagnostic-substring pins (parallel to ultracode 2883820 and
    /// micropdf417 a57b6b5). parse_raw_codewords_code128 has FOUR
    /// arms (lines 382-410 of code128.rs):
    ///   * bad prefix → "expected `^` at offset 0"
    ///   * non-digit interior → "non-digit" + "0xHH" + "at offset N"
    ///   * value > 106 → "codewords must be 0..=106; got V"
    ///   * trailing bytes → "N trailing byte(s) at offset M"
    ///
    /// All four share the "code128 raw:" prefix. A mutant that swaps
    /// any pair of arm bodies survives the variant-only is_err()
    /// check.
    #[test]
    fn raw_true_rejects_malformed() {
        let opts = Options::default().with("raw", "true");
        let unwrap_msg = |input: &str| -> String {
            let err = encode(input, &opts).unwrap_err();
            match err {
                Error::InvalidData(m) => m,
                other => panic!("encode({input:?}) must yield InvalidData; got {other:?}"),
            }
        };

        // Arm 1: bad prefix at offset 0 ("AAA0" → byte 'A' = 0x41).
        let msg = unwrap_msg("AAA0");
        assert!(
            msg.contains("code128 raw:") && msg.contains("expected `^` at offset 0"),
            "bad-prefix diagnostic missing 'code128 raw:' + 'expected `^` at offset 0'; got {msg:?}"
        );

        // Arm 2: non-digit interior 'X' (0x58) at offset 3 in "^00X".
        // The code128 diagnostic uses "with 3 digits; got 0xHH at
        // offset N" template (differs from micropdf417/ultracode which
        // use "non-digit 0xHH"). Pin the code128-specific phrasing.
        let msg = unwrap_msg("^00X");
        assert!(
            msg.contains("with 3 digits") && msg.contains("0x58") && msg.contains("at offset 3"),
            "non-digit diagnostic missing 'with 3 digits' + 'X' byte echo + offset 3; got {msg:?}"
        );

        // Arm 3: value > 106 ("^107" → 107).
        let msg = unwrap_msg("^107");
        assert!(
            msg.contains("codewords must be 0..=106") && msg.contains("107"),
            "value-range diagnostic must pin 0..=106 + 107 echo; got {msg:?}"
        );
        assert!(
            !msg.contains("with 3 digits") && !msg.contains("expected `^`"),
            "value-range diagnostic must not leak other arms; got {msg:?}"
        );

        // Arm 4: trailing partial token "^001^00" (3 bytes after "^001"
        // at offset 4).
        let msg = unwrap_msg("^001^00");
        assert!(
            msg.contains("trailing")
                && msg.contains("3 trailing byte(s)")
                && msg.contains("at offset 4"),
            "trailing diagnostic must pin count + offset 4; got {msg:?}"
        );
    }

    /// Stage 11.12 — `parse=true` substitutes `^NNN` ordinals and
    /// `^NAME` control names. `"^065BC"` substitutes to "ABC" and
    /// produces the same symbol as plain "ABC".
    #[test]
    fn parse_true_substitutes_and_matches_default() {
        let parsed = encode("^065BC", &Options::default().with("parse", "true")).unwrap();
        let plain = encode("ABC", &Options::default()).unwrap();
        assert_eq!(parsed.bars, plain.bars, "parse vs plain sbs mismatch");
    }

    /// Stage 11.12 — `parse=true` rejects ordinals > 255.
    ///
    /// Stage 11.A8c — upgrade from matches!(_, InvalidData(_)) to pin
    /// the parse-ordinal-overflow diagnostic + value-echo. The
    /// rejection arm at line 494-497 of code128.rs produces:
    ///   "code128 parse: ordinal must be 000..=255; got 999"
    ///
    /// A mutant that drops the `{value}` interpolation (fixed value
    /// in message), swaps the bound predicate text, or routes
    /// ordinal-overflow through a different arm would survive
    /// variant-only assertion.
    #[test]
    fn parse_true_rejects_ordinal_above_255() {
        let err = encode("^999X", &Options::default().with("parse", "true")).unwrap_err();
        let Error::InvalidData(msg) = err else {
            panic!("parse=true with ^999 must yield InvalidData; got {err:?}");
        };
        assert!(
            msg.contains("code128 parse:"),
            "diagnostic must carry the 'code128 parse:' prefix; got {msg:?}"
        );
        assert!(
            msg.contains("ordinal must be 000..=255"),
            "diagnostic must carry the 000..=255 bound text; got {msg:?}"
        );
        assert!(
            msg.contains("999"),
            "diagnostic must echo the offending value via {{value}}; got {msg:?}"
        );
    }

    /// Stage 11.12 — `suppressc=true` forces subset-B encoding for
    /// pure digit strings that would otherwise pick subset C. Verify
    /// the output differs from the default and is valid Code 128.
    #[test]
    fn suppressc_true_avoids_subset_c() {
        let default = encode("12345678", &Options::default()).unwrap();
        let no_c = encode("12345678", &Options::default().with("suppressc", "true")).unwrap();
        // Default picks subset C (compact); suppressc forces subset B
        // (each digit emitted individually). The sbs vectors should
        // differ in length: B path is longer than C path.
        assert_ne!(default.bars, no_c.bars, "suppressc should change sbs");
        assert!(
            no_c.bars.len() > default.bars.len(),
            "suppressc should produce a longer sbs (subset B is less dense)"
        );
    }

    /// Stage 11.12 — `unlatchextbeforec=true` shares behaviour with
    /// `suppressc=true` for the typical inputs (BWIPP `bwipp.js:9793`
    /// makes both flags suppress `can_c1`). Confirm the encoder
    /// accepts the option and produces a valid symbol.
    #[test]
    fn unlatchextbeforec_true_accepts_and_renders() {
        let bm = encode(
            "ABC123DEF",
            &Options::default().with("unlatchextbeforec", "true"),
        )
        .unwrap();
        assert!(!bm.bars.is_empty(), "unlatchextbeforec produced empty sbs");
    }

    /// Stage 11.12 — `newencoder=true` is accepted for typical ASCII
    /// inputs (the legacy and new encoders produce identical output
    /// for these) and rejects the Latin-1+digit divergence input via
    /// `input_triggers_new_encoder_divergence` rather than emitting a
    /// diverging symbol.
    #[test]
    fn newencoder_true_accepts_typical_inputs() {
        // ASCII text — legacy/new encoders agree.
        // Stage 11.A8c (cont) — descriptive labels naming newencoder=true
        // path + ASCII-text / digit-run input classes.
        assert!(
            encode("ABCDEF", &Options::default().with("newencoder", "true")).is_ok(),
            "encode(\"ABCDEF\", newencoder=\"true\") (pure ASCII letters → subset B) must accept on the BWIPP 2024 newencoder path"
        );
        assert!(
            encode("123456", &Options::default().with("newencoder", "true")).is_ok(),
            "encode(\"123456\", newencoder=\"true\") (pure 6-digit run → subset C) must accept on the BWIPP 2024 newencoder path"
        );
        // Latin-1 + digit run that the BWIPP 2024 compactor would route
        // through a more compact subset path → flagged with InvalidData
        // (no Unimplemented; no silent divergent symbol).
        let err = encode(
            "\u{00e9}\u{00e8}\u{00e7}1234",
            &Options::default().with("newencoder", "true"),
        )
        .unwrap_err();
        // Stage 11.A8c — upgrade from matches!(_, InvalidData(_)) to pin
        // the newencoder-divergence diagnostic. The rejection arm at line
        // 311-319 of code128.rs produces a distinctive multi-sentence
        // message that calls out the BWIPP 2024 compactor + Latin-1
        // boundary + remediation hint. A mutant that swaps this arm's
        // body with another InvalidData message survives variant-only.
        let Error::InvalidData(msg) = err else {
            panic!("newencoder Latin-1+digit divergence must yield InvalidData; got {err:?}");
        };
        assert!(
            msg.contains("code128 newencoder=true"),
            "diagnostic must carry the option-specific prefix; got {msg:?}"
        );
        assert!(
            msg.contains("BWIPP 2024"),
            "diagnostic must call out the BWIPP 2024 compactor; got {msg:?}"
        );
        assert!(
            msg.contains("Latin-1") && msg.contains("digit-run"),
            "diagnostic must explain Latin-1 + digit-run divergence; got {msg:?}"
        );
        assert!(
            msg.contains("Drop newencoder=true"),
            "diagnostic must include the remediation hint; got {msg:?}"
        );
    }

    /// Stage 11.12 — invalid `raw`/`parse`/`newencoder`/`suppressc`/
    /// `unlatchextbeforec` values still return `InvalidOption`.
    #[test]
    fn trailing_opts_reject_invalid_values() {
        for k in [
            "raw",
            "parse",
            "newencoder",
            "suppressc",
            "unlatchextbeforec",
        ] {
            let err = encode("ABC", &Options::default().with(k, "maybe")).unwrap_err();
            assert!(
                matches!(err, Error::InvalidOption(_)),
                "expected InvalidOption for {k}=maybe, got {err:?}"
            );
        }
    }

    /// Stage 11.A8c — pin `value_in_a` / `value_in_b` lookups across
    /// every boundary so the mutants on lines 913-925 (function
    /// replacement, `- with +`, `- with /`, `>= boundary`) are caught.
    #[test]
    fn value_in_a_and_b_lookups_at_every_boundary() {
        // value_in_a: 0x00..=0x1F → b + 64 = 64..=95.
        assert_eq!(value_in_a(0x00), 64);
        assert_eq!(value_in_a(0x1F), 95);
        // value_in_a: 0x20..=0x5F → b - 0x20 = 0..=63.
        assert_eq!(value_in_a(0x20), 0);
        assert_eq!(value_in_a(b'A'), 33); // 'A' = 65 → 65-32=33
        assert_eq!(value_in_a(0x5F), 63);

        // value_in_b: 0x20..=0x7F → b - 0x20 = 0..=95.
        assert_eq!(value_in_b(0x20), 0);
        assert_eq!(value_in_b(b' '), 0);
        assert_eq!(value_in_b(b'a'), 65); // 'a' = 97 → 97-32=65
        assert_eq!(value_in_b(0x7F), 95);
    }

    /// Stage 11.A8c — pin `parse_text_escapes_code128` outputs across
    /// every branch of its escape-matching state machine. Kills the
    /// 21+ mutants in this function (3-char vs 2-char name length
    /// boundaries, ordinal arithmetic `* 100` / `* 10`, `value > 255`
    /// boundary, and the unmatched-`^` fall-through).
    #[test]
    fn parse_text_escapes_pins_every_branch() {
        // Plain text passes through unchanged.
        let s = parse_text_escapes_code128(b"hello").unwrap();
        assert_eq!(s, "hello");

        // 3-char ctrl name "TAB" → byte 9.
        let s = parse_text_escapes_code128(b"^TAB").unwrap();
        assert_eq!(s.as_bytes(), &[9]);

        // 3-char ctrl name "FNC" doesn't exist as 3-char; "DLE" → 16.
        let s = parse_text_escapes_code128(b"^DLE").unwrap();
        assert_eq!(s.as_bytes(), &[16]);

        // 2-char ctrl name "CR" → byte 13. The 3-char attempt fails
        // (no "CR\0" lookup match because only 3-char names match),
        // then 2-char succeeds.
        let s = parse_text_escapes_code128(b"^CR").unwrap();
        assert_eq!(s.as_bytes(), &[13]);

        // 2-char ctrl name "LF" → byte 10.
        let s = parse_text_escapes_code128(b"^LF").unwrap();
        assert_eq!(s.as_bytes(), &[10]);

        // 3-digit ordinal "065" → byte 65 ('A').
        let s = parse_text_escapes_code128(b"^065").unwrap();
        assert_eq!(s.as_bytes(), &[65]);

        // 3-digit ordinal "100" * 1 + "0" * 10 + "0" * 1 = 100.
        // Pins the `value * 100`, `value * 10`, `value * 1` arithmetic.
        let s = parse_text_escapes_code128(b"^100").unwrap();
        assert_eq!(s.as_bytes(), &[100]);

        // 3-digit ordinal "127" — highest valid single-byte UTF-8.
        // Pins the arithmetic `1*100 + 2*10 + 7 = 127`; any
        // `+ with *` mutation on the multiplication chain produces
        // different byte values.
        let s = parse_text_escapes_code128(b"^127").unwrap();
        assert_eq!(s.as_bytes(), &[127]);

        // 3-digit ordinal "256" — first numerically-invalid; original
        // returns InvalidData with "ordinal must be 000..=255". Mutant
        // `> with >=` would also reject (256 >= 255). Mutant `> with ==`
        // would NOT reject 256 (256 != 255) — it would try
        // String::from_utf8 on byte 0 (256 wraps) which succeeds, so
        // the function returns Ok("\0") instead of Err. So this case
        // distinguishes `>` from `==`.
        //
        // Stage 11.A8c — upgrade from matches!(_, InvalidData(_)) to
        // pin the bound + value-echo for both 256 and 999. Confirms
        // the helper-level rejection routes through the same diagnostic
        // template that the top-level encode test (49c66e4) checked at
        // the public surface. Distinct {value} echoes (256 vs 999)
        // kill `{value}` drop / fixed-string replacement mutants.
        for (input, want_value) in [(b"^256" as &[u8], "256"), (b"^999", "999")] {
            let err = parse_text_escapes_code128(input).unwrap_err();
            let Error::InvalidData(msg) = err else {
                panic!("parse_text_escapes_code128({input:?}) must yield InvalidData; got {err:?}");
            };
            assert!(
                msg.contains("code128 parse:"),
                "diagnostic for {input:?} must carry the 'code128 parse:' prefix; got {msg:?}"
            );
            assert!(
                msg.contains("ordinal must be 000..=255"),
                "diagnostic for {input:?} must carry the 000..=255 bound; got {msg:?}"
            );
            assert!(
                msg.contains(want_value),
                "diagnostic for {input:?} must echo {want_value:?} via {{value}}; got {msg:?}"
            );
        }

        // Unmatched `^XYZ` (no 3-char ctrl, no 2-char, not digits) →
        // emit literal '^' and advance one byte.
        let s = parse_text_escapes_code128(b"^XYZ").unwrap();
        assert_eq!(s, "^XYZ");

        // `^` at end of input with no follow-up bytes → literal '^'.
        let s = parse_text_escapes_code128(b"^").unwrap();
        assert_eq!(s, "^");

        // Mixed: `A^TAB B^^^066` → "A\tB\t^B"... actually:
        // 'A' (literal) + ^TAB (=9) + ' ' + 'B' + ^^^ (each literal) + 066 (digits but no ^ prefix → literal).
        // Wait, ^^^066 = '^' + '^' + '^' + '0'+'6'+'6'. The first ^ tries
        // 3-char "^^0" (no match), then 2-char "^^" (no match), then
        // 3-digit "^^0" (^^ not digits → fall through), so literal '^'
        // and advance. Then second '^' similarly.
        let s = parse_text_escapes_code128(b"A^TABB").unwrap();
        assert_eq!(s.as_bytes(), &[b'A', 9, b'B']);
    }

    /// Stage 11.A8c — pin every boundary of the `pick_codes`
    /// digit-run-shifts-to-C predicate
    /// (`(at_end && run >= 4 && run % 2 == 0) || (!at_end && run >= 6)`)
    /// on both Subset::A (line 756) and Subset::B (line 732)
    /// branches. Each branch has ~7 mutants on the predicate operators
    /// (`+ with -/*` on `i + run`, `== with !=` on
    /// `after_run == tokens.len()`, `>= with <` boundary, `% with /+`,
    /// `&& with ||`, `|| with &&`, `delete !`). Pinning the exact
    /// codeword sequence for boundary-adjacent inputs kills all of them.
    #[test]
    fn pick_codes_digit_run_shifts_to_c_at_every_boundary() {
        let ascii = |s: &str| -> Vec<Token> { s.bytes().map(Token::Ascii).collect() };

        // Subset B → C, at_end true, run exactly 4 (even ≥ 4) → enter C.
        // Encoding: 'a'=65, 'b'=66, SWC=99, pair(1,2)=12, pair(3,4)=34.
        assert_eq!(
            pick_codes(&ascii("ab1234")),
            vec![START_B, 65, 66, 99, 12, 34]
        );

        // Subset B → C, at_end true, run = 5 (odd) at position i=2.
        // The loop re-evaluates each position: at i=2 run=5 odd, stay
        // in B and push '1'. At i=3 run=4 even at_end → enter C, push
        // pair(2,3) + pair(4,5). Pinning the post-shift codeword
        // sequence kills `% with /` / `% with +` / `&& with ||` /
        // `== with !=` mutants on the run-shift predicate.
        assert_eq!(
            pick_codes(&ascii("ab12345")),
            vec![START_B, 65, 66, 17, 99, 23, 45]
        );

        // Subset B, at_end true, run = 3 (< 4) → stay B throughout.
        // Pins the `>= with <` boundary mutant on `run >= 4`.
        assert_eq!(
            pick_codes(&ascii("ab123")),
            vec![START_B, 65, 66, 17, 18, 19]
        );

        // Subset B → C, at_end true, run = 6 (even ≥ 4) → enter C.
        assert_eq!(
            pick_codes(&ascii("ab123456")),
            vec![START_B, 65, 66, 99, 12, 34, 56]
        );

        // Subset B, at_end FALSE, run = 4 (< 6 mid-stream) → STAY in B.
        // The mutant `>= with <` flips the at_end branch and the !at_end
        // branch separately — `4 >= 6` false → stays. Mutant `4 < 6` true
        // also stays. But the `!` deletion on `!at_end` flips behavior.
        assert_eq!(
            pick_codes(&ascii("ab1234cd")),
            vec![START_B, 65, 66, 17, 18, 19, 20, 67, 68]
        );

        // Subset B, at_end FALSE, run = 6 (≥ 6 mid-stream) → enter C.
        // After 6 digits, switch back to B (100=SWB) for 'cd'.
        assert_eq!(
            pick_codes(&ascii("ab123456cd")),
            vec![START_B, 65, 66, 99, 12, 34, 56, 100, 67, 68]
        );

        // Subset A → C, at_end true, run = 4 (even ≥ 4) → enter C.
        // Need control bytes to start in Subset A.
        // \x01=value_in_a(1)=65, \x02=66, SWC=99, pair(1,2)=12, pair(3,4)=34.
        assert_eq!(
            pick_codes(&ascii("\x01\x021234")),
            vec![START_A, 65, 66, 99, 12, 34]
        );

        // Subset A → C, at_end false, run = 6 mid-stream → enter C, then
        // switch back. After 6 digits in C, the next byte is \x03 (< 0x20),
        // so Subset C's "else if next byte < 0x20" branch fires → 101
        // (Code A) → switch to A → push value_in_a(3)=67.
        assert_eq!(
            pick_codes(&ascii("\x01\x02123456\x03")),
            vec![START_A, 65, 66, 99, 12, 34, 56, 101, 67]
        );
    }

    /// Stage 11.A8c — pin `next_byte` / `digit_pair` / `digit_run`
    /// helpers used by the digit-run lookahead inside `pick_codes_*`.
    /// Mutations to catch:
    ///   - `next_byte`: FNC arms returning Some instead of None.
    ///   - `digit_pair`: `is_ascii_digit() && _` → `||`: would accept
    ///     mixed digit/non-digit pairs.
    ///   - `digit_pair`: `a - b'0'` → `a + b'0'`: wrong subtract.
    ///   - `digit_pair`: `i + 1` → `i`: reads the same token twice.
    ///   - `digit_run`: take_while predicate `is_ascii_digit` swap to
    ///     `is_ascii_alphabetic`: counts letters instead.
    ///   - `digit_run`: counting non-Ascii tokens too.
    #[test]
    fn next_byte_digit_pair_digit_run_helpers() {
        let tokens: Vec<Token> = vec![
            Token::Ascii(b'1'),
            Token::Ascii(b'2'),
            Token::Ascii(b'A'),
            Token::Fnc1,
            Token::Ascii(b'3'),
            Token::Ascii(b'4'),
        ];

        // next_byte: Ascii → Some(byte); FNC tokens → None;
        // out-of-bounds → None.
        assert_eq!(next_byte(&tokens, 0), Some(b'1'));
        assert_eq!(next_byte(&tokens, 2), Some(b'A'));
        assert_eq!(next_byte(&tokens, 3), None, "Fnc1 must return None");
        assert_eq!(next_byte(&tokens, 99), None, "out-of-bounds → None");
        let only_fnc: Vec<Token> = vec![Token::Fnc2, Token::Fnc3, Token::LinkA, Token::LinkC];
        assert_eq!(next_byte(&only_fnc, 0), None, "Fnc2 → None");
        assert_eq!(next_byte(&only_fnc, 1), None, "Fnc3 → None");
        assert_eq!(next_byte(&only_fnc, 2), None, "LinkA → None");
        assert_eq!(next_byte(&only_fnc, 3), None, "LinkC → None");

        // digit_pair: two consecutive digits → Some((d-0, d-0)); mixed
        // or non-digit → None.
        assert_eq!(
            digit_pair(&tokens, 0),
            Some((1, 2)),
            "two digits at i=0 → (1, 2)"
        );
        assert_eq!(
            digit_pair(&tokens, 1),
            None,
            "digit + letter → None (mixed)"
        );
        assert_eq!(
            digit_pair(&tokens, 2),
            None,
            "letter + Fnc1 → None (first is non-digit)"
        );
        assert_eq!(
            digit_pair(&tokens, 3),
            None,
            "Fnc1 + digit → None (first byte is None)"
        );
        assert_eq!(
            digit_pair(&tokens, 4),
            Some((3, 4)),
            "two digits at i=4 → (3, 4)"
        );
        assert_eq!(
            digit_pair(&tokens, 5),
            None,
            "single digit at end → None (no next byte)"
        );

        // digit_run: counts consecutive Ascii-digit tokens from `from`.
        assert_eq!(digit_run(&tokens, 0), 2, "two digits then letter → 2");
        assert_eq!(digit_run(&tokens, 1), 1, "one digit then letter → 1");
        assert_eq!(digit_run(&tokens, 2), 0, "letter → 0");
        assert_eq!(digit_run(&tokens, 3), 0, "Fnc1 → 0 (not Ascii)");
        assert_eq!(digit_run(&tokens, 4), 2, "two digits at end → 2");
        assert_eq!(digit_run(&tokens, 5), 1, "single digit → 1");

        // All-digit run.
        let all_digits: Vec<Token> = "1234567890".bytes().map(Token::Ascii).collect();
        assert_eq!(digit_run(&all_digits, 0), 10);
        assert_eq!(digit_run(&all_digits, 5), 5);
        assert_eq!(digit_run(&all_digits, 10), 0, "past end → 0");
    }

    /// Stage 11.A8c — pin `pick_initial_subset` decision boundaries.
    /// Kills the 2 mutants on lines 822 (`== with !=` on
    /// `total_data_bytes == 2 && leading_digits == 2`) and 834
    /// (`delete !` on `if any_control && !any_lower`).
    #[test]
    fn pick_initial_subset_branches() {
        let ascii = |s: &str| -> Vec<Token> { s.bytes().map(Token::Ascii).collect() };

        // "12" — total=2, leading_digits=2 → Subset C.
        // Mutant `== with !=` on `total_data_bytes == 2` would route
        // any-non-2-byte input to C; pinning the 2-byte case at C
        // distinguishes from the !=2 case (caught by other tests).
        assert!(matches!(pick_initial_subset(&ascii("12")), Subset::C));

        // "1234" — leading_digits=4 ≥ 4 → C (via the 825 branch).
        assert!(matches!(pick_initial_subset(&ascii("1234")), Subset::C));

        // "\x01abc" — any_control=true (\x01), any_lower=true ('a').
        // Original `any_control && !any_lower` = true && false = false
        // → falls through to Subset B.
        // Mutant `delete !` makes it `any_control && any_lower` =
        // true && true = true → returns A. Different output.
        assert!(matches!(pick_initial_subset(&ascii("\x01abc")), Subset::B));

        // "\x01ABC" — any_control=true, any_lower=false (no a-z).
        // Original `!any_lower` = true → returns A.
        // Mutant `any_lower` = false → falls to B. Different.
        assert!(matches!(pick_initial_subset(&ascii("\x01ABC")), Subset::A));

        // "abc" — no controls, no need for A → B.
        assert!(matches!(pick_initial_subset(&ascii("abc")), Subset::B));
    }

    /// Stage 11.A8c — pin `parse_raw_codewords_code128` token
    /// parsing + boundary checks.
    ///
    /// Each token is exactly `^NNN`: 4 bytes total (`^` + 3 digits).
    /// Codeword value computed as `n2*100 + n1*10 + n0` via
    /// `value = value*10 + (c - b'0')`. Value range 0..=106.
    ///
    /// Mutations caught:
    ///   * `i + 4 <= input.len()` → `<` would skip the last token.
    ///   * `input[i] != b'^'` → `== b'^'` flips the branch.
    ///   * `value * 10 + …` → `* 100 +` or `+ 10 *` reorders digits.
    ///   * `value > 106` → `>= 106` would reject the STOP token.
    ///   * Trailing-bytes guard removed would silently truncate.
    #[test]
    fn parse_raw_codewords_code128_token_parsing_and_bounds() {
        // Empty input → Ok empty vec.
        assert_eq!(parse_raw_codewords_code128(b"").unwrap(), Vec::<u32>::new());

        // Single token "^005" → [5].
        assert_eq!(parse_raw_codewords_code128(b"^005").unwrap(), vec![5]);

        // Two tokens "^001^002" → [1, 2].
        assert_eq!(
            parse_raw_codewords_code128(b"^001^002").unwrap(),
            vec![1, 2]
        );

        // Multi-digit positional value: "^123" → 1*100+2*10+3 = 123 …
        // wait, 123 > 106 → must reject. Use "^099" instead → 99.
        assert_eq!(parse_raw_codewords_code128(b"^099").unwrap(), vec![99]);
        // "^106" — STOP codeword, max valid.
        assert_eq!(parse_raw_codewords_code128(b"^106").unwrap(), vec![106]);

        // Value > 106 → InvalidData. Diagnostic at line 401:
        //   "code128 raw: codewords must be 0..=106; got {value}"
        // 4-anchor pin upgrades the previous single-substring check:
        let err = parse_raw_codewords_code128(b"^107").unwrap_err();
        match err {
            Error::InvalidData(msg) => {
                assert!(
                    msg.contains("code128 raw:"),
                    "boundary+1 diagnostic must carry the code128 raw prefix; got {msg:?}"
                );
                assert!(
                    msg.contains("codewords must be 0..=106"),
                    "boundary+1 diagnostic must carry the predicate + range hint; got {msg:?}"
                );
                assert!(
                    msg.contains("got 107"),
                    "boundary+1 diagnostic must echo the offending value 107; got {msg:?}"
                );
            }
            o => panic!("expected InvalidData, got {o:?}"),
        }
        // Same for ^200 — per-value diagnostic pin proves the
        // `{value}` interpolation routes ANY parsed integer.
        match parse_raw_codewords_code128(b"^200").unwrap_err() {
            Error::InvalidData(msg) => {
                assert!(
                    msg.contains("got 200"),
                    "^200 diagnostic must echo the offending value 200 (kills hardcoded-107 mutant); got {msg:?}"
                );
                assert!(
                    msg.contains("0..=106"),
                    "^200 diagnostic must carry the 0..=106 range hint; got {msg:?}"
                );
            }
            o => panic!("expected InvalidData for ^200, got {o:?}"),
        }
        // Stage 11.A8c — replace the previous no-op `^200` placeholder
        // with a fresh max-3-digit-boundary `^999` value-echo pin so
        // the boundary set covers {107, 200, 999} — proves the
        // `{value}` interpolation is not pinned to either tested value.
        match parse_raw_codewords_code128(b"^999").unwrap_err() {
            Error::InvalidData(msg) => {
                assert!(
                    msg.contains("got 999"),
                    "^999 diagnostic must echo the offending value 999; got {msg:?}"
                );
                assert!(
                    msg.contains("0..=106"),
                    "^999 diagnostic must carry the 0..=106 range hint; got {msg:?}"
                );
                assert!(
                    !msg.contains("trailing"),
                    "wrong arm — trailing-byte diagnostic leaked into bounds check: {msg:?}"
                );
            }
            o => panic!("expected InvalidData for ^999, got {o:?}"),
        }

        // Wrong leading char (no '^') at offset 0 → InvalidData.
        // Stage 11.A8c — 3-anchor pin: prefix + predicate + offset 0.
        match parse_raw_codewords_code128(b"X005").unwrap_err() {
            Error::InvalidData(msg) => {
                assert!(
                    msg.contains("code128 raw:"),
                    "missing `code128 raw:` prefix: {msg}"
                );
                assert!(
                    msg.contains("expected `^`"),
                    "missing `expected \\`^\\`` predicate: {msg}"
                );
                assert!(
                    msg.contains("offset 0"),
                    "wrong-leading-char must report offset 0: {msg}"
                );
                assert!(
                    !msg.contains("trailing"),
                    "wrong arm — trailing diagnostic leaked: {msg}"
                );
            }
            o => panic!("X005 should reject as InvalidData, got {o:?}"),
        }

        // Second token missing '^' at offset 4 → InvalidData.
        // Stage 11.A8c — proves the per-token offset increments past
        // the first parsed token (so `i += 4` is reached after Ok
        // emission of token 1, not skipped).
        match parse_raw_codewords_code128(b"^001X002").unwrap_err() {
            Error::InvalidData(msg) => {
                assert!(
                    msg.contains("code128 raw:"),
                    "missing `code128 raw:` prefix: {msg}"
                );
                assert!(
                    msg.contains("expected `^`"),
                    "missing `expected \\`^\\`` predicate: {msg}"
                );
                assert!(
                    msg.contains("offset 4"),
                    "second-token error must report offset 4 (not 0): {msg}"
                );
                assert!(
                    !msg.contains("0..=106"),
                    "wrong arm — value-range diagnostic leaked: {msg}"
                );
            }
            o => panic!("^001X002 should reject as InvalidData, got {o:?}"),
        }

        // Non-digit at position 3 (third digit) of first token,
        // c='A'=0x41. Stage 11.A8c — 4-anchor pin: predicate + hex
        // value + offset 3 + cross-arm guard.
        match parse_raw_codewords_code128(b"^00A").unwrap_err() {
            Error::InvalidData(msg) => {
                assert!(
                    msg.contains("tokens must be ^NNN with 3 digits"),
                    "missing 3-digit predicate: {msg}"
                );
                assert!(
                    msg.contains("0x41"),
                    "missing hex echo `0x41` for offending 'A': {msg}"
                );
                assert!(
                    msg.contains("offset 3"),
                    "third-position non-digit must report offset 3: {msg}"
                );
                assert!(
                    !msg.contains("expected `^`"),
                    "wrong arm — wrong-leading-char diagnostic leaked: {msg}"
                );
            }
            o => panic!("^00A should reject as InvalidData, got {o:?}"),
        }

        // Non-digit at position 1 (first digit) of first token,
        // c='A'=0x41. Stage 11.A8c — proves the inner loop hits the
        // first checked digit (`j` starts at 1, not 0 or 2).
        match parse_raw_codewords_code128(b"^A00").unwrap_err() {
            Error::InvalidData(msg) => {
                assert!(
                    msg.contains("tokens must be ^NNN with 3 digits"),
                    "missing 3-digit predicate: {msg}"
                );
                assert!(
                    msg.contains("0x41"),
                    "missing hex echo `0x41` for offending 'A': {msg}"
                );
                assert!(
                    msg.contains("offset 1"),
                    "first-digit non-digit must report offset 1: {msg}"
                );
                assert!(
                    !msg.contains("offset 3"),
                    "wrong-position arm — offset 3 must NOT appear: {msg}"
                );
            }
            o => panic!("^A00 should reject as InvalidData, got {o:?}"),
        }

        // Trailing bytes (not a multiple of 4) → InvalidData with
        // "trailing" message. Stage 11.A8c — strengthen to 3 anchors
        // (count + offset + prefix) so dropping any of the format
        // arguments would be caught.
        match parse_raw_codewords_code128(b"^001^").unwrap_err() {
            Error::InvalidData(msg) => {
                assert!(
                    msg.contains("code128 raw:"),
                    "missing `code128 raw:` prefix: {msg}"
                );
                assert!(
                    msg.contains("1 trailing byte(s)"),
                    "missing trailing-count predicate (1 byte after parsed token): {msg}"
                );
                assert!(
                    msg.contains("offset 4"),
                    "trailing diagnostic must report offset 4 (i after parsing ^001): {msg}"
                );
            }
            o => panic!("^001^ should reject as InvalidData, got {o:?}"),
        }

        // 1-byte input "^" → all of it is trailing (loop never enters).
        // Stage 11.A8c — value-echo pin (1 byte, offset 0) proves the
        // count formatter is `input.len() - i` rather than a constant.
        match parse_raw_codewords_code128(b"^").unwrap_err() {
            Error::InvalidData(msg) => {
                assert!(
                    msg.contains("code128 raw:"),
                    "missing `code128 raw:` prefix: {msg}"
                );
                assert!(
                    msg.contains("1 trailing byte(s)"),
                    "single `^` must report 1 trailing byte: {msg}"
                );
                assert!(
                    msg.contains("offset 0"),
                    "single-byte trailing diagnostic must report offset 0: {msg}"
                );
            }
            o => panic!("single `^` should reject as InvalidData, got {o:?}"),
        }
    }

    /// Stage 11.A8c — pin `encode_codes` checksum + append-suffix:
    ///   * `start = codes[0]`, weighted sum `start + Σ codes[i]*i`
    ///     for i ≥ 1, modulus 103.
    ///   * `full = codes ++ [check, STOP]`.
    ///   * Empty `codes` → Err InvalidData.
    ///   * Pattern emission iterates `full` (so HRI gets ALL patterns
    ///     including check + STOP).
    ///
    /// Mutations caught:
    ///   * `enumerate().skip(1)` → drop skip: index would be 0 for
    ///     start → `start + start*0 = start` for one-element case
    ///     stays same, but for [START_B, b'!'-32=1] the check would
    ///     shift from `(104 + 1*1) % 103 = 2` to `(104 + 104*0 +
    ///     1*1) % 103 = 2` ... need a 3-element case.
    ///   * `sum % 103` → `% 102` or `% 104` for a known input.
    ///   * `full.push(check)` → drop: pattern length shrinks by 1.
    ///   * `full.push(STOP)` → drop: same.
    ///   * `codes.is_empty()` → `!is_empty()` flips error path.
    #[test]
    fn encode_codes_checksum_and_suffix_invariants() {
        // Empty codes → Err. Stage 11.A8c — pin the empty-codes
        // diagnostic + cross-arm contamination guards. encode_codes
        // at line 351-355 produces:
        //   "Code 128 codeword stream must not be empty"
        // A mutant that swaps the empty guard with another path's
        // message survives the variant-only matches!() check.
        let err = encode_codes(&[], "hri").unwrap_err();
        let Error::InvalidData(msg) = err else {
            panic!("empty codes must yield InvalidData; got {err:?}");
        };
        assert!(
            msg.contains("Code 128"),
            "diagnostic must carry the symbology tag; got {msg:?}"
        );
        assert!(
            msg.contains("codeword stream"),
            "diagnostic must specify 'codeword stream' (distinguishes from \
             input-string rejection paths); got {msg:?}"
        );
        assert!(
            msg.contains("must not be empty"),
            "diagnostic must carry the empty-rejection predicate; got {msg:?}"
        );

        // Single-code stream [START_A=103]:
        //   sum = 103, skip(1) yields nothing.
        //   check = 103 % 103 = 0.
        //   full = [103, 0, 106]. Each pattern is 11 modules → 33
        //   total. (STOP pattern adds a trailing "11" — total 35?
        //   Actually PATTERNS for STOP includes the stop+termination.)
        let p = encode_codes(&[START_A], "X").unwrap();
        assert_eq!(p.text.as_deref(), Some("X"));
        assert!(p.total_width() > 0, "non-empty pattern");

        // 3-code stream [START_B, 5, 9]:
        //   sum = 104 + 5*1 + 9*2 = 104 + 5 + 18 = 127.
        //   check = 127 % 103 = 24.
        // Test via the encode_codes call that the same 3 codes
        // produce a stable LinearPattern whose width matches
        // 5 patterns × ~11 modules.
        let p3 = encode_codes(&[START_B, 5, 9], "abc").unwrap();
        // Sanity: width should be > the single-code width because we
        // have more patterns.
        assert!(
            p3.total_width() > p.total_width(),
            "3-code symbol must be wider than 1-code"
        );

        // Different start codes → different output (pins `start =
        // codes[0]` against `codes[1]` mutation).
        let p_a = encode_codes(&[START_A, 0], "x").unwrap();
        let p_b = encode_codes(&[START_B, 0], "x").unwrap();
        assert_ne!(
            p_a.bars, p_b.bars,
            "different start codes → different bar pattern"
        );

        // hri propagates as the LinearPattern text.
        let p_h = encode_codes(&[START_A], "MY_HRI").unwrap();
        assert_eq!(p_h.text.as_deref(), Some("MY_HRI"));
    }

    /// Stage 11.A8c — pin the ASCII-rejection boundaries in
    /// `parse_input_with_fncs` (line 861 `b > 0x7f`),
    /// `encode_tokens` (line 633 `*b > 0x7F`), and
    /// `parse_raw_codewords_code128` (line 399 `value > 106`). Each
    /// has `> with == / >=` mutants — the existing
    /// `non_ascii_is_rejected` test uses character `é` (0xC3 0xA9 in
    /// UTF-8, first byte 0xC3 = 195) which fails for original `> 127`
    /// AND mutant `>=` (both reject 0xC3) but `> with ==` would
    /// reject ONLY exactly 0x80 (= 128), letting 0x90+ pass. Pinning
    /// 0x80 and 0x90 separately distinguishes all three mutants.
    #[test]
    fn ascii_rejection_boundaries_in_parse_and_encode() {
        // parse_input_with_fncs / encode reject inputs containing
        // exactly byte 0x80 (the boundary) and 0xFF (extreme).
        //
        // Stage 11.A8c — upgrade from matches!(_, InvalidData(_)) to
        // pin the per-char diagnostic substring. encode() routes
        // through the char-level path first (line 334-338 of code128.rs)
        // which produces:
        //   "Code 128 only supports ASCII; got '\u{80}'"  (or '\u{ff}')
        //
        // A mutant that drops `{c:?}` (fixed char in message), swaps
        // the predicate, or routes via a different InvalidData arm
        // survives variant-only checks. Distinct char echoes (0x80 vs
        // 0xff) also kill `{c:?}` → fixed-string mutants.
        // Char Debug format differs for printable vs non-printable:
        //   * 0x80 is non-printable → "'\u{80}'"
        //   * 0xFF is latin-1 'ÿ' → "'ÿ'"
        // Distinct echoes still kill `{c:?}` → fixed-string mutants.
        for (input, want_char) in [("A\u{0080}B", "'\\u{80}'"), ("A\u{00FF}B", "'ÿ'")] {
            let err = encode(input, &Options::default()).unwrap_err();
            let Error::InvalidData(msg) = err else {
                panic!("non-ASCII {input:?} must yield InvalidData; got {err:?}");
            };
            assert!(
                msg.contains("Code 128"),
                "diagnostic for {input:?} must carry the symbology tag; got {msg:?}"
            );
            assert!(
                msg.contains("only supports ASCII"),
                "diagnostic for {input:?} must carry the ASCII-only predicate; got {msg:?}"
            );
            assert!(
                msg.contains(want_char),
                "diagnostic for {input:?} must echo the offending char {want_char:?} via {{c:?}}; got {msg:?}"
            );
        }

        // Byte 0x7F (DEL) is the highest valid ASCII byte → must be
        // accepted. Pins the `> 0x7F` boundary: mutant `>= 0x7F` would
        // reject 0x7F, mutant `== 0x7F` would reject only 0x7F.
        let ok = encode("\u{007F}", &Options::default());
        assert!(ok.is_ok(), "0x7F should be accepted, got {ok:?}");

        // parse_raw_codewords_code128 — codeword 106 (STOP) is the
        // maximum valid value; 107 must reject. Use `raw=true` to
        // route through that path. Stage 11.A8c — pin the actual arm
        // fired by the input. parse_raw_codewords_code128 expects
        // `^NNN^NNN...` tokens; the test input "107000000000" has
        // no carets, so it fires the missing-`^` arm (line 382-385)
        // BEFORE reaching the `value > 106` check at line 399. The
        // diagnostic is:
        //   "code128 raw: tokens must be ^NNN; expected `^` at offset 0"
        //
        // A mutant that drops the offset interpolation, the `^NNN`
        // format hint, or routes via a different arm survives the
        // variant-only check. This input actually exercises the
        // token-shape guard, not the `> 106` boundary — both are
        // part of the raw parser's defense-in-depth.
        let opts = Options::default().with("raw", "true");
        let ok = encode("106106106106", &opts);
        let err = encode("107000000000", &opts).unwrap_err();
        let Error::InvalidData(msg) = err else {
            panic!("raw=true non-^-prefixed input must yield InvalidData; got {err:?}");
        };
        assert!(
            msg.contains("code128 raw:"),
            "diagnostic must carry the 'code128 raw:' prefix; got {msg:?}"
        );
        assert!(
            msg.contains("tokens must be ^NNN"),
            "diagnostic must show the expected token format; got {msg:?}"
        );
        assert!(
            msg.contains("at offset 0"),
            "diagnostic must echo the offset of the failing token; got {msg:?}"
        );
        // Echo `ok` so the binding isn't dead code if the encode
        // happened to fail downstream for a different reason.
        let _ = ok;
    }

    /// Stage 11.A8c — pin `input_triggers_new_encoder_divergence`
    /// boundaries. Kills the 12 mutants on lines 211-235 (function
    /// replacement, `>= 0x80` Latin-1 boundary, `count >= 4 && count %
    /// 2 == 0` even-≥4 guard, `count += 1` / `j += 1` loop increments,
    /// `delete !` on `!b.is_ascii_digit()`).
    #[test]
    fn input_triggers_new_encoder_divergence_pins_boundaries() {
        // No Latin-1 byte → never triggers regardless of digits.
        assert!(!input_triggers_new_encoder_divergence(""));
        assert!(!input_triggers_new_encoder_divergence("ABC"));
        assert!(!input_triggers_new_encoder_divergence("123456"));
        assert!(!input_triggers_new_encoder_divergence("ABCDEF123456"));

        // Latin-1 alone → never triggers (no digit run follows).
        assert!(!input_triggers_new_encoder_divergence("\u{0080}"));
        assert!(!input_triggers_new_encoder_divergence("\u{00FF}AB"));

        // Latin-1 then 1, 2, 3 digits → count < 4, NOT triggered.
        assert!(!input_triggers_new_encoder_divergence("\u{0080}1"));
        assert!(!input_triggers_new_encoder_divergence("\u{0080}12"));
        assert!(!input_triggers_new_encoder_divergence("\u{0080}123"));

        // Latin-1 then EXACTLY 4 digits (count=4 even, ≥4) → TRIGGERS.
        assert!(input_triggers_new_encoder_divergence("\u{0080}1234"));

        // Latin-1 then 5 digits → STILL triggers: the loop continues
        // past i=2 (count=5 odd, no trigger) and at i=3 sees count=4
        // (remaining digits 2,3,4,5 — even, ≥4) → TRIGGER.
        assert!(input_triggers_new_encoder_divergence("\u{0080}12345"));

        // Latin-1 then 6 digits (count=6 even ≥4) → TRIGGERS.
        assert!(input_triggers_new_encoder_divergence("\u{0080}123456"));

        // Latin-1 then 7 digits → also triggers (at i=2, count=7 odd,
        // skip; at i=3, count=6 even ≥4, TRIGGER). Any digit run ≥4
        // contains a position where a 4-or-6-digit tail is visible.
        assert!(input_triggers_new_encoder_divergence("\u{0080}1234567"));

        // Latin-1 then 8 digits → TRIGGERS.
        assert!(input_triggers_new_encoder_divergence("\u{0080}12345678"));

        // Latin-1 then non-digit → run breaks, latin1_run reset, no trigger.
        assert!(!input_triggers_new_encoder_divergence("\u{0080}AB1234"));

        // Mid-string Latin-1 + 4 digits — TRIGGERS (latin1_run persists
        // across digit chars but resets on non-digit).
        assert!(input_triggers_new_encoder_divergence("ABC\u{0080}1234"));

        // Latin-1 then alphanumeric then digits → run was reset by 'A',
        // so the second digit cluster doesn't trigger.
        assert!(!input_triggers_new_encoder_divergence("\u{0080}A1234"));
    }

    /// Stage 11.A8c — pin `check_code128_opts` parsing paths. Kills the
    /// 2 mutants on lines 253 (`delete match arm "false"`) and
    /// 274 (`&& with ||` on `raw && newencoder` precedence).
    #[test]
    fn check_code128_opts_parses_explicit_false_and_raw_overrides_newencoder() {
        // Each option accepts "false" explicitly (the deleted match arm
        // would route "false" through the _ catch-all and return Err).
        for k in [
            "raw",
            "parse",
            "newencoder",
            "suppressc",
            "unlatchextbeforec",
            "parsefnc",
        ] {
            let opts = Options::default().with(k, "false");
            let parsed = check_code128_opts(&opts)
                .unwrap_or_else(|e| panic!("check_code128_opts({k}=false) failed: {e}"));
            // Every flag should remain at its default (false).
            assert!(!parsed.raw && !parsed.parse && !parsed.newencoder, "{k}");
            assert!(
                !parsed.suppressc && !parsed.unlatchextbeforec && !parsed.parsefnc,
                "{k}"
            );
        }

        // raw=true alone leaves newencoder=false.
        let opts = Options::default().with("raw", "true");
        let parsed = check_code128_opts(&opts).unwrap();
        assert!(parsed.raw && !parsed.newencoder);

        // newencoder=true alone leaves raw=false.
        let opts = Options::default().with("newencoder", "true");
        let parsed = check_code128_opts(&opts).unwrap();
        assert!(parsed.newencoder && !parsed.raw);

        // raw=true AND newencoder=true → newencoder is forced false
        // (the `&&` precedence: both true triggers the override).
        // Mutant `&& with ||`: `raw || newencoder` is true whenever
        // either is true → newencoder is forced false even when only
        // newencoder is set, breaking the newencoder-alone case above.
        // So the two assertions together kill the `&& with ||` mutant.
        let opts = Options::default()
            .with("raw", "true")
            .with("newencoder", "true");
        let parsed = check_code128_opts(&opts).unwrap();
        assert!(
            parsed.raw && !parsed.newencoder,
            "raw=true should override newencoder"
        );
    }

    /// Stage 11.A8c — pin the catch-all rejection arm in
    /// `check_code128_opts` at line 255-259. The existing
    /// `check_code128_opts_parses_explicit_false_and_raw_overrides_newencoder`
    /// covers `"true"`/`"false"` but the catch-all `_ => Err(...)`
    /// arm is uncovered. A mutant that swaps the catch-all for
    /// `_ => Ok(())` would silently accept garbage values.
    ///
    /// Anchors:
    ///   - Each of the 6 options with "maybe" → InvalidOption with
    ///     diagnostic containing "{key}=" + the offending value +
    ///     "must be \"true\" or \"false\"".
    ///   - Single-key sweep verifies every match arm at the slot
    ///     mapping (lines 261-269) routes correctly to its own field
    ///     setter — kills slot-arm swap mutations indirectly.
    ///
    /// Mutations killed:
    ///   * Catch-all `_ => Err(InvalidOption(...))` replaced by
    ///     `_ => Ok(())`: invalid values would silently pass.
    ///   * Error message string mutations ("true" → "yes" etc.).
    ///   * `key` interpolation removed from the diagnostic.
    #[test]
    fn check_code128_opts_rejects_invalid_boolean_values() {
        // Sweep each of the 6 options with a non-boolean value.
        for k in [
            "raw",
            "parse",
            "newencoder",
            "suppressc",
            "unlatchextbeforec",
            "parsefnc",
        ] {
            let opts = Options::default().with(k, "maybe");
            match check_code128_opts(&opts) {
                Err(Error::InvalidOption(msg)) => assert!(
                    msg.contains(&format!("{k}=\"maybe\""))
                        && msg.contains("must be")
                        && msg.contains("true")
                        && msg.contains("false"),
                    "expected diagnostic naming {k} + value + 'true'/'false', got: {msg}"
                ),
                other => panic!("{k}=maybe should reject as InvalidOption, got {other:?}"),
            }
        }

        // The four sibling rejection arms below all route through the
        // same line-257 format. Per-value diagnostic pins mirror the
        // dedicated "maybe" anchor above so the `{v:?}` interpolation
        // is proven for: empty string, uppercase boolean, mixed-case
        // boolean, and numeric string. Each input's distinct Debug
        // echo proves the format interpolates the caller's value
        // (not a hardcoded literal from a single anchor).

        // Empty string also rejects (not "true" or "false").
        let opts = Options::default().with("raw", "");
        match check_code128_opts(&opts) {
            Err(Error::InvalidOption(msg)) => assert!(
                msg.contains("raw=\"\"") && msg.contains("must be") && msg.contains("\"true\""),
                "raw=\"\" diagnostic must Debug-echo the empty string + predicate; got {msg}"
            ),
            other => panic!("raw=\"\" should reject as InvalidOption, got {other:?}"),
        }

        // Case-sensitivity: "TRUE" / "True" reject (BWIPP is lowercase-only).
        let opts = Options::default().with("raw", "TRUE");
        match check_code128_opts(&opts) {
            Err(Error::InvalidOption(msg)) => assert!(
                msg.contains("raw=\"TRUE\"") && msg.contains("must be"),
                "raw=TRUE diagnostic must Debug-echo \"TRUE\" verbatim (case-preserved); got {msg}"
            ),
            other => panic!("raw=TRUE should reject as InvalidOption, got {other:?}"),
        }
        let opts = Options::default().with("raw", "True");
        match check_code128_opts(&opts) {
            Err(Error::InvalidOption(msg)) => assert!(
                msg.contains("raw=\"True\"") && msg.contains("must be"),
                "raw=True diagnostic must Debug-echo \"True\" verbatim (case-preserved); got {msg}"
            ),
            other => panic!("raw=True should reject as InvalidOption, got {other:?}"),
        }

        // Numeric values reject.
        let opts = Options::default().with("raw", "1");
        match check_code128_opts(&opts) {
            Err(Error::InvalidOption(msg)) => assert!(
                msg.contains("raw=\"1\"") && msg.contains("must be"),
                "raw=1 diagnostic must Debug-echo \"1\" verbatim; got {msg}"
            ),
            other => panic!("raw=1 should reject as InvalidOption, got {other:?}"),
        }
    }

    /// Stage 11.A8c — exercise `pick_codes_no_subset_c` with diverse
    /// suppressc inputs to pin the 17 mutants in that function
    /// (boundary checks `< with <= / >`, `+=` arithmetic, `delete !`).
    #[test]
    fn suppressc_corpus_pins_pick_codes_no_subset_c_paths() {
        // Pure digits with suppressc=true → forced subset B.
        let cases = [
            ("12345678", 8),  // exact-pair length
            ("123456789", 9), // odd length
            ("1", 1),         // single digit
            ("12", 2),
            ("ABC123abc", 9), // mixed with lowercase forcing B initially
            ("12ABC34", 7),   // digits-letters-digits transition
        ];
        for (text, expected_chars) in cases {
            let result = encode(text, &Options::default().with("suppressc", "true"))
                .unwrap_or_else(|e| panic!("encode({text:?}) failed: {e}"));
            // Verify the output is valid Code 128 (non-empty bars) and
            // longer than the subset-C compact form would be (subset C
            // halves digit-pair bar count).
            assert!(
                !result.bars.is_empty(),
                "encode({text:?}) produced empty bars"
            );
            // For pure-digit inputs, suppressc forces each digit as a
            // separate codeword → at least `expected_chars` codewords
            // worth of bars + framing.
            if text.bytes().all(|b| b.is_ascii_digit()) {
                // Stage 11.A8c (cont) — `.unwrap()` → `.unwrap_or_else`
                // with per-iteration input echo for the baseline-default
                // comparison call.
                let default = encode(text, &Options::default()).unwrap_or_else(|e| {
                    panic!("encode({text:?}, default opts) (suppressc=true comparison baseline) must succeed; got Err: {e}")
                });
                assert!(
                    result.bars.len() >= default.bars.len(),
                    "suppressc didn't expand bar count for {text:?}"
                );
            }
            // Echo expected_chars to silence the unused binding warning
            // in case the assertion above doesn't fire.
            let _ = expected_chars;
        }
    }

    /// Stage 11.A8c — diagnostic dump for pick_codes default (subset
    /// C → space transition) so the byte-for-byte killer below stays
    /// in lockstep with the encoder. One-shot; ignored.
    #[test]
    #[ignore]
    fn dump_pick_codes_subset_c_to_space_bars() {
        let bars = encode("1234 5678", &Options::default()).unwrap().bars;
        eprintln!("CASE 1234_5678 bars[{}]={:?}", bars.len(), bars);
    }

    /// Stage 11.A8c — kill code128.rs:720:66 (`< with <=` in pick_codes
    /// Subset-C → non-digit-pair branch). Original: `b < 0x20` so a
    /// space (0x20) routes to Subset B (push 100); mutant `b <= 0x20`
    /// would route the space to Subset A (push 101) — a different
    /// codeword and different bar sequence. The pinned bars below are
    /// the production output for "1234 5678" under default options.
    #[test]
    fn pick_codes_subset_c_to_space_transitions_to_subset_b() {
        let p = encode("1234 5678", &Options::default()).unwrap_or_else(|e| {
            panic!("encode(\"1234 5678\", default) (pick_codes Subset C → space transition golden) must succeed: {e:?}")
        });
        // Captured 2026-05-27 from the current encoder; under the
        // mutant the space would push Code A (101) instead of Code B
        // (100), changing every bar after the digit pair "34".
        let want: &[u8] = &[
            2, 1, 1, 2, 3, 2, 1, 1, 2, 2, 3, 2, 1, 3, 1, 1, 2, 3, 1, 1, 4, 1, 3, 1, 2, 1, 2, 2, 2,
            2, 1, 1, 3, 1, 4, 1, 3, 3, 1, 1, 2, 1, 2, 4, 1, 1, 1, 2, 1, 3, 2, 2, 1, 2, 2, 3, 3, 1,
            1, 1, 2,
        ];
        assert_eq!(
            p.bars.as_slice(),
            want,
            "pick_codes Subset C → space transition regressed: \
             a mutant on line 720 (`b < 0x20` → `b <= 0x20`) would \
             route 0x20 (space) to Subset A instead of Subset B"
        );
    }

    /// Stage 11.A8c — kill code128.rs:633:19 (`> with ==` / `> with >=`
    /// in encode_tokens 0x7F ASCII boundary check). Original rejects
    /// only b > 127; the two mutants reject either only b == 127 or
    /// b >= 127. Two probe bytes distinguish all three:
    ///
    /// * `Token::Ascii(0x7F)` (127) — original Ok; mutant `==` Err;
    ///   mutant `>=` Err. Kills both mutants.
    /// * `Token::Ascii(0x80)` (128) — original Err; mutant `==` Ok
    ///   (since 128 != 127); mutant `>=` Err. Kills `==` mutant.
    #[test]
    fn encode_tokens_ascii_boundary_rejects_above_0x7f_only() {
        // 0x7F (DEL) must be accepted — it's the last ASCII byte.
        let tokens = vec![Token::Ascii(0x7F)];
        let result = encode_tokens(&tokens, &Options::default());
        assert!(
            result.is_ok(),
            "encode_tokens(0x7F) must succeed; both `> with ==` and \
             `> with >=` mutants on line 633 would reject 0x7F instead: {result:?}"
        );

        // 0x80 (first non-ASCII byte) must be rejected — the `==` mutant
        // would let it through.
        let tokens = vec![Token::Ascii(0x80)];
        let result = encode_tokens(&tokens, &Options::default());
        let err = result.expect_err(
            "encode_tokens(0x80) must error; `> with ==` mutant on line 633 would Ok this",
        );
        if let Error::InvalidData(msg) = err {
            assert!(
                msg.contains("only supports ASCII") && msg.contains("0x80"),
                "encode_tokens(0x80) diagnostic must name the ASCII-only constraint + offending byte; got {msg}"
            );
        } else {
            panic!("encode_tokens(0x80) must yield InvalidData");
        }
    }

    /// Stage 11.A8c — kill code128.rs:477:26 / 477:30 / 488:26 /
    /// 488:30 / 494:26 — parse_text_escapes_code128 boundary mutants.
    ///
    /// * 477 (`+ with *` on `i + 1 + 2 <= input.len()`) — original
    ///   needs 3 bytes available for 2-char escape; mutant `i + 2`
    ///   enters the arm with only 2 bytes and panics slicing
    ///   `input[i+1..i+3]`. Probe input: `b"^X"` (len=2). Original
    ///   skips, falls through, returns Ok("^X"); mutant panics.
    ///
    /// * 488 (`+ with *` on `i + 1 + 3 <= input.len()`) — original
    ///   needs 4 bytes for 3-digit ordinal; mutant `i + 3` enters
    ///   with only 3 bytes and panics slicing `input[i+1..i+4]`.
    ///   Probe input: `b"^99"` (len=3) — 2-char arm enters but "99"
    ///   isn't a 2-char escape, then 3-digit arm: original skips,
    ///   falls through, returns Ok("^99"); mutant panics.
    ///
    /// * 494 (`> with >=` on `value > 255`) — original accepts value
    ///   == 255 (caller can encode byte 255); mutant `>=` rejects 255
    ///   too. Probe input: `b"^255"` — original returns Ok with the
    ///   byte 255 in the output; mutant returns Err.
    #[test]
    fn parse_text_escapes_boundary_inputs_kill_arithmetic_mutants() {
        // `^X` — 2-char-arm guard boundary.
        let s = parse_text_escapes_code128(b"^X").expect(
            "parse_text_escapes_code128(b\"^X\") (len=2) must Ok — line 477 `+ with *` mutant \
             enters the 2-char arm with insufficient input and panics on input[1..3]",
        );
        assert_eq!(s, "^X", "^X must fall through to literal-^ + 'X'");

        // `^99` — 3-digit-arm guard boundary.
        let s = parse_text_escapes_code128(b"^99").expect(
            "parse_text_escapes_code128(b\"^99\") (len=3) must Ok — line 488 `+ with *` mutant \
             enters the 3-digit arm with insufficient input and panics on input[1..4]",
        );
        assert_eq!(s, "^99", "^99 must fall through to literal-^ + '9' + '9'");

        // `^255` — exact value-255 boundary (kills `> with >=`).
        // Original: value=255 passes the ordinal-bound check, then
        // String::from_utf8 fails because 0xFF alone is invalid UTF-8 →
        // Err with "not valid UTF-8".
        // Mutant `>=`: rejects value=255 at the ordinal check →
        // Err with "ordinal must be 000..=255; got 255". Different
        // diagnostics on the same input distinguish the two paths.
        let err = parse_text_escapes_code128(b"^255")
            .expect_err("parse_text_escapes_code128(b\"^255\") errors via UTF-8 conversion");
        let Error::InvalidData(msg) = err else {
            panic!("parse_text_escapes_code128(b\"^255\") must yield InvalidData");
        };
        assert!(
            msg.contains("not valid UTF-8"),
            "^255 must error via the UTF-8 conversion path (byte 0xFF substituted then UTF-8 \
             fails) — line 494 `> with >=` mutant would reject value=255 at the ordinal-bound \
             check instead, producing the 'ordinal must be 000..=255' message; got {msg}"
        );
        assert!(
            !msg.contains("ordinal must be 000..=255"),
            "^255 must NOT route through the ordinal-bound rejection — that's the mutant \
             behaviour; got {msg}"
        );
    }

    /// Stage 11.A8c — byte-for-byte goldens for pick_codes_no_subset_c
    /// across 8 inputs exercising every branch in the function:
    ///
    /// * `"ABC"` — pure Subset-B path, no subset switch (kills 547:24
    ///   match-guard `b<0x20 → true/false` mutants because flipping to
    ///   true would force Subset::A initial, changing every bar).
    /// * `"\x01ABC"` — initial Subset::A (b<0x20), then stays in A
    ///   because 'A'/'B'/'C' (65/66/67) are <0x60 (kills 547:26 `<` /
    ///   `==`/`<=` / `>` variants).
    /// * `"abc"` — initial Subset::B (b not <0x20); 'a','b','c' all
    ///   handled by value_in_b (kills `delete !` at 543:23 because
    ///   data_start finds first non-FNC token at index 0).
    /// * `"ABC\x01XYZ"` — Subset::B → Subset::A mid-message switch
    ///   when 0x01 < 0x20 hits in B (kills 592:22 `<` mutants and
    ///   562:15 / 571:15 / 586:15 `+=` arithmetic).
    /// * `"\x01abc"` — Subset::A → Subset::B switch when 'a' (0x61)
    ///   >= 0x60 in A (kills 602:22 `>=` → `<` mutant and 608:19 `+=`
    ///   > arithmetic).
    /// * `"ABCdef"` — letters with 'd','e','f' >=0x60 staying in B
    ///   (Subset::B handles via value_in_b, kills `+=` mutants on
    ///   the value_in_b path).
    /// * `"12345"` — digit run; pure Subset-B (no subset C because
    ///   suppressc=true) — kills mutants that affect digit encoding.
    /// * `"x"` — single-byte minimal path (kills `< tokens.len()` at
    ///   545:36 variants because data_start=0, len=1, only `< ` works).
    ///
    /// Goldens captured 2026-05-27 from the original implementation
    /// (pre-mutation); any of the 18 surviving `pick_codes_no_subset_c`
    /// mutants from v4 baseline produces a different bar sequence for
    /// at least one of these inputs.
    #[test]
    fn pick_codes_no_subset_c_byte_for_byte_goldens() {
        let opts = Options::default().with("suppressc", "true");
        #[allow(clippy::type_complexity)]
        let cases: &[(&[u8], &[u8])] = &[
            (
                b"ABC",
                &[
                    2, 1, 1, 2, 1, 4, 1, 1, 1, 3, 2, 3, 1, 3, 1, 1, 2, 3, 1, 3, 1, 3, 2, 1, 2, 2,
                    2, 1, 2, 2, 2, 3, 3, 1, 1, 1, 2,
                ],
            ),
            (
                b"\x01ABC",
                &[
                    2, 1, 1, 4, 1, 2, 1, 2, 1, 1, 2, 4, 1, 1, 1, 3, 2, 3, 1, 3, 1, 1, 2, 3, 1, 3,
                    1, 3, 2, 1, 1, 1, 1, 4, 2, 2, 2, 3, 3, 1, 1, 1, 2,
                ],
            ),
            (
                b"abc",
                &[
                    2, 1, 1, 2, 1, 4, 1, 2, 1, 1, 2, 4, 1, 2, 1, 4, 2, 1, 1, 4, 1, 1, 2, 2, 2, 1,
                    4, 1, 2, 1, 2, 3, 3, 1, 1, 1, 2,
                ],
            ),
            (
                b"ABC\x01XYZ",
                &[
                    2, 1, 1, 2, 1, 4, 1, 1, 1, 3, 2, 3, 1, 3, 1, 1, 2, 3, 1, 3, 1, 3, 2, 1, 3, 1,
                    1, 1, 4, 1, 1, 2, 1, 1, 2, 4, 3, 3, 1, 1, 2, 1, 3, 1, 2, 1, 1, 3, 3, 1, 2, 3,
                    1, 1, 2, 4, 1, 2, 1, 1, 2, 3, 3, 1, 1, 1, 2,
                ],
            ),
            (
                b"\x01abc",
                &[
                    2, 1, 1, 4, 1, 2, 1, 2, 1, 1, 2, 4, 1, 1, 4, 1, 3, 1, 1, 2, 1, 1, 2, 4, 1, 2,
                    1, 4, 2, 1, 1, 4, 1, 1, 2, 2, 3, 2, 2, 2, 1, 1, 2, 3, 3, 1, 1, 1, 2,
                ],
            ),
            (
                b"ABCdef",
                &[
                    2, 1, 1, 2, 1, 4, 1, 1, 1, 3, 2, 3, 1, 3, 1, 1, 2, 3, 1, 3, 1, 3, 2, 1, 1, 4,
                    1, 2, 2, 1, 1, 1, 2, 2, 1, 4, 1, 1, 2, 4, 1, 2, 1, 3, 2, 2, 1, 2, 2, 3, 3, 1,
                    1, 1, 2,
                ],
            ),
            (
                b"12345",
                &[
                    2, 1, 1, 2, 1, 4, 1, 2, 3, 2, 2, 1, 2, 2, 3, 2, 1, 1, 2, 2, 1, 1, 3, 2, 2, 2,
                    1, 2, 3, 1, 2, 1, 3, 2, 1, 2, 2, 1, 4, 1, 2, 1, 2, 3, 3, 1, 1, 1, 2,
                ],
            ),
            (
                b"x",
                &[
                    2, 1, 1, 2, 1, 4, 4, 2, 1, 2, 1, 1, 2, 1, 2, 1, 4, 1, 2, 3, 3, 1, 1, 1, 2,
                ],
            ),
        ];
        for (input, want) in cases {
            let s = std::str::from_utf8(input).unwrap_or("<non-utf8>");
            let p = encode(s, &opts).unwrap_or_else(|e| {
                panic!(
                    "encode({input:?}, suppressc=true) (pick_codes_no_subset_c byte-for-byte oracle row) must succeed: {e:?}",
                )
            });
            assert_eq!(
                p.bars.as_slice(),
                *want,
                "pick_codes_no_subset_c bar mismatch for {input:?} ({s:?}): \
                 a mutant in pick_codes_no_subset_c (subset selection / \
                 `i += 1` arithmetic / `b < 0x20` / `b >= 0x60` guards) \
                 has changed the encoded codeword stream"
            );
        }
    }

    /// Stage 11.A8c-L — extends the byte-for-byte oracle test above with
    /// FNC1/FNC2/FNC3 + leading-space + Link paths in pick_codes_no_subset_c
    /// that the original 8 cases didn't exercise. Pinned via a compact
    /// fingerprint (bars.len(), Σ_i bar_i·(i+1)·2654435761 wrapping) on
    /// the current oracle-matched output. Targets the 9 surviving
    /// L545/547/562/571/586/592 mutants (counter increment + b<0x20 boundary)
    /// in the suppressc=true control-byte / FNC / leading-space branches.
    #[test]
    fn pick_codes_no_subset_c_fnc_and_space_paths_pinned() {
        fn fp(payload: &str, with_parsefnc: bool) -> (usize, u64) {
            let mut opts = Options::default().with("suppressc", "true");
            if with_parsefnc {
                opts = opts.with("parsefnc", "true");
            }
            let p =
                encode(payload, &opts).unwrap_or_else(|e| panic!("encode({payload:?}) ok: {e:?}"));
            let mut s: u64 = 0;
            for (i, &b) in p.bars.iter().enumerate() {
                s = s.wrapping_add(
                    (b as u64).wrapping_mul((i as u64).wrapping_add(1).wrapping_mul(2_654_435_761)),
                );
            }
            (p.bars.len(), s)
        }
        // payload, parsefnc, expected (bars.len, fingerprint)
        let cases: &[(&str, bool, (usize, u64))] = &[
            // Leading-space (b=0x20 boundary on L547, L592).
            (" ABC", false, FP_LEADING_SPACE),
            ("ABC XYZ", false, FP_SPACE_MID),
            // FNC1 at start, then data (L562 i+=1 advance).
            ("^FNC1ABC", true, FP_FNC1_START),
            // FNC1 mid-data, then more data.
            ("ABC^FNC1XYZ", true, FP_FNC1_MID),
            // FNC2 / FNC3 paths (L571 i+=1 advance).
            ("^FNC2ABC", true, FP_FNC2_START),
            ("^FNC3ABC", true, FP_FNC3_START),
            // Lower-case after leading space — exercise B-subset path + space.
            (" abc", false, FP_SPACE_LOWER),
            // LinkA / LinkC composite-linkage tokens (L586 i+=1 advance).
            ("ABC^LNKA", true, FP_LNKA_END),
            ("ABC^LNKC", true, FP_LNKC_END),
        ];
        for (p, pf, want) in cases {
            assert_eq!(
                fp(p, *pf),
                *want,
                "pick_codes_no_subset_c fingerprint changed for {p:?}; \
                 a mutant in the FNC / Link / leading-space arithmetic shifted the bars"
            );
        }
    }

    // Captured from the oracle-matched encoder. Any arithmetic mutation in
    // pick_codes_no_subset_c (L545/547/562/571/586/592 — counter advances,
    // b<0x20 boundary in FNC/leading-space paths) changes the bars and
    // breaks one of these.
    const FP_LEADING_SPACE: (usize, u64) = (43, 4655880324794);
    const FP_SPACE_MID: (usize, u64) = (61, 9216200962192);
    const FP_FNC1_START: (usize, u64) = (43, 4624027095662);
    const FP_FNC1_MID: (usize, u64) = (61, 9194965476104);
    const FP_FNC2_START: (usize, u64) = (43, 4634644838706);
    const FP_FNC3_START: (usize, u64) = (43, 4645262581750);
    const FP_SPACE_LOWER: (usize, u64) = (43, 4655880324794);
    const FP_LNKA_END: (usize, u64) = (43, 4634644838706);
    const FP_LNKC_END: (usize, u64) = (43, 4645262581750);

    /// Stage 11.A8c-L — kill the 2 residual code128 survivors not covered
    /// by the FNC/space/Link killer: L756 pick_codes Subset-A→C switch
    /// `&&/||` and L861 parse_input_with_fncs `> with >=` (0x7F DEL boundary).
    #[test]
    fn pick_codes_subset_a_digit_tail_does_not_switch_to_c() {
        // Input enters Subset A via the leading control byte; the trailing
        // 3-digit run is at_end with run=3 (odd, <4). The original condition
        // `(at_end && run >= 4 && run % 2 == 0) || (!at_end && run >= 6)`
        // is false → stay in A, encode digits as A characters. The mutant
        // `&&` → `||` makes the first clause `at_end || ...` which is true
        // at_end, switching to C with insufficient odd run — different
        // (wrong) bars.
        let opts = Options::default();
        let p = encode("\x01ABC123", &opts).expect("encode ok");
        // Pin observed bars from oracle-matched output; any &&/|| flip in
        // the Subset A→C switch condition changes the codeword stream.
        let mut s: u64 = 0;
        for (i, &b) in p.bars.iter().enumerate() {
            s = s.wrapping_add(
                (b as u64).wrapping_mul((i as u64).wrapping_add(1).wrapping_mul(2_654_435_761)),
            );
        }
        assert_eq!(
            (p.bars.len(), s),
            L756_FP,
            "L756 pick_codes A→C switch fingerprint changed"
        );
    }
    const L756_FP: (usize, u64) = (61, 9269289677412);

    #[test]
    fn parse_input_with_fncs_accepts_0x7f_del() {
        // Original: `if b > 0x7f` rejects 0x80..0xFF only; 0x7F (DEL) passes
        // as a literal Ascii token. Mutant `>= 0x7f` would also reject 0x7F,
        // returning Err. This test exercises that exact boundary.
        let opts = Options::default().with("parsefnc", "true");
        let r = encode("A\x7FB", &opts);
        assert!(
            r.is_ok(),
            "encode of A\\x7FB with parsefnc=true must succeed (0x7F DEL is valid Code 128); \
             a mutant in parse_input_with_fncs `b > 0x7f` boundary has rejected it: {r:?}"
        );
        // Additionally pin the output so the boundary's exact codeword survives.
        let p = r.unwrap();
        let mut s: u64 = 0;
        for (i, &b) in p.bars.iter().enumerate() {
            s = s.wrapping_add(
                (b as u64).wrapping_mul((i as u64).wrapping_add(1).wrapping_mul(2_654_435_761)),
            );
        }
        assert_eq!(
            (p.bars.len(), s),
            L861_FP,
            "L861 parse_input_with_fncs 0x7F output changed"
        );
    }
    const L861_FP: (usize, u64) = (37, 3472001975388);

    /// Stage 11.A8c — pin `parse_input_with_fncs(data)`. The
    /// parsefnc=true tokenizer. Walks bytes left-to-right and emits
    /// `Token::Ascii(b)` for literal bytes, `Token::Fnc1/2/3` and
    /// `Token::LinkA/C` for the 5 escape forms, with `^^` collapsing
    /// to a literal `^`. Multiple error branches for malformed input.
    ///
    /// All exercised end-to-end via the parsefnc=true corpus but no
    /// direct unit anchor for the tokenizer itself.
    ///
    /// Anchors pin:
    ///   * empty input → empty Vec;
    ///   * literal ASCII run: "abc" → 3 Ascii tokens;
    ///   * each of 5 escapes: "^FNC1"/"^FNC2"/"^FNC3"/"^LNKA"/"^LNKC"
    ///     → matching single token;
    ///   * `^^` → single Ascii(b'^');
    ///   * mixed: "abc^FNC1def" → 3 Ascii + Fnc1 + 3 Ascii;
    ///   * high byte (≥0x80) → Err;
    ///   * trailing `^` at end → Err;
    ///   * truncated escape "^FNC" (4 chars short of 5) → Err;
    ///   * unknown escape "^XYZQ" → Err.
    ///
    /// Strong arm-disambiguation: Fnc1, Fnc2, Fnc3, LinkA, LinkC are
    /// pairwise distinct (kills tag-swap mutants like `b"FNC1"` →
    /// `b"FNC2"`).
    #[test]
    fn parse_input_with_fncs_escape_arms_and_error_branches() {
        // Empty → empty Vec.
        // Stage 11.A8c (cont) — descriptive label naming empty-input invariant.
        let empty_tokens = parse_input_with_fncs("").unwrap();
        assert!(
            empty_tokens.is_empty(),
            "parse_input_with_fncs(\"\") must produce empty token vec (no spurious sentinel tokens); got len={}",
            empty_tokens.len()
        );

        // Pure ASCII run.
        let toks = parse_input_with_fncs("abc").unwrap();
        assert_eq!(
            toks,
            vec![Token::Ascii(b'a'), Token::Ascii(b'b'), Token::Ascii(b'c')]
        );

        // Each escape: per-arm single-token output.
        assert_eq!(parse_input_with_fncs("^FNC1").unwrap(), vec![Token::Fnc1]);
        assert_eq!(parse_input_with_fncs("^FNC2").unwrap(), vec![Token::Fnc2]);
        assert_eq!(parse_input_with_fncs("^FNC3").unwrap(), vec![Token::Fnc3]);
        assert_eq!(parse_input_with_fncs("^LNKA").unwrap(), vec![Token::LinkA]);
        assert_eq!(parse_input_with_fncs("^LNKC").unwrap(), vec![Token::LinkC]);

        // `^^` → literal '^'.
        assert_eq!(
            parse_input_with_fncs("^^").unwrap(),
            vec![Token::Ascii(b'^')]
        );

        // Mixed: "abc^FNC1def".
        let toks = parse_input_with_fncs("abc^FNC1def").unwrap();
        assert_eq!(
            toks,
            vec![
                Token::Ascii(b'a'),
                Token::Ascii(b'b'),
                Token::Ascii(b'c'),
                Token::Fnc1,
                Token::Ascii(b'd'),
                Token::Ascii(b'e'),
                Token::Ascii(b'f'),
            ]
        );

        // Stage 11.A8c — upgrade the 6 weak is_err() checks below to
        // per-arm diagnostic pins. parse_input_with_fncs has FOUR
        // rejection arms (lines 861-906 of code128.rs):
        //   * b > 0x7f → "Code 128 only supports ASCII; got 0xHH
        //     at byte N"
        //   * trailing `^` → "code128: parsefnc=true: trailing `^`
        //     with no escape body"
        //   * truncated `^FNC` → "code128: parsefnc=true: truncated
        //     escape at byte N; expected ..."
        //   * unknown `^XYZQ` → "code128: parsefnc=true: unknown
        //     escape `^XYZQ` at byte N; expected ..."

        // Arm 1: non-ASCII byte. "café" → 'é' = 0xC3 0xA9; first
        // non-ASCII byte is 0xC3 at byte index 3 (after "caf").
        let err = parse_input_with_fncs("café").unwrap_err();
        let Error::InvalidData(msg) = err else {
            panic!("non-ASCII must yield InvalidData; got {err:?}");
        };
        assert!(
            msg.contains("only supports ASCII") && msg.contains("0xc3")
                && msg.contains("at byte 3"),
            "non-ASCII diagnostic must pin ASCII-only predicate + 0xc3 byte + offset 3; got {msg:?}"
        );

        // Arm 2: trailing `^`. Both "abc^" and "^" hit the same arm.
        for (input, want_offset) in [("abc^", "byte 3"), ("^", "byte 0")] {
            // The arm doesn't echo the offset — pin the path-specific
            // predicate text instead.
            let _ = want_offset; // intentionally unused (no echo in arm 2)
            let err = parse_input_with_fncs(input).unwrap_err();
            let Error::InvalidData(msg) = err else {
                panic!("{input:?} must yield InvalidData; got {err:?}");
            };
            assert!(
                msg.contains("trailing `^` with no escape body"),
                "{input:?} must pin 'trailing `^` with no escape body'; got {msg:?}"
            );
        }

        // Arm 3: truncated escape "^FNC" (4 bytes, not the required 5).
        let err = parse_input_with_fncs("^FNC").unwrap_err();
        let Error::InvalidData(msg) = err else {
            panic!("^FNC must yield InvalidData; got {err:?}");
        };
        assert!(
            msg.contains("truncated escape at byte 0") && msg.contains("FNC1"),
            "truncated escape must pin offset 0 + expected-escape list; got {msg:?}"
        );

        // Arm 4: unknown 4-char escape tag.
        for (input, want_tag) in [("^XYZQ", "^XYZQ"), ("^FOO1", "^FOO1")] {
            let err = parse_input_with_fncs(input).unwrap_err();
            let Error::InvalidData(msg) = err else {
                panic!("{input:?} must yield InvalidData; got {err:?}");
            };
            assert!(
                msg.contains("unknown escape")
                    && msg.contains(want_tag)
                    && msg.contains("at byte 0"),
                "{input:?} must pin 'unknown escape' + {want_tag:?} + offset 0; got {msg:?}"
            );
        }
    }
}