mrrc 0.8.2 - Docs.rs

# Error-handling test coverage manifest.
#
# Documentation-as-spec for mrrc's error handling: every documented
# error code in docs/reference/error-codes.md is paired here with one
# or more cases that should trigger it. The harness
# (tests/error_coverage.rs and the Python equivalent) reads this
# manifest and asserts the documented variant, code, slug, and
# positional context fire when the parser exercises the trigger —
# not when an error is constructed in test code.
#
# A single code can have multiple cases when the docs describe
# multiple distinct trigger patterns (e.g., E101 fires on both
# "non-digit length in a directory entry" and "missing field
# terminator before base address"). Each case is exercised
# independently; wiring is tracked per case, not per code, so the
# coverage tally reflects the granularity of the spec.
#
# Per-trigger coverage convention: when a single MarcError variant has
# multiple distinct production fire sites (different functions, files,
# or conditions), each fire site gets its own manifest case — even
# when the variant slug is shared. E404 `WriterError` is the canonical
# example: three cases (size cap, non-3-ASCII tag, finished-writer
# reuse) share the slug `record_too_large_for_iso2709` but each
# exercises a distinct path. The harness's `exercise_writer` /
# `exercise_accessor` dispatches case-id-branch to the right driver.
# Adding a new fire site for an existing variant requires both a new
# `[[case]]` here and a branch in the relevant `exercise_*` helper.
#
# Cases marked `wired = false` describe trigger patterns where the
# parser does not currently produce the documented variant for the
# given input. The `skip_reason` field states what happens today, in
# technical terms, so a contributor implementing the missing detection
# knows what they are replacing. Adding a new error code requires
# adding a case here AND a fixture (or other trigger mechanism).
#
# Schema:
#   id               unique identifier "<code>_<short_trigger>"; used
#                    as the test display name and disambiguates cases
#                    that share a code
#   code             "Exxx" identifier (stable across releases)
#   variant          MarcError variant name (Rust)
#   slug             snake_case slug (stable across releases)
#   trigger_kind     how the harness exercises the trigger:
#                      "parse_iso2709"  — feed bytes to MarcReader
#                      "parse_marcxml"  — feed string to marcxml_to_record
#                      "parse_marcjson" — feed string to marcjson_to_record
#                      "io_error"       — inject a Read source that errors
#                      "recovery_cap"   — drive lenient mode past max_errors
#                      "accessor"       — call a Record accessor on a parsed record
#                      "writer"         — construct a record and attempt to write it
#                    The authoritative list of trigger_kinds the Rust
#                    harness exercises is the module docstring in
#                    tests/error_coverage.rs. A kind that harness can't
#                    drive (e.g. parse_marcjson — no Rust str-to-Record
#                    entry point; covered in the Python harness instead)
#                    skips with a manifest-driven reason.
#   trigger_fixture  path to fixture bytes (parse_iso2709) or text
#                    (parse_marcxml / parse_marcjson) that should fire
#                    `code`. Required for parse_* kinds; optional for
#                    others (their semantics are described in the
#                    case's `description` and `skip_reason`).
#   description      one-line description of the malformation or trigger
#   expected_context positional fields the variant must populate when
#                    fired (per docs/reference/error-codes.md)
#   recovery_modes   recovery modes the harness exercises on this case
#                    (parse kinds only; declared so the harness can
#                    extend without manifest churn)
#   wired            true if the parser currently emits the documented
#                    variant for this trigger
#   skip_reason      required when wired = false; describes current
#                    behavior in technical terms

schema_version = 1

# === Stream / leader (E0xx) =========================================

# E001: leader bytes 0-4 not five ASCII digits.
[[case]]
id = "e001_record_length_non_digit"
code = "E001"
variant = "RecordLengthInvalid"
slug = "record_length_invalid"
trigger_kind = "parse_iso2709"
trigger_fixture = "tests/data/error_fixtures/e001_record_length_non_digit.bin"
description = "Leader byte 0 ('0' of '00150') replaced with 'X'."
expected_context = ["record_index", "byte_offset"]
recovery_modes = ["strict"]
wired = true

# E001: leader bytes 0-4 parse but the resulting length is < 24 (the
# leader alone is 24 bytes, so any smaller value is impossible).
# Distinct production path from the non-digit case above: parsed via
# Leader::validate_for_reading after Leader::from_bytes succeeds.
[[case]]
id = "e001_record_length_below_24"
code = "E001"
variant = "RecordLengthInvalid"
slug = "record_length_invalid"
trigger_kind = "parse_iso2709"
trigger_fixture = "tests/data/error_fixtures/e001_record_length_below_24.bin"
description = "Leader bytes 0-4 set to '00010' (decimal 10 < 24-byte minimum)."
expected_context = ["record_index", "byte_offset"]
recovery_modes = ["strict"]
wired = true

# E002: leader byte 10 (indicator count, normally '2') is non-digit.
# Audit note: docs/reference/error-codes.md describes E002 as also
# firing on malformations like "reserved bytes 20-23 not '4500'" or
# "encoding indicator out of range", but the current parser does not
# inspect those bytes. Today's E002 path covers indicator count parse
# failure, subfield code count parse failure, and (overlapping with
# the docs-claimed E001/E003 paths) record_length < 24 and
# data_base_address < 24. The reserved-field check is a docs-vs-code
# gap separate from the wiring work for E001/E003/E004.
[[case]]
id = "e002_indicator_count_non_digit"
code = "E002"
variant = "InvalidLeader"
slug = "leader_invalid"
trigger_kind = "parse_iso2709"
trigger_fixture = "tests/data/error_fixtures/e002_indicator_count_non_digit.bin"
description = "Leader byte 10 (indicator count, normally '2') replaced with 'X'."
expected_context = ["record_index", "byte_offset", "record_byte_offset", "bytes_near"]
recovery_modes = ["strict"]
wired = true

# E002: leader byte 5 (record_status) is byte-valid but not in the MARC 21
# allowed set {a, c, d, n, p}. RecordStructureValidator runs only at
# validation_level=strict_marc and reports this as InvalidLeader.
[[case]]
id = "e002_invalid_record_status"
code = "E002"
variant = "InvalidLeader"
slug = "leader_invalid"
trigger_kind = "parse_iso2709"
trigger_fixture = "tests/data/error_fixtures/e002_invalid_record_status.bin"
description = "Leader byte 5 (record status) is 'x' (not in {a, c, d, n, p})."
expected_context = ["record_index", "byte_offset", "record_byte_offset", "bytes_near"]
recovery_modes = ["strict"]
validation_level = "strict_marc"
wired = true

# E002 (semantic) — leader byte 6 (type of record) outside MARC 21
# allowed set.
[[case]]
id = "e002_invalid_record_type"
code = "E002"
variant = "InvalidLeader"
slug = "leader_invalid"
trigger_kind = "parse_iso2709"
trigger_fixture = "tests/data/error_fixtures/e002_invalid_record_type.bin"
description = "Leader byte 6 (type of record) is 'q' (not in the documented set)."
expected_context = ["record_index", "byte_offset", "record_byte_offset", "bytes_near"]
recovery_modes = ["strict"]
validation_level = "strict_marc"
wired = true

# E002 (semantic) — leader byte 7 (bibliographic level) outside
# {a, b, c, d, i, m, s}.
[[case]]
id = "e002_invalid_bibliographic_level"
code = "E002"
variant = "InvalidLeader"
slug = "leader_invalid"
trigger_kind = "parse_iso2709"
trigger_fixture = "tests/data/error_fixtures/e002_invalid_bibliographic_level.bin"
description = "Leader byte 7 (bibliographic level) is 'q' (not in {a, b, c, d, i, m, s})."
expected_context = ["record_index", "byte_offset", "record_byte_offset", "bytes_near"]
recovery_modes = ["strict"]
validation_level = "strict_marc"
wired = true

# E002 (semantic) — leader byte 8 (control record type) outside
# {' ', 'a'}.
[[case]]
id = "e002_invalid_control_record_type"
code = "E002"
variant = "InvalidLeader"
slug = "leader_invalid"
trigger_kind = "parse_iso2709"
trigger_fixture = "tests/data/error_fixtures/e002_invalid_control_record_type.bin"
description = "Leader byte 8 (control record type) is 'q' (not in {' ', 'a'})."
expected_context = ["record_index", "byte_offset", "record_byte_offset", "bytes_near"]
recovery_modes = ["strict"]
validation_level = "strict_marc"
wired = true

# E002 (semantic) — leader byte 9 (character coding) outside {' ', 'a'}.
[[case]]
id = "e002_invalid_character_coding"
code = "E002"
variant = "InvalidLeader"
slug = "leader_invalid"
trigger_kind = "parse_iso2709"
trigger_fixture = "tests/data/error_fixtures/e002_invalid_character_coding.bin"
description = "Leader byte 9 (character coding) is 'q' (not in {' ', 'a'})."
expected_context = ["record_index", "byte_offset", "record_byte_offset", "bytes_near"]
recovery_modes = ["strict"]
validation_level = "strict_marc"
wired = true

# E002 (semantic) — leader byte 10 (indicator count) is a byte-valid
# digit but != 2 (MARC 21 requires exactly 2). Distinct from the
# structural e002_indicator_count_non_digit case, which fails the
# digit check in Leader::from_bytes.
[[case]]
id = "e002_indicator_count_not_two"
code = "E002"
variant = "InvalidLeader"
slug = "leader_invalid"
trigger_kind = "parse_iso2709"
trigger_fixture = "tests/data/error_fixtures/e002_indicator_count_not_two.bin"
description = "Leader byte 10 (indicator count) is '3' (digit but MARC 21 requires 2)."
expected_context = ["record_index", "byte_offset", "record_byte_offset", "bytes_near"]
recovery_modes = ["strict"]
validation_level = "strict_marc"
wired = true

# E002 (semantic) — leader byte 11 (subfield code count) is a
# byte-valid digit but != 2.
[[case]]
id = "e002_subfield_code_count_not_two"
code = "E002"
variant = "InvalidLeader"
slug = "leader_invalid"
trigger_kind = "parse_iso2709"
trigger_fixture = "tests/data/error_fixtures/e002_subfield_code_count_not_two.bin"
description = "Leader byte 11 (subfield code count) is '3' (digit but MARC 21 requires 2)."
expected_context = ["record_index", "byte_offset", "record_byte_offset", "bytes_near"]
recovery_modes = ["strict"]
validation_level = "strict_marc"
wired = true

# E002 (semantic) — leader byte 17 (encoding level) outside the MARC 21
# allowed set.
[[case]]
id = "e002_invalid_encoding_level"
code = "E002"
variant = "InvalidLeader"
slug = "leader_invalid"
trigger_kind = "parse_iso2709"
trigger_fixture = "tests/data/error_fixtures/e002_invalid_encoding_level.bin"
description = "Leader byte 17 (encoding level) is 'q' (not in {' ', 1-5, 7, 8, u, z})."
expected_context = ["record_index", "byte_offset", "record_byte_offset", "bytes_near"]
recovery_modes = ["strict"]
validation_level = "strict_marc"
wired = true

# E002 (semantic) — leader byte 18 (cataloging form) outside
# {' ', a, c, i, n, u}.
[[case]]
id = "e002_invalid_cataloging_form"
code = "E002"
variant = "InvalidLeader"
slug = "leader_invalid"
trigger_kind = "parse_iso2709"
trigger_fixture = "tests/data/error_fixtures/e002_invalid_cataloging_form.bin"
description = "Leader byte 18 (cataloging form) is 'q' (not in {' ', a, c, i, n, u})."
expected_context = ["record_index", "byte_offset", "record_byte_offset", "bytes_near"]
recovery_modes = ["strict"]
validation_level = "strict_marc"
wired = true

# E002 (semantic) — leader byte 19 (multipart level) outside
# {' ', a, b, c}.
[[case]]
id = "e002_invalid_multipart_level"
code = "E002"
variant = "InvalidLeader"
slug = "leader_invalid"
trigger_kind = "parse_iso2709"
trigger_fixture = "tests/data/error_fixtures/e002_invalid_multipart_level.bin"
description = "Leader byte 19 (multipart level) is 'q' (not in {' ', a, b, c})."
expected_context = ["record_index", "byte_offset", "record_byte_offset", "bytes_near"]
recovery_modes = ["strict"]
validation_level = "strict_marc"
wired = true

# E002 (defensive) — data_base_address > 99999. Unreachable from any
# 5-digit-ASCII-parsed leader (max value 99999), but reachable via
# programmatic Leader construction (public struct, public fields).
# The defensive check at record_validation.rs:91-96 guards against
# this constructed state and is exercised here by building a Leader
# with the bad value and invoking RecordStructureValidator directly.
[[case]]
id = "e002_data_base_address_overflow_programmatic"
code = "E002"
variant = "InvalidLeader"
slug = "leader_invalid"
trigger_kind = "programmatic_validator"
description = "Construct Leader with data_base_address = 100_000 and invoke RecordStructureValidator::validate_leader directly."
expected_context = []
recovery_modes = ["strict"]
wired = true

# E003: leader bytes 12-16 not five ASCII digits.
[[case]]
id = "e003_base_address_non_digit"
code = "E003"
variant = "BaseAddressInvalid"
slug = "base_address_invalid"
trigger_kind = "parse_iso2709"
trigger_fixture = "tests/data/error_fixtures/e003_base_address_non_digit.bin"
description = "Leader byte 12 ('0' of base address '00061') replaced with 'X'."
expected_context = ["record_index", "byte_offset"]
recovery_modes = ["strict"]
wired = true

# E003: leader bytes 12-16 parse but the resulting base address is
# < 24 (the leader alone is 24 bytes). Distinct production path from
# the non-digit case above: parsed via Leader::validate_for_reading
# after Leader::from_bytes succeeds.
[[case]]
id = "e003_base_address_below_24"
code = "E003"
variant = "BaseAddressInvalid"
slug = "base_address_invalid"
trigger_kind = "parse_iso2709"
trigger_fixture = "tests/data/error_fixtures/e003_base_address_below_24.bin"
description = "Leader bytes 12-16 set to '00020' (decimal 20 < 24-byte leader minimum)."
expected_context = ["record_index", "byte_offset"]
recovery_modes = ["strict"]
wired = true

# E004: base address claims a value past the available bytes.
[[case]]
id = "e004_base_address_past_record"
code = "E004"
variant = "BaseAddressNotFound"
slug = "base_address_not_found"
trigger_kind = "parse_iso2709"
trigger_fixture = "tests/data/error_fixtures/e004_base_address_past_record.bin"
description = "Leader bytes 12-16 set to '99999' (> 150-byte record length)."
expected_context = ["record_index", "byte_offset"]
recovery_modes = ["strict"]
wired = true

# E005: stream EOF before reading the leader-claimed record length.
[[case]]
id = "e005_truncated_record"
code = "E005"
variant = "TruncatedRecord"
slug = "truncated_record"
trigger_kind = "parse_iso2709"
trigger_fixture = "tests/data/error_fixtures/e005_truncated_record.bin"
description = "Record claims 150 bytes; stream ends after 100."
expected_context = ["record_index", "byte_offset", "record_byte_offset"]
recovery_modes = ["strict"]
wired = true

# E005 lenient skeleton path: same truncation that the strict case fires
# at also fires in lenient mode, but as a non-raised error pushed onto
# the yielded record's `errors: Arc<Vec<MarcError>>`. Distinct fire
# behavior from the strict path, so a separate case.
[[case]]
id = "e005_truncated_record_lenient"
code = "E005"
variant = "TruncatedRecord"
slug = "truncated_record"
trigger_kind = "parse_iso2709_lenient"
trigger_fixture = "tests/data/error_fixtures/e005_truncated_record.bin"
description = "Same truncated fixture as the strict case; lenient mode pushes E005 onto record.errors instead of raising."
expected_context = ["record_index", "byte_offset", "record_byte_offset"]
recovery_modes = ["lenient"]
wired = true

# E006: byte at the leader's claimed end position is not 0x1D.
[[case]]
id = "e006_no_record_terminator"
code = "E006"
variant = "EndOfRecordNotFound"
slug = "end_of_record_not_found"
trigger_kind = "parse_iso2709"
trigger_fixture = "tests/data/error_fixtures/e006_no_record_terminator.bin"
description = "Final byte (0x1D RECORD_TERMINATOR) replaced with 0x00."
expected_context = ["record_index", "byte_offset", "record_byte_offset"]
recovery_modes = ["strict"]
wired = true

# E007: underlying I/O error from the reader source. Two distinct paths
# construct MarcError::IoError, each covered by a case below:
#   - raw-io / leader boundary: the read fails before the parser has
#     begun a record (read_leader_bytes), so no positional context is
#     available and the context-free From<io::Error> fallback is used.
#   - parse path: the read fails mid-record in read_record_data, which
#     enriches the error via ParseContext::err_io with the in-progress
#     record's record_index, byte_offset, and source_name.
[[case]]
id = "e007_io_failure"
code = "E007"
variant = "IoError"
slug = "io_error"
trigger_kind = "io_error"
description = "Reader source errors on the first read (leader boundary); no positional context available."
expected_context = []
recovery_modes = ["strict"]
wired = true

[[case]]
id = "e007_io_failure_parse_path"
code = "E007"
variant = "IoError"
slug = "io_error"
trigger_kind = "io_error_parse_path"
description = "Reader source errors mid-record while reading the data area; IoError carries the in-progress record's context."
expected_context = ["record_index", "byte_offset", "source_name"]
recovery_modes = ["strict"]
wired = true

# E099: lenient/permissive recovered-error cap exceeded. Wired in
# MarcReader::read_record (constructs MarcError::FatalReaderError when
# the per-stream cap is hit); the harness exercises it once a
# trigger_kind=recovery_cap mechanism lands.
[[case]]
id = "e099_recovery_cap_exceeded"
code = "E099"
variant = "FatalReaderError"
slug = "fatal_reader_error"
trigger_kind = "recovery_cap"
description = "Stream of malformed records exceeds MarcReader::with_max_errors(N)."
expected_context = []
recovery_modes = ["lenient", "permissive"]
wired = true

# === Directory / field header (E1xx) ================================

# E101: directory has no FIELD_TERMINATOR before base address.
[[case]]
id = "e101_no_field_terminator"
code = "E101"
variant = "DirectoryInvalid"
slug = "directory_invalid"
trigger_kind = "parse_iso2709"
trigger_fixture = "tests/data/error_fixtures/e101_directory_no_terminator.bin"
description = "Byte 60 (FIELD_TERMINATOR 0x1E ending the directory) replaced with '0', producing a partial trailing entry."
expected_context = ["record_index", "byte_offset", "record_byte_offset"]
recovery_modes = ["strict"]
wired = true

# E101: directory entry length field contains a non-digit byte.
[[case]]
id = "e101_non_digit_length"
code = "E101"
variant = "DirectoryInvalid"
slug = "directory_invalid"
trigger_kind = "parse_iso2709"
trigger_fixture = "tests/data/error_fixtures/e101_directory_non_digit_length.bin"
description = "Directory entry length field 'X025' (was '0025') for the 100 entry."
expected_context = ["record_index", "byte_offset", "record_byte_offset", "field_tag"]
recovery_modes = ["strict"]
wired = true

# E101: directory entry tag contains a non-ASCII byte. Tags are 3 ASCII
# bytes per the codec; lossy UTF-8 conversion would replace the byte
# with U+FFFD (3 bytes), producing a tag the writer can't fit back into
# the directory's fixed-width tag field. Round-trip-breaking; the
# error-classification fuzz target's round-trip assertion surfaces it.
[[case]]
id = "e101_non_ascii_tag"
code = "E101"
variant = "DirectoryInvalid"
slug = "directory_invalid"
trigger_kind = "parse_iso2709"
trigger_fixture = "tests/data/error_fixtures/e101_non_ascii_tag.bin"
description = "Directory entry 1 tag's first byte replaced with 0xCC (non-ASCII)."
# field_tag omitted: the tag bytes are themselves the malformation, so the
# parser does not set current_field_tag on this path (unlike the E106
# data-field guard, where the tag is known).
expected_context = ["record_index", "byte_offset", "record_byte_offset"]
recovery_modes = ["strict"]
wired = true

# E101: directory entry start-position byte is non-digit. Distinct
# production path from non_digit_length (mutates the length field's
# bytes 3-6 of the entry) — this mutates the start-position field's
# bytes 7-11 of the entry. Different code branch in the directory walker.
[[case]]
id = "e101_non_digit_start"
code = "E101"
variant = "DirectoryInvalid"
slug = "directory_invalid"
trigger_kind = "parse_iso2709"
trigger_fixture = "tests/data/error_fixtures/e101_directory_non_digit_start.bin"
description = "Directory entry 1 start-position byte 7 of 11 set to 'X' (first byte of the 5-byte start field)."
expected_context = ["record_index", "byte_offset", "record_byte_offset", "field_tag"]
recovery_modes = ["strict"]
wired = true

# E105: accessor lookup for a tag the record does not contain.
[[case]]
id = "e105_field_not_found"
code = "E105"
variant = "FieldNotFound"
slug = "field_not_found"
trigger_kind = "accessor"
trigger_fixture = "tests/data/simple_book.mrc"
description = "Parse simple_book.mrc cleanly, then call record.get_field_or_err('999')."
expected_context = ["field_tag"]
recovery_modes = ["strict"]
wired = true

# E105 from AuthorityRecord — the same accessor pattern fires from the
# authority record type via the parity API.
[[case]]
id = "e105_authority_field_not_found"
code = "E105"
variant = "FieldNotFound"
slug = "field_not_found"
trigger_kind = "accessor"
trigger_fixture = "tests/data/simple_authority.mrc"
description = "Parse simple_authority.mrc cleanly, then call record.get_field_or_err('999')."
expected_context = ["field_tag"]
recovery_modes = ["strict"]
wired = true

# E105 from HoldingsRecord — the same accessor pattern fires from the
# holdings record type via the parity API.
[[case]]
id = "e105_holdings_field_not_found"
code = "E105"
variant = "FieldNotFound"
slug = "field_not_found"
trigger_kind = "accessor"
trigger_fixture = "tests/data/simple_holdings.mrc"
description = "Parse simple_holdings.mrc cleanly, then call record.get_field_or_err('999')."
expected_context = ["field_tag"]
recovery_modes = ["strict"]
wired = true

# E106: directory entry claims a field length larger than the data area.
[[case]]
id = "e106_field_length_past_data"
code = "E106"
variant = "InvalidField"
slug = "invalid_field"
trigger_kind = "parse_iso2709"
trigger_fixture = "tests/data/error_fixtures/e106_field_length_past_data.bin"
description = "Directory entry for tag 100 claims length '9999' (was '0025'); declared field bytes extend past the data area."
expected_context = ["record_index", "byte_offset", "record_byte_offset", "field_tag"]
recovery_modes = ["strict"]
wired = true

# E106: byte after data-field indicators is not the subfield delimiter.
# Bibliographic reader (SubfieldStructureMode::Strict) fires this in
# parse_subfields; authority and holdings tolerate the same byte under
# SubfieldStructureMode::Permissive.
[[case]]
id = "e106_expected_subfield_delimiter"
code = "E106"
variant = "InvalidField"
slug = "invalid_field"
trigger_kind = "parse_iso2709"
trigger_fixture = "tests/data/error_fixtures/e106_expected_subfield_delimiter.bin"
description = "Byte 63 (first subfield delimiter 0x1F of the 100 field) replaced with 'X'."
expected_context = ["record_index", "byte_offset", "record_byte_offset", "field_tag"]
recovery_modes = ["strict"]
wired = true

# E106 authority field-too-short: the documented per-reader minimum
# guard ("authority returns Err when field_bytes.len() < 2") fires
# from AuthorityMarcReader when a *data* field is below the 2-byte
# indicator minimum. The guard runs only on data fields — control
# fields (001-009) decode on a separate path and are exempt — so the
# fixture mutates the 100 heading field's directory length to '0001'.
# An earlier trial that mutated the 005 control field could never trip
# it, which is what the data-field-only gating predicts.
[[case]]
id = "e106_authority_field_too_short"
code = "E106"
variant = "InvalidField"
slug = "invalid_field"
trigger_kind = "parse_authority"
trigger_fixture = "tests/data/error_fixtures/e106_authority_field_too_short.bin"
description = "Authority record's 100 data-field directory length set to '0001' (1 byte; below the 2-byte indicator minimum)."
expected_context = ["record_index", "byte_offset", "field_tag"]
recovery_modes = ["strict"]
wired = true

# E106 recovery path: when a truncated record drops into try_recover_record
# in lenient mode, the recovery directory walker calls parse_4digits /
# parse_5digits directly. A non-digit length/start byte there fires E106
# InvalidField (via MarcError::invalid_field_msg) — distinct from the
# main parser path's E101 DirectoryInvalid for the same kind of malformed
# directory byte. The fixture combines truncation (so the recovery path
# runs) with a malformed length field (so parse_4digits errors). Both
# E005 and E106 land on record.errors; the harness picks the E106 one.
[[case]]
id = "e106_recovery_invalid_field"
code = "E106"
variant = "InvalidField"
slug = "invalid_field"
trigger_kind = "parse_iso2709_lenient"
trigger_fixture = "tests/data/error_fixtures/e106_recovery_invalid_field.bin"
description = "Leader claims 999-byte record; only the directory (with a non-digit length field 'ABCD') is provided. Truncation routes through try_recover_record, which calls parse_4digits and pushes InvalidField."
expected_context = []
recovery_modes = ["lenient"]
wired = true

# === Subfield / indicator (E2xx) ====================================

# E201: indicator byte not digit/space.
[[case]]
id = "e201_bad_indicator_245"
code = "E201"
variant = "InvalidIndicator"
slug = "invalid_indicator"
trigger_kind = "parse_iso2709"
trigger_fixture = "tests/data/error_fixtures/e201_bad_indicator.bin"
description = "First indicator byte of field 245 is ':' (not digit/space)."
expected_context = [
    "record_index",
    "byte_offset",
    "record_byte_offset",
    "field_tag",
    "indicator_position",
    "found",
    "expected",
]
recovery_modes = ["strict"]
validation_level = "strict_marc"
wired = true

# E201: indicator byte is digit-valid but violates the per-tag MARC 21 rule
# for the field. IndicatorValidator runs in addition to the universal
# byte-validity check at strict_marc.
[[case]]
id = "e201_per_tag_indicator_245"
code = "E201"
variant = "InvalidIndicator"
slug = "invalid_indicator"
trigger_kind = "parse_iso2709"
trigger_fixture = "tests/data/error_fixtures/e201_per_tag_indicator_245.bin"
description = "First indicator of field 245 is '9' (byte-valid digit but per-tag rule allows only 0/1)."
expected_context = [
    "record_index",
    "byte_offset",
    "record_byte_offset",
    "field_tag",
    "indicator_position",
    "found",
    "expected",
]
recovery_modes = ["strict"]
validation_level = "strict_marc"
wired = true

# E202: subfield code byte not printable ASCII.
[[case]]
id = "e202_non_printable_subfield_code"
code = "E202"
variant = "BadSubfieldCode"
slug = "bad_subfield_code"
trigger_kind = "parse_iso2709"
trigger_fixture = "tests/data/error_fixtures/e202_non_printable_subfield_code.bin"
description = "Byte 64 (subfield code 'a' after 0x1F at byte 63) replaced with 0x00."
expected_context = [
    "record_index",
    "byte_offset",
    "record_byte_offset",
    "field_tag",
    "subfield_code",
]
recovery_modes = ["strict"]
validation_level = "strict_marc"
wired = true

# === Encoding (E3xx) ================================================

# E301: subfield value contains invalid UTF-8 bytes.
[[case]]
id = "e301_invalid_utf8_in_subfield"
code = "E301"
variant = "EncodingError"
slug = "utf8_invalid"
trigger_kind = "parse_iso2709"
trigger_fixture = "tests/data/error_fixtures/e301_invalid_utf8_in_subfield.bin"
description = "Byte 70 (inside the 100$a 'Fitzgerald' subfield value) replaced with 0xFF."
expected_context = ["record_index"]
recovery_modes = ["strict"]
validation_level = "strict_marc"
wired = true

# E301: bibliographic CONTROL field invalid UTF-8 (distinct production
# path from the subfield case above — fires from
# iso2709_skeleton.rs's control-field reader at strict_marc, not from
# parse_subfields).
[[case]]
id = "e301_invalid_utf8_in_control_field"
code = "E301"
variant = "EncodingError"
slug = "utf8_invalid"
trigger_kind = "parse_iso2709"
trigger_fixture = "tests/data/error_fixtures/e301_invalid_utf8_in_control_field.bin"
description = "Byte 49 (first byte of 008 control field in with_control_fields.mrc) replaced with 0xFF."
expected_context = ["record_index"]
recovery_modes = ["strict"]
validation_level = "strict_marc"
wired = true

# E301 authority CONTROL field invalid UTF-8 at strict_marc. The
# fixture's leader is the simple_authority.mrc leader with position 18
# (punctuation policy) set to ' ' so it conforms to the MARC 21 Authority
# Format allowed-value sets; the per-record-type leader validators in
# `RecordStructureValidator` let the parse proceed past the leader and
# fire E301 on the corrupted control-field byte (0xFF at offset 73).
[[case]]
id = "e301_invalid_utf8_in_authority_control_field"
code = "E301"
variant = "EncodingError"
slug = "utf8_invalid"
trigger_kind = "parse_authority"
trigger_fixture = "tests/data/error_fixtures/e301_invalid_utf8_in_authority_control_field.bin"
description = "Byte 73 (first byte of first control field in simple_authority.mrc) replaced with 0xFF; leader position 18 set to ' ' so the MARC 21 Authority Format allowed-value sets accept the leader at strict_marc."
expected_context = ["record_index"]
recovery_modes = ["strict"]
validation_level = "strict_marc"
wired = true

# === Serialization (E4xx) ===========================================

# E401: malformed MARCXML (mismatched closing tag). Triggers
# marcxml.rs's read_event_into wrapping at line 157/252/454/497 —
# the closing-tag mismatch fires at the read_event_into call active
# when the bad tag is consumed.
[[case]]
id = "e401_marcxml_close_tag_mismatch"
code = "E401"
variant = "XmlError"
slug = "marcxml_invalid"
trigger_kind = "parse_marcxml"
trigger_fixture = "tests/data/error_fixtures/e401_malformed_marcxml.xml"
description = "Subfield 'a' closing tag is </WRONG> instead of </subfield>."
expected_context = []
recovery_modes = ["strict"]
wired = true

# E401: input is well-formed XML but contains no <record> element.
# Triggers marcxml.rs:458 (Custom error after the outer event loop
# exhausts without finding a <record> Start event).
[[case]]
id = "e401_no_record_element"
code = "E401"
variant = "XmlError"
slug = "marcxml_invalid"
trigger_kind = "parse_marcxml"
trigger_fixture = "tests/data/error_fixtures/e401_no_record_element.xml"
description = "Well-formed XML wrapper with no <record> element inside."
expected_context = []
recovery_modes = ["strict"]
wired = true

# E401: invalid numeric character reference in subfield text. Triggers
# marcxml.rs:278 (resolve_char_ref returns Err for out-of-range
# values like &#xFFFFFFF;).
[[case]]
id = "e401_bad_char_reference"
code = "E401"
variant = "XmlError"
slug = "marcxml_invalid"
trigger_kind = "parse_marcxml"
trigger_fixture = "tests/data/error_fixtures/e401_bad_char_reference.xml"
description = "Subfield 'a' text contains &#xFFFFFFF; (overflowing character reference)."
expected_context = []
recovery_modes = ["strict"]
wired = true

# E401: input truncated mid-subfield (file ends inside <subfield>
# before any closing tag). Triggers marcxml.rs:301 (Custom error
# "unexpected EOF inside <subfield>" from read_leaf_text's EOF
# handler).
[[case]]
id = "e401_unexpected_eof_in_subfield"
code = "E401"
variant = "XmlError"
slug = "marcxml_invalid"
trigger_kind = "parse_marcxml"
trigger_fixture = "tests/data/error_fixtures/e401_unexpected_eof_in_subfield.xml"
description = "XML truncated mid-text inside <subfield>; no closing tag."
expected_context = []
recovery_modes = ["strict"]
wired = true

# E402: malformed MARCJSON (truncated JSON).
[[case]]
id = "e402_marcjson_truncated"
code = "E402"
variant = "JsonError"
slug = "marcjson_invalid"
trigger_kind = "parse_marcjson"
trigger_fixture = "tests/data/error_fixtures/e402_malformed_marcjson.json"
description = "JSON document is truncated mid-string in a subfield value."
expected_context = []
recovery_modes = ["strict"]
wired = true

# E404: writer attempts to serialize a record exceeding the ISO 2709
# length limit. Wired in the writer path (MARCWriter and analogues
# construct MarcError::WriterError when total length or base address
# exceeds 99999); the harness exercises it once a trigger_kind=writer
# mechanism lands.
[[case]]
id = "e404_record_too_large_for_iso2709"
code = "E404"
variant = "WriterError"
slug = "record_too_large_for_iso2709"
trigger_kind = "writer"
description = "Construct a record whose total length or base address exceeds 99999 and attempt to serialize it."
expected_context = ["record_index"]
recovery_modes = ["strict"]
wired = true

# E404 also fires from validate_directory_tag when a Field's tag is not
# 3 ASCII bytes. Distinct production trigger from the size cap above;
# per the per-trigger coverage convention, each fire site gets its own
# manifest case even when the variant slug is shared.
[[case]]
id = "e404_writer_non_ascii_tag"
code = "E404"
variant = "WriterError"
slug = "record_too_large_for_iso2709"
trigger_kind = "writer"
description = "Construct a record with a field whose tag is not 3 ASCII bytes and attempt to serialize it."
expected_context = ["record_index"]
recovery_modes = ["strict"]
wired = true

# E404 also fires from MarcWriter::write_record when called after
# finish(). Reachable from Rust but not from the Python `mrrc.MARCWriter`
# wrapper (which short-circuits via PyRuntimeError before reaching the
# Rust writer).
[[case]]
id = "e404_writer_finished_writer_reuse"
code = "E404"
variant = "WriterError"
slug = "record_too_large_for_iso2709"
trigger_kind = "writer"
description = "Call MarcWriter::finish(), then attempt another write_record on the same writer."
expected_context = []
recovery_modes = ["strict"]
wired = true

# E404 (defensive) — check_iso2709_size's base_address > 99999 guard.
# Unreachable from the writer's normal control flow (base_address <=
# record_length, so the record_length > 99999 check fires first), but
# reachable via direct invocation of the public helper. Exercised here
# by calling check_iso2709_size with record_length=1 and
# base_address=100_000, isolating the base_address branch.
[[case]]
id = "e404_check_iso2709_size_base_address_overflow_programmatic"
code = "E404"
variant = "WriterError"
slug = "record_too_large_for_iso2709"
trigger_kind = "programmatic_writer_check"
description = "Call iso2709::check_iso2709_size(record_length=1, base_address=100_000) directly to exercise the base_address > 99999 defensive branch."
expected_context = ["record_index"]
recovery_modes = ["strict"]
wired = true