use std::fs;
use std::io::{self, Cursor, Read};
use std::path::{Path, PathBuf};
use mrrc::{
AuthorityMarcReader, Field, HoldingsMarcReader, Leader, MarcReader, MarcWriter, Record,
RecordStructureValidator, RecoveryMode, Subfield, ValidationLevel,
};
use serde::Deserialize;
#[derive(Debug, Deserialize)]
struct Manifest {
#[allow(dead_code)]
schema_version: u32,
case: Vec<Case>,
}
#[derive(Debug, Deserialize)]
struct Case {
id: String,
code: String,
variant: String,
slug: String,
#[serde(default = "default_trigger_kind")]
trigger_kind: String,
#[serde(default)]
trigger_fixture: Option<String>,
#[allow(dead_code)]
description: String,
expected_context: Vec<String>,
recovery_modes: Vec<String>,
#[serde(default)]
validation_level: Option<String>,
wired: bool,
#[serde(default)]
skip_reason: Option<String>,
}
fn parse_validation_level(case: &Case) -> ValidationLevel {
match case.validation_level.as_deref() {
None | Some("structural") => ValidationLevel::Structural,
Some("strict_marc") => ValidationLevel::StrictMarc,
Some(other) => panic!(
"case {}: unknown validation_level {:?} (expected \"structural\" or \"strict_marc\")",
case.id, other
),
}
}
fn default_trigger_kind() -> String {
"parse_iso2709".to_string()
}
fn manifest_path() -> PathBuf {
Path::new(env!("CARGO_MANIFEST_DIR")).join("tests/error_coverage.toml")
}
fn load_manifest() -> Manifest {
let text = fs::read_to_string(manifest_path()).expect("read error_coverage.toml");
toml::from_str(&text).expect("parse error_coverage.toml")
}
fn fixture_path(case: &Case) -> PathBuf {
let rel = case.trigger_fixture.as_ref().unwrap_or_else(|| {
panic!(
"case {} has trigger_kind requiring a fixture but none set",
case.id
)
});
Path::new(env!("CARGO_MANIFEST_DIR")).join(rel)
}
fn fixture_bytes(case: &Case) -> Vec<u8> {
let path = fixture_path(case);
fs::read(&path).unwrap_or_else(|e| panic!("read fixture {}: {e}", path.display()))
}
fn fixture_text(case: &Case) -> String {
let path = fixture_path(case);
fs::read_to_string(&path).unwrap_or_else(|e| panic!("read fixture {}: {e}", path.display()))
}
enum TriggerOutcome {
Fired(mrrc::MarcError),
NoError,
UnsupportedKind(String),
}
fn exercise_accessor(case: &Case) -> TriggerOutcome {
let bytes = fixture_bytes(case);
match case.id.as_str() {
"e105_field_not_found" => {
let mut reader =
MarcReader::new(Cursor::new(bytes)).with_recovery_mode(RecoveryMode::Strict);
let Ok(Some(record)) = reader.read_record() else {
return TriggerOutcome::UnsupportedKind(format!(
"{}: bibliographic fixture did not parse to a record",
case.id
));
};
match record.get_field_or_err("999") {
Ok(_) => TriggerOutcome::NoError,
Err(e) => TriggerOutcome::Fired(e),
}
},
"e105_authority_field_not_found" => {
let mut reader = AuthorityMarcReader::new(Cursor::new(bytes))
.with_recovery_mode(RecoveryMode::Strict);
let Ok(Some(record)) = reader.read_record() else {
return TriggerOutcome::UnsupportedKind(format!(
"{}: authority fixture did not parse to a record",
case.id
));
};
match record.get_field_or_err("999") {
Ok(_) => TriggerOutcome::NoError,
Err(e) => TriggerOutcome::Fired(e),
}
},
"e105_holdings_field_not_found" => {
let mut reader = HoldingsMarcReader::new(Cursor::new(bytes))
.with_recovery_mode(RecoveryMode::Strict);
let Ok(Some(record)) = reader.read_record() else {
return TriggerOutcome::UnsupportedKind(format!(
"{}: holdings fixture did not parse to a record",
case.id
));
};
match record.get_field_or_err("999") {
Ok(_) => TriggerOutcome::NoError,
Err(e) => TriggerOutcome::Fired(e),
}
},
other => TriggerOutcome::UnsupportedKind(format!(
"trigger_kind=accessor: case {other} has no harness branch; add one in exercise_accessor"
)),
}
}
fn first_iso2709_error(
bytes: &[u8],
mode: RecoveryMode,
level: ValidationLevel,
) -> Option<mrrc::MarcError> {
let mut reader = MarcReader::new(Cursor::new(bytes))
.with_recovery_mode(mode)
.with_validation_level(level);
loop {
match reader.read_record() {
Ok(Some(_)) => {},
Ok(None) => return None,
Err(e) => return Some(e),
}
}
}
fn first_holdings_error(bytes: &[u8], level: ValidationLevel) -> Option<mrrc::MarcError> {
let mut reader = HoldingsMarcReader::new(Cursor::new(bytes))
.with_recovery_mode(RecoveryMode::Strict)
.with_validation_level(level);
loop {
match reader.read_record() {
Ok(Some(_)) => {},
Ok(None) => return None,
Err(e) => return Some(e),
}
}
}
fn first_authority_error(bytes: &[u8], level: ValidationLevel) -> Option<mrrc::MarcError> {
let mut reader = AuthorityMarcReader::new(Cursor::new(bytes))
.with_recovery_mode(RecoveryMode::Strict)
.with_validation_level(level);
loop {
match reader.read_record() {
Ok(Some(_)) => {},
Ok(None) => return None,
Err(e) => return Some(e),
}
}
}
struct FailingReader;
impl Read for FailingReader {
fn read(&mut self, _buf: &mut [u8]) -> io::Result<usize> {
Err(io::Error::new(
io::ErrorKind::Other,
"synthetic read failure",
))
}
}
fn build_bad_record() -> Vec<u8> {
const FIELD_TERMINATOR: u8 = 0x1E;
const RECORD_TERMINATOR: u8 = 0x1D;
let mut directory = Vec::new();
directory.extend_from_slice(b"245ABCD00000");
directory.push(FIELD_TERMINATOR);
let base_address = 24 + directory.len();
let record_length = base_address + 1;
let mut leader = Vec::new();
leader.extend_from_slice(format!("{record_length:05}").as_bytes());
leader.extend_from_slice(b"nam a22");
leader.extend_from_slice(format!("{base_address:05}").as_bytes());
leader.extend_from_slice(b" i 4500");
let mut out = Vec::new();
out.extend_from_slice(&leader);
out.extend_from_slice(&directory);
out.push(RECORD_TERMINATOR);
out
}
fn build_valid_record(control_001: &str) -> Vec<u8> {
const FIELD_TERMINATOR: u8 = 0x1E;
const RECORD_TERMINATOR: u8 = 0x1D;
let mut field_data = Vec::new();
field_data.extend_from_slice(control_001.as_bytes());
field_data.push(FIELD_TERMINATOR);
let mut directory = Vec::new();
directory.extend_from_slice(b"001");
directory.extend_from_slice(format!("{:04}", field_data.len()).as_bytes());
directory.extend_from_slice(b"00000");
directory.push(FIELD_TERMINATOR);
let base_address = 24 + directory.len();
let record_length = base_address + field_data.len() + 1;
let mut leader = Vec::new();
leader.extend_from_slice(format!("{record_length:05}").as_bytes());
leader.extend_from_slice(b"nam a22");
leader.extend_from_slice(format!("{base_address:05}").as_bytes());
leader.extend_from_slice(b" i 4500");
let mut out = Vec::new();
out.extend_from_slice(&leader);
out.extend_from_slice(&directory);
out.extend_from_slice(&field_data);
out.push(RECORD_TERMINATOR);
out
}
struct FailAfterBuffer {
data: Vec<u8>,
pos: usize,
}
impl Read for FailAfterBuffer {
fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
if self.pos >= self.data.len() {
return Err(io::Error::new(
io::ErrorKind::Other,
"synthetic mid-record read failure",
));
}
let n = std::cmp::min(buf.len(), self.data.len() - self.pos);
buf[..n].copy_from_slice(&self.data[self.pos..self.pos + n]);
self.pos += n;
Ok(n)
}
}
#[allow(clippy::too_many_lines)] fn fire_trigger(case: &Case) -> TriggerOutcome {
match case.trigger_kind.as_str() {
"parse_iso2709" => match first_iso2709_error(
&fixture_bytes(case),
RecoveryMode::Strict,
parse_validation_level(case),
) {
Some(e) => TriggerOutcome::Fired(e),
None => TriggerOutcome::NoError,
},
"parse_iso2709_lenient" => {
let bytes = fixture_bytes(case);
let mut reader = MarcReader::new(Cursor::new(&bytes[..]))
.with_recovery_mode(RecoveryMode::Lenient)
.with_max_errors(0)
.with_validation_level(parse_validation_level(case));
loop {
match reader.read_record() {
Ok(Some(record)) => {
if let Some(err) =
record.errors.iter().find(|e| e.code() == case.code)
{
return TriggerOutcome::Fired(err.clone());
}
},
Ok(None) => return TriggerOutcome::NoError,
Err(e) => return TriggerOutcome::Fired(e),
}
}
},
"parse_marcxml" => {
let text = fixture_text(case);
match mrrc::marcxml::marcxml_to_record(&text) {
Ok(_) => TriggerOutcome::NoError,
Err(e) => TriggerOutcome::Fired(e),
}
},
"parse_marcjson" => TriggerOutcome::UnsupportedKind(
"no public Rust str-to-Record API for MARCJSON; case is exercised in the Python harness".to_string(),
),
"io_error" => {
let mut reader = MarcReader::new(FailingReader).with_recovery_mode(RecoveryMode::Strict);
match reader.read_record() {
Ok(_) => TriggerOutcome::NoError,
Err(e) => TriggerOutcome::Fired(e),
}
},
"io_error_parse_path" => {
let rec1 = build_valid_record("rec1");
let rec2 = build_valid_record("rec2");
let mut stream = rec1;
stream.extend_from_slice(&rec2[..24]);
let mut reader = MarcReader::new(FailAfterBuffer {
data: stream,
pos: 0,
})
.with_recovery_mode(RecoveryMode::Strict)
.with_source("synthetic-stream.mrc");
match reader.read_record() {
Ok(_) => match reader.read_record() {
Ok(_) => TriggerOutcome::NoError,
Err(e) => TriggerOutcome::Fired(e),
},
Err(e) => TriggerOutcome::Fired(e),
}
},
"recovery_cap" => {
const CAP: usize = 1;
let mut stream = Vec::new();
for _ in 0..=(CAP + 1) {
stream.extend_from_slice(&build_bad_record());
}
let mut reader = MarcReader::new(Cursor::new(stream))
.with_recovery_mode(RecoveryMode::Lenient)
.with_max_errors(CAP);
loop {
match reader.read_record() {
Ok(Some(_)) => {},
Ok(None) => return TriggerOutcome::NoError,
Err(e) => return TriggerOutcome::Fired(e),
}
}
},
"parse_holdings" => match first_holdings_error(
&fixture_bytes(case),
parse_validation_level(case),
) {
Some(e) => TriggerOutcome::Fired(e),
None => TriggerOutcome::NoError,
},
"parse_authority" => match first_authority_error(
&fixture_bytes(case),
parse_validation_level(case),
) {
Some(e) => TriggerOutcome::Fired(e),
None => TriggerOutcome::NoError,
},
"programmatic_validator" => exercise_programmatic_validator(case),
"programmatic_writer_check" => exercise_programmatic_writer_check(case),
"accessor" => exercise_accessor(case),
"writer" => exercise_writer(case),
other => TriggerOutcome::UnsupportedKind(format!("unknown trigger_kind {other:?}")),
}
}
fn exercise_programmatic_validator(case: &Case) -> TriggerOutcome {
let mut leader =
Leader::from_bytes(b"00150nam a2200061 4500").expect("baseline leader parses");
match case.id.as_str() {
"e002_data_base_address_overflow_programmatic" => {
leader.data_base_address = 100_000;
match RecordStructureValidator::validate_leader(&leader) {
Ok(()) => TriggerOutcome::NoError,
Err(e) => TriggerOutcome::Fired(e),
}
},
other => TriggerOutcome::UnsupportedKind(format!(
"trigger_kind=programmatic_validator: case {other} has no harness branch; add one in exercise_programmatic_validator"
)),
}
}
fn exercise_programmatic_writer_check(case: &Case) -> TriggerOutcome {
match case.id.as_str() {
"e404_check_iso2709_size_base_address_overflow_programmatic" => {
match mrrc::iso2709::check_iso2709_size(
1,
100_000,
Some(1),
None,
) {
Ok(()) => TriggerOutcome::NoError,
Err(e) => TriggerOutcome::Fired(e),
}
},
other => TriggerOutcome::UnsupportedKind(format!(
"trigger_kind=programmatic_writer_check: case {other} has no harness branch; add one in exercise_programmatic_writer_check"
)),
}
}
fn exercise_writer(case: &Case) -> TriggerOutcome {
let leader =
Leader::from_bytes(b"00000nam a2200000 i 4500").expect("synthetic minimal leader parses");
match case.id.as_str() {
"e404_record_too_large_for_iso2709" => {
let mut record = Record::new(leader);
let big_value = "x".repeat(100_000);
let field = Field {
tag: "999".to_string(),
indicator1: ' ',
indicator2: ' ',
subfields: smallvec::smallvec![Subfield {
code: 'a',
value: big_value,
}],
};
record.add_field(field);
let mut buf = Vec::new();
let mut writer = MarcWriter::new(&mut buf);
match writer.write_record(&record) {
Ok(()) => TriggerOutcome::NoError,
Err(e) => TriggerOutcome::Fired(e),
}
},
"e404_writer_non_ascii_tag" => {
let mut record = Record::new(leader);
let field = Field {
tag: "12".to_string(),
indicator1: ' ',
indicator2: ' ',
subfields: smallvec::smallvec![Subfield {
code: 'a',
value: "x".to_string(),
}],
};
record.add_field(field);
let mut buf = Vec::new();
let mut writer = MarcWriter::new(&mut buf);
match writer.write_record(&record) {
Ok(()) => TriggerOutcome::NoError,
Err(e) => TriggerOutcome::Fired(e),
}
},
"e404_writer_finished_writer_reuse" => {
let record = Record::new(leader);
let mut buf = Vec::new();
let mut writer = MarcWriter::new(&mut buf);
if let Err(e) = writer.finish() {
return TriggerOutcome::UnsupportedKind(format!(
"{}: finish() unexpectedly failed before reuse trigger could fire: {e}",
case.id
));
}
match writer.write_record(&record) {
Ok(()) => TriggerOutcome::NoError,
Err(e) => TriggerOutcome::Fired(e),
}
},
other => TriggerOutcome::UnsupportedKind(format!(
"trigger_kind=writer: case {other} has no harness branch; add one in exercise_writer"
)),
}
}
fn json_probe_key(field: &str) -> &str {
match field {
"bytes_near" => "bytes_near_hex",
"found" => "found_hex",
"source_name" => "source",
other => other,
}
}
enum WiredOutcome {
Asserted,
SkippedByHarness(String),
Failed(String),
}
fn run_wired(case: &Case) -> WiredOutcome {
let strict_only_trigger = !matches!(
case.trigger_kind.as_str(),
"recovery_cap" | "parse_iso2709_lenient"
);
if strict_only_trigger && !case.recovery_modes.iter().any(|m| m == "strict") {
return WiredOutcome::SkippedByHarness(
"case contract does not cover strict mode; non-strict assertions pending".to_string(),
);
}
let err = match fire_trigger(case) {
TriggerOutcome::Fired(e) => e,
TriggerOutcome::NoError => {
return WiredOutcome::Failed(format!(
"{} ({} / {}): expected {} error, got success",
case.id, case.code, case.variant, case.code
));
},
TriggerOutcome::UnsupportedKind(reason) => {
return WiredOutcome::SkippedByHarness(reason);
},
};
if err.code() != case.code {
return WiredOutcome::Failed(format!(
"{} ({}): expected code {}, got {} ({:?})",
case.id,
case.variant,
case.code,
err.code(),
err
));
}
if err.slug() != case.slug {
return WiredOutcome::Failed(format!(
"{} ({}): expected slug {:?}, got {:?}",
case.id,
case.variant,
case.slug,
err.slug()
));
}
let dict = err.to_json_value();
for field in &case.expected_context {
let key = json_probe_key(field);
let present = dict.get(key).is_some_and(|v| !v.is_null());
if !present {
return WiredOutcome::Failed(format!(
"{} ({}): expected_context field {} not populated (probed via {:?}); error JSON: {}",
case.id, case.variant, field, key, dict
));
}
}
WiredOutcome::Asserted
}
#[test]
fn manifest_is_well_formed() {
let manifest = load_manifest();
assert_eq!(manifest.schema_version, 1, "schema_version drift");
assert!(!manifest.case.is_empty(), "manifest has no cases");
let mut ids = std::collections::HashSet::new();
for case in &manifest.case {
let inserted = ids.insert(case.id.clone());
assert!(inserted, "duplicate case id {}", case.id);
if matches!(
case.trigger_kind.as_str(),
"parse_iso2709" | "parse_marcxml" | "parse_marcjson" | "accessor"
) {
assert!(
case.trigger_fixture.is_some(),
"case {}: trigger_kind {:?} requires a trigger_fixture",
case.id,
case.trigger_kind
);
let path = fixture_path(case);
assert!(
path.exists(),
"case {}: fixture {} does not exist",
case.id,
path.display()
);
}
if !case.wired {
assert!(
case.skip_reason.is_some(),
"case {} is unwired but has no skip_reason",
case.id
);
}
}
}
#[test]
fn coverage_assertions() {
let manifest = load_manifest();
let total = manifest.case.len();
let mut failures: Vec<String> = Vec::new();
let mut wired_count = 0usize;
let mut asserted = 0usize;
let mut harness_skips: Vec<(String, String)> = Vec::new();
let mut unwired_skips: Vec<(String, String)> = Vec::new();
for case in &manifest.case {
if case.wired {
wired_count += 1;
match run_wired(case) {
WiredOutcome::Asserted => asserted += 1,
WiredOutcome::SkippedByHarness(reason) => {
harness_skips.push((case.id.clone(), reason));
},
WiredOutcome::Failed(reason) => failures.push(reason),
}
} else {
let reason = case
.skip_reason
.clone()
.unwrap_or_else(|| "unwired (no reason provided)".into());
unwired_skips.push((case.id.clone(), reason));
}
}
for (id, reason) in &unwired_skips {
eprintln!("[error_coverage] UNWIRED {id}: {reason}");
}
for (id, reason) in &harness_skips {
eprintln!("[error_coverage] HARNESS-SKIP {id}: {reason}");
}
eprintln!(
"[error_coverage] wired in manifest: {wired_count}/{total} \
| harness asserted: {asserted}/{wired_count} \
| harness skipped: {} (unwired: {}, harness limitations: {})",
unwired_skips.len() + harness_skips.len(),
unwired_skips.len(),
harness_skips.len(),
);
assert!(
failures.is_empty(),
"wired-case failures:\n - {}",
failures.join("\n - ")
);
}