from __future__ import annotations
import sys
from pathlib import Path
from typing import Any
import pytest
import mrrc
if sys.version_info >= (3, 11):
import tomllib
else:
import tomli as tomllib
_REPO_ROOT = Path(__file__).resolve().parents[2]
_MANIFEST_PATH = _REPO_ROOT / "tests" / "error_coverage.toml"
def _load_manifest() -> dict[str, Any]:
with _MANIFEST_PATH.open("rb") as f:
return tomllib.load(f)
_MANIFEST = _load_manifest()
_CASES: list[dict[str, Any]] = _MANIFEST["case"]
_PYTHON_BINDING_REGRESSIONS: dict[str, str] = {}
def _fixture_path(case: dict[str, Any]) -> Path:
rel = case.get("trigger_fixture")
assert rel, f"case {case['id']}: trigger_kind requires a trigger_fixture but none set"
return _REPO_ROOT / rel
def _exercise_strict(case: dict[str, Any]) -> mrrc.MrrcException:
kind = case.get("trigger_kind", "parse_iso2709")
if kind == "parse_iso2709":
bytes_ = _fixture_path(case).read_bytes()
reader = mrrc.MARCReader(
bytes_,
recovery_mode="strict",
validation_level=case.get("validation_level", "structural"),
)
try:
for _ in reader:
pass
except mrrc.MrrcException as e:
return e
pytest.fail(
f"{case['id']} ({case['code']} / {case['variant']}): "
f"expected {case['code']} error in strict mode, got clean iteration"
)
elif kind == "parse_marcxml":
text = _fixture_path(case).read_text()
try:
mrrc.xml_to_record(text)
except mrrc.MrrcException as e:
return e
pytest.fail(
f"{case['id']} ({case['code']}): expected {case['code']} error from "
f"xml_to_record, got clean parse"
)
elif kind == "parse_marcjson":
text = _fixture_path(case).read_text()
try:
mrrc.marcjson_to_record(text)
except mrrc.MrrcException as e:
return e
pytest.fail(
f"{case['id']} ({case['code']}): expected {case['code']} error from "
f"marcjson_to_record, got clean parse"
)
elif kind == "parse_iso2709_lenient":
pytest.skip(
"parse_iso2709_lenient surfaces errors via record.errors in "
"lenient mode rather than as raised exceptions. The Python "
"side exposes record.errors as well (covered by "
"tests/python/test_iter_with_errors.py), but the typed-"
"exception harness here is shaped around raised exceptions "
"only. The Rust harness asserts the typed variant and its "
"positional context for these cases."
)
elif kind in ("io_error", "io_error_parse_path"):
pytest.skip(
"E007 on the Python side raises built-in OSError, not a typed "
"mrrc.IoError (documented in docs/reference/error-codes.md#E007 "
"as pymarc-compat). The Rust harness asserts the typed variant "
"and its positional context; this typed-class framework cannot "
"assert built-in OSError. The Python contract is asserted "
"instead by tests/python/test_errors.py::TestFailingReadRaisesOSError."
)
elif kind == "recovery_cap":
cap = 1
bad = (_REPO_ROOT / "tests" / "data" / "error_fixtures"
/ "e101_directory_non_digit_length.bin").read_bytes()
stream = bad * (cap + 2)
reader = mrrc.MARCReader(
stream, recovery_mode="lenient", max_errors=cap
)
try:
for _ in reader:
pass
except mrrc.MrrcException as e:
return e
pytest.fail(
f"{case['id']} ({case['code']}): expected {case['code']} from "
"recovery_cap stream, got clean iteration"
)
elif kind == "accessor":
bytes_ = _fixture_path(case).read_bytes()
reader = mrrc.MARCReader(bytes_, recovery_mode="strict")
try:
record = next(iter(reader))
except StopIteration:
pytest.fail(
f"{case['id']} ({case['code']}): fixture parsed to no records; "
"accessor cannot be exercised"
)
except mrrc.MrrcException as e:
pytest.fail(
f"{case['id']} ({case['code']}): fixture failed to parse cleanly "
f"({e}); accessor cannot be exercised"
)
if case["id"] == "e105_field_not_found":
try:
record.get_field_or_err("999")
except mrrc.MrrcException as e:
return e
pytest.fail(
f"{case['id']} ({case['code']}): get_field_or_err('999') returned "
"a field on simple_book.mrc; expected FieldNotFound"
)
pytest.skip(
f"{case['id']}: trigger_kind=accessor case has no harness branch; "
"add one in test_error_coverage.py"
)
elif kind == "writer":
if case["id"] == "e404_record_too_large_for_iso2709":
record = mrrc.Record()
record.add_field(
mrrc.Field(
tag="999",
indicator1=" ",
indicator2=" ",
subfields=[mrrc.Subfield("a", "x" * 100_000)],
)
)
import io
buf = io.BytesIO()
writer = mrrc.MARCWriter(buf)
try:
writer.write_record(record)
except mrrc.MrrcException as e:
return e
pytest.fail(
f"{case['id']} ({case['code']}): expected {case['code']} error from "
"MARCWriter.write_record on an oversize record, got success"
)
if case["id"] == "e404_writer_non_ascii_tag":
pytest.skip(
"Python `mrrc.Field` validates `tag.len() != 3` at "
"construction with `ValueError`; the underlying Rust "
"WriterError E404 path is unreachable from the public "
"Python API. Asserted in the Rust harness."
)
if case["id"] == "e404_writer_finished_writer_reuse":
pytest.skip(
"Python `mrrc.MARCWriter` raises `RuntimeError` on reuse "
"after `close()` before reaching the Rust closed-writer "
"path that would surface E404. Asserted in the Rust harness."
)
pytest.fail(
f"{case['id']}: trigger_kind=writer case has no harness "
"branch; add one in test_error_coverage.py"
)
elif kind == "parse_holdings":
pytest.skip(
"trigger_kind=parse_holdings exercises HoldingsMarcReader; the "
"Python harness's typed-error assertion is the same shape as "
"parse_iso2709, but plumbing it through HoldingsMarcReader "
"requires Python bindings that don't yet route the typed "
"Rust error consistently. Asserted in the Rust harness."
)
elif kind == "parse_authority":
pytest.skip(
"trigger_kind=parse_authority exercises AuthorityMarcReader; "
"same Python-binding gap as parse_holdings. Asserted in the "
"Rust harness."
)
elif kind in ("programmatic_validator", "programmatic_writer_check"):
pytest.skip(
f"trigger_kind={kind} exercises a Rust API directly with "
"constructed state and has no Python wrapper analog. Asserted "
"in the Rust harness."
)
else:
pytest.fail(f"{case['id']}: unknown trigger_kind {kind!r}")
@pytest.mark.parametrize("case", _CASES, ids=lambda c: c["id"])
def test_documented_error_fires(case: dict[str, Any]) -> None:
if not case["wired"]:
pytest.skip(case.get("skip_reason", "unwired"))
if (
"strict" not in case["recovery_modes"]
and case.get("trigger_kind") != "recovery_cap"
):
pytest.skip(
"case contract does not cover strict mode; non-strict "
"assertions pending"
)
if case["id"] in _PYTHON_BINDING_REGRESSIONS:
pytest.skip(
f"python-binding regression: {_PYTHON_BINDING_REGRESSIONS[case['id']]}"
)
exc = _exercise_strict(case)
assert exc.code == case["code"], (
f"{case['id']}: expected code {case['code']}, "
f"got {exc.code} ({type(exc).__name__})"
)
assert exc.slug == case["slug"], (
f"{case['id']}: expected slug {case['slug']!r}, got {exc.slug!r}"
)
for field in case["expected_context"]:
value = getattr(exc, field, None)
assert value is not None, (
f"{case['id']}: expected_context field {field!r} not populated; "
f"exception attrs: {exc.to_dict()}"
)
def test_manifest_is_well_formed() -> None:
assert _MANIFEST["schema_version"] == 1, "schema_version drift"
assert _CASES, "manifest has no cases"
seen_ids: set[str] = set()
parse_kinds = {"parse_iso2709", "parse_marcxml", "parse_marcjson"}
for case in _CASES:
case_id = case["id"]
assert case_id not in seen_ids, f"duplicate case id {case_id}"
seen_ids.add(case_id)
kind = case.get("trigger_kind", "parse_iso2709")
if kind in parse_kinds:
assert "trigger_fixture" in case, (
f"case {case_id}: trigger_kind {kind!r} requires a trigger_fixture"
)
fixture_path = _REPO_ROOT / case["trigger_fixture"]
assert fixture_path.exists(), (
f"case {case_id}: fixture {fixture_path} does not exist"
)
if not case["wired"]:
assert case.get("skip_reason"), (
f"case {case_id} is unwired but has no skip_reason"
)
def test_coverage_tally(capsys: pytest.CaptureFixture[str]) -> None:
wired = sum(1 for c in _CASES if c["wired"])
total = len(_CASES)
skipped = total - wired
with capsys.disabled():
print(f"\n[error_coverage] wired in manifest: {wired}/{total} (unwired: {skipped})")