from __future__ import annotations
import io
from pathlib import Path
import pytest
import mrrc
_REPO_ROOT = Path(__file__).resolve().parents[2]
_VALID = _REPO_ROOT / "tests" / "data" / "simple_book.mrc"
_TOTAL = 100
_MALFORMED_EVERY = 10
def _build_corpus() -> tuple[bytes, list[bool]]:
valid = _VALID.read_bytes()
malformed = bytearray(valid)
for i in range(24, 36):
malformed[i] = ord("X")
malformed = bytes(malformed)
records: list[bytes] = []
expected: list[bool] = []
for i in range(_TOTAL):
is_malformed = i % _MALFORMED_EVERY == (_MALFORMED_EVERY - 1)
records.append(malformed if is_malformed else valid)
expected.append(not is_malformed)
return b"".join(records), expected
def test_permissive_iteration_shape_matches_pymarc() -> None:
pymarc = pytest.importorskip("pymarc")
stream, expected = _build_corpus()
mrrc_shape = [r is not None for r in mrrc.MARCReader(stream, permissive=True)]
pymarc_shape = [
r is not None for r in pymarc.MARCReader(io.BytesIO(stream), permissive=True)
]
assert len(mrrc_shape) == _TOTAL, f"mrrc iterated {len(mrrc_shape)}, expected {_TOTAL}"
assert len(pymarc_shape) == _TOTAL, (
f"pymarc iterated {len(pymarc_shape)}, expected {_TOTAL}"
)
assert mrrc_shape == expected, "mrrc shape diverged from the corpus pattern"
assert pymarc_shape == expected, "pymarc shape diverged from the corpus pattern"
assert mrrc_shape == pymarc_shape