from __future__ import annotations
from pathlib import Path
import pytest
import mrrc
_REPO_ROOT = Path(__file__).resolve().parents[2]
_FIXTURES = _REPO_ROOT / "tests" / "data" / "error_fixtures"
def test_accessors_default_to_none() -> None:
bytes_ = (_REPO_ROOT / "tests" / "data" / "simple_book.mrc").read_bytes()
reader = mrrc.MARCReader(bytes_, permissive=True)
assert reader.current_exception is None
assert reader.current_chunk is None
def test_clean_record_sets_chunk_and_leaves_exception_none() -> None:
bytes_ = (_REPO_ROOT / "tests" / "data" / "simple_book.mrc").read_bytes()
reader = mrrc.MARCReader(bytes_, permissive=True)
record = next(reader)
assert record is not None
assert reader.current_exception is None
assert reader.current_chunk is not None
assert reader.current_chunk[:5].decode("ascii").isdigit()
declared_length = int(reader.current_chunk[:5])
assert len(reader.current_chunk) == declared_length
def test_clean_iteration_clears_prior_exception() -> None:
bad = (
_REPO_ROOT
/ "tests"
/ "data"
/ "fuzz-regressions"
/ "error_classification"
/ "non-ascii-tag-roundtrip.mrc"
).read_bytes()
good = (_REPO_ROOT / "tests" / "data" / "simple_book.mrc").read_bytes()
stream = bad + good
reader = mrrc.MARCReader(stream, permissive=True, validation_level="strict_marc")
first = next(reader)
assert first is None
assert reader.current_exception is not None
second = next(reader)
assert second is not None
assert reader.current_exception is None
def test_permissive_swallow_populates_exception_and_chunk() -> None:
bytes_ = (
_REPO_ROOT
/ "tests"
/ "data"
/ "fuzz-regressions"
/ "error_classification"
/ "non-ascii-tag-roundtrip.mrc"
).read_bytes()
reader = mrrc.MARCReader(bytes_, permissive=True, validation_level="strict_marc")
record = next(reader)
assert record is None
assert reader.current_exception is not None
assert reader.current_exception.code == "E101"
assert reader.current_chunk is not None
assert len(reader.current_chunk) > 0
declared_length = int(reader.current_chunk[:5])
assert len(reader.current_chunk) == declared_length
def test_strict_mode_raises_and_does_not_silence_via_accessors() -> None:
bytes_ = (
_REPO_ROOT
/ "tests"
/ "data"
/ "fuzz-regressions"
/ "error_classification"
/ "non-ascii-tag-roundtrip.mrc"
).read_bytes()
reader = mrrc.MARCReader(
bytes_, recovery_mode="strict", validation_level="strict_marc"
)
with pytest.raises(Exception): next(reader)
def test_current_chunk_tracks_each_record_in_a_multi_record_stream() -> None:
bytes_ = (_REPO_ROOT / "tests" / "data" / "multi_records.mrc").read_bytes()
reader = mrrc.MARCReader(bytes_, permissive=True)
seen_chunks: list[bytes] = []
for record in reader:
assert record is not None assert reader.current_chunk is not None
declared = int(reader.current_chunk[:5])
assert len(reader.current_chunk) == declared
seen_chunks.append(reader.current_chunk)
assert len(seen_chunks) >= 2
assert seen_chunks[0] != seen_chunks[1]
def test_accessors_retain_last_values_after_stop_iteration() -> None:
bytes_ = (_REPO_ROOT / "tests" / "data" / "simple_book.mrc").read_bytes()
reader = mrrc.MARCReader(bytes_, permissive=True)
record = next(reader)
assert record is not None
final_chunk = reader.current_chunk
assert final_chunk is not None
with pytest.raises(StopIteration):
next(reader)
assert reader.current_chunk == final_chunk
assert reader.current_exception is None
def test_current_chunk_tracks_in_default_strict_mode() -> None:
bytes_ = (_REPO_ROOT / "tests" / "data" / "simple_book.mrc").read_bytes()
reader = mrrc.MARCReader(bytes_)
record = next(reader)
assert record is not None
assert reader.current_chunk is not None
assert len(reader.current_chunk) == int(reader.current_chunk[:5])
assert reader.current_exception is None
def test_iter_with_errors_does_not_clobber_accessors() -> None:
bad = (
_REPO_ROOT
/ "tests"
/ "data"
/ "fuzz-regressions"
/ "error_classification"
/ "non-ascii-tag-roundtrip.mrc"
).read_bytes()
reader = mrrc.MARCReader(bad, permissive=True, validation_level="strict_marc")
pairs = list(reader.iter_with_errors())
assert any(rec is None and len(errs) >= 1 for rec, errs in pairs)
assert reader.current_chunk is None
assert reader.current_exception is None
def test_iteration_shape_matches_pymarc_default() -> None:
pymarc = pytest.importorskip("pymarc")
valid = (_REPO_ROOT / "tests" / "data" / "simple_book.mrc").read_bytes()
malformed = (
_REPO_ROOT
/ "tests"
/ "data"
/ "fuzz-regressions"
/ "error_classification"
/ "non-ascii-tag-roundtrip.mrc"
).read_bytes()
stream = valid + malformed
import io
mrrc_reader = mrrc.MARCReader(stream, permissive=True, validation_level="strict_marc")
mrrc_shape = [r is not None for r in mrrc_reader]
pymarc_reader = pymarc.MARCReader(io.BytesIO(stream))
pymarc_shape = [r is not None for r in pymarc_reader]
assert len(mrrc_shape) == len(pymarc_shape), (
f"iteration count diverged: mrrc={len(mrrc_shape)} pymarc={len(pymarc_shape)}"
)
assert len(mrrc_shape) >= 1
assert mrrc_shape[0] is True
assert pymarc_shape[0] is True