import pickle
import pytest
import mrrc
class TestExceptionHierarchy:
def test_invalid_indicator_subclass_of_record_directory_invalid(self):
assert issubclass(mrrc.InvalidIndicator, mrrc.RecordDirectoryInvalid)
def test_bad_subfield_code_subclass_of_record_directory_invalid(self):
assert issubclass(mrrc.BadSubfieldCode, mrrc.RecordDirectoryInvalid)
def test_invalid_field_subclass_of_record_directory_invalid(self):
assert issubclass(mrrc.InvalidField, mrrc.RecordDirectoryInvalid)
def test_truncated_record_subclass_of_end_of_record_not_found(self):
assert issubclass(mrrc.TruncatedRecord, mrrc.EndOfRecordNotFound)
def test_all_mrrc_specific_classes_subclass_mrrc_exception(self):
for cls in (
mrrc.InvalidIndicator,
mrrc.BadSubfieldCode,
mrrc.InvalidField,
mrrc.TruncatedRecord,
mrrc.EncodingError,
mrrc.XmlError,
mrrc.JsonError,
mrrc.WriterError,
):
assert issubclass(cls, mrrc.MrrcException), cls
def test_pymarc_style_catch_catches_new_subclasses(self):
try:
raise mrrc.InvalidIndicator(
record_index=1,
field_tag="245",
indicator_position=0,
found=b":",
expected="digit or space",
)
except mrrc.RecordDirectoryInvalid as caught:
assert isinstance(caught, mrrc.InvalidIndicator)
_CODE_TABLE = [
(mrrc.RecordLengthInvalid, "E001", "record_length_invalid"),
(mrrc.RecordLeaderInvalid, "E002", "leader_invalid"),
(mrrc.BaseAddressInvalid, "E003", "base_address_invalid"),
(mrrc.BaseAddressNotFound, "E004", "base_address_not_found"),
(mrrc.TruncatedRecord, "E005", "truncated_record"),
(mrrc.EndOfRecordNotFound, "E006", "end_of_record_not_found"),
(mrrc.FatalReaderError, "E099", "fatal_reader_error"),
(mrrc.RecordDirectoryInvalid, "E101", "directory_invalid"),
(mrrc.FieldNotFound, "E105", "field_not_found"),
(mrrc.InvalidField, "E106", "invalid_field"),
(mrrc.InvalidIndicator, "E201", "invalid_indicator"),
(mrrc.BadSubfieldCode, "E202", "bad_subfield_code"),
(mrrc.EncodingError, "E301", "utf8_invalid"),
(mrrc.XmlError, "E401", "marcxml_invalid"),
(mrrc.JsonError, "E402", "marcjson_invalid"),
(mrrc.WriterError, "E404", "record_too_large_for_iso2709"),
(mrrc.BadSubfieldCodeWarning, "W001", "bad_subfield_code_warning"),
]
class TestErrorCodes:
@pytest.mark.parametrize("cls, code, slug", _CODE_TABLE)
def test_class_carries_canonical_code_and_slug(self, cls, code, slug):
assert cls.code == code, f"{cls.__name__}.code != {code!r}"
assert cls.slug == slug, f"{cls.__name__}.slug != {slug!r}"
@pytest.mark.parametrize("cls, code, _slug", _CODE_TABLE)
def test_help_url_anchors_on_docs_page(self, cls, code, _slug):
if cls is mrrc.BadSubfieldCodeWarning:
pytest.skip("BadSubfieldCodeWarning is a warning, not an exception")
instance = cls()
from mrrc.exceptions import DOCS_BASE_URL
assert (
instance.help_url()
== f"{DOCS_BASE_URL}/reference/error-codes/#{code}"
)
def test_codes_are_unique(self):
codes = [code for _cls, code, _slug in _CODE_TABLE]
assert len(codes) == len(set(codes)), "duplicate error codes detected"
def test_slugs_are_unique(self):
slugs = [slug for _cls, _code, slug in _CODE_TABLE]
assert len(slugs) == len(set(slugs)), "duplicate error slugs detected"
class TestBareConstructor:
@pytest.mark.parametrize(
"cls",
[
mrrc.MrrcException,
mrrc.RecordLengthInvalid,
mrrc.RecordLeaderInvalid,
mrrc.BaseAddressInvalid,
mrrc.BaseAddressNotFound,
mrrc.RecordDirectoryInvalid,
mrrc.EndOfRecordNotFound,
mrrc.FieldNotFound,
mrrc.FatalReaderError,
mrrc.InvalidIndicator,
mrrc.BadSubfieldCode,
mrrc.InvalidField,
mrrc.TruncatedRecord,
mrrc.EncodingError,
mrrc.XmlError,
mrrc.JsonError,
mrrc.WriterError,
],
)
def test_bare_construct(self, cls):
instance = cls()
assert instance.record_index is None
assert instance.byte_offset is None
assert instance.field_tag is None
def test_unknown_kwarg_raises_type_error(self):
with pytest.raises(TypeError, match="unexpected keyword"):
mrrc.InvalidIndicator(not_a_real_field="oops")
class TestPickleRoundTrip:
def test_round_trip_preserves_all_positional_attrs(self):
original = mrrc.InvalidIndicator(
record_index=847,
record_control_number="ocm01234567",
field_tag="245",
indicator_position=1,
found=b":",
expected="digit or space",
byte_offset=7217,
record_byte_offset=42,
source="harvest.mrc",
)
restored = pickle.loads(pickle.dumps(original))
assert restored.record_index == 847
assert restored.record_control_number == "ocm01234567"
assert restored.field_tag == "245"
assert restored.indicator_position == 1
assert restored.found == b":"
assert restored.expected == "digit or space"
assert restored.byte_offset == 7217
assert restored.record_byte_offset == 42
assert restored.source == "harvest.mrc"
def test_round_trip_preserves_subclass_extras(self):
original = mrrc.TruncatedRecord(
record_index=12,
expected_length=1024,
actual_length=640,
)
restored = pickle.loads(pickle.dumps(original))
assert restored.expected_length == 1024
assert restored.actual_length == 640
assert restored.record_index == 12
def test_round_trip_preserves_fatal_reader_error_cap_fields(self):
original = mrrc.FatalReaderError(
cap=100,
errors_seen=101,
record_index=7,
source="stream.mrc",
)
restored = pickle.loads(pickle.dumps(original))
assert restored.cap == 100
assert restored.errors_seen == 101
assert restored.record_index == 7
assert restored.source == "stream.mrc"
d = restored.to_dict()
assert d["cap"] == 100
assert d["errors_seen"] == 101
def test_round_trip_preserves_invalid_field_message(self):
original = mrrc.InvalidField(
record_index=5,
field_tag="245",
message="exceeds data area",
)
restored = pickle.loads(pickle.dumps(original))
assert restored.message == "exceeds data area"
def test_round_trip_bare_instance(self):
original = mrrc.RecordLeaderInvalid()
restored = pickle.loads(pickle.dumps(original))
assert restored.record_index is None
assert restored.byte_offset is None
def test_setstate_rejects_unexpected_keys(self):
instance = mrrc.RecordLeaderInvalid()
with pytest.raises(TypeError, match="unexpected"):
instance.__setstate__({"_format": "evil_lambda_replacement"})
@pytest.fixture
def invalid_indicator_full():
return mrrc.InvalidIndicator(
record_index=847,
record_control_number="ocm01234567",
field_tag="245",
indicator_position=1,
found=b":",
expected="digit or space",
byte_offset=7217,
record_byte_offset=42,
source="harvest.mrc",
)
class TestSnapshotFormats:
def test_str_invalid_indicator_full_context(
self, invalid_indicator_full, snapshot
):
assert str(invalid_indicator_full) == snapshot
def test_repr_invalid_indicator_full_context(
self, invalid_indicator_full, snapshot
):
assert repr(invalid_indicator_full) == snapshot
def test_detailed_invalid_indicator_full_context(
self, invalid_indicator_full, snapshot
):
assert invalid_indicator_full.detailed() == snapshot
def test_str_no_context_falls_back_to_class_name(self, snapshot):
err = mrrc.BaseAddressNotFound()
assert str(err) == snapshot
def test_str_truncated_record(self, snapshot):
err = mrrc.TruncatedRecord(
record_index=12,
record_control_number="oc00000012",
byte_offset=16384,
record_byte_offset=128,
source="partial.mrc",
expected_length=1024,
actual_length=640,
)
assert str(err) == snapshot
def test_detailed_truncated_record(self, snapshot):
err = mrrc.TruncatedRecord(
record_index=12,
record_control_number="oc00000012",
byte_offset=16384,
record_byte_offset=128,
source="partial.mrc",
expected_length=1024,
actual_length=640,
)
assert err.detailed() == snapshot
def test_str_writer_error(self, snapshot):
err = mrrc.WriterError(
record_index=99,
record_control_number="oc00000099",
message="Record length exceeds 4GB limit (5000000000 bytes)",
)
assert str(err) == snapshot
class TestStructuredSerialization:
def test_to_dict_full_schema_invalid_indicator(self):
err = mrrc.InvalidIndicator(
record_index=847,
record_control_number="ocm01234567",
field_tag="245",
indicator_position=1,
found=b":",
expected="digit or space",
byte_offset=7217,
record_byte_offset=42,
source="harvest.mrc",
)
d = err.to_dict()
assert d["schema_version"] == 1
assert d["class"] == "InvalidIndicator"
assert d["code"] == "E201"
assert d["slug"] == "invalid_indicator"
assert d["severity"] == "error"
assert d["help_url"].endswith("#E201")
assert d["record_index"] == 847
assert d["record_control_number"] == "ocm01234567"
assert d["field_tag"] == "245"
assert d["indicator_position"] == 1
assert d["expected"] == "digit or space"
assert d["byte_offset"] == 7217
assert d["record_byte_offset"] == 42
assert d["source"] == "harvest.mrc"
assert d["found"] is None
assert d["found_hex"] == "3a"
assert d["_cause"] is None
def test_to_dict_truncated_record_includes_length_extras(self):
err = mrrc.TruncatedRecord(expected_length=1024, actual_length=640)
d = err.to_dict()
assert d["expected_length"] == 1024
assert d["actual_length"] == 640
assert d["class"] == "TruncatedRecord"
assert d["code"] == "E005"
def test_to_dict_invalid_field_includes_message(self):
err = mrrc.InvalidField(message="exceeds data area", field_tag="245")
d = err.to_dict()
assert d["message"] == "exceeds data area"
assert d["field_tag"] == "245"
def test_to_dict_cause_chain_populates_underscore_cause(self):
try:
try:
raise FileNotFoundError("test path")
except FileNotFoundError as e:
raise mrrc.WriterError(message="failed") from e
except mrrc.WriterError as e:
d = e.to_dict()
assert d["_cause"] == "test path"
def test_to_dict_no_cause_chain_yields_null(self):
err = mrrc.InvalidIndicator()
assert err.to_dict()["_cause"] is None
def test_to_dict_include_traceback(self):
try:
raise mrrc.InvalidIndicator()
except mrrc.InvalidIndicator as e:
d = e.to_dict(include_traceback=True)
assert "traceback" in d
assert isinstance(d["traceback"], list)
assert any("InvalidIndicator" in line for line in d["traceback"])
def test_to_dict_default_omits_traceback(self):
try:
raise mrrc.InvalidIndicator()
except mrrc.InvalidIndicator as e:
d = e.to_dict()
assert "traceback" not in d
def test_to_json_returns_valid_json_string(self):
import json
err = mrrc.InvalidIndicator(
record_index=1,
field_tag="245",
found=b":",
)
s = err.to_json()
parsed = json.loads(s)
assert parsed["class"] == "InvalidIndicator"
assert parsed["found_hex"] == "3a"
def test_to_json_forwards_kwargs_to_json_dumps(self):
err = mrrc.InvalidIndicator()
pretty = err.to_json(indent=2)
assert "\n" in pretty
def test_to_json_include_traceback_routes_to_to_dict(self):
import json
try:
raise mrrc.InvalidIndicator()
except mrrc.InvalidIndicator as e:
s = e.to_json(include_traceback=True)
parsed = json.loads(s)
assert "traceback" in parsed
def test_to_dict_payload_size_bounded(self):
err = mrrc.InvalidIndicator(found=b"a" * 32)
d = err.to_dict()
assert len(d["found_hex"]) == 64
def test_to_dict_bytes_near_surfaces_hex_and_offset(self):
err = mrrc.InvalidIndicator(
bytes_near=b" :0",
bytes_near_offset=99,
)
d = err.to_dict()
assert d["bytes_near"] is None
assert d["bytes_near_hex"] == "203a30"
assert d["bytes_near_offset"] == 99
def test_to_dict_bytes_near_null_when_absent(self):
err = mrrc.InvalidIndicator()
d = err.to_dict()
assert d["bytes_near"] is None
assert "bytes_near_hex" not in d
assert d["bytes_near_offset"] is None
class TestHexDumpRendering:
def test_detailed_includes_hex_dump_when_bytes_near_set(self):
window = b"2023nyu " + b":0\x000 0 eng d\x1e245"
err = mrrc.InvalidIndicator(
record_index=847,
field_tag="245",
indicator_position=0,
byte_offset=0x1C31,
bytes_near=window,
bytes_near_offset=0x1C21,
)
d = err.detailed()
assert "bytes near offset 0x1C31:" in d, d
assert "0x1C21:" in d, d
assert "0x1C31:" in d, d
assert "^^ offending byte" in d, d
assert "|2023nyu" in d, d
def test_detailed_no_dump_when_bytes_near_absent(self):
err = mrrc.InvalidIndicator(record_index=1)
assert "bytes near offset" not in err.detailed()
def test_detailed_no_caret_when_offset_outside_window(self):
err = mrrc.InvalidIndicator(
byte_offset=9999,
bytes_near=b"abcdef",
bytes_near_offset=0,
)
d = err.detailed()
assert "bytes near offset 0x270F:" in d
assert "^^ offending byte" not in d
def test_pickle_preserves_bytes_near(self):
import pickle
err = mrrc.InvalidIndicator(
bytes_near=b" :0",
bytes_near_offset=99,
)
restored = pickle.loads(pickle.dumps(err))
assert restored.bytes_near == b" :0"
assert restored.bytes_near_offset == 99
def _build_minimal_marc_record(record_type: bytes = b"a") -> bytes:
field_245 = b"10\x1fa" + b"Test" + b"\x1e"
directory = b"245" + format(len(field_245), "04d").encode() + b"00000"
base_address = 24 + len(directory) + 1
record_length = base_address + len(field_245) + 1
leader = (
format(record_length, "05d").encode()
+ b"n"
+ record_type
+ b"m a2"
+ b"2"
+ format(base_address, "05d").encode()
+ b" 4500"
)
return leader + directory + b"\x1e" + field_245 + b"\x1d"
class TestFfiTypedExceptions:
def test_truncated_record_surfaces_as_typed_exception_with_byte_counts(self):
import io
full = _build_minimal_marc_record()
truncated = full[: len(full) - 10]
reader = mrrc.MARCReader(io.BytesIO(truncated))
with pytest.raises(mrrc.EndOfRecordNotFound) as excinfo:
list(reader)
err = excinfo.value
assert isinstance(err, mrrc.TruncatedRecord)
assert err.expected_length is not None
assert err.actual_length is not None
assert err.actual_length < err.expected_length
def test_invalid_leader_record_length_too_small(self):
import io
leader = b"00010nam a2200025 i 4500" reader = mrrc.MARCReader(io.BytesIO(leader))
with pytest.raises(mrrc.MrrcException):
list(reader)
def test_wrong_authority_record_type_raises_invalid_field(self):
import io
bib_record = _build_minimal_marc_record(record_type=b"a")
reader = mrrc.AuthorityMARCReader(io.BytesIO(bib_record))
with pytest.raises(mrrc.MrrcException) as excinfo:
list(reader)
err = excinfo.value
assert err.record_index == 1
if isinstance(err, mrrc.InvalidField):
assert err.message is not None
assert "authority" in err.message.lower()
def test_typed_exception_carries_code_slug_help_url(self):
import io
from mrrc.exceptions import DOCS_BASE_URL
leader = b"00010nam a2200025 i 4500"
reader = mrrc.MARCReader(io.BytesIO(leader))
with pytest.raises(mrrc.MrrcException) as excinfo:
list(reader)
err = excinfo.value
assert err.code.startswith("E"), err.code
assert err.slug
assert (
err.help_url()
== f"{DOCS_BASE_URL}/reference/error-codes/#{err.code}"
)
def test_pre_parse_error_from_buffered_reader_carries_bytes_near(self):
import io
bad = b"00010nam a2200025 i 4500"
reader = mrrc.MARCReader(io.BytesIO(bad))
with pytest.raises(mrrc.MrrcException) as excinfo:
list(reader)
err = excinfo.value
assert err.byte_offset == 0
assert err.bytes_near is not None
assert len(err.bytes_near) > 0
assert b"00010" in err.bytes_near
assert "bytes near offset" in err.detailed()
def test_leader_error_carries_bytes_near_via_with_bytes_near(self):
import io
bad_leader = b"00050nam a2200010 i 4500" + b"\x00" * 26
reader = mrrc.MARCReader(io.BytesIO(bad_leader))
with pytest.raises(mrrc.MrrcException) as excinfo:
list(reader)
err = excinfo.value
assert isinstance(err, mrrc.RecordLeaderInvalid)
assert err.bytes_near is not None
assert len(err.bytes_near) > 0
assert err.byte_offset == 0
assert err.bytes_near_offset == 0
d = err.detailed()
assert "bytes near offset" in d
assert "^^ offending byte" in d
assert "00050nam" in d
def test_ffi_error_carries_bytes_near_window_for_hex_dump(self):
import io
field_245 = b"10\x1faT\x1e"
directory = b"245999900000"
base_address = 24 + len(directory) + 1
record_length = base_address + len(field_245) + 1
leader = (
f"{record_length:05d}".encode()
+ b"nam a22"
+ f"{base_address:05d}".encode()
+ b" i 4500"
)
record = leader + directory + b"\x1e" + field_245 + b"\x1d"
reader = mrrc.MARCReader(io.BytesIO(record))
with pytest.raises(mrrc.MrrcException) as excinfo:
list(reader)
err = excinfo.value
assert err.bytes_near is not None, err
assert len(err.bytes_near) > 0
assert err.byte_offset is not None
assert err.bytes_near_offset is not None
assert (
err.bytes_near_offset
<= err.byte_offset
<= err.bytes_near_offset + len(err.bytes_near)
)
d = err.detailed()
assert "bytes near offset" in d
assert "^^ offending byte" in d
def test_to_dict_on_surfaced_exception_carries_positional_fields(self):
import io
full = _build_minimal_marc_record()
truncated = full[: len(full) - 10]
reader = mrrc.MARCReader(io.BytesIO(truncated))
with pytest.raises(mrrc.MrrcException) as excinfo:
list(reader)
d = excinfo.value.to_dict()
assert d["schema_version"] == 1
assert d["class"]
assert d["code"].startswith("E")
assert d["slug"]
assert d["help_url"].endswith(f"#{d['code']}")
for key in (
"record_index",
"record_control_number",
"field_tag",
"byte_offset",
"record_byte_offset",
"source",
"bytes_near",
"bytes_near_offset",
):
assert key in d, f"missing key {key}"
if d["class"] == "TruncatedRecord":
assert d["expected_length"] is not None
assert d["actual_length"] is not None
assert "_cause" in d
def test_no_silent_drops_in_pyo3_conversion(self):
import io
garbage = b"00050nam a22000000 i 4500" + b"\x00" * 26
reader = mrrc.MARCReader(io.BytesIO(garbage))
try:
list(reader)
except mrrc.MrrcException:
pass except OSError:
pass