import io
import warnings
import pytest
import mrrc
def _build_valid_record_bytes() -> bytes:
record = mrrc.Record()
record.add_field(mrrc.Field("001", data="test001"))
record.add_field(mrrc.Field("008", data="230327s2023 wau ef 000 1 eng d"))
f245 = mrrc.Field("245", "1", "0")
f245.add_subfield("a", "Test title")
record.add_field(f245)
output = io.BytesIO()
writer = mrrc.MARCWriter(output)
writer.write_record(record)
writer.close()
return output.getvalue()
def _build_malformed_record_bytes() -> bytes:
leader = b"00050nam 2200025 4500"
directory = b"XXX000100000" + b"\x1e"
data_needed = 50 - len(leader) - len(directory) - 1 data = b"\xff" * data_needed + b"\x1d"
return leader + directory + data
def _build_two_record_stream(inject_bad_second: bool = False) -> bytes:
good = _build_valid_record_bytes()
if inject_bad_second:
bad = _build_malformed_record_bytes()
return good + bad
second = good return good + second
class TestToUnicode:
def test_to_unicode_true_no_warning(self):
data = _build_valid_record_bytes()
with warnings.catch_warnings(record=True) as w:
warnings.simplefilter("always")
mrrc.MARCReader(io.BytesIO(data), to_unicode=True)
assert len(w) == 0
def test_to_unicode_default_no_warning(self):
data = _build_valid_record_bytes()
with warnings.catch_warnings(record=True) as w:
warnings.simplefilter("always")
mrrc.MARCReader(io.BytesIO(data))
assert len(w) == 0
def test_to_unicode_false_warns(self):
data = _build_valid_record_bytes()
with warnings.catch_warnings(record=True) as w:
warnings.simplefilter("always")
mrrc.MARCReader(io.BytesIO(data), to_unicode=False)
assert len(w) == 1
assert "to_unicode=False has no effect" in str(w[0].message)
def test_to_unicode_false_still_reads(self):
data = _build_valid_record_bytes()
with warnings.catch_warnings(record=True):
warnings.simplefilter("always")
reader = mrrc.MARCReader(io.BytesIO(data), to_unicode=False)
records = list(reader)
assert len(records) == 1
assert records[0].get_fields("001")[0].data == "test001"
class TestPermissive:
def test_permissive_false_raises_on_bad_record(self):
data = _build_two_record_stream(inject_bad_second=True)
reader = mrrc.MARCReader(io.BytesIO(data), permissive=False)
record = next(reader)
assert record is not None
with pytest.raises(Exception):
next(reader)
def test_permissive_true_yields_none_for_bad_record(self):
data = _build_two_record_stream(inject_bad_second=True)
reader = mrrc.MARCReader(io.BytesIO(data), permissive=True)
records = list(reader)
assert any(r is not None for r in records), "Should have at least one valid record"
assert any(r is None for r in records), "Should have None for malformed record"
def test_permissive_true_valid_records_normal(self):
data = _build_two_record_stream(inject_bad_second=False)
reader = mrrc.MARCReader(io.BytesIO(data), permissive=True)
records = list(reader)
assert len(records) == 2
assert all(r is not None for r in records)
def test_permissive_pymarc_pattern(self):
data = _build_two_record_stream(inject_bad_second=True)
reader = mrrc.MARCReader(io.BytesIO(data), permissive=True)
valid_count = 0
error_count = 0
for record in reader:
if record is None:
error_count += 1
continue
valid_count += 1
assert valid_count >= 1
assert error_count >= 1
class TestRecoveryMode:
def test_recovery_mode_default_strict(self):
data = _build_valid_record_bytes()
reader = mrrc.MARCReader(io.BytesIO(data))
records = list(reader)
assert len(records) == 1
def test_recovery_mode_lenient(self):
data = _build_valid_record_bytes()
reader = mrrc.MARCReader(io.BytesIO(data), recovery_mode="lenient")
records = list(reader)
assert len(records) == 1
def test_recovery_mode_permissive(self):
data = _build_valid_record_bytes()
reader = mrrc.MARCReader(io.BytesIO(data), recovery_mode="permissive")
records = list(reader)
assert len(records) == 1
def test_recovery_mode_invalid_raises(self):
data = _build_valid_record_bytes()
with pytest.raises(ValueError, match="Invalid recovery_mode"):
mrrc.MARCReader(io.BytesIO(data), recovery_mode="invalid")
class TestConflictValidation:
def test_permissive_with_lenient_raises(self):
data = _build_valid_record_bytes()
with pytest.raises(ValueError, match="Cannot combine"):
mrrc.MARCReader(io.BytesIO(data), permissive=True, recovery_mode="lenient")
def test_permissive_with_permissive_recovery_raises(self):
data = _build_valid_record_bytes()
with pytest.raises(ValueError, match="Cannot combine"):
mrrc.MARCReader(io.BytesIO(data), permissive=True, recovery_mode="permissive")
def test_permissive_with_strict_ok(self):
data = _build_valid_record_bytes()
reader = mrrc.MARCReader(io.BytesIO(data), permissive=True, recovery_mode="strict")
records = list(reader)
assert len(records) == 1