import io
import json
import os
import threading
from pathlib import Path
import pytest
import mrrc
class TestParityRustFileVsPythonFile:
@staticmethod
def _record_to_comparable(record):
marcjson = json.loads(record.to_marcjson())
return marcjson
@staticmethod
def read_all_records(source):
reader = mrrc.MARCReader(source)
records = []
for record in reader:
records.append(record)
return records
def test_parity_simple_book_file_path(self):
test_file = "tests/data/simple_book.mrc"
if not os.path.exists(test_file):
pytest.skip(f"Test file not found: {test_file}")
records_rustfile = self.read_all_records(test_file)
with open(test_file, "rb") as f:
records_pythonfile = self.read_all_records(f)
assert len(records_rustfile) == len(records_pythonfile), \
f"Record count mismatch: RustFile={len(records_rustfile)}, PythonFile={len(records_pythonfile)}"
for i, (rec_rust, rec_py) in enumerate(zip(records_rustfile, records_pythonfile)):
rust_json = self._record_to_comparable(rec_rust)
py_json = self._record_to_comparable(rec_py)
assert rust_json == py_json, \
f"Record {i} mismatch: RustFile vs PythonFile"
def test_parity_multi_records_file_path(self):
test_file = "tests/data/multi_records.mrc"
if not os.path.exists(test_file):
pytest.skip(f"Test file not found: {test_file}")
records_rustfile = self.read_all_records(test_file)
with open(test_file, "rb") as f:
records_pythonfile = self.read_all_records(f)
assert len(records_rustfile) == len(records_pythonfile)
for i, (rec_rust, rec_py) in enumerate(zip(records_rustfile, records_pythonfile)):
rust_json = self._record_to_comparable(rec_rust)
py_json = self._record_to_comparable(rec_py)
assert rust_json == py_json, f"Record {i} mismatch"
def test_parity_pathlib_path(self):
test_file = "tests/data/simple_book.mrc"
if not os.path.exists(test_file):
pytest.skip(f"Test file not found: {test_file}")
records_rustfile = self.read_all_records(Path(test_file))
with open(test_file, "rb") as f:
records_pythonfile = self.read_all_records(f)
assert len(records_rustfile) == len(records_pythonfile)
for i, (rec_rust, rec_py) in enumerate(zip(records_rustfile, records_pythonfile)):
rust_json = self._record_to_comparable(rec_rust)
py_json = self._record_to_comparable(rec_py)
assert rust_json == py_json, f"Record {i} mismatch"
class TestParityCursorBackendVsRustFile:
@staticmethod
def _record_to_comparable(record):
marcjson = json.loads(record.to_marcjson())
return marcjson
@staticmethod
def read_all_records(source):
reader = mrrc.MARCReader(source)
records = []
for record in reader:
records.append(record)
return records
def test_parity_bytes_vs_file_path(self):
test_file = "tests/data/simple_book.mrc"
if not os.path.exists(test_file):
pytest.skip(f"Test file not found: {test_file}")
with open(test_file, "rb") as f:
file_bytes = f.read()
records_cursor = self.read_all_records(file_bytes)
records_rustfile = self.read_all_records(test_file)
assert len(records_cursor) == len(records_rustfile)
for i, (rec_cursor, rec_rust) in enumerate(zip(records_cursor, records_rustfile)):
cursor_json = self._record_to_comparable(rec_cursor)
rust_json = self._record_to_comparable(rec_rust)
assert cursor_json == rust_json, \
f"Record {i} mismatch: CursorBackend vs RustFile"
def test_parity_bytearray_vs_file_path(self):
test_file = "tests/data/multi_records.mrc"
if not os.path.exists(test_file):
pytest.skip(f"Test file not found: {test_file}")
with open(test_file, "rb") as f:
file_data = bytearray(f.read())
records_cursor = self.read_all_records(file_data)
records_rustfile = self.read_all_records(test_file)
assert len(records_cursor) == len(records_rustfile)
for i, (rec_cursor, rec_rust) in enumerate(zip(records_cursor, records_rustfile)):
cursor_json = self._record_to_comparable(rec_cursor)
rust_json = self._record_to_comparable(rec_rust)
assert cursor_json == rust_json, f"Record {i} mismatch"
def test_parity_bytesio_vs_file_path(self):
test_file = "tests/data/simple_book.mrc"
if not os.path.exists(test_file):
pytest.skip(f"Test file not found: {test_file}")
with open(test_file, "rb") as f:
file_bytes = f.read()
bytesio = io.BytesIO(file_bytes)
records_bytesio = self.read_all_records(bytesio)
records_rustfile = self.read_all_records(test_file)
assert len(records_bytesio) == len(records_rustfile)
for i, (rec_bio, rec_rust) in enumerate(zip(records_bytesio, records_rustfile)):
bio_json = self._record_to_comparable(rec_bio)
rust_json = self._record_to_comparable(rec_rust)
assert bio_json == rust_json, f"Record {i} mismatch"
class TestGILReleaseVerification:
def test_rustfile_and_cursor_backend_are_thread_safe(self):
test_file = "tests/data/multi_records.mrc"
if not os.path.exists(test_file):
pytest.skip(f"Test file not found: {test_file}")
results = {"errors": []}
lock = threading.Lock()
def reader_thread(source, source_type):
try:
reader = mrrc.MARCReader(source)
count = 0
for _ in reader:
count += 1
with lock:
results[f"{source_type}_count"] = count
except Exception as e:
with lock:
results["errors"].append(f"{source_type}: {e}")
results["rustfile_count"] = 0
thread1 = threading.Thread(target=reader_thread, args=(test_file, "rustfile"))
thread1.start()
thread1.join(timeout=10)
assert not thread1.is_alive(), "RustFile read timed out"
assert "rustfile_count" in results, "RustFile read did not complete"
assert results["rustfile_count"] > 0, "RustFile read returned no records"
with open(test_file, "rb") as f:
file_data = f.read()
results["cursor_count"] = 0
thread2 = threading.Thread(target=reader_thread, args=(file_data, "cursor"))
thread2.start()
thread2.join(timeout=10)
assert not thread2.is_alive(), "CursorBackend read timed out"
assert "cursor_count" in results, "CursorBackend read did not complete"
assert results["cursor_count"] > 0, "CursorBackend read returned no records"
assert not results["errors"], f"Threading errors: {results['errors']}"
def test_concurrent_reads_same_file(self):
test_file = "tests/data/multi_records.mrc"
if not os.path.exists(test_file):
pytest.skip(f"Test file not found: {test_file}")
results = {"counts": [], "errors": []}
lock = threading.Lock()
def reader_thread(thread_id):
try:
reader = mrrc.MARCReader(test_file)
count = 0
for _ in reader:
count += 1
with lock:
results["counts"].append(count)
except Exception as e:
with lock:
results["errors"].append(f"Thread {thread_id}: {e}")
threads = []
for i in range(3):
t = threading.Thread(target=reader_thread, args=(i,))
threads.append(t)
t.start()
for t in threads:
t.join(timeout=10)
assert not t.is_alive(), "Thread timed out"
assert not results["errors"], f"Errors occurred: {results['errors']}"
assert len(results["counts"]) == 3, "Not all threads completed"
assert all(c == results["counts"][0] for c in results["counts"]), \
f"Different record counts across threads: {results['counts']}"
class TestBackendParityAcceptanceCriteria:
def test_gate_rustfile_equals_pythonfile(self):
test_file = "tests/data/multi_records.mrc"
if not os.path.exists(test_file):
pytest.skip(f"Test file not found: {test_file}")
reader1 = mrrc.MARCReader(test_file)
records_rust = [json.loads(r.to_marcjson()) for r in reader1]
with open(test_file, "rb") as f:
reader2 = mrrc.MARCReader(f)
records_py = [json.loads(r.to_marcjson()) for r in reader2]
assert records_rust == records_py, "RustFile and PythonFile outputs differ"
def test_gate_cursorbackend_equals_rustfile(self):
test_file = "tests/data/multi_records.mrc"
if not os.path.exists(test_file):
pytest.skip(f"Test file not found: {test_file}")
with open(test_file, "rb") as f:
file_data = f.read()
reader1 = mrrc.MARCReader(file_data)
records_cursor = [json.loads(r.to_marcjson()) for r in reader1]
reader2 = mrrc.MARCReader(test_file)
records_rust = [json.loads(r.to_marcjson()) for r in reader2]
assert records_cursor == records_rust, "CursorBackend and RustFile outputs differ"
def test_gate_no_exceptions_or_panics(self):
test_files = [
"tests/data/simple_book.mrc",
"tests/data/multi_records.mrc",
"tests/data/with_control_fields.mrc",
]
for test_file in test_files:
if not os.path.exists(test_file):
continue
try:
reader = mrrc.MARCReader(test_file)
for _ in reader:
pass
except Exception as e:
pytest.fail(f"RustFile read failed for {test_file}: {e}")
try:
with open(test_file, "rb") as f:
reader = mrrc.MARCReader(f)
for _ in reader:
pass
except Exception as e:
pytest.fail(f"PythonFile read failed for {test_file}: {e}")