import pytest
from mrrc import ProducerConsumerPipeline
@pytest.fixture
def large_10k_mrc(fixture_10k, tmp_path):
path = tmp_path / "10k_records.mrc"
path.write_bytes(fixture_10k)
return path
def test_regression_records_spanning_chunk_boundaries(large_10k_mrc):
pipeline = ProducerConsumerPipeline.from_file(str(large_10k_mrc))
record_count = sum(1 for _ in pipeline)
assert record_count == 10000, (
f"Expected 10000 records but got {record_count}. "
"Check if records spanning chunk boundaries are being lost."
)