use mrrc::{
marcjson, marcxml, Field, Leader, MarcError, MarcReader, MarcWriter, Record, RecoveryMode,
Subfield, ValidationLevel,
};
use proptest::prelude::*;
use smallvec::SmallVec;
use std::io::Cursor;
fn arb_leader() -> impl Strategy<Value = Leader> {
(
prop_oneof![Just('a'), Just('c'), Just('d'), Just('n'), Just('p')],
prop_oneof![
Just('a'),
Just('c'),
Just('d'),
Just('e'),
Just('f'),
Just('g'),
Just('i'),
Just('j'),
Just('k'),
Just('m'),
Just('o'),
Just('p'),
Just('r'),
Just('t'),
],
prop_oneof![
Just('a'),
Just('b'),
Just('c'),
Just('d'),
Just('i'),
Just('m'),
Just('s'),
],
prop_oneof![Just(' '), Just('a')], prop_oneof![Just(' '), Just('a'), Just(' ')], prop_oneof![Just(' '), Just('1'), Just('3'), Just('7')], prop_oneof![Just(' '), Just('a'), Just('c'), Just('i')], prop_oneof![Just(' '), Just('a'), Just('b'), Just('c')], )
.prop_map(
|(
record_status,
record_type,
bibliographic_level,
character_coding,
control_record_type,
encoding_level,
cataloging_form,
multipart_level,
)| {
Leader {
record_length: 0,
record_status,
record_type,
bibliographic_level,
control_record_type,
character_coding,
indicator_count: 2,
subfield_code_count: 2,
data_base_address: 0,
encoding_level,
cataloging_form,
multipart_level,
reserved: "4500".to_string(),
}
},
)
}
fn arb_control_tag() -> impl Strategy<Value = String> {
prop_oneof![
Just("001".to_string()),
Just("003".to_string()),
Just("005".to_string()),
Just("006".to_string()),
Just("007".to_string()),
Just("008".to_string()),
]
}
fn arb_control_value() -> impl Strategy<Value = String> {
"[a-zA-Z0-9 ./:;-]{1,40}".prop_filter("no MARC delimiters", |s| {
!s.bytes().any(|b| b == 0x1D || b == 0x1E || b == 0x1F)
})
}
fn arb_data_tag() -> impl Strategy<Value = String> {
(10u16..1000).prop_map(|n| format!("{n:03}"))
}
fn arb_indicator() -> impl Strategy<Value = char> {
prop_oneof![
Just(' '),
Just('0'),
Just('1'),
Just('2'),
Just('3'),
Just('4'),
Just('5'),
Just('6'),
Just('7'),
Just('8'),
Just('9'),
]
}
fn arb_subfield_code() -> impl Strategy<Value = char> {
prop_oneof![
(b'a'..=b'z').prop_map(|b| b as char),
(b'0'..=b'9').prop_map(|b| b as char),
]
}
fn arb_subfield_value() -> BoxedStrategy<String> {
"[a-zA-Z0-9 .,:;()'/&-]{1,80}"
.prop_filter("no MARC delimiters", |s: &String| {
!s.bytes().any(|b| b == 0x1D || b == 0x1E || b == 0x1F)
})
.boxed()
}
fn arb_subfield_value_xml() -> BoxedStrategy<String> {
"[a-zA-Z0-9 .,:;()<>&\"'/-]{1,80}"
.prop_filter("no MARC delimiters", |s: &String| {
!s.bytes().any(|b| b == 0x1D || b == 0x1E || b == 0x1F)
})
.boxed()
}
fn arb_subfield_value_json() -> BoxedStrategy<String> {
prop::collection::vec(
prop_oneof![
Just('\t'),
Just('\n'),
Just('\r'),
Just('\\'),
Just('"'),
(b'a'..=b'z').prop_map(|b| b as char),
(b'A'..=b'Z').prop_map(|b| b as char),
(b'0'..=b'9').prop_map(|b| b as char),
Just(' '),
Just('.'),
Just(','),
],
1..=80,
)
.prop_map(|chars: Vec<char>| chars.into_iter().collect::<String>())
.prop_filter("no MARC delimiters", |s: &String| {
!s.bytes().any(|b| b == 0x1D || b == 0x1E || b == 0x1F)
})
.boxed()
}
fn arb_subfield_with(value: BoxedStrategy<String>) -> BoxedStrategy<Subfield> {
(arb_subfield_code(), value)
.prop_map(|(code, value)| Subfield { code, value })
.boxed()
}
fn arb_data_field_with(value: BoxedStrategy<String>) -> BoxedStrategy<Field> {
(
arb_data_tag(),
arb_indicator(),
arb_indicator(),
prop::collection::vec(arb_subfield_with(value), 1..=5),
)
.prop_map(|(tag, ind1, ind2, subfields)| Field {
tag,
indicator1: ind1,
indicator2: ind2,
subfields: SmallVec::from_vec(subfields),
})
.boxed()
}
fn arb_control_value_xml() -> BoxedStrategy<String> {
arb_control_value().boxed()
}
fn arb_record_with(
subfield_value: BoxedStrategy<String>,
control_value: BoxedStrategy<String>,
) -> BoxedStrategy<Record> {
(
arb_leader(),
prop::collection::vec((arb_control_tag(), control_value), 0..=3),
prop::collection::vec(arb_data_field_with(subfield_value), 0..=10),
)
.prop_map(|(leader, control_fields, data_fields)| {
let mut record = Record::new(leader);
let mut seen_tags = std::collections::HashSet::new();
for (tag, value) in control_fields {
if seen_tags.insert(tag.clone()) {
record.add_control_field(tag, value);
}
}
for field in data_fields {
record.add_field(field);
}
record
})
.boxed()
}
fn arb_record() -> BoxedStrategy<Record> {
arb_record_with(arb_subfield_value(), arb_control_value().boxed())
}
fn arb_record_xml() -> BoxedStrategy<Record> {
arb_record_with(arb_subfield_value_xml(), arb_control_value_xml())
}
fn arb_record_json() -> BoxedStrategy<Record> {
arb_record_with(arb_subfield_value_json(), arb_control_value().boxed())
}
const LEADER_LEN: usize = 24;
const DIRECTORY_ENTRY_LEN: usize = 12;
const FIELD_TERMINATOR: u8 = 0x1E;
const RECORD_TERMINATOR: u8 = 0x1D;
const SUBFIELD_DELIMITER: u8 = 0x1F;
fn parse_record_length(bytes: &[u8]) -> usize {
std::str::from_utf8(&bytes[0..5])
.expect("leader length is ASCII")
.parse()
.expect("leader length is numeric")
}
fn parse_data_base_address(bytes: &[u8]) -> usize {
std::str::from_utf8(&bytes[12..17])
.expect("leader base address is ASCII")
.parse()
.expect("leader base address is numeric")
}
#[derive(Debug)]
struct DirEntry {
tag: String,
length: usize,
start: usize,
}
fn parse_directory(bytes: &[u8]) -> Vec<DirEntry> {
let base = parse_data_base_address(bytes);
let dir = &bytes[LEADER_LEN..base - 1];
dir.chunks_exact(DIRECTORY_ENTRY_LEN)
.map(|chunk| DirEntry {
tag: std::str::from_utf8(&chunk[0..3])
.expect("tag is ASCII")
.to_string(),
length: std::str::from_utf8(&chunk[3..7])
.expect("length is ASCII")
.parse()
.expect("length is numeric"),
start: std::str::from_utf8(&chunk[7..12])
.expect("start is ASCII")
.parse()
.expect("start is numeric"),
})
.collect()
}
fn emit_binary(record: &Record) -> Vec<u8> {
let mut buffer = Vec::new();
{
let mut writer = MarcWriter::new(&mut buffer);
writer.write_record(record).expect("write should succeed");
}
buffer
}
proptest! {
#![proptest_config(ProptestConfig {
cases: 64,
..ProptestConfig::default()
})]
#[test]
fn binary_roundtrip(record in arb_record()) {
let buffer = emit_binary(&record);
let cursor = Cursor::new(&buffer);
let mut reader = MarcReader::new(cursor);
let parsed = reader
.read_record()
.expect("read should succeed")
.expect("should get a record");
prop_assert_eq!(record.leader.record_status, parsed.leader.record_status);
prop_assert_eq!(record.leader.record_type, parsed.leader.record_type);
prop_assert_eq!(record.leader.bibliographic_level, parsed.leader.bibliographic_level);
prop_assert_eq!(record.leader.control_record_type, parsed.leader.control_record_type);
prop_assert_eq!(record.leader.character_coding, parsed.leader.character_coding);
prop_assert_eq!(record.leader.indicator_count, parsed.leader.indicator_count);
prop_assert_eq!(record.leader.subfield_code_count, parsed.leader.subfield_code_count);
prop_assert_eq!(record.leader.encoding_level, parsed.leader.encoding_level);
prop_assert_eq!(record.leader.cataloging_form, parsed.leader.cataloging_form);
prop_assert_eq!(record.leader.multipart_level, parsed.leader.multipart_level);
prop_assert_eq!(&record.leader.reserved, &parsed.leader.reserved);
prop_assert_eq!(record.control_fields.len(), parsed.control_fields.len());
for (tag, value) in &record.control_fields {
let parsed_value = parsed.control_fields.get(tag);
prop_assert_eq!(Some(value), parsed_value,
"control field {} mismatch", tag);
}
let orig_fields: Vec<&Field> = record.fields().collect();
let parsed_fields: Vec<&Field> = parsed.fields().collect();
prop_assert_eq!(orig_fields.len(), parsed_fields.len(),
"field count mismatch");
for (orig, roundtripped) in orig_fields.iter().zip(parsed_fields.iter()) {
prop_assert_eq!(&orig.tag, &roundtripped.tag);
prop_assert_eq!(orig.indicator1, roundtripped.indicator1);
prop_assert_eq!(orig.indicator2, roundtripped.indicator2);
prop_assert_eq!(orig.subfields.len(), roundtripped.subfields.len(),
"subfield count mismatch in field {}", orig.tag);
for (orig_sf, parsed_sf) in orig.subfields.iter().zip(roundtripped.subfields.iter()) {
prop_assert_eq!(orig_sf.code, parsed_sf.code);
prop_assert_eq!(&orig_sf.value, &parsed_sf.value);
}
}
let next = reader.read_record().expect("read should succeed");
prop_assert!(next.is_none(), "expected exactly one record in buffer");
}
#[test]
fn serialization_never_panics(record in arb_record()) {
let mut buf = Vec::new();
let mut writer = MarcWriter::new(&mut buf);
let result = writer.write_record(&record);
prop_assert!(result.is_ok(), "MarcWriter failed: {:?}", result.err());
prop_assert!(!buf.is_empty(), "Serialized record is empty");
}
#[test]
fn leader_length_matches_emitted_bytes(record in arb_record()) {
let buffer = emit_binary(&record);
let declared = parse_record_length(&buffer);
prop_assert_eq!(declared, buffer.len(),
"leader.record_length ({}) != emitted byte count ({})",
declared, buffer.len());
}
#[test]
fn directory_entries_tile_data_area(record in arb_record()) {
let buffer = emit_binary(&record);
let base = parse_data_base_address(&buffer);
let record_length = parse_record_length(&buffer);
let entries = parse_directory(&buffer);
let data_area_size = record_length - base;
prop_assert_eq!(buffer[record_length - 1], RECORD_TERMINATOR,
"last byte should be RECORD_TERMINATOR");
let mut expected_start = 0usize;
let mut total_length = 0usize;
for entry in &entries {
prop_assert_eq!(entry.start, expected_start,
"entry {} start {} != expected {}",
entry.tag, entry.start, expected_start);
prop_assert!(entry.length >= 1,
"entry {} length is zero", entry.tag);
let field_end_in_data = entry.start + entry.length;
prop_assert!(field_end_in_data < data_area_size,
"entry {} extends past data area (end={}, data_area_size={})",
entry.tag, field_end_in_data, data_area_size);
let term_pos = base + entry.start + entry.length - 1;
prop_assert_eq!(buffer[term_pos], FIELD_TERMINATOR,
"entry {} should end with FIELD_TERMINATOR at {}",
entry.tag, term_pos);
expected_start += entry.length;
total_length += entry.length;
}
prop_assert_eq!(total_length + 1, data_area_size,
"sum of field lengths ({}) + RECORD_TERMINATOR != data area size ({})",
total_length, data_area_size);
}
#[test]
fn indicator_bytes_in_valid_set(record in arb_record()) {
let buffer = emit_binary(&record);
let base = parse_data_base_address(&buffer);
for entry in parse_directory(&buffer) {
if entry.tag.as_str() < "010" {
continue; }
let ind1 = buffer[base + entry.start];
let ind2 = buffer[base + entry.start + 1];
for (label, byte) in [("ind1", ind1), ("ind2", ind2)] {
prop_assert!(
byte == b' ' || byte.is_ascii_digit(),
"{} for tag {} is 0x{:02x} (not digit or space)",
label, entry.tag, byte
);
}
}
}
#[test]
fn subfield_codes_are_lower_alnum(record in arb_record()) {
let buffer = emit_binary(&record);
for (i, byte) in buffer.iter().enumerate() {
if *byte == SUBFIELD_DELIMITER {
let code = buffer[i + 1];
prop_assert!(
code.is_ascii_lowercase() || code.is_ascii_digit(),
"subfield code at offset {} is 0x{:02x} (expected [a-z0-9])",
i + 1, code
);
}
}
}
#[test]
fn marcxml_roundtrip(record in arb_record_xml()) {
let xml = marcxml::record_to_marcxml(&record).expect("MARCXML serialize");
let parsed = marcxml::marcxml_to_record(&xml).expect("MARCXML parse");
assert_records_equal(&record, &parsed)?;
}
#[test]
fn marcjson_roundtrip(record in arb_record_json()) {
let json = marcjson::record_to_marcjson(&record).expect("MARCJSON serialize");
let parsed = marcjson::marcjson_to_record(&json).expect("MARCJSON parse");
assert_records_equal(&record, &parsed)?;
}
}
fn assert_records_equal(orig: &Record, parsed: &Record) -> Result<(), TestCaseError> {
prop_assert_eq!(orig.leader.record_status, parsed.leader.record_status);
prop_assert_eq!(orig.leader.record_type, parsed.leader.record_type);
prop_assert_eq!(
orig.leader.bibliographic_level,
parsed.leader.bibliographic_level
);
prop_assert_eq!(orig.leader.character_coding, parsed.leader.character_coding);
prop_assert_eq!(&orig.leader.reserved, &parsed.leader.reserved);
prop_assert_eq!(orig.control_fields.len(), parsed.control_fields.len());
for (tag, values) in &orig.control_fields {
let parsed_values = parsed.control_fields.get(tag);
prop_assert_eq!(
Some(values),
parsed_values,
"control field {} mismatch",
tag
);
}
let orig_fields: Vec<&Field> = orig.fields().collect();
let parsed_fields: Vec<&Field> = parsed.fields().collect();
prop_assert_eq!(
orig_fields.len(),
parsed_fields.len(),
"field count mismatch"
);
for (orig_f, parsed_f) in orig_fields.iter().zip(parsed_fields.iter()) {
prop_assert_eq!(&orig_f.tag, &parsed_f.tag);
prop_assert_eq!(orig_f.indicator1, parsed_f.indicator1);
prop_assert_eq!(orig_f.indicator2, parsed_f.indicator2);
prop_assert_eq!(
orig_f.subfields.len(),
parsed_f.subfields.len(),
"subfield count in {} mismatch",
orig_f.tag
);
for (orig_sf, parsed_sf) in orig_f.subfields.iter().zip(parsed_f.subfields.iter()) {
prop_assert_eq!(orig_sf.code, parsed_sf.code);
prop_assert_eq!(&orig_sf.value, &parsed_sf.value);
}
}
Ok(())
}
fn arb_record_with_data_field() -> BoxedStrategy<Record> {
(
arb_leader(),
prop::collection::vec((arb_control_tag(), arb_control_value()), 0..=2),
prop::collection::vec(arb_data_field_with(arb_subfield_value()), 1..=5),
)
.prop_map(|(leader, control_fields, data_fields)| {
let mut record = Record::new(leader);
let mut seen_tags = std::collections::HashSet::new();
for (tag, value) in control_fields {
if seen_tags.insert(tag.clone()) {
record.add_control_field(tag, value);
}
}
for field in data_fields {
record.add_field(field);
}
record
})
.boxed()
}
fn arb_non_digit_non_space() -> impl Strategy<Value = u8> {
(0u8..=255u8).prop_filter("not digit or space", |b| !b.is_ascii_digit() && *b != b' ')
}
fn arb_non_numeric_byte() -> impl Strategy<Value = u8> {
(0u8..=255u8).prop_filter("not digit or sign", |b| {
!b.is_ascii_digit() && *b != b'+' && *b != b'-'
})
}
fn arb_non_graphic_subfield_byte() -> impl Strategy<Value = u8> {
(0u8..=255u8).prop_filter("not ASCII graphic", |b| !b.is_ascii_graphic())
}
fn first_data_field_offset(buffer: &[u8]) -> Option<(String, usize)> {
let base = parse_data_base_address(buffer);
parse_directory(buffer)
.into_iter()
.find(|e| e.tag.as_str() >= "010")
.map(|e| (e.tag, base + e.start))
}
fn first_subfield_delimiter_offset(buffer: &[u8]) -> Option<usize> {
buffer.iter().position(|b| *b == SUBFIELD_DELIMITER)
}
fn arb_invalid_leader_truncated() -> BoxedStrategy<Vec<u8>> {
prop::collection::vec(any::<u8>(), 0..LEADER_LEN).boxed()
}
fn arb_invalid_leader_bad_length_field() -> BoxedStrategy<Vec<u8>> {
(arb_record(), 0usize..5, arb_non_numeric_byte())
.prop_map(|(record, pos, bad_byte)| {
let mut buffer = emit_binary(&record);
buffer[pos] = bad_byte;
buffer
})
.boxed()
}
fn arb_invalid_leader_bad_base_address() -> BoxedStrategy<Vec<u8>> {
(arb_record(), 12usize..17, arb_non_numeric_byte())
.prop_map(|(record, pos, bad_byte)| {
let mut buffer = emit_binary(&record);
buffer[pos] = bad_byte;
buffer
})
.boxed()
}
fn arb_record_with_bad_indicator() -> BoxedStrategy<(Vec<u8>, String)> {
(
arb_record_with_data_field(),
arb_non_digit_non_space(),
0u8..=1u8,
)
.prop_map(|(record, bad_byte, ind_pos)| {
let mut buffer = emit_binary(&record);
let (tag, offset) = first_data_field_offset(&buffer)
.expect("arb_record_with_data_field guarantees a data field");
buffer[offset + ind_pos as usize] = bad_byte;
(buffer, tag)
})
.boxed()
}
fn arb_record_with_bad_subfield_code() -> BoxedStrategy<Vec<u8>> {
(arb_subfield_value(), arb_non_graphic_subfield_byte())
.prop_map(|(value, bad_byte)| {
let leader = Leader {
record_length: 0,
record_status: 'n',
record_type: 'a',
bibliographic_level: 'm',
control_record_type: ' ',
character_coding: 'a',
indicator_count: 2,
subfield_code_count: 2,
data_base_address: 0,
encoding_level: ' ',
cataloging_form: ' ',
multipart_level: ' ',
reserved: "4500".to_string(),
};
let mut record = Record::new(leader);
record.add_field(Field {
tag: "020".to_string(),
indicator1: ' ',
indicator2: ' ',
subfields: SmallVec::from_vec(vec![Subfield { code: 'a', value }]),
});
let mut buffer = emit_binary(&record);
let delim = first_subfield_delimiter_offset(&buffer).expect("020 field has a subfield");
buffer[delim + 1] = bad_byte;
buffer
})
.boxed()
}
fn arb_record_with_directory_violation() -> BoxedStrategy<Vec<u8>> {
arb_record_with_data_field()
.prop_map(|record| {
let mut buffer = emit_binary(&record);
let base = parse_data_base_address(&buffer);
buffer[base - 1] = b'0';
buffer
})
.boxed()
}
fn arb_record_truncated_after_offset() -> BoxedStrategy<Vec<u8>> {
(arb_record(), any::<u32>())
.prop_map(|(record, seed)| {
let buffer = emit_binary(&record);
let total = buffer.len();
let span = (total - LEADER_LEN).max(1);
let prefix_len = LEADER_LEN + (seed as usize) % span;
buffer[..prefix_len].to_vec()
})
.boxed()
}
fn arb_record_missing_record_terminator() -> BoxedStrategy<Vec<u8>> {
arb_record()
.prop_map(|record| {
let mut buffer = emit_binary(&record);
let last = buffer.len() - 1;
buffer[last] = 0x00;
buffer
})
.boxed()
}
fn arb_malformed_recoverable_record() -> BoxedStrategy<Vec<u8>> {
arb_record_with_data_field()
.prop_map(|record| {
let mut buffer = emit_binary(&record);
for byte in &mut buffer[27..31] {
*byte = b'X';
}
buffer
})
.boxed()
}
proptest! {
#![proptest_config(ProptestConfig {
cases: 64,
..ProptestConfig::default()
})]
#[test]
fn truncated_leader_never_yields_record(bytes in arb_invalid_leader_truncated()) {
let mut reader = MarcReader::new(Cursor::new(&bytes[..]));
let result = reader.read_record();
prop_assert!(
!matches!(&result, Ok(Some(_))),
"truncated leader should not parse as a record, got {result:?}"
);
}
#[test]
fn malformed_record_length_yields_leader_error(
bytes in arb_invalid_leader_bad_length_field(),
) {
let mut reader = MarcReader::new(Cursor::new(&bytes[..]));
let result = reader.read_record();
match result {
Err(MarcError::RecordLengthInvalid {
record_index, byte_offset, ..
}) => {
prop_assert!(record_index.is_some(), "record_index missing");
prop_assert!(byte_offset.is_some(), "byte_offset missing");
},
Err(MarcError::InvalidLeader {
record_index, byte_offset, ..
}) => {
prop_assert!(record_index.is_some(), "record_index missing");
prop_assert!(byte_offset.is_some(), "byte_offset missing");
},
other => prop_assert!(
false,
"expected RecordLengthInvalid or InvalidLeader, got {other:?}"
),
}
}
#[test]
fn malformed_base_address_yields_e003(
bytes in arb_invalid_leader_bad_base_address(),
) {
let mut reader = MarcReader::new(Cursor::new(&bytes[..]));
let result = reader.read_record();
match result {
Err(MarcError::BaseAddressInvalid {
record_index, byte_offset, ..
}) => {
prop_assert!(record_index.is_some());
prop_assert!(byte_offset.is_some());
},
other => prop_assert!(false, "expected BaseAddressInvalid, got {other:?}"),
}
}
#[test]
fn malformed_indicator_yields_e201(
(bytes, expected_tag) in arb_record_with_bad_indicator(),
) {
let mut reader = MarcReader::new(Cursor::new(&bytes[..]))
.with_validation_level(ValidationLevel::StrictMarc);
let result = reader.read_record();
match result {
Err(MarcError::InvalidIndicator {
field_tag,
indicator_position,
record_index,
byte_offset,
record_byte_offset,
..
}) => {
prop_assert_eq!(field_tag.as_deref(), Some(expected_tag.as_str()));
prop_assert!(indicator_position.is_some());
prop_assert!(record_index.is_some());
prop_assert!(byte_offset.is_some());
prop_assert!(record_byte_offset.is_some());
},
other => prop_assert!(false, "expected InvalidIndicator, got {other:?}"),
}
}
#[test]
fn malformed_subfield_code_yields_e202(
bytes in arb_record_with_bad_subfield_code(),
) {
let mut reader = MarcReader::new(Cursor::new(&bytes[..]))
.with_validation_level(ValidationLevel::StrictMarc);
let result = reader.read_record();
match result {
Err(MarcError::BadSubfieldCode {
field_tag,
record_index,
byte_offset,
record_byte_offset,
..
}) => {
prop_assert!(field_tag.is_some());
prop_assert!(record_index.is_some());
prop_assert!(byte_offset.is_some());
prop_assert!(record_byte_offset.is_some());
},
other => prop_assert!(false, "expected BadSubfieldCode, got {other:?}"),
}
}
#[test]
fn directory_violation_yields_e101(
bytes in arb_record_with_directory_violation(),
) {
let mut reader = MarcReader::new(Cursor::new(&bytes[..]));
let result = reader.read_record();
match result {
Err(MarcError::DirectoryInvalid {
record_index, byte_offset, ..
}) => {
prop_assert!(record_index.is_some());
prop_assert!(byte_offset.is_some());
},
other => prop_assert!(false, "expected DirectoryInvalid, got {other:?}"),
}
}
#[test]
fn truncated_record_never_yields_record(
bytes in arb_record_truncated_after_offset(),
) {
let mut reader = MarcReader::new(Cursor::new(&bytes[..]));
let result = reader.read_record();
prop_assert!(
!matches!(&result, Ok(Some(_))),
"truncated record should not parse as a record, got {result:?}"
);
}
#[test]
fn missing_record_terminator_yields_e006(
bytes in arb_record_missing_record_terminator(),
) {
let mut reader = MarcReader::new(Cursor::new(&bytes[..]));
let result = reader.read_record();
match result {
Err(MarcError::EndOfRecordNotFound {
record_index, byte_offset, ..
}) => {
prop_assert!(record_index.is_some());
prop_assert!(byte_offset.is_some());
},
other => prop_assert!(false, "expected EndOfRecordNotFound, got {other:?}"),
}
}
}
proptest! {
#![proptest_config(ProptestConfig {
cases: 64,
..ProptestConfig::default()
})]
#[test]
fn lenient_mode_never_panics_on_arbitrary_bytes(
bytes in prop::collection::vec(any::<u8>(), 0..2048),
) {
let mut reader = MarcReader::new(Cursor::new(&bytes[..]))
.with_recovery_mode(RecoveryMode::Lenient);
while let Ok(Some(_)) = reader.read_record() {}
}
#[test]
fn permissive_mode_swallows_field_level_malformations(
bytes in prop_oneof![
arb_record_with_directory_violation(),
arb_record_missing_record_terminator(),
],
) {
let mut reader = MarcReader::new(Cursor::new(&bytes[..]))
.with_recovery_mode(RecoveryMode::Permissive);
loop {
match reader.read_record() {
Ok(Some(_)) => {},
Ok(None) => break,
Err(e) => {
prop_assert!(
false,
"permissive should swallow field-level malformations; got {e:?}"
);
break;
},
}
}
}
#[test]
fn recovery_cap_eventually_trips_on_pathological_inputs(
n in 1usize..=4usize,
records in prop::collection::vec(arb_malformed_recoverable_record(), 6..=10),
) {
let mut stream = Vec::new();
for r in &records {
stream.extend_from_slice(r);
}
let mut reader = MarcReader::new(Cursor::new(&stream[..]))
.with_recovery_mode(RecoveryMode::Lenient)
.with_max_errors(n);
let mut got_fatal = false;
loop {
match reader.read_record() {
Ok(Some(_)) => {},
Ok(None) => break,
Err(MarcError::FatalReaderError { cap, errors_seen, .. }) => {
prop_assert_eq!(cap, n);
prop_assert!(errors_seen > n);
got_fatal = true;
break;
},
Err(e) => prop_assert!(false, "unexpected error in Lenient mode: {e:?}"),
}
}
prop_assert!(
got_fatal,
"cap N={} should have tripped on {} malformed records",
n, records.len()
);
prop_assert!(matches!(reader.read_record(), Ok(None)));
}
}