use crate::error::Result;
use crate::formats::FormatReader;
use crate::iso2709::{DataFieldParseConfig, ParseContext};
use crate::iso2709_skeleton::{parse_iso2709_record, Iso2709Builder};
use crate::leader::Leader;
use crate::record::{Field, Record};
use crate::recovery::{self, RecoveryCap, RecoveryMode};
use std::io::Read;
#[derive(Debug)]
pub struct MarcReader<R: Read> {
reader: R,
recovery_mode: RecoveryMode,
records_read: usize,
ctx: ParseContext,
cap: RecoveryCap,
}
impl<R: Read> MarcReader<R> {
pub fn new(reader: R) -> Self {
MarcReader {
reader,
recovery_mode: RecoveryMode::Strict,
records_read: 0,
ctx: ParseContext::new(),
cap: RecoveryCap::new(),
}
}
#[must_use]
pub fn with_recovery_mode(mut self, mode: RecoveryMode) -> Self {
self.recovery_mode = mode;
self
}
#[must_use]
pub fn with_source(mut self, name: impl Into<String>) -> Self {
self.ctx.source_name = Some(name.into());
self
}
#[must_use]
pub fn with_max_errors(mut self, n: usize) -> Self {
self.cap.set_max(n);
self
}
}
impl MarcReader<std::fs::File> {
pub fn from_path(path: impl AsRef<std::path::Path>) -> std::io::Result<Self> {
let path = path.as_ref();
let file = std::fs::File::open(path)?;
Ok(Self::new(file).with_source(path.display().to_string()))
}
}
impl<R: Read> MarcReader<R> {
pub fn read_record(&mut self) -> Result<Option<Record>> {
let result = parse_iso2709_record::<R, BibBuilder>(
&mut self.reader,
&mut self.ctx,
&mut self.cap,
self.recovery_mode,
)?;
if result.is_some() {
self.records_read += 1;
}
Ok(result)
}
}
struct BibBuilder {
record: Record,
}
impl Iso2709Builder for BibBuilder {
type Output = Record;
#[inline]
fn parse_config() -> DataFieldParseConfig {
DataFieldParseConfig::BIBLIOGRAPHIC
}
#[inline]
fn new_for(leader: Leader) -> Self {
BibBuilder {
record: Record::new(leader),
}
}
#[inline]
fn add_control_field(&mut self, tag: String, value: String) {
self.record.add_control_field(tag, value);
}
#[inline]
fn add_data_field(&mut self, _tag: String, field: Field) {
self.record.add_field(field);
}
fn try_recover_truncated(
leader: Leader,
partial_data: &[u8],
base_address: usize,
mode: RecoveryMode,
ctx: &ParseContext,
) -> Option<Result<Record>> {
Some(recovery::try_recover_record(
leader,
partial_data,
base_address,
mode,
ctx,
))
}
#[inline]
fn finalize(self) -> Record {
self.record
}
}
impl<R: Read + std::fmt::Debug> FormatReader for MarcReader<R> {
fn read_record(&mut self) -> Result<Option<Record>> {
MarcReader::read_record(self)
}
fn records_read(&self) -> Option<usize> {
Some(self.records_read)
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::io::Cursor;
use crate::iso2709::{FIELD_TERMINATOR, RECORD_TERMINATOR, SUBFIELD_DELIMITER};
#[test]
fn test_read_simple_record() {
let mut record_bytes = Vec::new();
let mut field_245 = Vec::new();
field_245.extend_from_slice(b"10"); field_245.push(SUBFIELD_DELIMITER);
field_245.push(b'a');
field_245.extend_from_slice(b"Test title");
field_245.push(FIELD_TERMINATOR);
let mut directory = Vec::new();
directory.extend_from_slice(b"245");
directory.extend_from_slice(format!("{:04}", field_245.len()).as_bytes());
directory.extend_from_slice(b"00000");
let base_address = 24 + directory.len() + 1; directory.push(FIELD_TERMINATOR);
let record_length = base_address + field_245.len() + 1;
let mut leader = Vec::new();
leader.extend_from_slice(format!("{record_length:05}").as_bytes()); leader.push(b'n'); leader.push(b'a'); leader.push(b'm'); leader.push(b' '); leader.push(b'a'); leader.push(b'2'); leader.push(b'2'); leader.extend_from_slice(format!("{base_address:05}").as_bytes()); leader.push(b' '); leader.push(b' '); leader.push(b' '); leader.extend_from_slice(b"4500");
record_bytes.extend_from_slice(&leader);
record_bytes.extend_from_slice(&directory);
record_bytes.extend_from_slice(&field_245);
record_bytes.push(RECORD_TERMINATOR);
let cursor = Cursor::new(record_bytes);
let mut reader = MarcReader::new(cursor);
let record = reader.read_record().unwrap().unwrap();
assert_eq!(record.leader.record_type, 'a');
let fields = record.get_fields("245");
assert!(fields.is_some());
let field = &fields.unwrap()[0];
assert_eq!(field.indicator1, '1');
assert_eq!(field.indicator2, '0');
let title = field.get_subfield('a');
assert_eq!(title, Some("Test title"));
}
#[test]
fn test_eof_returns_none() {
let data = vec![];
let cursor = Cursor::new(data);
let mut reader = MarcReader::new(cursor);
let result = reader.read_record().unwrap();
assert!(result.is_none());
}
#[test]
fn test_read_multiple_records() {
let mut all_bytes = Vec::new();
for _ in 0..2 {
let mut field_245 = Vec::new();
field_245.extend_from_slice(b"10");
field_245.push(SUBFIELD_DELIMITER);
field_245.push(b'a');
field_245.extend_from_slice(b"Test title");
field_245.push(FIELD_TERMINATOR);
let mut directory = Vec::new();
directory.extend_from_slice(b"245");
directory.extend_from_slice(format!("{:04}", field_245.len()).as_bytes());
directory.extend_from_slice(b"00000");
let base_address = 24 + directory.len() + 1;
directory.push(FIELD_TERMINATOR);
let record_length = base_address + field_245.len() + 1;
let mut leader = Vec::new();
leader.extend_from_slice(format!("{record_length:05}").as_bytes()); leader.push(b'n'); leader.push(b'a'); leader.push(b'm'); leader.push(b' '); leader.push(b'a'); leader.push(b'2'); leader.push(b'2'); leader.extend_from_slice(format!("{base_address:05}").as_bytes()); leader.push(b' '); leader.push(b' '); leader.push(b' '); leader.extend_from_slice(b"4500");
all_bytes.extend_from_slice(&leader);
all_bytes.extend_from_slice(&directory);
all_bytes.extend_from_slice(&field_245);
all_bytes.push(RECORD_TERMINATOR);
}
let cursor = Cursor::new(all_bytes);
let mut reader = MarcReader::new(cursor);
let record1 = reader.read_record().unwrap();
assert!(record1.is_some());
let record2 = reader.read_record().unwrap();
assert!(record2.is_some());
let record3 = reader.read_record().unwrap();
assert!(record3.is_none());
}
#[test]
fn test_format_reader_trait() {
let mut all_bytes = Vec::new();
for _ in 0..2 {
let mut field_245 = Vec::new();
field_245.extend_from_slice(b"10");
field_245.push(SUBFIELD_DELIMITER);
field_245.push(b'a');
field_245.extend_from_slice(b"Test title");
field_245.push(FIELD_TERMINATOR);
let mut directory = Vec::new();
directory.extend_from_slice(b"245");
directory.extend_from_slice(format!("{:04}", field_245.len()).as_bytes());
directory.extend_from_slice(b"00000");
let base_address = 24 + directory.len() + 1;
directory.push(FIELD_TERMINATOR);
let record_length = base_address + field_245.len() + 1;
let mut leader = Vec::new();
leader.extend_from_slice(format!("{record_length:05}").as_bytes());
leader.push(b'n');
leader.push(b'a');
leader.push(b'm');
leader.push(b' ');
leader.push(b'a');
leader.push(b'2');
leader.push(b'2');
leader.extend_from_slice(format!("{base_address:05}").as_bytes());
leader.push(b' ');
leader.push(b' ');
leader.push(b' ');
leader.extend_from_slice(b"4500");
all_bytes.extend_from_slice(&leader);
all_bytes.extend_from_slice(&directory);
all_bytes.extend_from_slice(&field_245);
all_bytes.push(RECORD_TERMINATOR);
}
let cursor = Cursor::new(all_bytes);
let mut reader = MarcReader::new(cursor);
assert_eq!(reader.records_read(), Some(0));
let records = FormatReader::read_all(&mut reader).unwrap();
assert_eq!(records.len(), 2);
assert_eq!(reader.records_read(), Some(2));
}
#[test]
fn test_format_reader_iterator() {
use crate::formats::FormatReaderExt;
let mut all_bytes = Vec::new();
for _ in 0..3 {
let mut field_245 = Vec::new();
field_245.extend_from_slice(b"10");
field_245.push(SUBFIELD_DELIMITER);
field_245.push(b'a');
field_245.extend_from_slice(b"Test title");
field_245.push(FIELD_TERMINATOR);
let mut directory = Vec::new();
directory.extend_from_slice(b"245");
directory.extend_from_slice(format!("{:04}", field_245.len()).as_bytes());
directory.extend_from_slice(b"00000");
let base_address = 24 + directory.len() + 1;
directory.push(FIELD_TERMINATOR);
let record_length = base_address + field_245.len() + 1;
let mut leader = Vec::new();
leader.extend_from_slice(format!("{record_length:05}").as_bytes());
leader.push(b'n');
leader.push(b'a');
leader.push(b'm');
leader.push(b' ');
leader.push(b'a');
leader.push(b'2');
leader.push(b'2');
leader.extend_from_slice(format!("{base_address:05}").as_bytes());
leader.push(b' ');
leader.push(b' ');
leader.push(b' ');
leader.extend_from_slice(b"4500");
all_bytes.extend_from_slice(&leader);
all_bytes.extend_from_slice(&directory);
all_bytes.extend_from_slice(&field_245);
all_bytes.push(RECORD_TERMINATOR);
}
let cursor = Cursor::new(all_bytes);
let mut reader = MarcReader::new(cursor);
let mut count = 0;
for result in reader.records() {
result.unwrap();
count += 1;
}
assert_eq!(count, 3);
assert_eq!(reader.records_read(), Some(3));
}
#[test]
fn test_malformed_leader_record_length_too_small() {
let leader = b"00010nam a2200025 i 4500";
let cursor = Cursor::new(leader.to_vec());
let mut reader = MarcReader::new(cursor);
let result = reader.read_record();
assert!(result.is_err(), "expected Err for record_length < 24");
let err = result.unwrap_err().to_string();
assert!(
err.contains("Record length must be at least 24"),
"got: {err}"
);
}
fn build_bad_record() -> Vec<u8> {
let mut directory = Vec::new();
directory.extend_from_slice(b"245ABCD00000");
directory.push(FIELD_TERMINATOR);
let base_address = 24 + directory.len();
let record_length = base_address + 1;
let mut leader = Vec::new();
leader.extend_from_slice(format!("{record_length:05}").as_bytes());
leader.extend_from_slice(b"nam a22");
leader.extend_from_slice(format!("{base_address:05}").as_bytes());
leader.extend_from_slice(b" i 4500");
assert_eq!(leader.len(), 24);
let mut out = Vec::new();
out.extend_from_slice(&leader);
out.extend_from_slice(&directory);
out.push(RECORD_TERMINATOR);
out
}
#[test]
fn test_max_errors_cap_trips_on_stream_of_malformed_records() {
let mut stream = Vec::new();
for _ in 0..5 {
stream.extend_from_slice(&build_bad_record());
}
let mut reader = MarcReader::new(Cursor::new(stream))
.with_recovery_mode(RecoveryMode::Lenient)
.with_max_errors(3);
for _ in 0..3 {
let rec = reader.read_record().unwrap();
assert!(rec.is_some());
}
let err = reader.read_record().expect_err("cap should trip");
match err {
crate::error::MarcError::FatalReaderError {
cap,
errors_seen,
record_index,
..
} => {
assert_eq!(cap, 3);
assert_eq!(errors_seen, 4);
assert_eq!(record_index, Some(4));
},
other => panic!("expected FatalReaderError, got {other:?}"),
}
assert!(reader.read_record().unwrap().is_none());
assert!(reader.read_record().unwrap().is_none());
}
#[test]
fn test_max_errors_zero_disables_cap() {
let mut stream = Vec::new();
for _ in 0..50 {
stream.extend_from_slice(&build_bad_record());
}
let mut reader = MarcReader::new(Cursor::new(stream))
.with_recovery_mode(RecoveryMode::Lenient)
.with_max_errors(0);
let mut count = 0;
while reader.read_record().unwrap().is_some() {
count += 1;
}
assert_eq!(count, 50);
}
#[test]
fn test_max_errors_inert_in_strict_mode() {
let stream = build_bad_record();
let mut reader = MarcReader::new(Cursor::new(stream))
.with_recovery_mode(RecoveryMode::Strict)
.with_max_errors(1);
let err = reader.read_record().expect_err("strict mode should error");
assert!(
!matches!(err, crate::error::MarcError::FatalReaderError { .. }),
"strict mode should never produce FatalReaderError, got {err:?}"
);
}
#[test]
fn test_malformed_leader_base_address_too_small() {
let leader = b"00050nam a2200010 i 4500";
let cursor = Cursor::new(leader.to_vec());
let mut reader = MarcReader::new(cursor);
let result = reader.read_record();
assert!(result.is_err(), "expected Err for base_address < 24");
let err = result.unwrap_err().to_string();
assert!(
err.contains("Base address of data must be at least 24"),
"got: {err}"
);
}
}