Skip to main content

marc_rs/
reader.rs

1use std::path::Path;
2
3use crate::encoding::Encoding;
4use crate::error::MarcError;
5use crate::format::MarcFormat;
6use crate::raw::{BinaryReader, RawRecordView};
7use crate::record::Record;
8use crate::xml::XmlReader;
9use crate::{detect_file_format, FileFormat};
10
11/// Unified MARC reader with auto-detection of file format (binary ISO2709 or MARC-XML).
12///
13/// Internally, all data is stored as ISO2709 bytes. XML input is eagerly
14/// converted by `XmlReader::parse()`, so iteration always goes through
15/// `BinaryReader`.
16pub struct MarcReader {
17    data: Vec<u8>,
18    source_format: FileFormat,
19    /// When set, used instead of the encoding detected from the record (e.g. leader / 100$a).
20    encoding_override: Option<Encoding>,
21}
22
23impl MarcReader {
24    /// Auto-detect format from raw bytes and build the reader.
25    /// XML data is eagerly converted to ISO2709.
26    pub fn from_bytes(data: Vec<u8>) -> Result<Self, MarcError> {
27        let fmt = detect_file_format(&data);
28        match fmt {
29            FileFormat::Binary => Ok(Self {
30                data,
31                source_format: FileFormat::Binary,
32                encoding_override: None,
33            }),
34            FileFormat::Xml => Ok(Self {
35                data: XmlReader::parse(&data)?,
36                source_format: FileFormat::Xml,
37                encoding_override: None,
38            }),
39        }
40    }
41
42    /// Read a file, auto-detect its format, and build the reader.
43    pub fn from_file(path: &Path) -> Result<Self, MarcError> {
44        let data = std::fs::read(path)?;
45        Self::from_bytes(data)
46    }
47
48    /// Force binary ISO2709 format (no detection).
49    pub fn from_binary(data: Vec<u8>) -> Self {
50        Self {
51            data,
52            source_format: FileFormat::Binary,
53            encoding_override: None,
54        }
55    }
56
57    /// Force MARC-XML format (no detection). Parses XML eagerly into ISO2709.
58    pub fn from_xml(data: &[u8]) -> Result<Self, MarcError> {
59        Ok(Self {
60            data: XmlReader::parse(data)?,
61            source_format: FileFormat::Xml,
62            encoding_override: None,
63        })
64    }
65
66    /// Force the encoding used when decoding raw record data. Use when the record's
67    /// declared encoding (e.g. leader byte 9 for MARC21, 100$a for UNIMARC) is wrong.
68    pub fn with_encoding(mut self, encoding: Encoding) -> Self {
69        self.encoding_override = Some(encoding);
70        self
71    }
72
73    /// Clear any encoding override.
74    pub fn clear_encoding_override(&mut self) {
75        self.encoding_override = None;
76    }
77
78    /// Returns the original file format before conversion.
79    pub fn source_format(&self) -> FileFormat {
80        self.source_format
81    }
82
83    /// Returns the encoding override when set.
84    pub fn encoding_override(&self) -> Option<Encoding> {
85        self.encoding_override
86    }
87
88    /// Iterate over raw ISO2709 records (with optional encoding override applied when decoding).
89    pub fn iter(&self) -> BinaryReader<'_> {
90        BinaryReader::with_encoding(&self.data, self.encoding_override)
91    }
92
93    /// Raw ISO2709 bytes (useful for direct binary output).
94    pub fn as_bytes(&self) -> &[u8] {
95        &self.data
96    }
97
98    /// Parse all records into the high-level semantic model.
99    /// If `with_encoding()` was used, that encoding is applied instead of the one
100    /// detected from each record.
101    pub fn into_records(self) -> Result<Vec<Record>, MarcError> {
102        let mut records = Vec::new();
103        for view in BinaryReader::with_encoding(&self.data, self.encoding_override) {
104            let view = view?;
105            let raw = view.as_raw();
106            let format = MarcFormat::detect(raw, self.encoding_override)?;
107
108            records.push(format.to_record(raw)?);
109        }
110        Ok(records)
111    }
112}
113
114impl<'a> IntoIterator for &'a MarcReader {
115    type Item = Result<RawRecordView<'a>, MarcError>;
116    type IntoIter = BinaryReader<'a>;
117
118    fn into_iter(self) -> Self::IntoIter {
119        self.iter()
120    }
121}