Skip to main content

marc_rs/
lib.rs

1#![forbid(unsafe_code)]
2
3//! MARC21 / UNIMARC parsing and writing library.
4//!
5//! High-level model:
6//! - `raw::RawRecord<'a>`: zero-copy view over a binary ISO2709 record.
7//! - `record::Record`: semantic, serde-compatible model organised in UNIMARC-style blocks.
8//! - `format`: dictionaries for MARC21 and UNIMARC that map raw fields to semantic fields,
9//!   including positional extraction for coded fields (e.g. UNIMARC 100$a, MARC21 008).
10
11pub mod encoding;
12pub mod error;
13pub mod format;
14pub mod raw;
15pub mod reader;
16pub mod record;
17pub mod xml;
18
19pub use crate::encoding::Encoding;
20pub use crate::error::MarcError;
21pub use crate::format::MarcFormat;
22pub use crate::raw::{BinaryReader, BinaryWriter, RawRecord, RawRecordView};
23pub use crate::reader::MarcReader;
24pub use crate::record::Record;
25pub use crate::xml::{XmlReader, XmlWriter};
26
27/// Detected file format (binary ISO2709 vs MARC-XML).
28#[derive(Debug, Clone, Copy, PartialEq, Eq)]
29pub enum FileFormat {
30    Binary,
31    Xml,
32}
33
34/// Detect whether `data` is binary ISO2709 or MARC-XML by inspecting
35/// the first non-whitespace byte (after an optional UTF-8 BOM).
36pub fn detect_file_format(data: &[u8]) -> FileFormat {
37    let mut start = 0;
38    if data.len() >= 3 && data[..3] == [0xEF, 0xBB, 0xBF] {
39        start = 3;
40    }
41    while start < data.len() && data[start].is_ascii_whitespace() {
42        start += 1;
43    }
44    if start < data.len() && data[start] == b'<' {
45        FileFormat::Xml
46    } else {
47        FileFormat::Binary
48    }
49}
50
51/// Parse records from any supported format (binary ISO2709 or MARC-XML).
52/// The format is auto-detected from the data.
53pub fn parse_records(data: &[u8]) -> Result<Vec<Record>, MarcError> {
54    MarcReader::from_bytes(data.to_vec())?.into_records()
55}