Skip to main content

marc_rs/xml/
writer.rs

1use std::io::Write;
2
3use crate::encoding::Encoding;
4use crate::error::MarcError;
5use crate::raw::{RawField, RawRecord};
6use crate::{MarcFormat, Record};
7
8/// Writes MARC records as MARC-XML (MARCXML).
9/// Output is always UTF-8, regardless of the source record encoding.
10pub struct XmlWriter<W: Write> {
11    writer: W,
12}
13
14impl<W: Write> XmlWriter<W> {
15    pub fn new(writer: W) -> Self {
16        Self { writer }
17    }
18
19    pub fn start_collection(&mut self) -> Result<(), MarcError> {
20        writeln!(self.writer, "<?xml version=\"1.0\" encoding=\"UTF-8\"?>")?;
21        writeln!(self.writer, "<collection xmlns=\"http://www.loc.gov/MARC21/slim\">")?;
22        Ok(())
23    }
24
25    pub fn end_collection(&mut self) -> Result<(), MarcError> {
26        writeln!(self.writer, "</collection>")?;
27        Ok(())
28    }
29
30    /// Write a raw ISO2709 record as a MARC-XML `<record>` element.
31    /// Uses `encoding_override` when provided, otherwise detects from the record.
32    pub fn write_raw(&mut self, raw: &RawRecord, encoding_override: Option<Encoding>) -> Result<(), MarcError> {
33        let format = MarcFormat::detect(raw, encoding_override)?;
34
35        writeln!(self.writer, "  <record>")?;
36
37        // Leader (24 ASCII bytes)
38        let leader = raw.leader()?;
39        let leader_str = String::from_utf8_lossy(leader);
40        writeln!(self.writer, "    <leader>{}</leader>", xml_escape(&leader_str))?;
41
42        for field in raw.fields()? {
43            match field {
44                RawField::Control { tag, data } => {
45                    let tag_str = String::from_utf8_lossy(&tag);
46                    let clean = strip_field_terminator(data);
47                    let text = format.encoding().decode(clean).unwrap_or_else(|_| "".into());
48                    writeln!(self.writer, "    <controlfield tag=\"{tag_str}\">{}</controlfield>", xml_escape(&text))?;
49                }
50                RawField::Data { tag, indicators, body } => {
51                    let tag_str = String::from_utf8_lossy(&tag);
52                    let ind1 = indicators[0] as char;
53                    let ind2 = indicators[1] as char;
54                    writeln!(self.writer, "    <datafield tag=\"{tag_str}\" ind1=\"{}\" ind2=\"{}\">", xml_escape_char(ind1), xml_escape_char(ind2))?;
55
56                    let mut pos = 0;
57                    while pos < body.len() {
58                        if body[pos] == 0x1F {
59                            if pos + 1 >= body.len() {
60                                break;
61                            }
62                            let code = body[pos + 1] as char;
63                            let start = pos + 2;
64                            let mut end = start;
65                            while end < body.len() && body[end] != 0x1F && body[end] != 0x1E {
66                                end += 1;
67                            }
68                            let text = format.encoding().decode(&body[start..end]).unwrap_or_else(|_| "".into());
69                            writeln!(self.writer, "      <subfield code=\"{}\">{}</subfield>", xml_escape_char(code), xml_escape(&text))?;
70                            pos = end;
71                        } else if body[pos] == 0x1E {
72                            break;
73                        } else {
74                            pos += 1;
75                        }
76                    }
77
78                    writeln!(self.writer, "    </datafield>")?;
79                }
80            }
81        }
82
83        writeln!(self.writer, "  </record>")?;
84        Ok(())
85    }
86
87    /// Convert a semantic `Record` to ISO2709 via the given format, then write as XML.
88    pub fn write_record(&mut self, format: &MarcFormat, record: &Record) -> Result<(), MarcError> {
89        let raw_bytes = format.to_raw(record)?;
90        let raw = RawRecord(&raw_bytes.data());
91        self.write_raw(&raw, None)
92    }
93
94    pub fn flush(&mut self) -> Result<(), MarcError> {
95        self.writer.flush()?;
96        Ok(())
97    }
98}
99
100fn strip_field_terminator(data: &[u8]) -> &[u8] {
101    if data.last() == Some(&0x1E) {
102        &data[..data.len() - 1]
103    } else {
104        data
105    }
106}
107
108fn xml_escape(s: &str) -> String {
109    let mut out = String::with_capacity(s.len());
110    for c in s.chars() {
111        match c {
112            '&' => out.push_str("&amp;"),
113            '<' => out.push_str("&lt;"),
114            '>' => out.push_str("&gt;"),
115            '"' => out.push_str("&quot;"),
116            '\'' => out.push_str("&apos;"),
117            _ => out.push(c),
118        }
119    }
120    out
121}
122
123fn xml_escape_char(c: char) -> String {
124    match c {
125        '&' => "&amp;".into(),
126        '<' => "&lt;".into(),
127        '>' => "&gt;".into(),
128        '"' => "&quot;".into(),
129        '\'' => "&apos;".into(),
130        _ => c.to_string(),
131    }
132}