1use std::io::Write;
2
3use crate::encoding::Encoding;
4use crate::error::MarcError;
5use crate::raw::{RawField, RawRecord};
6use crate::{MarcFormat, Record};
7
8pub struct XmlWriter<W: Write> {
11 writer: W,
12}
13
14impl<W: Write> XmlWriter<W> {
15 pub fn new(writer: W) -> Self {
16 Self { writer }
17 }
18
19 pub fn start_collection(&mut self) -> Result<(), MarcError> {
20 writeln!(self.writer, "<?xml version=\"1.0\" encoding=\"UTF-8\"?>")?;
21 writeln!(self.writer, "<collection xmlns=\"http://www.loc.gov/MARC21/slim\">")?;
22 Ok(())
23 }
24
25 pub fn end_collection(&mut self) -> Result<(), MarcError> {
26 writeln!(self.writer, "</collection>")?;
27 Ok(())
28 }
29
30 pub fn write_raw(&mut self, raw: &RawRecord, encoding_override: Option<Encoding>) -> Result<(), MarcError> {
33 let format = MarcFormat::detect(raw, encoding_override)?;
34
35 writeln!(self.writer, " <record>")?;
36
37 let leader = raw.leader()?;
39 let leader_str = String::from_utf8_lossy(leader);
40 writeln!(self.writer, " <leader>{}</leader>", xml_escape(&leader_str))?;
41
42 for field in raw.fields()? {
43 match field {
44 RawField::Control { tag, data } => {
45 let tag_str = String::from_utf8_lossy(&tag);
46 let clean = strip_field_terminator(data);
47 let text = format.encoding().decode(clean).unwrap_or_else(|_| "".into());
48 writeln!(self.writer, " <controlfield tag=\"{tag_str}\">{}</controlfield>", xml_escape(&text))?;
49 }
50 RawField::Data { tag, indicators, body } => {
51 let tag_str = String::from_utf8_lossy(&tag);
52 let ind1 = indicators[0] as char;
53 let ind2 = indicators[1] as char;
54 writeln!(self.writer, " <datafield tag=\"{tag_str}\" ind1=\"{}\" ind2=\"{}\">", xml_escape_char(ind1), xml_escape_char(ind2))?;
55
56 let mut pos = 0;
57 while pos < body.len() {
58 if body[pos] == 0x1F {
59 if pos + 1 >= body.len() {
60 break;
61 }
62 let code = body[pos + 1] as char;
63 let start = pos + 2;
64 let mut end = start;
65 while end < body.len() && body[end] != 0x1F && body[end] != 0x1E {
66 end += 1;
67 }
68 let text = format.encoding().decode(&body[start..end]).unwrap_or_else(|_| "".into());
69 writeln!(self.writer, " <subfield code=\"{}\">{}</subfield>", xml_escape_char(code), xml_escape(&text))?;
70 pos = end;
71 } else if body[pos] == 0x1E {
72 break;
73 } else {
74 pos += 1;
75 }
76 }
77
78 writeln!(self.writer, " </datafield>")?;
79 }
80 }
81 }
82
83 writeln!(self.writer, " </record>")?;
84 Ok(())
85 }
86
87 pub fn write_record(&mut self, format: &MarcFormat, record: &Record) -> Result<(), MarcError> {
89 let raw_bytes = format.to_raw(record)?;
90 let raw = RawRecord(&raw_bytes.data());
91 self.write_raw(&raw, None)
92 }
93
94 pub fn flush(&mut self) -> Result<(), MarcError> {
95 self.writer.flush()?;
96 Ok(())
97 }
98}
99
100fn strip_field_terminator(data: &[u8]) -> &[u8] {
101 if data.last() == Some(&0x1E) {
102 &data[..data.len() - 1]
103 } else {
104 data
105 }
106}
107
108fn xml_escape(s: &str) -> String {
109 let mut out = String::with_capacity(s.len());
110 for c in s.chars() {
111 match c {
112 '&' => out.push_str("&"),
113 '<' => out.push_str("<"),
114 '>' => out.push_str(">"),
115 '"' => out.push_str("""),
116 '\'' => out.push_str("'"),
117 _ => out.push(c),
118 }
119 }
120 out
121}
122
123fn xml_escape_char(c: char) -> String {
124 match c {
125 '&' => "&".into(),
126 '<' => "<".into(),
127 '>' => ">".into(),
128 '"' => """.into(),
129 '\'' => "'".into(),
130 _ => c.to_string(),
131 }
132}