1use std::io::BufRead;
2
3use quick_xml::events::{BytesStart, Event};
4use quick_xml::Reader;
5
6use crate::error::MarcError;
7
8pub struct XmlReader;
11
12struct XmlField {
13 tag: [u8; 3],
14 data: XmlFieldData,
15}
16
17enum XmlFieldData {
18 Control(String),
19 Data { ind1: u8, ind2: u8, subfields: Vec<(u8, String)> },
20}
21
22impl XmlReader {
23 pub fn parse(data: &[u8]) -> Result<Vec<u8>, MarcError> {
25 let mut reader = Reader::from_reader(data);
26 reader.config_mut().trim_text(true);
27 let mut buf = Vec::new();
28 let mut output = Vec::new();
29
30 loop {
31 match reader.read_event_into(&mut buf) {
32 Ok(Event::Start(ref e)) if local_name_eq(e, b"record") => {
33 let iso_bytes = Self::parse_record(&mut reader)?;
34 output.extend_from_slice(&iso_bytes);
35 }
36 Ok(Event::Eof) => break,
37 Err(e) => return Err(MarcError::Xml(e.to_string())),
38 _ => {}
39 }
40 buf.clear();
41 }
42
43 Ok(output)
44 }
45
46 fn parse_record<R: BufRead>(reader: &mut Reader<R>) -> Result<Vec<u8>, MarcError> {
47 let mut buf = Vec::new();
48 let mut leader = String::new();
49 let mut fields: Vec<XmlField> = Vec::new();
50
51 loop {
52 match reader.read_event_into(&mut buf) {
53 Ok(Event::Start(ref e)) => {
54 let local = e.local_name();
55 match local.as_ref() {
56 b"leader" => {
57 leader = read_element_text(reader)?;
58 }
59 b"controlfield" => {
60 let tag = parse_tag(e)?;
61 let text = read_element_text(reader)?;
62 fields.push(XmlField {
63 tag,
64 data: XmlFieldData::Control(text),
65 });
66 }
67 b"datafield" => {
68 let tag = parse_tag(e)?;
69 let ind1 = parse_indicator(e, b"ind1");
70 let ind2 = parse_indicator(e, b"ind2");
71 let subfields = Self::parse_subfields(reader)?;
72 fields.push(XmlField {
73 tag,
74 data: XmlFieldData::Data { ind1, ind2, subfields },
75 });
76 }
77 _ => {}
78 }
79 }
80 Ok(Event::End(ref e)) if e.local_name().as_ref() == b"record" => break,
81 Ok(Event::Eof) => return Err(MarcError::Xml("unexpected EOF inside <record>".into())),
82 Err(e) => return Err(MarcError::Xml(e.to_string())),
83 _ => {}
84 }
85 buf.clear();
86 }
87
88 build_iso2709_from_xml(&leader, &fields)
89 }
90
91 fn parse_subfields<R: BufRead>(reader: &mut Reader<R>) -> Result<Vec<(u8, String)>, MarcError> {
92 let mut buf = Vec::new();
93 let mut subfields = Vec::new();
94
95 loop {
96 match reader.read_event_into(&mut buf) {
97 Ok(Event::Start(ref e)) if e.local_name().as_ref() == b"subfield" => {
98 let code = parse_subfield_code(e);
99 let text = read_element_text(reader)?;
100 subfields.push((code, text));
101 }
102 Ok(Event::Empty(ref e)) if e.local_name().as_ref() == b"subfield" => {
103 let code = parse_subfield_code(e);
104 subfields.push((code, String::new()));
105 }
106 Ok(Event::End(ref e)) if e.local_name().as_ref() == b"datafield" => break,
107 Ok(Event::Eof) => return Err(MarcError::Xml("unexpected EOF inside <datafield>".into())),
108 Err(e) => return Err(MarcError::Xml(e.to_string())),
109 _ => {}
110 }
111 buf.clear();
112 }
113
114 Ok(subfields)
115 }
116}
117
118fn local_name_eq(e: &BytesStart, name: &[u8]) -> bool {
123 e.local_name().as_ref() == name
124}
125
126fn read_element_text<R: BufRead>(reader: &mut Reader<R>) -> Result<String, MarcError> {
127 let mut buf = Vec::new();
128 let mut text = String::new();
129 loop {
130 match reader.read_event_into(&mut buf) {
131 Ok(Event::Text(e)) => {
132 text.push_str(&e.unescape().map_err(|e| MarcError::Xml(e.to_string()))?);
133 }
134 Ok(Event::CData(e)) => {
135 text.push_str(&String::from_utf8_lossy(&e));
136 }
137 Ok(Event::End(_)) => break,
138 Ok(Event::Eof) => return Err(MarcError::Xml("unexpected EOF in element text".into())),
139 Err(e) => return Err(MarcError::Xml(e.to_string())),
140 _ => {}
141 }
142 buf.clear();
143 }
144 Ok(text)
145}
146
147fn attr_value(e: &BytesStart, name: &[u8]) -> Option<String> {
148 e.attributes().flatten().find(|a| a.key.as_ref() == name).map(|a| String::from_utf8_lossy(&a.value).into_owned())
149}
150
151fn parse_tag(e: &BytesStart) -> Result<[u8; 3], MarcError> {
152 let val = attr_value(e, b"tag").ok_or_else(|| MarcError::Xml("missing tag attribute".into()))?;
153 let bytes = val.as_bytes();
154 if bytes.len() != 3 {
155 return Err(MarcError::Xml(format!("tag must be exactly 3 characters, got '{val}'")));
156 }
157 let mut tag = [0u8; 3];
158 tag.copy_from_slice(bytes);
159 Ok(tag)
160}
161
162fn parse_indicator(e: &BytesStart, name: &[u8]) -> u8 {
163 attr_value(e, name).and_then(|v| v.bytes().next()).unwrap_or(b' ')
164}
165
166fn parse_subfield_code(e: &BytesStart) -> u8 {
167 attr_value(e, b"code").and_then(|v| v.bytes().next()).unwrap_or(b'a')
168}
169
170fn build_iso2709_from_xml(leader_str: &str, fields: &[XmlField]) -> Result<Vec<u8>, MarcError> {
173 let mut directory: Vec<u8> = Vec::new();
174 let mut field_data: Vec<u8> = Vec::new();
175 let mut offset: usize = 0;
176
177 for field in fields {
178 let mut fb: Vec<u8> = Vec::new();
179 match &field.data {
180 XmlFieldData::Control(text) => {
181 fb.extend_from_slice(text.as_bytes());
182 fb.push(0x1E);
183 }
184 XmlFieldData::Data { ind1, ind2, subfields } => {
185 fb.push(*ind1);
186 fb.push(*ind2);
187 for (code, value) in subfields {
188 fb.push(0x1F);
189 fb.push(*code);
190 fb.extend_from_slice(value.as_bytes());
191 }
192 fb.push(0x1E);
193 }
194 }
195
196 let length = fb.len();
197 directory.extend_from_slice(&field.tag);
198 directory.extend_from_slice(format!("{length:0>4}").as_bytes());
199 directory.extend_from_slice(format!("{offset:0>5}").as_bytes());
200 field_data.extend_from_slice(&fb);
201 offset += length;
202 }
203
204 directory.push(0x1E);
205 let base_address = 24 + directory.len();
206
207 let mut leader_bytes = [b' '; 24];
208 let src = leader_str.as_bytes();
209 let n = src.len().min(24);
210 leader_bytes[..n].copy_from_slice(&src[..n]);
211
212 leader_bytes[9] = b'a';
213 leader_bytes[10] = b'2';
214 leader_bytes[11] = b'2';
215 leader_bytes[20..24].copy_from_slice(b"4500");
216
217 let record_length = base_address + field_data.len() + 1;
218 if record_length > 99999 {
219 return Err(MarcError::InvalidRecord("record too long for ISO2709 leader"));
220 }
221 leader_bytes[0..5].copy_from_slice(format!("{record_length:0>5}").as_bytes());
222 leader_bytes[12..17].copy_from_slice(format!("{base_address:0>5}").as_bytes());
223
224 let mut out = Vec::with_capacity(record_length);
225 out.extend_from_slice(&leader_bytes);
226 out.extend_from_slice(&directory);
227 out.extend_from_slice(&field_data);
228 out.push(0x1D);
229 Ok(out)
230}