docx_rs/reader/
xml_parser.rs1use std::collections::VecDeque;
2use std::io::{BufReader, Read};
3
4use quick_xml::encoding::Decoder;
5use quick_xml::events::{BytesEnd, BytesStart, Event};
6use quick_xml::Reader;
7
8#[derive(Clone, Debug, PartialEq, Eq)]
9pub struct OwnedName {
10 pub local_name: String,
11 pub namespace: Option<String>,
12 pub prefix: Option<String>,
13}
14
15#[derive(Clone, Debug, PartialEq, Eq)]
16pub struct OwnedAttribute {
17 pub name: OwnedName,
18 pub value: String,
19}
20
21#[derive(Clone, Debug, Default, PartialEq, Eq)]
22pub struct Namespace {
23 mappings: Vec<(String, String)>,
24}
25
26impl Namespace {
27 pub fn empty() -> Self {
28 Self {
29 mappings: Vec::new(),
30 }
31 }
32}
33
34impl IntoIterator for Namespace {
35 type Item = (String, String);
36 type IntoIter = std::vec::IntoIter<(String, String)>;
37
38 fn into_iter(self) -> Self::IntoIter {
39 self.mappings.into_iter()
40 }
41}
42
43#[derive(Clone, Debug, PartialEq)]
44pub enum XmlEvent {
45 StartElement {
46 name: OwnedName,
47 attributes: Vec<OwnedAttribute>,
48 namespace: Namespace,
49 },
50 EndElement {
51 name: OwnedName,
52 },
53 Characters(String),
54 Whitespace(String),
55 EndDocument,
56}
57
58pub struct EventReader<R: Read> {
59 reader: Reader<BufReader<R>>,
60 buf: Vec<u8>,
61 pending: VecDeque<XmlEvent>,
62 finished: bool,
63}
64
65impl<R: Read> EventReader<R> {
66 pub fn new(reader: R) -> Self {
67 let mut reader = Reader::from_reader(BufReader::new(reader));
68 {
69 let config = reader.config_mut();
70 config.trim_text(false);
71 config.check_end_names = true;
72 config.expand_empty_elements = false;
73 }
74 Self {
75 reader,
76 buf: Vec::new(),
77 pending: VecDeque::new(),
78 finished: false,
79 }
80 }
81
82 pub fn next(&mut self) -> Result<XmlEvent, quick_xml::Error> {
83 self.read_next()
84 }
85
86 fn read_next(&mut self) -> Result<XmlEvent, quick_xml::Error> {
87 if let Some(event) = self.pending.pop_front() {
88 return Ok(event);
89 }
90
91 loop {
92 self.buf.clear();
93 match self.reader.read_event_into(&mut self.buf)? {
94 Event::Start(element) => {
95 let decoder = self.reader.decoder();
96 let event = Self::build_start_event(element, decoder)?;
97 return Ok(event);
98 }
99 Event::Empty(element) => {
100 let decoder = self.reader.decoder();
101 let event = Self::build_start_event(element, decoder)?;
102 if let XmlEvent::StartElement { name, .. } = &event {
103 self.pending
104 .push_back(XmlEvent::EndElement { name: name.clone() });
105 }
106 return Ok(event);
107 }
108 Event::End(element) => {
109 let name = build_name_from_end(&element)?;
110 return Ok(XmlEvent::EndElement { name });
111 }
112 Event::Text(text) => {
113 let text = text.unescape()?.into_owned();
114 if text.chars().all(char::is_whitespace) {
115 return Ok(XmlEvent::Whitespace(text));
116 } else {
117 return Ok(XmlEvent::Characters(text));
118 }
119 }
120 Event::CData(text) => {
121 let decoded = self.reader.decoder().decode(text.as_ref())?.into_owned();
122 return Ok(XmlEvent::Characters(decoded));
123 }
124 Event::Eof => {
125 self.finished = true;
126 return Ok(XmlEvent::EndDocument);
127 }
128 Event::Decl(_) | Event::PI(_) | Event::Comment(_) | Event::DocType(_) => {
129 }
131 }
132 }
133 }
134
135 fn build_start_event(
136 element: BytesStart<'_>,
137 decoder: Decoder,
138 ) -> Result<XmlEvent, quick_xml::Error> {
139 let name = build_name_from_start(&element)?;
140 let attributes = build_attributes(&element, decoder)?;
141 Ok(XmlEvent::StartElement {
142 name,
143 attributes,
144 namespace: Namespace::empty(),
145 })
146 }
147}
148
149impl<R: Read> Iterator for EventReader<R> {
150 type Item = Result<XmlEvent, quick_xml::Error>;
151
152 fn next(&mut self) -> Option<Self::Item> {
153 if self.finished {
154 return None;
155 }
156 match self.read_next() {
157 Ok(XmlEvent::EndDocument) => {
158 self.finished = true;
159 Some(Ok(XmlEvent::EndDocument))
160 }
161 Ok(event) => Some(Ok(event)),
162 Err(e) => {
163 self.finished = true;
164 Some(Err(e))
165 }
166 }
167 }
168}
169
170fn build_name_from_start(element: &BytesStart<'_>) -> Result<OwnedName, quick_xml::Error> {
171 let name = element.name();
172 Ok(split_qname(name.as_ref()))
173}
174
175fn build_name_from_end(element: &BytesEnd<'_>) -> Result<OwnedName, quick_xml::Error> {
176 let name = element.name();
177 Ok(split_qname(name.as_ref()))
178}
179
180fn split_qname(raw: &[u8]) -> OwnedName {
181 let text = String::from_utf8_lossy(raw).into_owned();
182 if let Some(idx) = text.find(':') {
183 let prefix = text[..idx].to_string();
184 let local = text[idx + 1..].to_string();
185 OwnedName {
186 local_name: local,
187 namespace: None,
188 prefix: Some(prefix),
189 }
190 } else {
191 OwnedName {
192 local_name: text,
193 namespace: None,
194 prefix: None,
195 }
196 }
197}
198
199fn build_attributes(
200 element: &BytesStart<'_>,
201 decoder: Decoder,
202) -> Result<Vec<OwnedAttribute>, quick_xml::Error> {
203 let mut attributes = Vec::new();
204 for attr_result in element.attributes().with_checks(false) {
205 let attr = attr_result.map_err(quick_xml::Error::from)?;
206 let value = attr.decode_and_unescape_value(decoder)?.into_owned();
207 let name = split_qname(attr.key.as_ref());
208 attributes.push(OwnedAttribute { name, value });
209 }
210 Ok(attributes)
211}