opcua_xml/encoding/
reader.rs

1use std::{
2    io::{BufReader, Read},
3    num::{ParseFloatError, ParseIntError},
4    str::FromStr,
5};
6
7use quick_xml::events::Event;
8use thiserror::Error;
9
10#[derive(Debug, Error)]
11/// Error produced when reading XML.
12pub enum XmlReadError {
13    #[error("{0}")]
14    /// Failed to parse XML.
15    Xml(#[from] quick_xml::Error),
16    #[error("Unexpected EOF")]
17    /// Unexpected EOF.
18    UnexpectedEof,
19    #[error("Failed to parse integer: {0}")]
20    /// Failed to parse value as integer.
21    ParseInt(#[from] ParseIntError),
22    #[error("Failed to parse float: {0}")]
23    /// Failed to parse value as float.
24    ParseFloat(#[from] ParseFloatError),
25    #[error("Failed to parse value: {0}")]
26    /// Some other parse error.
27    Parse(String),
28}
29
30/// XML stream reader specialized for working with OPC-UA XML.
31pub struct XmlStreamReader<T> {
32    reader: quick_xml::Reader<BufReader<T>>,
33    buffer: Vec<u8>,
34}
35
36impl<T: Read> XmlStreamReader<T> {
37    /// Create a new stream reader with an internal buffer.
38    pub fn new(reader: T) -> Self {
39        Self {
40            reader: quick_xml::Reader::from_reader(BufReader::new(reader)),
41            buffer: Vec::new(),
42        }
43    }
44
45    /// Get the next event from the stream.
46    pub fn next_event(&mut self) -> Result<quick_xml::events::Event<'_>, XmlReadError> {
47        self.buffer.clear();
48        Ok(self.reader.read_event_into(&mut self.buffer)?)
49    }
50
51    /// Skip the current value. This should be called after encountering a
52    /// `Start` event, and will skip until the corresponding `End` event is consumed.
53    ///
54    /// Note that this does not check that the document is coherent, just that
55    /// an equal number of start and end events are consumed.
56    pub fn skip_value(&mut self) -> Result<(), XmlReadError> {
57        let mut depth = 1u32;
58        loop {
59            match self.next_event()? {
60                Event::Start(_) => depth += 1,
61                Event::End(_) => {
62                    depth -= 1;
63                    if depth == 0 {
64                        return Ok(());
65                    }
66                }
67                Event::Eof => {
68                    if depth == 1 {
69                        return Ok(());
70                    } else {
71                        return Err(XmlReadError::UnexpectedEof);
72                    }
73                }
74                _ => {}
75            }
76        }
77    }
78
79    /// Consume the current event, skipping any child elements and returning the combined text
80    /// content with leading and trailing whitespace removed.
81    /// Note that if there are multiple text elements they will be concatenated, but
82    /// whitespace between these will not be removed.
83    pub fn consume_as_text(&mut self) -> Result<String, XmlReadError> {
84        let mut text: Option<String> = None;
85        let mut depth = 1u32;
86        loop {
87            match self.next_event()? {
88                Event::Start(_) => depth += 1,
89                Event::End(_) => {
90                    depth -= 1;
91                    if depth == 0 {
92                        if let Some(mut text) = text {
93                            let trimmed = text.trim_ascii_end();
94                            text.truncate(trimmed.len());
95                            return Ok(text);
96                        } else {
97                            return Ok(String::new());
98                        }
99                    }
100                }
101                Event::Text(mut e) => {
102                    if depth != 1 {
103                        continue;
104                    }
105                    if let Some(text) = text.as_mut() {
106                        text.push_str(&e.unescape()?);
107                    } else if e.inplace_trim_start() {
108                        continue;
109                    } else {
110                        text = Some(e.unescape()?.into_owned());
111                    }
112                }
113
114                Event::Eof => {
115                    if depth == 1 {
116                        if let Some(mut text) = text {
117                            let trimmed = text.trim_ascii_end();
118                            text.truncate(trimmed.len());
119                            return Ok(text);
120                        } else {
121                            return Ok(String::new());
122                        }
123                    } else {
124                        return Err(XmlReadError::UnexpectedEof);
125                    }
126                }
127                _ => continue,
128            }
129        }
130    }
131
132    /// Consume the current element as a raw array of bytes.
133    pub fn consume_raw(&mut self) -> Result<Vec<u8>, XmlReadError> {
134        let mut out = Vec::new();
135        let mut depth = 1u32;
136        // quick-xml doesn't really have a way to do this, and in fact does not capture the full event,
137        // fortunately the way it does capture each event is quite predictable, so we can reconstruct
138        // the input.
139        // We do need the parser, since we only want to read the current element.
140        loop {
141            let evt = self.next_event()?;
142            match evt {
143                Event::Start(s) => {
144                    depth += 1;
145                    out.push(b'<');
146                    out.extend_from_slice(&s);
147                    out.push(b'>');
148                }
149                Event::End(s) => {
150                    depth -= 1;
151                    if depth == 0 {
152                        return Ok(out);
153                    }
154                    out.extend_from_slice(b"</");
155                    out.extend_from_slice(&s);
156                    out.push(b'>');
157                }
158                Event::CData(s) => {
159                    out.extend_from_slice(b"<![CDATA[");
160                    out.extend_from_slice(&s);
161                    out.extend_from_slice(b"]]>");
162                }
163                Event::Comment(s) => {
164                    out.extend_from_slice(b"<!--");
165                    out.extend_from_slice(&s);
166                    out.extend_from_slice(b"-->");
167                }
168                Event::Decl(s) => {
169                    out.extend_from_slice(b"<?");
170                    out.extend_from_slice(&s);
171                    out.extend_from_slice(b"?>");
172                }
173                Event::DocType(s) => {
174                    out.extend_from_slice(b"<!DOCTYPE");
175                    out.extend_from_slice(&s);
176                    out.push(b'>');
177                }
178                Event::Empty(s) => {
179                    out.push(b'<');
180                    out.extend_from_slice(&s);
181                    out.extend_from_slice(b"/>");
182                }
183                Event::PI(s) => {
184                    out.extend_from_slice(b"<?");
185                    out.extend_from_slice(&s);
186                    out.extend_from_slice(b"?>");
187                }
188                Event::Text(s) => {
189                    out.extend_from_slice(&s);
190                }
191                Event::Eof => {
192                    if depth == 1 {
193                        return Ok(out);
194                    } else {
195                        return Err(XmlReadError::UnexpectedEof);
196                    }
197                }
198            }
199        }
200    }
201
202    /// Consume the current node as a text value and parse it as the given type.
203    pub fn consume_content<R: FromStr>(&mut self) -> Result<R, XmlReadError>
204    where
205        XmlReadError: From<<R as FromStr>::Err>,
206    {
207        let text = self.consume_as_text()?;
208        Ok(text.parse()?)
209    }
210}
211
212#[cfg(test)]
213mod test {
214    use std::io::Cursor;
215
216    use quick_xml::events::Event;
217
218    #[test]
219    fn test_xml_text_comments() {
220        let xml = r#"
221        <Foo>
222            Ho
223            <Bar>
224            Hello
225            </Bar>
226            Hello <!-- Comment --> there
227        </Foo>
228        "#;
229        let mut cursor = Cursor::new(xml.as_bytes());
230        let mut reader = super::XmlStreamReader::new(&mut cursor);
231        // You canend up with text everywhere. Any loading needs to account for this.
232        assert!(matches!(reader.next_event().unwrap(), Event::Text(_)));
233        assert!(matches!(reader.next_event().unwrap(), Event::Start(_)));
234        assert!(matches!(reader.next_event().unwrap(), Event::Text(_)));
235        assert!(matches!(reader.next_event().unwrap(), Event::Start(_)));
236        assert!(matches!(reader.next_event().unwrap(), Event::Text(_)));
237        assert!(matches!(reader.next_event().unwrap(), Event::End(_)));
238        assert!(matches!(reader.next_event().unwrap(), Event::Text(_)));
239        assert!(matches!(reader.next_event().unwrap(), Event::Comment(_)));
240        assert!(matches!(reader.next_event().unwrap(), Event::Text(_)));
241        assert!(matches!(reader.next_event().unwrap(), Event::End(_)));
242        assert!(matches!(reader.next_event().unwrap(), Event::Text(_)));
243        assert!(matches!(reader.next_event().unwrap(), Event::Eof));
244        assert!(matches!(reader.next_event().unwrap(), Event::Eof));
245    }
246
247    #[test]
248    fn test_consume_as_text() {
249        let xml = r#"<Foo>
250            <Bar>
251            Hello
252            </Bar>
253            Hello <!-- Comment -->there
254        </Foo>"#;
255
256        let mut cursor = Cursor::new(xml.as_bytes());
257        let mut reader = super::XmlStreamReader::new(&mut cursor);
258
259        assert!(matches!(reader.next_event().unwrap(), Event::Start(_)));
260        assert_eq!(reader.consume_as_text().unwrap(), "Hello there");
261    }
262
263    #[test]
264    fn test_consume_content() {
265        let xml = r#"<Foo>
266            12345
267        </Foo>"#;
268        let mut cursor = Cursor::new(xml.as_bytes());
269        let mut reader = super::XmlStreamReader::new(&mut cursor);
270
271        assert!(matches!(reader.next_event().unwrap(), Event::Start(_)));
272        assert_eq!(reader.consume_content::<u32>().unwrap(), 12345);
273    }
274
275    #[test]
276    fn test_consume_raw() {
277        let xml = r#"<Foo>
278<Bar>
279    Hello <!-- Comment here -->
280    More text
281</Bar>
282<Bar attr = "foo" />
283<? Mystery PI ?>
284</Foo>"#;
285        let mut cursor = Cursor::new(xml.as_bytes());
286        let mut reader = super::XmlStreamReader::new(&mut cursor);
287        assert!(matches!(reader.next_event().unwrap(), Event::Start(_)));
288        let raw = reader.consume_raw().unwrap();
289        println!("{}", String::from_utf8_lossy(&raw));
290        assert_eq!(&xml.as_bytes()[5..(xml.len() - 6)], &*raw);
291    }
292}