1use quick_xml::events::{BytesCData, BytesEnd, BytesStart, BytesText, Event};
8use quick_xml::{Reader, Writer};
9use std::io::{BufRead, Write};
10
11use crate::{Error, FromXml, ParseError, Result};
12
13#[derive(Clone, Debug, PartialEq)]
18pub struct PositionedNode {
19 pub position: usize,
21 pub node: RawXmlNode,
23}
24
25impl PositionedNode {
26 pub fn new(position: usize, node: RawXmlNode) -> Self {
28 Self { position, node }
29 }
30}
31
32#[derive(Clone, Debug, PartialEq)]
37pub struct PositionedAttr {
38 pub position: usize,
40 pub name: String,
42 pub value: String,
44}
45
46impl PositionedAttr {
47 pub fn new(position: usize, name: impl Into<String>, value: impl Into<String>) -> Self {
49 Self {
50 position,
51 name: name.into(),
52 value: value.into(),
53 }
54 }
55}
56
57#[derive(Clone, Debug, PartialEq)]
59pub enum RawXmlNode {
60 Element(RawXmlElement),
62 Text(String),
64 CData(String),
66 Comment(String),
68}
69
70#[derive(Clone, Debug, PartialEq)]
72pub struct RawXmlElement {
73 pub name: String,
75 pub attributes: Vec<(String, String)>,
77 pub children: Vec<RawXmlNode>,
79 pub self_closing: bool,
81}
82
83impl RawXmlElement {
84 pub fn new(name: impl Into<String>) -> Self {
86 Self {
87 name: name.into(),
88 attributes: Vec::new(),
89 children: Vec::new(),
90 self_closing: false,
91 }
92 }
93
94 pub fn from_reader<R: BufRead>(reader: &mut Reader<R>, start: &BytesStart) -> Result<Self> {
98 let name = String::from_utf8_lossy(start.name().as_ref()).to_string();
99
100 let attributes = start
101 .attributes()
102 .filter_map(|a| a.ok())
103 .map(|a| {
104 (
105 String::from_utf8_lossy(a.key.as_ref()).to_string(),
106 String::from_utf8_lossy(&a.value).to_string(),
107 )
108 })
109 .collect();
110
111 let mut element = RawXmlElement {
112 name: name.clone(),
113 attributes,
114 children: Vec::new(),
115 self_closing: false,
116 };
117
118 let mut buf = Vec::new();
119 let target_name = start.name().as_ref().to_vec();
120
121 loop {
122 match reader.read_event_into(&mut buf) {
123 Ok(Event::Start(e)) => {
124 let child = RawXmlElement::from_reader(reader, &e)?;
125 element.children.push(RawXmlNode::Element(child));
126 }
127 Ok(Event::Empty(e)) => {
128 let child = RawXmlElement::from_empty(&e);
129 element.children.push(RawXmlNode::Element(child));
130 }
131 Ok(Event::Text(e)) => {
132 let text = e.decode().unwrap_or_default();
133 if !text.is_empty() {
134 element.children.push(RawXmlNode::Text(text.to_string()));
135 }
136 }
137 Ok(Event::CData(e)) => {
138 let text = String::from_utf8_lossy(&e).to_string();
139 element.children.push(RawXmlNode::CData(text));
140 }
141 Ok(Event::Comment(e)) => {
142 let text = String::from_utf8_lossy(&e).to_string();
143 element.children.push(RawXmlNode::Comment(text));
144 }
145 Ok(Event::End(e)) => {
146 if e.name().as_ref() == target_name {
147 break;
148 }
149 }
150 Ok(Event::Eof) => {
151 return Err(Error::Invalid(format!(
152 "Unexpected EOF while parsing element '{}'",
153 name
154 )));
155 }
156 Err(e) => return Err(Error::Xml(e)),
157 _ => {}
158 }
159 buf.clear();
160 }
161
162 Ok(element)
163 }
164
165 pub fn from_empty(start: &BytesStart) -> Self {
167 let name = String::from_utf8_lossy(start.name().as_ref()).to_string();
168
169 let attributes = start
170 .attributes()
171 .filter_map(|a| a.ok())
172 .map(|a| {
173 (
174 String::from_utf8_lossy(a.key.as_ref()).to_string(),
175 String::from_utf8_lossy(&a.value).to_string(),
176 )
177 })
178 .collect();
179
180 RawXmlElement {
181 name,
182 attributes,
183 children: Vec::new(),
184 self_closing: true,
185 }
186 }
187
188 pub fn parse_as<T: FromXml>(&self) -> std::result::Result<T, ParseError> {
201 let streaming_reader = RawXmlStreamReader::new(self);
202 let mut reader = Reader::from_reader(streaming_reader);
203 let mut buf = Vec::new();
204
205 loop {
206 match reader.read_event_into(&mut buf) {
207 Ok(Event::Start(e)) => {
208 return T::from_xml(&mut reader, &e, false);
209 }
210 Ok(Event::Empty(e)) => {
211 return T::from_xml(&mut reader, &e, true);
212 }
213 Ok(Event::Eof) => {
214 return Err(ParseError::UnexpectedElement(
215 "empty XML in parse_as".to_string(),
216 ));
217 }
218 Err(e) => return Err(ParseError::Xml(e)),
219 _ => {}
220 }
221 buf.clear();
222 }
223 }
224
225 pub fn write_to<W: Write>(&self, writer: &mut Writer<W>) -> Result<()> {
227 let mut start = BytesStart::new(&self.name);
228 for (key, value) in &self.attributes {
229 start.push_attribute((key.as_str(), value.as_str()));
230 }
231
232 if self.self_closing && self.children.is_empty() {
233 writer.write_event(Event::Empty(start))?;
234 } else {
235 writer.write_event(Event::Start(start))?;
236
237 for child in &self.children {
238 child.write_to(writer)?;
239 }
240
241 writer.write_event(Event::End(BytesEnd::new(&self.name)))?;
242 }
243
244 Ok(())
245 }
246}
247
248impl RawXmlNode {
249 pub fn write_to<W: Write>(&self, writer: &mut Writer<W>) -> Result<()> {
251 match self {
252 RawXmlNode::Element(elem) => elem.write_to(writer),
253 RawXmlNode::Text(text) => {
254 writer.write_event(Event::Text(BytesText::new(text)))?;
255 Ok(())
256 }
257 RawXmlNode::CData(text) => {
258 writer.write_event(Event::CData(BytesCData::new(text)))?;
259 Ok(())
260 }
261 RawXmlNode::Comment(text) => {
262 writer.write_event(Event::Comment(BytesText::new(text)))?;
263 Ok(())
264 }
265 }
266 }
267}
268
269pub struct RawXmlStreamReader<'a> {
275 stack: Vec<(&'a RawXmlElement, usize, ElementState)>,
277 buffer: Vec<u8>,
279 pos: usize,
281 done: bool,
283}
284
285#[derive(Clone, Copy, PartialEq)]
286enum ElementState {
287 Start,
289 Children,
291 End,
293}
294
295impl<'a> RawXmlStreamReader<'a> {
296 pub fn new(elem: &'a RawXmlElement) -> Self {
298 Self {
299 stack: vec![(elem, 0, ElementState::Start)],
300 buffer: Vec::with_capacity(256),
301 pos: 0,
302 done: false,
303 }
304 }
305
306 fn generate_next(&mut self) {
308 self.buffer.clear();
309 self.pos = 0;
310
311 while self.buffer.is_empty() && !self.stack.is_empty() {
312 let (elem, child_idx, state) = self.stack.pop().unwrap();
313
314 match state {
315 ElementState::Start => {
316 self.buffer.push(b'<');
318 self.buffer.extend_from_slice(elem.name.as_bytes());
319
320 for (key, value) in &elem.attributes {
321 self.buffer.push(b' ');
322 self.buffer.extend_from_slice(key.as_bytes());
323 self.buffer.extend_from_slice(b"=\"");
324 for &b in value.as_bytes() {
326 match b {
327 b'"' => self.buffer.extend_from_slice(b"""),
328 b'&' => self.buffer.extend_from_slice(b"&"),
329 b'<' => self.buffer.extend_from_slice(b"<"),
330 _ => self.buffer.push(b),
331 }
332 }
333 self.buffer.push(b'"');
334 }
335
336 if elem.self_closing && elem.children.is_empty() {
337 self.buffer.extend_from_slice(b"/>");
338 } else {
340 self.buffer.push(b'>');
341 self.stack.push((elem, 0, ElementState::Children));
343 }
344 }
345 ElementState::Children => {
346 if child_idx < elem.children.len() {
347 self.stack
349 .push((elem, child_idx + 1, ElementState::Children));
350
351 match &elem.children[child_idx] {
353 RawXmlNode::Element(child) => {
354 self.stack.push((child, 0, ElementState::Start));
355 }
356 RawXmlNode::Text(text) => {
357 for &b in text.as_bytes() {
359 match b {
360 b'&' => self.buffer.extend_from_slice(b"&"),
361 b'<' => self.buffer.extend_from_slice(b"<"),
362 b'>' => self.buffer.extend_from_slice(b">"),
363 _ => self.buffer.push(b),
364 }
365 }
366 }
367 RawXmlNode::CData(text) => {
368 self.buffer.extend_from_slice(b"<![CDATA[");
369 self.buffer.extend_from_slice(text.as_bytes());
370 self.buffer.extend_from_slice(b"]]>");
371 }
372 RawXmlNode::Comment(text) => {
373 self.buffer.extend_from_slice(b"<!--");
374 self.buffer.extend_from_slice(text.as_bytes());
375 self.buffer.extend_from_slice(b"-->");
376 }
377 }
378 } else {
379 self.stack.push((elem, 0, ElementState::End));
381 }
382 }
383 ElementState::End => {
384 self.buffer.extend_from_slice(b"</");
385 self.buffer.extend_from_slice(elem.name.as_bytes());
386 self.buffer.push(b'>');
387 }
388 }
389 }
390
391 if self.stack.is_empty() && self.buffer.is_empty() {
392 self.done = true;
393 }
394 }
395}
396
397impl<'a> std::io::Read for RawXmlStreamReader<'a> {
398 fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
399 if self.pos >= self.buffer.len() {
400 if self.done {
401 return Ok(0);
402 }
403 self.generate_next();
404 if self.done && self.buffer.is_empty() {
405 return Ok(0);
406 }
407 }
408
409 let remaining = &self.buffer[self.pos..];
410 let to_copy = remaining.len().min(buf.len());
411 buf[..to_copy].copy_from_slice(&remaining[..to_copy]);
412 self.pos += to_copy;
413 Ok(to_copy)
414 }
415}
416
417impl<'a> BufRead for RawXmlStreamReader<'a> {
418 fn fill_buf(&mut self) -> std::io::Result<&[u8]> {
419 if self.pos >= self.buffer.len() {
420 if self.done {
421 return Ok(&[]);
422 }
423 self.generate_next();
424 }
425 Ok(&self.buffer[self.pos..])
426 }
427
428 fn consume(&mut self, amt: usize) {
429 self.pos += amt;
430 }
431}
432
433#[cfg(test)]
434mod tests {
435 use super::*;
436 use std::io::Cursor;
437
438 #[test]
439 fn test_parse_simple_element() {
440 let xml = r#"<w:test attr="value">content</w:test>"#;
441 let mut reader = Reader::from_str(xml);
442 let mut buf = Vec::new();
443
444 if let Ok(Event::Start(e)) = reader.read_event_into(&mut buf) {
445 let elem = RawXmlElement::from_reader(&mut reader, &e).unwrap();
446 assert_eq!(elem.name, "w:test");
447 assert_eq!(
448 elem.attributes,
449 vec![("attr".to_string(), "value".to_string())]
450 );
451 assert_eq!(elem.children.len(), 1);
452 if let RawXmlNode::Text(t) = &elem.children[0] {
453 assert_eq!(t, "content");
454 } else {
455 panic!("Expected text node");
456 }
457 }
458 }
459
460 #[test]
461 fn test_parse_nested_elements() {
462 let xml = r#"<parent><child1/><child2>text</child2></parent>"#;
463 let mut reader = Reader::from_str(xml);
464 let mut buf = Vec::new();
465
466 if let Ok(Event::Start(e)) = reader.read_event_into(&mut buf) {
467 let elem = RawXmlElement::from_reader(&mut reader, &e).unwrap();
468 assert_eq!(elem.name, "parent");
469 assert_eq!(elem.children.len(), 2);
470 }
471 }
472
473 #[test]
474 fn test_roundtrip() {
475 let xml = r#"<w:test attr="value"><w:child>text</w:child></w:test>"#;
476 let mut reader = Reader::from_str(xml);
477 let mut buf = Vec::new();
478
479 if let Ok(Event::Start(e)) = reader.read_event_into(&mut buf) {
480 let elem = RawXmlElement::from_reader(&mut reader, &e).unwrap();
481
482 let mut output = Vec::new();
483 let mut writer = Writer::new(Cursor::new(&mut output));
484 elem.write_to(&mut writer).unwrap();
485
486 let output_str = String::from_utf8(output).unwrap();
487 assert_eq!(output_str, xml);
488 }
489 }
490
491 #[test]
492 fn test_streaming_reader() {
493 use std::io::Read;
494
495 let xml = r#"<parent attr="val"><child>text</child></parent>"#;
496 let mut reader = Reader::from_str(xml);
497 let mut buf = Vec::new();
498
499 if let Ok(Event::Start(e)) = reader.read_event_into(&mut buf) {
500 let elem = RawXmlElement::from_reader(&mut reader, &e).unwrap();
501
502 let mut stream_reader = RawXmlStreamReader::new(&elem);
504 let mut output = String::new();
505 stream_reader.read_to_string(&mut output).unwrap();
506
507 assert_eq!(output, xml);
508 }
509 }
510
511 #[test]
512 fn test_streaming_reader_escaping() {
513 use std::io::Read;
514
515 let mut elem = RawXmlElement::new("test");
517 elem.attributes
518 .push(("attr".to_string(), "val\"ue".to_string()));
519 elem.children
520 .push(RawXmlNode::Text("a < b & c > d".to_string()));
521
522 let mut stream_reader = RawXmlStreamReader::new(&elem);
523 let mut output = String::new();
524 stream_reader.read_to_string(&mut output).unwrap();
525
526 assert_eq!(
527 output,
528 r#"<test attr="val"ue">a < b & c > d</test>"#
529 );
530 }
531}