1use quick_xml::events::{BytesCData, BytesEnd, BytesStart, BytesText, Event};
8use quick_xml::{Reader, Writer};
9use std::io::{BufRead, Write};
10
11use crate::{Error, FromXml, ParseError, Result};
12
13#[derive(Clone, Debug, PartialEq)]
18pub struct PositionedNode {
19 pub position: usize,
21 pub node: RawXmlNode,
23}
24
25impl PositionedNode {
26 pub fn new(position: usize, node: RawXmlNode) -> Self {
28 Self { position, node }
29 }
30}
31
32#[derive(Clone, Debug, PartialEq)]
37pub struct PositionedAttr {
38 pub position: usize,
40 pub name: String,
42 pub value: String,
44}
45
46impl PositionedAttr {
47 pub fn new(position: usize, name: impl Into<String>, value: impl Into<String>) -> Self {
49 Self {
50 position,
51 name: name.into(),
52 value: value.into(),
53 }
54 }
55}
56
57#[derive(Clone, Debug, PartialEq)]
59pub enum RawXmlNode {
60 Element(RawXmlElement),
62 Text(String),
64 CData(String),
66 Comment(String),
68}
69
70#[derive(Clone, Debug, PartialEq)]
72pub struct RawXmlElement {
73 pub name: String,
75 pub attributes: Vec<(String, String)>,
77 pub children: Vec<RawXmlNode>,
79 pub self_closing: bool,
81}
82
83impl RawXmlElement {
84 pub fn new(name: impl Into<String>) -> Self {
86 Self {
87 name: name.into(),
88 attributes: Vec::new(),
89 children: Vec::new(),
90 self_closing: false,
91 }
92 }
93
94 pub fn from_reader<R: BufRead>(reader: &mut Reader<R>, start: &BytesStart) -> Result<Self> {
98 let name = String::from_utf8_lossy(start.name().as_ref()).to_string();
99
100 let attributes = start
101 .attributes()
102 .filter_map(|a| a.ok())
103 .map(|a| {
104 (
105 String::from_utf8_lossy(a.key.as_ref()).to_string(),
106 String::from_utf8_lossy(&a.value).to_string(),
107 )
108 })
109 .collect();
110
111 let mut element = RawXmlElement {
112 name: name.clone(),
113 attributes,
114 children: Vec::new(),
115 self_closing: false,
116 };
117
118 let mut buf = Vec::new();
119 let target_name = start.name().as_ref().to_vec();
120
121 loop {
122 match reader.read_event_into(&mut buf) {
123 Ok(Event::Start(e)) => {
124 let child = RawXmlElement::from_reader(reader, &e)?;
125 element.children.push(RawXmlNode::Element(child));
126 }
127 Ok(Event::Empty(e)) => {
128 let child = RawXmlElement::from_empty(&e);
129 element.children.push(RawXmlNode::Element(child));
130 }
131 Ok(Event::Text(e)) => {
132 let text = e.decode().unwrap_or_default();
133 if !text.is_empty() {
134 if let Some(RawXmlNode::Text(last)) = element.children.last_mut() {
136 last.push_str(&text);
137 } else {
138 element.children.push(RawXmlNode::Text(text.to_string()));
139 }
140 }
141 }
142 Ok(Event::GeneralRef(e)) => {
143 let entity_name = e.decode().unwrap_or_default();
144 if let Some(resolved) = quick_xml::escape::resolve_xml_entity(&entity_name) {
145 if let Some(RawXmlNode::Text(last)) = element.children.last_mut() {
147 last.push_str(resolved);
148 } else {
149 element
150 .children
151 .push(RawXmlNode::Text(resolved.to_string()));
152 }
153 }
154 }
155 Ok(Event::CData(e)) => {
156 let text = String::from_utf8_lossy(&e).to_string();
157 element.children.push(RawXmlNode::CData(text));
158 }
159 Ok(Event::Comment(e)) => {
160 let text = String::from_utf8_lossy(&e).to_string();
161 element.children.push(RawXmlNode::Comment(text));
162 }
163 Ok(Event::End(e)) => {
164 if e.name().as_ref() == target_name {
165 break;
166 }
167 }
168 Ok(Event::Eof) => {
169 return Err(Error::Invalid(format!(
170 "Unexpected EOF while parsing element '{}'",
171 name
172 )));
173 }
174 Err(e) => return Err(Error::Xml(e)),
175 _ => {}
176 }
177 buf.clear();
178 }
179
180 Ok(element)
181 }
182
183 pub fn from_empty(start: &BytesStart) -> Self {
185 let name = String::from_utf8_lossy(start.name().as_ref()).to_string();
186
187 let attributes = start
188 .attributes()
189 .filter_map(|a| a.ok())
190 .map(|a| {
191 (
192 String::from_utf8_lossy(a.key.as_ref()).to_string(),
193 String::from_utf8_lossy(&a.value).to_string(),
194 )
195 })
196 .collect();
197
198 RawXmlElement {
199 name,
200 attributes,
201 children: Vec::new(),
202 self_closing: true,
203 }
204 }
205
206 pub fn parse_as<T: FromXml>(&self) -> std::result::Result<T, ParseError> {
219 let streaming_reader = RawXmlStreamReader::new(self);
220 let mut reader = Reader::from_reader(streaming_reader);
221 let mut buf = Vec::new();
222
223 loop {
224 match reader.read_event_into(&mut buf) {
225 Ok(Event::Start(e)) => {
226 return T::from_xml(&mut reader, &e, false);
227 }
228 Ok(Event::Empty(e)) => {
229 return T::from_xml(&mut reader, &e, true);
230 }
231 Ok(Event::Eof) => {
232 return Err(ParseError::UnexpectedElement(
233 "empty XML in parse_as".to_string(),
234 ));
235 }
236 Err(e) => return Err(ParseError::Xml(e)),
237 _ => {}
238 }
239 buf.clear();
240 }
241 }
242
243 pub fn write_to<W: Write>(&self, writer: &mut Writer<W>) -> Result<()> {
245 let mut start = BytesStart::new(&self.name);
246 for (key, value) in &self.attributes {
247 start.push_attribute((key.as_str(), value.as_str()));
248 }
249
250 if self.self_closing && self.children.is_empty() {
251 writer.write_event(Event::Empty(start))?;
252 } else {
253 writer.write_event(Event::Start(start))?;
254
255 for child in &self.children {
256 child.write_to(writer)?;
257 }
258
259 writer.write_event(Event::End(BytesEnd::new(&self.name)))?;
260 }
261
262 Ok(())
263 }
264}
265
266impl RawXmlNode {
267 pub fn write_to<W: Write>(&self, writer: &mut Writer<W>) -> Result<()> {
269 match self {
270 RawXmlNode::Element(elem) => elem.write_to(writer),
271 RawXmlNode::Text(text) => {
272 writer.write_event(Event::Text(BytesText::new(text)))?;
273 Ok(())
274 }
275 RawXmlNode::CData(text) => {
276 writer.write_event(Event::CData(BytesCData::new(text)))?;
277 Ok(())
278 }
279 RawXmlNode::Comment(text) => {
280 writer.write_event(Event::Comment(BytesText::new(text)))?;
281 Ok(())
282 }
283 }
284 }
285}
286
287pub struct RawXmlStreamReader<'a> {
293 stack: Vec<(&'a RawXmlElement, usize, ElementState)>,
295 buffer: Vec<u8>,
297 pos: usize,
299 done: bool,
301}
302
303#[derive(Clone, Copy, PartialEq)]
304enum ElementState {
305 Start,
307 Children,
309 End,
311}
312
313impl<'a> RawXmlStreamReader<'a> {
314 pub fn new(elem: &'a RawXmlElement) -> Self {
316 Self {
317 stack: vec![(elem, 0, ElementState::Start)],
318 buffer: Vec::with_capacity(256),
319 pos: 0,
320 done: false,
321 }
322 }
323
324 fn generate_next(&mut self) {
326 self.buffer.clear();
327 self.pos = 0;
328
329 while self.buffer.is_empty() && !self.stack.is_empty() {
330 let (elem, child_idx, state) = self.stack.pop().unwrap();
331
332 match state {
333 ElementState::Start => {
334 self.buffer.push(b'<');
336 self.buffer.extend_from_slice(elem.name.as_bytes());
337
338 for (key, value) in &elem.attributes {
339 self.buffer.push(b' ');
340 self.buffer.extend_from_slice(key.as_bytes());
341 self.buffer.extend_from_slice(b"=\"");
342 for &b in value.as_bytes() {
344 match b {
345 b'"' => self.buffer.extend_from_slice(b"""),
346 b'&' => self.buffer.extend_from_slice(b"&"),
347 b'<' => self.buffer.extend_from_slice(b"<"),
348 _ => self.buffer.push(b),
349 }
350 }
351 self.buffer.push(b'"');
352 }
353
354 if elem.self_closing && elem.children.is_empty() {
355 self.buffer.extend_from_slice(b"/>");
356 } else {
358 self.buffer.push(b'>');
359 self.stack.push((elem, 0, ElementState::Children));
361 }
362 }
363 ElementState::Children => {
364 if child_idx < elem.children.len() {
365 self.stack
367 .push((elem, child_idx + 1, ElementState::Children));
368
369 match &elem.children[child_idx] {
371 RawXmlNode::Element(child) => {
372 self.stack.push((child, 0, ElementState::Start));
373 }
374 RawXmlNode::Text(text) => {
375 for &b in text.as_bytes() {
377 match b {
378 b'&' => self.buffer.extend_from_slice(b"&"),
379 b'<' => self.buffer.extend_from_slice(b"<"),
380 b'>' => self.buffer.extend_from_slice(b">"),
381 _ => self.buffer.push(b),
382 }
383 }
384 }
385 RawXmlNode::CData(text) => {
386 self.buffer.extend_from_slice(b"<![CDATA[");
387 self.buffer.extend_from_slice(text.as_bytes());
388 self.buffer.extend_from_slice(b"]]>");
389 }
390 RawXmlNode::Comment(text) => {
391 self.buffer.extend_from_slice(b"<!--");
392 self.buffer.extend_from_slice(text.as_bytes());
393 self.buffer.extend_from_slice(b"-->");
394 }
395 }
396 } else {
397 self.stack.push((elem, 0, ElementState::End));
399 }
400 }
401 ElementState::End => {
402 self.buffer.extend_from_slice(b"</");
403 self.buffer.extend_from_slice(elem.name.as_bytes());
404 self.buffer.push(b'>');
405 }
406 }
407 }
408
409 if self.stack.is_empty() && self.buffer.is_empty() {
410 self.done = true;
411 }
412 }
413}
414
415impl<'a> std::io::Read for RawXmlStreamReader<'a> {
416 fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
417 if self.pos >= self.buffer.len() {
418 if self.done {
419 return Ok(0);
420 }
421 self.generate_next();
422 if self.done && self.buffer.is_empty() {
423 return Ok(0);
424 }
425 }
426
427 let remaining = &self.buffer[self.pos..];
428 let to_copy = remaining.len().min(buf.len());
429 buf[..to_copy].copy_from_slice(&remaining[..to_copy]);
430 self.pos += to_copy;
431 Ok(to_copy)
432 }
433}
434
435impl<'a> BufRead for RawXmlStreamReader<'a> {
436 fn fill_buf(&mut self) -> std::io::Result<&[u8]> {
437 if self.pos >= self.buffer.len() {
438 if self.done {
439 return Ok(&[]);
440 }
441 self.generate_next();
442 }
443 Ok(&self.buffer[self.pos..])
444 }
445
446 fn consume(&mut self, amt: usize) {
447 self.pos += amt;
448 }
449}
450
451#[cfg(test)]
452mod tests {
453 use super::*;
454 use std::io::Cursor;
455
456 #[test]
457 fn test_parse_simple_element() {
458 let xml = r#"<w:test attr="value">content</w:test>"#;
459 let mut reader = Reader::from_str(xml);
460 let mut buf = Vec::new();
461
462 if let Ok(Event::Start(e)) = reader.read_event_into(&mut buf) {
463 let elem = RawXmlElement::from_reader(&mut reader, &e).unwrap();
464 assert_eq!(elem.name, "w:test");
465 assert_eq!(
466 elem.attributes,
467 vec![("attr".to_string(), "value".to_string())]
468 );
469 assert_eq!(elem.children.len(), 1);
470 if let RawXmlNode::Text(t) = &elem.children[0] {
471 assert_eq!(t, "content");
472 } else {
473 panic!("Expected text node");
474 }
475 }
476 }
477
478 #[test]
479 fn test_parse_nested_elements() {
480 let xml = r#"<parent><child1/><child2>text</child2></parent>"#;
481 let mut reader = Reader::from_str(xml);
482 let mut buf = Vec::new();
483
484 if let Ok(Event::Start(e)) = reader.read_event_into(&mut buf) {
485 let elem = RawXmlElement::from_reader(&mut reader, &e).unwrap();
486 assert_eq!(elem.name, "parent");
487 assert_eq!(elem.children.len(), 2);
488 }
489 }
490
491 #[test]
492 fn test_roundtrip() {
493 let xml = r#"<w:test attr="value"><w:child>text</w:child></w:test>"#;
494 let mut reader = Reader::from_str(xml);
495 let mut buf = Vec::new();
496
497 if let Ok(Event::Start(e)) = reader.read_event_into(&mut buf) {
498 let elem = RawXmlElement::from_reader(&mut reader, &e).unwrap();
499
500 let mut output = Vec::new();
501 let mut writer = Writer::new(Cursor::new(&mut output));
502 elem.write_to(&mut writer).unwrap();
503
504 let output_str = String::from_utf8(output).unwrap();
505 assert_eq!(output_str, xml);
506 }
507 }
508
509 #[test]
510 fn test_streaming_reader() {
511 use std::io::Read;
512
513 let xml = r#"<parent attr="val"><child>text</child></parent>"#;
514 let mut reader = Reader::from_str(xml);
515 let mut buf = Vec::new();
516
517 if let Ok(Event::Start(e)) = reader.read_event_into(&mut buf) {
518 let elem = RawXmlElement::from_reader(&mut reader, &e).unwrap();
519
520 let mut stream_reader = RawXmlStreamReader::new(&elem);
522 let mut output = String::new();
523 stream_reader.read_to_string(&mut output).unwrap();
524
525 assert_eq!(output, xml);
526 }
527 }
528
529 #[test]
530 fn test_streaming_reader_escaping() {
531 use std::io::Read;
532
533 let mut elem = RawXmlElement::new("test");
535 elem.attributes
536 .push(("attr".to_string(), "val\"ue".to_string()));
537 elem.children
538 .push(RawXmlNode::Text("a < b & c > d".to_string()));
539
540 let mut stream_reader = RawXmlStreamReader::new(&elem);
541 let mut output = String::new();
542 stream_reader.read_to_string(&mut output).unwrap();
543
544 assert_eq!(
545 output,
546 r#"<test attr="val"ue">a < b & c > d</test>"#
547 );
548 }
549
550 #[test]
551 fn test_from_reader_preserves_xml_entities() {
552 let xml = r#"<root><t>A & B < C > D "E" 'F'</t></root>"#;
555 let mut reader = Reader::from_str(xml);
556 let mut buf = Vec::new();
557
558 if let Ok(Event::Start(e)) = reader.read_event_into(&mut buf) {
559 let elem = RawXmlElement::from_reader(&mut reader, &e).unwrap();
560
561 let child = &elem.children[0];
563 if let RawXmlNode::Element(t_elem) = child {
564 if let Some(RawXmlNode::Text(text)) = t_elem.children.first() {
565 assert_eq!(text, "A & B < C > D \"E\" 'F'");
566 } else {
567 panic!("Expected text child in <t> element");
568 }
569 } else {
570 panic!("Expected element child");
571 }
572
573 use std::io::Read;
575 let mut stream_reader = RawXmlStreamReader::new(&elem);
576 let mut output = String::new();
577 stream_reader.read_to_string(&mut output).unwrap();
578 assert!(output.contains("A & B < C > D"));
579 }
580 }
581}