xmltree_parse_with_config/
lib.rs

1//! A simple library for parsing an XML file into an in-memory tree structure
2//!
3//! Not recommended for large XML files, as it will load the entire file into memory.
4//!
5//! # Example
6//!
7//! ```no_run
8//! use xmltree::Element;
9//! use std::fs::File;
10//!
11//! let data: &'static str = r##"
12//! <?xml version="1.0" encoding="utf-8" standalone="yes"?>
13//! <names>
14//!     <name first="bob" last="jones" />
15//!     <name first="elizabeth" last="smith" />
16//! </names>
17//! "##;
18//!
19//! let mut names_element = Element::parse(data.as_bytes()).unwrap();
20//!
21//! println!("{:#?}", names_element);
22//! {
23//!     // get first `name` element
24//!     let name = names_element.get_mut_child("name").expect("Can't find name element");
25//!     name.attributes.insert("suffix".to_owned(), "mr".to_owned());
26//! }
27//! names_element.write(File::create("result.xml").unwrap());
28//!
29//!
30//! ```
31extern crate xml;
32
33use std::borrow::Cow;
34#[cfg(feature = "attribute-sorted")]
35use std::collections::BTreeMap as AttributeMap;
36#[cfg(not(any(feature = "attribute-sorted", feature = "attribute-order")))]
37use std::collections::HashMap as AttributeMap;
38use std::fmt;
39use std::io::{Read, Write};
40
41#[cfg(feature = "attribute-order")]
42use indexmap::map::IndexMap as AttributeMap;
43pub use xml::namespace::Namespace;
44pub use xml::reader::ParserConfig;
45use xml::reader::{EventReader, XmlEvent};
46pub use xml::writer::{EmitterConfig, Error};
47
48#[derive(Debug, Clone, PartialEq, Eq)]
49pub enum XMLNode {
50    Element(Element),
51    Comment(String),
52    CData(String),
53    Text(String),
54    ProcessingInstruction(String, Option<String>),
55}
56
57trait AttributeMapExt {
58    fn allocate(capacity: usize) -> Self;
59}
60
61#[cfg(feature = "attribute-sorted")]
62impl<K: Ord, V> AttributeMapExt for AttributeMap<K, V> {
63    fn allocate(_capacity: usize) -> Self {
64        Self::new()
65    }
66}
67
68#[cfg(not(feature = "attribute-sorted"))]
69impl<K, V> AttributeMapExt for AttributeMap<K, V> {
70    fn allocate(capacity: usize) -> Self {
71        Self::with_capacity(capacity)
72    }
73}
74
75impl XMLNode {
76    pub fn as_element(&self) -> Option<&Element> {
77        if let XMLNode::Element(e) = self {
78            Some(e)
79        } else {
80            None
81        }
82    }
83    pub fn as_mut_element(&mut self) -> Option<&mut Element> {
84        if let XMLNode::Element(e) = self {
85            Some(e)
86        } else {
87            None
88        }
89    }
90    pub fn as_comment(&self) -> Option<&str> {
91        if let XMLNode::Comment(c) = self {
92            Some(c)
93        } else {
94            None
95        }
96    }
97    pub fn as_cdata(&self) -> Option<&str> {
98        if let XMLNode::CData(c) = self {
99            Some(c)
100        } else {
101            None
102        }
103    }
104    pub fn as_text(&self) -> Option<&str> {
105        if let XMLNode::Text(c) = self {
106            Some(c)
107        } else {
108            None
109        }
110    }
111    pub fn as_processing_instruction(&self) -> Option<(&str, Option<&str>)> {
112        if let XMLNode::ProcessingInstruction(s, o) = self {
113            Some((s, o.as_ref().map(|s| s.as_str())))
114        } else {
115            None
116        }
117    }
118}
119
120/// Represents an XML element.
121#[derive(Debug, Clone, PartialEq, Eq)]
122pub struct Element {
123    /// This elements prefix, if any
124    pub prefix: Option<String>,
125
126    /// This elements namespace, if any
127    pub namespace: Option<String>,
128
129    /// The full list of namespaces, if any
130    ///
131    /// The `Namespace` type is exported from the `xml-rs` crate.
132    pub namespaces: Option<Namespace>,
133
134    /// The name of the Element.  Does not include any namespace info
135    pub name: String,
136
137    /// The Element attributes
138    ///
139    /// By default, this is a `HashMap`, but if the optional "attribute-order" feature is enabled,
140    /// this is an [IndexMap](https://docs.rs/indexmap/1.4.0/indexmap/), which will retain
141    /// item insertion order.
142    pub attributes: AttributeMap<String, String>,
143
144    /// Children
145    pub children: Vec<XMLNode>,
146}
147
148/// Errors that can occur parsing XML
149#[derive(Debug)]
150pub enum ParseError {
151    /// The XML is invalid
152    MalformedXml(xml::reader::Error),
153    /// This library is unable to process this XML. This can occur if, for
154    /// example, the XML contains processing instructions.
155    CannotParse,
156}
157
158impl fmt::Display for ParseError {
159    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
160        match *self {
161            ParseError::MalformedXml(ref e) => write!(f, "Malformed XML. {}", e),
162            ParseError::CannotParse => write!(f, "Cannot parse"),
163        }
164    }
165}
166
167impl std::error::Error for ParseError {
168    fn description(&self) -> &str {
169        match *self {
170            ParseError::MalformedXml(..) => "Malformed XML",
171            ParseError::CannotParse => "Cannot parse",
172        }
173    }
174
175    fn cause(&self) -> Option<&dyn std::error::Error> {
176        match *self {
177            ParseError::MalformedXml(ref e) => Some(e),
178            ParseError::CannotParse => None,
179        }
180    }
181}
182
183fn build<B: Read>(reader: &mut EventReader<B>, mut elem: Element) -> Result<Element, ParseError> {
184    loop {
185        match reader.next() {
186            Ok(XmlEvent::EndElement { ref name }) => {
187                if name.local_name == elem.name {
188                    return Ok(elem);
189                } else {
190                    return Err(ParseError::CannotParse);
191                }
192            }
193            Ok(XmlEvent::StartElement {
194                name,
195                attributes,
196                namespace,
197            }) => {
198                let mut attr_map = AttributeMap::new();
199                for attr in attributes {
200                    attr_map.insert(attr.name.local_name, attr.value);
201                }
202
203                let new_elem = Element {
204                    prefix: name.prefix,
205                    namespace: name.namespace,
206                    namespaces: if namespace.is_essentially_empty() {
207                        None
208                    } else {
209                        Some(namespace)
210                    },
211                    name: name.local_name,
212                    attributes: attr_map,
213                    children: Vec::new(),
214                };
215                elem.children
216                    .push(XMLNode::Element(build(reader, new_elem)?));
217            }
218            Ok(XmlEvent::Characters(s)) => elem.children.push(XMLNode::Text(s)),
219            Ok(XmlEvent::Whitespace(..)) => (),
220            Ok(XmlEvent::Comment(s)) => elem.children.push(XMLNode::Comment(s)),
221            Ok(XmlEvent::CData(s)) => elem.children.push(XMLNode::Text(s)),
222            Ok(XmlEvent::ProcessingInstruction { name, data }) => elem
223                .children
224                .push(XMLNode::ProcessingInstruction(name, data)),
225            Ok(XmlEvent::StartDocument { .. }) | Ok(XmlEvent::EndDocument) => {
226                return Err(ParseError::CannotParse)
227            }
228            Err(e) => return Err(ParseError::MalformedXml(e)),
229        }
230    }
231}
232
233impl Element {
234    /// Create a new empty element with given name
235    ///
236    /// All other fields are empty
237    pub fn new(name: &str) -> Element {
238        Element {
239            name: String::from(name),
240            prefix: None,
241            namespace: None,
242            namespaces: None,
243            attributes: AttributeMap::new(),
244            children: Vec::new(),
245        }
246    }
247
248    /// Parses some data into a list of `XMLNode`s
249    ///
250    /// This is useful when you want to capture comments or processing instructions that appear
251    /// before or after the root node
252    pub fn parse_all<R: Read>(r: R) -> Result<Vec<XMLNode>, ParseError> {
253        let parser_config = ParserConfig::new().ignore_comments(false);
254        Element::parse_all_with_config(r, parser_config)
255    }
256
257    pub fn parse_all_with_config<R: Read>(r: R, parser_config: ParserConfig) -> Result<Vec<XMLNode>, ParseError> {
258        let mut reader = EventReader::new_with_config(r, parser_config);
259        let mut root_nodes = Vec::new();
260        loop {
261            match reader.next() {
262                Ok(XmlEvent::StartElement {
263                    name,
264                    attributes,
265                    namespace,
266                }) => {
267                    let mut attr_map = AttributeMap::allocate(attributes.len());
268                    for attr in attributes {
269                        attr_map.insert(attr.name.local_name, attr.value);
270                    }
271
272                    let root = Element {
273                        prefix: name.prefix,
274                        namespace: name.namespace,
275                        namespaces: if namespace.is_essentially_empty() {
276                            None
277                        } else {
278                            Some(namespace)
279                        },
280                        name: name.local_name,
281                        attributes: attr_map,
282                        children: Vec::new(),
283                    };
284                    root_nodes.push(XMLNode::Element(build(&mut reader, root)?));
285                }
286                Ok(XmlEvent::Comment(comment_string)) => {
287                    root_nodes.push(XMLNode::Comment(comment_string))
288                }
289                Ok(XmlEvent::Characters(text_string)) => {
290                    root_nodes.push(XMLNode::Text(text_string))
291                }
292                Ok(XmlEvent::CData(cdata_string)) => root_nodes.push(XMLNode::CData(cdata_string)),
293                Ok(XmlEvent::Whitespace(..)) | Ok(XmlEvent::StartDocument { .. }) => continue,
294                Ok(XmlEvent::ProcessingInstruction { name, data }) => {
295                    root_nodes.push(XMLNode::ProcessingInstruction(name, data))
296                }
297                Ok(XmlEvent::EndElement { .. }) => (),
298                Ok(XmlEvent::EndDocument) => return Ok(root_nodes),
299                Err(e) => return Err(ParseError::MalformedXml(e)),
300            }
301        }
302    }
303
304    /// Parses some data into an Element
305    pub fn parse<R: Read>(r: R) -> Result<Element, ParseError> {
306        let nodes = Element::parse_all(r)?;
307        for node in nodes {
308            if let XMLNode::Element(elem) = node { return Ok(elem) }
309        }
310        // This assume the underlying xml library throws an error on no root element
311        unreachable!();
312    }
313
314    pub fn parse_with_config<R: Read>(r: R, config: ParserConfig) -> Result<Element, ParseError> {
315        let nodes = Element::parse_all_with_config(r, config)?;
316        for node in nodes {
317            if let XMLNode::Element(elem) = node { return Ok(elem) }
318        }
319        // This assume the underlying xml library throws an error on no root element
320        unreachable!();
321    }
322
323    fn _write<B: Write>(&self, emitter: &mut xml::writer::EventWriter<B>) -> Result<(), Error> {
324        use xml::attribute::Attribute;
325        use xml::name::Name;
326        use xml::writer::events::XmlEvent;
327
328        let mut name = Name::local(&self.name);
329        if let Some(ref ns) = self.namespace {
330            name.namespace = Some(ns);
331        }
332        if let Some(ref p) = self.prefix {
333            name.prefix = Some(p);
334        }
335
336        let mut attributes = Vec::with_capacity(self.attributes.len());
337        for (k, v) in &self.attributes {
338            attributes.push(Attribute {
339                name: Name::local(k),
340                value: v,
341            });
342        }
343
344        let empty_ns = Namespace::empty();
345        let namespace = if let Some(ref ns) = self.namespaces {
346            Cow::Borrowed(ns)
347        } else {
348            Cow::Borrowed(&empty_ns)
349        };
350
351        emitter.write(XmlEvent::StartElement {
352            name,
353            attributes: Cow::Owned(attributes),
354            namespace,
355        })?;
356        for node in &self.children {
357            match node {
358                XMLNode::Element(elem) => elem._write(emitter)?,
359                XMLNode::Text(text) => emitter.write(XmlEvent::Characters(text))?,
360                XMLNode::Comment(comment) => emitter.write(XmlEvent::Comment(comment))?,
361                XMLNode::CData(comment) => emitter.write(XmlEvent::CData(comment))?,
362                XMLNode::ProcessingInstruction(name, data) => match data.to_owned() {
363                    Some(string) => emitter.write(XmlEvent::ProcessingInstruction {
364                        name,
365                        data: Some(&string),
366                    })?,
367                    None => emitter.write(XmlEvent::ProcessingInstruction { name, data: None })?,
368                },
369            }
370            // elem._write(emitter)?;
371        }
372        emitter.write(XmlEvent::EndElement { name: Some(name) })?;
373
374        Ok(())
375    }
376
377    /// Writes out this element as the root element in an new XML document
378    pub fn write<W: Write>(&self, w: W) -> Result<(), Error> {
379        self.write_with_config(w, EmitterConfig::new())
380    }
381
382    /// Writes out this element as the root element in a new XML document using the provided configuration
383    pub fn write_with_config<W: Write>(&self, w: W, config: EmitterConfig) -> Result<(), Error> {
384        use xml::common::XmlVersion;
385        use xml::writer::events::XmlEvent;
386        use xml::writer::EventWriter;
387
388        let write_document_declaration = config.write_document_declaration;
389        let mut emitter = EventWriter::new_with_config(w, config);
390        if write_document_declaration {
391            emitter.write(XmlEvent::StartDocument {
392                version: XmlVersion::Version10,
393                encoding: None,
394                standalone: None,
395            })?;
396        }
397        self._write(&mut emitter)
398    }
399
400    /// Find a child element with the given name and return a reference to it.
401    ///
402    /// Both `&str` and `String` implement `ElementPredicate` and can be used to search for child
403    /// elements that match the given element name with `.get_child("element_name")`.  You can also
404    /// search by `("element_name", "tag_name")` tuple.
405    ///
406    ///
407    /// Note: this will only return Elements.  To get other nodes (like comments), iterate through
408    /// the `children` field.
409    pub fn get_child<P: ElementPredicate>(&self, k: P) -> Option<&Element> {
410        self.children
411            .iter()
412            .filter_map(|e| match e {
413                XMLNode::Element(elem) => Some(elem),
414                _ => None,
415            })
416            .find(|e| k.match_element(e))
417    }
418
419    /// Find a child element with the given name and return a mutable reference to it.
420    pub fn get_mut_child<P: ElementPredicate>(&mut self, k: P) -> Option<&mut Element> {
421        self.children
422            .iter_mut()
423            .filter_map(|e| match e {
424                XMLNode::Element(elem) => Some(elem),
425                _ => None,
426            })
427            .find(|e| k.match_element(e))
428    }
429
430    /// Find a child element with the given name, remove and return it.
431    pub fn take_child<P: ElementPredicate>(&mut self, k: P) -> Option<Element> {
432        let index = self.children.iter().position(|e| match e {
433            XMLNode::Element(elem) => k.match_element(elem),
434            _ => false,
435        });
436        match index {
437            Some(index) => match self.children.remove(index) {
438                XMLNode::Element(elem) => Some(elem),
439                _ => None,
440            },
441            None => None,
442        }
443    }
444
445    /// Returns the inner text/cdata of this element, if any.
446    ///
447    /// If there are multiple text/cdata nodes, they will be all concatenated into one string.
448    pub fn get_text<'a>(&'a self) -> Option<Cow<'a, str>> {
449        let text_nodes: Vec<&'a str> = self
450            .children
451            .iter()
452            .filter_map(|node| node.as_text().or_else(|| node.as_cdata()))
453            .collect();
454        if text_nodes.is_empty() {
455            None
456        } else if text_nodes.len() == 1 {
457            Some(Cow::Borrowed(text_nodes[0]))
458        } else {
459            let mut full_text = String::new();
460            for text in text_nodes {
461                full_text.push_str(text);
462            }
463            Some(Cow::Owned(full_text))
464        }
465    }
466}
467
468/// A predicate for matching elements.
469///
470/// The default implementations allow you to match by tag name or a tuple of
471/// tag name and namespace.
472pub trait ElementPredicate {
473    fn match_element(&self, e: &Element) -> bool;
474}
475
476// Unfortunately,
477// `impl<TN> ElementPredicate for TN where String: PartialEq<TN>` and
478// `impl<TN, NS> ElementPredicate for (TN, NS) where String: PartialEq<TN>, String: PartialEq<NS>`
479// are conflicting implementations, even though we know that there is no
480// implementation for tuples. We just manually implement `ElementPredicate` for
481// all `PartialEq` impls of `String` and forward them to the 1-tuple version.
482//
483// This can probably be fixed once specialization is stable.
484impl<TN> ElementPredicate for (TN,)
485where
486    String: PartialEq<TN>,
487{
488    fn match_element(&self, e: &Element) -> bool {
489        e.name == self.0
490    }
491}
492
493impl<'a> ElementPredicate for &'a str {
494    /// Search by tag name
495    fn match_element(&self, e: &Element) -> bool {
496        (*self,).match_element(e)
497    }
498}
499
500impl<'a> ElementPredicate for Cow<'a, str> {
501    /// Search by tag name
502    fn match_element(&self, e: &Element) -> bool {
503        (&**self,).match_element(e)
504    }
505}
506
507impl ElementPredicate for String {
508    /// Search by tag name
509    fn match_element(&self, e: &Element) -> bool {
510        (&**self,).match_element(e)
511    }
512}
513
514impl<TN, NS> ElementPredicate for (TN, NS)
515where
516    String: PartialEq<TN>,
517    String: PartialEq<NS>,
518{
519    /// Search by a tuple of (tagname, namespace)
520    fn match_element(&self, e: &Element) -> bool {
521        e.name == self.0
522            && e.namespace
523                .as_ref()
524                .map(|ns| ns == &self.1)
525                .unwrap_or(false)
526    }
527}