xmltree/
lib.rs

1//! A simple library for parsing an XML file into an in-memory tree structure
2//!
3//! Not recommended for large XML files, as it will load the entire file into memory.
4//!
5//! # Example
6//!
7//! ```no_run
8//! use xmltree::Element;
9//! use std::fs::File;
10//!
11//! let data: &'static str = r##"
12//! <?xml version="1.0" encoding="utf-8" standalone="yes"?>
13//! <names>
14//!     <name first="bob" last="jones" />
15//!     <name first="elizabeth" last="smith" />
16//! </names>
17//! "##;
18//!
19//! let mut names_element = Element::parse(data.as_bytes()).unwrap();
20//!
21//! println!("{:#?}", names_element);
22//! {
23//!     // get first `name` element
24//!     let name = names_element.get_mut_child("name").expect("Can't find name element");
25//!     name.attributes.insert("suffix".to_owned(), "mr".to_owned());
26//! }
27//! names_element.write(File::create("result.xml").unwrap());
28//!
29//!
30//! ```
31
32#[cfg(all(feature = "attribute-order", not(feature = "attribute-sorted")))]
33/// The type used to store element attributes.
34pub type AttributeMap<K, V> = indexmap::map::IndexMap<K, V>;
35#[cfg(all(feature = "attribute-sorted", not(feature = "attribute-order")))]
36/// The type used to store element attributes.
37pub type AttributeMap<K, V> = std::collections::BTreeMap<K, V>;
38// When both features disabled or both enabled, use a fallback so irrelevant compiler errors don't
39// appear…
40#[cfg(any(
41    not(any(feature = "attribute-sorted", feature = "attribute-order")),
42    all(feature = "attribute-order", feature = "attribute-sorted")
43))]
44/// The type used to store element attributes.
45///
46/// By default this is a HashMap, but this can be changed with the "attribute-sorted" or "attribute-order" features
47pub type AttributeMap<K, V> = std::collections::HashMap<K, V>;
48// But don't let the invalid case off easy, now that we've made sure this is the only compiler
49// error they'll see.
50#[cfg(all(feature = "attribute-order", feature = "attribute-sorted"))]
51compile_error!("`attribute-order` and `attribute-sorted` are mutually exclusive — pick one");
52
53use std::borrow::Cow;
54use std::fmt;
55use std::io::{Read, Write};
56
57pub use xml::namespace::Namespace;
58pub use xml::reader::ParserConfig;
59use xml::reader::{EventReader, XmlEvent};
60pub use xml::writer::{EmitterConfig, Error};
61
62#[derive(Debug, Clone, PartialEq, Eq)]
63pub enum XMLNode {
64    Element(Element),
65    Comment(String),
66    CData(String),
67    Text(String),
68    ProcessingInstruction(String, Option<String>),
69}
70
71trait AttributeMapExt {
72    fn allocate(capacity: usize) -> Self;
73}
74
75#[cfg(feature = "attribute-sorted")]
76impl<K: Ord, V> AttributeMapExt for AttributeMap<K, V> {
77    fn allocate(_capacity: usize) -> Self {
78        Self::new()
79    }
80}
81
82#[cfg(not(feature = "attribute-sorted"))]
83impl<K, V> AttributeMapExt for AttributeMap<K, V> {
84    fn allocate(capacity: usize) -> Self {
85        Self::with_capacity(capacity)
86    }
87}
88
89impl XMLNode {
90    pub fn as_element(&self) -> Option<&Element> {
91        if let XMLNode::Element(e) = self {
92            Some(e)
93        } else {
94            None
95        }
96    }
97    pub fn as_mut_element(&mut self) -> Option<&mut Element> {
98        if let XMLNode::Element(e) = self {
99            Some(e)
100        } else {
101            None
102        }
103    }
104    pub fn as_comment(&self) -> Option<&str> {
105        if let XMLNode::Comment(c) = self {
106            Some(c)
107        } else {
108            None
109        }
110    }
111    pub fn as_cdata(&self) -> Option<&str> {
112        if let XMLNode::CData(c) = self {
113            Some(c)
114        } else {
115            None
116        }
117    }
118    pub fn as_text(&self) -> Option<&str> {
119        if let XMLNode::Text(c) = self {
120            Some(c)
121        } else {
122            None
123        }
124    }
125    pub fn as_processing_instruction(&self) -> Option<(&str, Option<&str>)> {
126        if let XMLNode::ProcessingInstruction(s, o) = self {
127            Some((s, o.as_ref().map(|s| s.as_str())))
128        } else {
129            None
130        }
131    }
132}
133
134/// Represents an XML element.
135#[derive(Debug, Clone, PartialEq, Eq)]
136pub struct Element {
137    /// This elements prefix, if any
138    pub prefix: Option<String>,
139
140    /// This elements namespace, if any
141    pub namespace: Option<String>,
142
143    /// The full list of namespaces, if any
144    ///
145    /// The `Namespace` type is exported from the `xml-rs` crate.
146    pub namespaces: Option<Namespace>,
147
148    /// The name of the Element.  Does not include any namespace info
149    pub name: String,
150
151    /// The Element attributes
152    ///
153    /// By default, this is a `HashMap`, but there are two optional features that can change this:
154    ///
155    /// * If the "attribute-order" feature is enabled, then this is an [IndexMap](https://docs.rs/indexmap/2/indexmap/),
156    ///   which will retain item insertion order.
157    /// * If the "attribute-sorted" feature is enabled, then this is a [`std::collections::BTreeMap`], which maintains keys in sorted order.
158    pub attributes: AttributeMap<String, String>,
159
160    /// Children
161    pub children: Vec<XMLNode>,
162}
163
164/// Errors that can occur parsing XML
165#[derive(Debug)]
166pub enum ParseError {
167    /// The XML is invalid
168    MalformedXml(xml::reader::Error),
169    /// This library is unable to process this XML. This can occur if, for
170    /// example, the XML contains processing instructions.
171    CannotParse,
172}
173
174impl fmt::Display for ParseError {
175    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
176        match *self {
177            ParseError::MalformedXml(ref e) => write!(f, "Malformed XML. {}", e),
178            ParseError::CannotParse => write!(f, "Cannot parse"),
179        }
180    }
181}
182
183impl std::error::Error for ParseError {
184    fn description(&self) -> &str {
185        match *self {
186            ParseError::MalformedXml(..) => "Malformed XML",
187            ParseError::CannotParse => "Cannot parse",
188        }
189    }
190
191    fn cause(&self) -> Option<&dyn std::error::Error> {
192        match *self {
193            ParseError::MalformedXml(ref e) => Some(e),
194            ParseError::CannotParse => None,
195        }
196    }
197}
198
199fn build<B: Read>(reader: &mut EventReader<B>, mut elem: Element) -> Result<Element, ParseError> {
200    loop {
201        match reader.next() {
202            Ok(XmlEvent::EndElement { ref name }) => {
203                if name.local_name == elem.name {
204                    return Ok(elem);
205                } else {
206                    return Err(ParseError::CannotParse);
207                }
208            }
209            Ok(XmlEvent::StartElement {
210                name,
211                attributes,
212                namespace,
213            }) => {
214                let mut attr_map = AttributeMap::new();
215                for attr in attributes {
216                    attr_map.insert(attr.name.local_name, attr.value);
217                }
218
219                let new_elem = Element {
220                    prefix: name.prefix,
221                    namespace: name.namespace,
222                    namespaces: if namespace.is_essentially_empty() {
223                        None
224                    } else {
225                        Some(namespace)
226                    },
227                    name: name.local_name,
228                    attributes: attr_map,
229                    children: Vec::new(),
230                };
231                elem.children
232                    .push(XMLNode::Element(build(reader, new_elem)?));
233            }
234            Ok(XmlEvent::Characters(s)) => elem.children.push(XMLNode::Text(s)),
235            Ok(XmlEvent::Whitespace(..)) => (),
236            Ok(XmlEvent::Comment(s)) => elem.children.push(XMLNode::Comment(s)),
237            Ok(XmlEvent::CData(s)) => elem.children.push(XMLNode::CData(s)),
238            Ok(XmlEvent::ProcessingInstruction { name, data }) => elem
239                .children
240                .push(XMLNode::ProcessingInstruction(name, data)),
241            Ok(XmlEvent::StartDocument { .. }) | Ok(XmlEvent::EndDocument) => {
242                return Err(ParseError::CannotParse)
243            }
244            Ok(XmlEvent::Doctype { .. }) => (),
245            Err(e) => return Err(ParseError::MalformedXml(e)),
246        }
247    }
248}
249
250impl Element {
251    /// Create a new empty element with given name
252    ///
253    /// All other fields are empty
254    pub fn new(name: &str) -> Element {
255        Element {
256            name: String::from(name),
257            prefix: None,
258            namespace: None,
259            namespaces: None,
260            attributes: AttributeMap::new(),
261            children: Vec::new(),
262        }
263    }
264
265    /// Parses some data into a list of `XMLNode`s
266    ///
267    /// This is useful when you want to capture comments or processing instructions that appear
268    /// before or after the root node
269    pub fn parse_all<R: Read>(r: R) -> Result<Vec<XMLNode>, ParseError> {
270        let parser_config = ParserConfig::new().ignore_comments(false);
271        Element::parse_all_with_config(r, parser_config)
272    }
273
274    pub fn parse_all_with_config<R: Read>(
275        r: R,
276        parser_config: ParserConfig,
277    ) -> Result<Vec<XMLNode>, ParseError> {
278        let mut reader = EventReader::new_with_config(r, parser_config);
279        let mut root_nodes = Vec::new();
280        loop {
281            match reader.next() {
282                Ok(XmlEvent::StartElement {
283                    name,
284                    attributes,
285                    namespace,
286                }) => {
287                    let mut attr_map = AttributeMap::allocate(attributes.len());
288                    for attr in attributes {
289                        attr_map.insert(attr.name.local_name, attr.value);
290                    }
291
292                    let root = Element {
293                        prefix: name.prefix,
294                        namespace: name.namespace,
295                        namespaces: if namespace.is_essentially_empty() {
296                            None
297                        } else {
298                            Some(namespace)
299                        },
300                        name: name.local_name,
301                        attributes: attr_map,
302                        children: Vec::new(),
303                    };
304                    root_nodes.push(XMLNode::Element(build(&mut reader, root)?));
305                }
306                Ok(XmlEvent::Comment(comment_string)) => {
307                    root_nodes.push(XMLNode::Comment(comment_string))
308                }
309                Ok(XmlEvent::Characters(text_string)) => {
310                    root_nodes.push(XMLNode::Text(text_string))
311                }
312                Ok(XmlEvent::CData(cdata_string)) => root_nodes.push(XMLNode::CData(cdata_string)),
313                Ok(XmlEvent::Whitespace(..)) | Ok(XmlEvent::StartDocument { .. }) => continue,
314                Ok(XmlEvent::ProcessingInstruction { name, data }) => {
315                    root_nodes.push(XMLNode::ProcessingInstruction(name, data))
316                }
317                Ok(XmlEvent::EndElement { .. }) => (),
318                Ok(XmlEvent::EndDocument) => return Ok(root_nodes),
319                Ok(XmlEvent::Doctype { .. }) => (),
320                Err(e) => return Err(ParseError::MalformedXml(e)),
321            }
322        }
323    }
324
325    /// Parses some data into an Element
326    pub fn parse<R: Read>(r: R) -> Result<Element, ParseError> {
327        let nodes = Element::parse_all(r)?;
328        for node in nodes {
329            if let XMLNode::Element(elem) = node {
330                return Ok(elem);
331            }
332        }
333        // This assume the underlying xml library throws an error on no root element
334        unreachable!();
335    }
336
337    pub fn parse_with_config<R: Read>(r: R, config: ParserConfig) -> Result<Element, ParseError> {
338        let nodes = Element::parse_all_with_config(r, config)?;
339        for node in nodes {
340            if let XMLNode::Element(elem) = node {
341                return Ok(elem);
342            }
343        }
344        // This assume the underlying xml library throws an error on no root element
345        unreachable!();
346    }
347
348    fn _write<B: Write>(&self, emitter: &mut xml::writer::EventWriter<B>) -> Result<(), Error> {
349        use xml::attribute::Attribute;
350        use xml::name::Name;
351        use xml::writer::events::XmlEvent;
352
353        let mut name = Name::local(&self.name);
354        if let Some(ref ns) = self.namespace {
355            name.namespace = Some(ns);
356        }
357        if let Some(ref p) = self.prefix {
358            name.prefix = Some(p);
359        }
360
361        let mut attributes = Vec::with_capacity(self.attributes.len());
362        for (k, v) in &self.attributes {
363            attributes.push(Attribute {
364                name: Name::local(k),
365                value: v,
366            });
367        }
368
369        let empty_ns = Namespace::empty();
370        let namespace = if let Some(ref ns) = self.namespaces {
371            Cow::Borrowed(ns)
372        } else {
373            Cow::Borrowed(&empty_ns)
374        };
375
376        emitter.write(XmlEvent::StartElement {
377            name,
378            attributes: Cow::Owned(attributes),
379            namespace,
380        })?;
381        for node in &self.children {
382            match node {
383                XMLNode::Element(elem) => elem._write(emitter)?,
384                XMLNode::Text(text) => emitter.write(XmlEvent::Characters(text))?,
385                XMLNode::Comment(comment) => emitter.write(XmlEvent::Comment(comment))?,
386                XMLNode::CData(comment) => emitter.write(XmlEvent::CData(comment))?,
387                XMLNode::ProcessingInstruction(name, data) => match data.to_owned() {
388                    Some(string) => emitter.write(XmlEvent::ProcessingInstruction {
389                        name,
390                        data: Some(&string),
391                    })?,
392                    None => emitter.write(XmlEvent::ProcessingInstruction { name, data: None })?,
393                },
394            }
395            // elem._write(emitter)?;
396        }
397        emitter.write(XmlEvent::EndElement { name: Some(name) })?;
398
399        Ok(())
400    }
401
402    /// Writes out this element as the root element in an new XML document
403    pub fn write<W: Write>(&self, w: W) -> Result<(), Error> {
404        self.write_with_config(w, EmitterConfig::new())
405    }
406
407    /// Writes out this element as the root element in a new XML document using the provided configuration
408    pub fn write_with_config<W: Write>(&self, w: W, config: EmitterConfig) -> Result<(), Error> {
409        use xml::common::XmlVersion;
410        use xml::writer::events::XmlEvent;
411        use xml::writer::EventWriter;
412
413        let write_document_declaration = config.write_document_declaration;
414        let mut emitter = EventWriter::new_with_config(w, config);
415        if write_document_declaration {
416            emitter.write(XmlEvent::StartDocument {
417                version: XmlVersion::Version10,
418                encoding: None,
419                standalone: None,
420            })?;
421        }
422        self._write(&mut emitter)
423    }
424
425    /// Find a child element with the given name and return a reference to it.
426    ///
427    /// Both `&str` and `String` implement `ElementPredicate` and can be used to search for child
428    /// elements that match the given element name with `.get_child("element_name")`.  You can also
429    /// search by `("element_name", "tag_name")` tuple.
430    ///
431    ///
432    /// Note: this will only return Elements.  To get other nodes (like comments), iterate through
433    /// the `children` field.
434    pub fn get_child<P: ElementPredicate>(&self, k: P) -> Option<&Element> {
435        self.children
436            .iter()
437            .filter_map(|e| match e {
438                XMLNode::Element(elem) => Some(elem),
439                _ => None,
440            })
441            .find(|e| k.match_element(e))
442    }
443
444    /// Find a child element with the given name and return a mutable reference to it.
445    pub fn get_mut_child<P: ElementPredicate>(&mut self, k: P) -> Option<&mut Element> {
446        self.children
447            .iter_mut()
448            .filter_map(|e| match e {
449                XMLNode::Element(elem) => Some(elem),
450                _ => None,
451            })
452            .find(|e| k.match_element(e))
453    }
454
455    /// Find a child element with the given name, remove and return it.
456    pub fn take_child<P: ElementPredicate>(&mut self, k: P) -> Option<Element> {
457        let index = self.children.iter().position(|e| match e {
458            XMLNode::Element(elem) => k.match_element(elem),
459            _ => false,
460        });
461        match index {
462            Some(index) => match self.children.remove(index) {
463                XMLNode::Element(elem) => Some(elem),
464                _ => None,
465            },
466            None => None,
467        }
468    }
469
470    /// Returns the inner text/cdata of this element, if any.
471    ///
472    /// If there are multiple text/cdata nodes, they will be all concatenated into one string.
473    pub fn get_text<'a>(&'a self) -> Option<Cow<'a, str>> {
474        let text_nodes: Vec<&'a str> = self
475            .children
476            .iter()
477            .filter_map(|node| node.as_text().or_else(|| node.as_cdata()))
478            .collect();
479        if text_nodes.is_empty() {
480            None
481        } else if text_nodes.len() == 1 {
482            Some(Cow::Borrowed(text_nodes[0]))
483        } else {
484            let mut full_text = String::new();
485            for text in text_nodes {
486                full_text.push_str(text);
487            }
488            Some(Cow::Owned(full_text))
489        }
490    }
491
492    /// Checks if this element matches the predicate.
493    pub fn matches<P: ElementPredicate>(&self, k: P) -> bool {
494        k.match_element(self)
495    }
496}
497
498/// A predicate for matching elements.
499///
500/// The default implementations allow you to match by tag name or a tuple of
501/// tag name and namespace.
502pub trait ElementPredicate {
503    fn match_element(&self, e: &Element) -> bool;
504}
505
506// Unfortunately,
507// `impl<TN> ElementPredicate for TN where String: PartialEq<TN>` and
508// `impl<TN, NS> ElementPredicate for (TN, NS) where String: PartialEq<TN>, String: PartialEq<NS>`
509// are conflicting implementations, even though we know that there is no
510// implementation for tuples. We just manually implement `ElementPredicate` for
511// all `PartialEq` impls of `String` and forward them to the 1-tuple version.
512//
513// This can probably be fixed once specialization is stable.
514impl<TN> ElementPredicate for (TN,)
515where
516    String: PartialEq<TN>,
517{
518    fn match_element(&self, e: &Element) -> bool {
519        e.name == self.0
520    }
521}
522
523impl<'a> ElementPredicate for &'a str {
524    /// Search by tag name
525    fn match_element(&self, e: &Element) -> bool {
526        (*self,).match_element(e)
527    }
528}
529
530impl<'a> ElementPredicate for Cow<'a, str> {
531    /// Search by tag name
532    fn match_element(&self, e: &Element) -> bool {
533        (&**self,).match_element(e)
534    }
535}
536
537impl ElementPredicate for String {
538    /// Search by tag name
539    fn match_element(&self, e: &Element) -> bool {
540        (&**self,).match_element(e)
541    }
542}
543
544impl<TN, NS> ElementPredicate for (TN, NS)
545where
546    String: PartialEq<TN>,
547    String: PartialEq<NS>,
548{
549    /// Search by a tuple of (tagname, namespace)
550    fn match_element(&self, e: &Element) -> bool {
551        e.name == self.0
552            && e.namespace
553                .as_ref()
554                .map(|ns| ns == &self.1)
555                .unwrap_or(false)
556    }
557}