xmltree_ns/
lib.rs

1//! A simple library for parsing an XML file into an in-memory tree structure
2//!
3//! Not recommended for large XML files, as it will load the entire file into memory.
4//!
5//! # Example
6//!
7//! ```no_run
8//! use xmltree_ns::{AttributeName, Element};
9//! use std::fs::File;
10//!
11//! let data: &'static str = r##"
12//! <?xml version="1.0" encoding="utf-8" standalone="yes"?>
13//! <names>
14//!     <name first="bob" last="jones" />
15//!     <name first="elizabeth" last="smith" />
16//! </names>
17//! "##;
18//!
19//! let mut names_element = Element::parse(data.as_bytes()).unwrap();
20//!
21//! println!("{:#?}", names_element);
22//! {
23//!     // get first `name` element
24//!     let name = names_element.get_mut_child("name").expect("Can't find name element");
25//!     name.attributes.insert(AttributeName::local("suffix"), "mr".to_owned());
26//! }
27//! names_element.write(File::create("result.xml").unwrap());
28//!
29//!
30//! ```
31
32#[cfg(all(feature = "attribute-order", not(feature = "attribute-sorted")))]
33/// The type used to store element attributes.
34pub type AttributeMap<K, V> = indexmap::map::IndexMap<K, V>;
35#[cfg(all(feature = "attribute-sorted", not(feature = "attribute-order")))]
36/// The type used to store element attributes.
37pub type AttributeMap<K, V> = std::collections::BTreeMap<K, V>;
38// When both features disabled or both enabled, use a fallback so irrelevant compiler errors don't
39// appear…
40#[cfg(any(
41    not(any(feature = "attribute-sorted", feature = "attribute-order")),
42    all(feature = "attribute-order", feature = "attribute-sorted")
43))]
44/// The type used to store element attributes.
45///
46/// By default this is a HashMap, but this can be changed with the "attribute-sorted" or "attribute-order" features
47pub type AttributeMap<K, V> = std::collections::HashMap<K, V>;
48// But don't let the invalid case off easy, now that we've made sure this is the only compiler
49// error they'll see.
50#[cfg(all(feature = "attribute-order", feature = "attribute-sorted"))]
51compile_error!("`attribute-order` and `attribute-sorted` are mutually exclusive — pick one");
52
53use std::borrow::Cow;
54use std::fmt;
55use std::io::{Read, Write};
56
57pub use xml::name::OwnedName as AttributeName;
58pub use xml::namespace::Namespace;
59pub use xml::reader::ParserConfig;
60use xml::reader::{EventReader, XmlEvent};
61pub use xml::writer::{EmitterConfig, Error};
62
63#[derive(Debug, Clone, PartialEq, Eq)]
64pub enum XMLNode {
65    Element(Element),
66    Comment(String),
67    CData(String),
68    Text(String),
69    ProcessingInstruction(String, Option<String>),
70}
71
72trait AttributeMapExt {
73    fn allocate(capacity: usize) -> Self;
74}
75
76#[cfg(feature = "attribute-sorted")]
77impl<K: Ord, V> AttributeMapExt for AttributeMap<K, V> {
78    fn allocate(_capacity: usize) -> Self {
79        Self::new()
80    }
81}
82
83#[cfg(not(feature = "attribute-sorted"))]
84impl<K, V> AttributeMapExt for AttributeMap<K, V> {
85    fn allocate(capacity: usize) -> Self {
86        Self::with_capacity(capacity)
87    }
88}
89
90impl XMLNode {
91    pub fn as_element(&self) -> Option<&Element> {
92        if let XMLNode::Element(e) = self {
93            Some(e)
94        } else {
95            None
96        }
97    }
98    pub fn as_mut_element(&mut self) -> Option<&mut Element> {
99        if let XMLNode::Element(e) = self {
100            Some(e)
101        } else {
102            None
103        }
104    }
105    pub fn as_comment(&self) -> Option<&str> {
106        if let XMLNode::Comment(c) = self {
107            Some(c)
108        } else {
109            None
110        }
111    }
112    pub fn as_cdata(&self) -> Option<&str> {
113        if let XMLNode::CData(c) = self {
114            Some(c)
115        } else {
116            None
117        }
118    }
119    pub fn as_text(&self) -> Option<&str> {
120        if let XMLNode::Text(c) = self {
121            Some(c)
122        } else {
123            None
124        }
125    }
126    pub fn as_processing_instruction(&self) -> Option<(&str, Option<&str>)> {
127        if let XMLNode::ProcessingInstruction(s, o) = self {
128            Some((s, o.as_ref().map(|s| s.as_str())))
129        } else {
130            None
131        }
132    }
133}
134
135/// Represents an XML element.
136#[derive(Debug, Clone, PartialEq, Eq)]
137pub struct Element {
138    /// This elements prefix, if any
139    pub prefix: Option<String>,
140
141    /// This elements namespace, if any
142    pub namespace: Option<String>,
143
144    /// The full list of namespaces, if any
145    ///
146    /// The `Namespace` type is exported from the `xml-rs` crate.
147    pub namespaces: Option<Namespace>,
148
149    /// The name of the Element.  Does not include any namespace info
150    pub name: String,
151
152    /// The Element attributes
153    ///
154    /// By default, this is a `HashMap`, but there are two optional features that can change this:
155    ///
156    /// * If the "attribute-order" feature is enabled, then this is an [IndexMap](https://docs.rs/indexmap/2/indexmap/),
157    ///   which will retain item insertion order.
158    /// * If the "attribute-sorted" feature is enabled, then this is a [`std::collections::BTreeMap`], which maintains keys in sorted order.
159    pub attributes: AttributeMap<AttributeName, String>,
160
161    /// Children
162    pub children: Vec<XMLNode>,
163}
164
165/// Errors that can occur parsing XML
166#[derive(Debug)]
167pub enum ParseError {
168    /// The XML is invalid
169    MalformedXml(xml::reader::Error),
170    /// This library is unable to process this XML. This can occur if, for
171    /// example, the XML contains processing instructions.
172    CannotParse,
173}
174
175impl fmt::Display for ParseError {
176    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
177        match *self {
178            ParseError::MalformedXml(ref e) => write!(f, "Malformed XML. {}", e),
179            ParseError::CannotParse => write!(f, "Cannot parse"),
180        }
181    }
182}
183
184impl std::error::Error for ParseError {
185    fn description(&self) -> &str {
186        match *self {
187            ParseError::MalformedXml(..) => "Malformed XML",
188            ParseError::CannotParse => "Cannot parse",
189        }
190    }
191
192    fn cause(&self) -> Option<&dyn std::error::Error> {
193        match *self {
194            ParseError::MalformedXml(ref e) => Some(e),
195            ParseError::CannotParse => None,
196        }
197    }
198}
199
200fn build<B: Read>(reader: &mut EventReader<B>, mut elem: Element) -> Result<Element, ParseError> {
201    loop {
202        match reader.next() {
203            Ok(XmlEvent::EndElement { ref name }) => {
204                if name.local_name == elem.name {
205                    return Ok(elem);
206                } else {
207                    return Err(ParseError::CannotParse);
208                }
209            }
210            Ok(XmlEvent::StartElement {
211                name,
212                attributes,
213                namespace,
214            }) => {
215                let mut attr_map = AttributeMap::new();
216                for attr in attributes {
217                    attr_map.insert(attr.name, attr.value);
218                }
219
220                let new_elem = Element {
221                    prefix: name.prefix,
222                    namespace: name.namespace,
223                    namespaces: if namespace.is_essentially_empty() {
224                        None
225                    } else {
226                        Some(namespace)
227                    },
228                    name: name.local_name,
229                    attributes: attr_map,
230                    children: Vec::new(),
231                };
232                elem.children
233                    .push(XMLNode::Element(build(reader, new_elem)?));
234            }
235            Ok(XmlEvent::Characters(s)) => elem.children.push(XMLNode::Text(s)),
236            Ok(XmlEvent::Whitespace(..)) => (),
237            Ok(XmlEvent::Comment(s)) => elem.children.push(XMLNode::Comment(s)),
238            Ok(XmlEvent::CData(s)) => elem.children.push(XMLNode::CData(s)),
239            Ok(XmlEvent::ProcessingInstruction { name, data }) => elem
240                .children
241                .push(XMLNode::ProcessingInstruction(name, data)),
242            Ok(XmlEvent::StartDocument { .. }) | Ok(XmlEvent::EndDocument) => {
243                return Err(ParseError::CannotParse)
244            }
245            Ok(XmlEvent::Doctype { .. }) => (),
246            Err(e) => return Err(ParseError::MalformedXml(e)),
247        }
248    }
249}
250
251impl Element {
252    /// Create a new empty element with given name
253    ///
254    /// All other fields are empty
255    pub fn new(name: &str) -> Element {
256        Element {
257            name: String::from(name),
258            prefix: None,
259            namespace: None,
260            namespaces: None,
261            attributes: AttributeMap::new(),
262            children: Vec::new(),
263        }
264    }
265
266    /// Parses some data into a list of `XMLNode`s
267    ///
268    /// This is useful when you want to capture comments or processing instructions that appear
269    /// before or after the root node
270    pub fn parse_all<R: Read>(r: R) -> Result<Vec<XMLNode>, ParseError> {
271        let parser_config = ParserConfig::new().ignore_comments(false);
272        Element::parse_all_with_config(r, parser_config)
273    }
274
275    pub fn parse_all_with_config<R: Read>(
276        r: R,
277        parser_config: ParserConfig,
278    ) -> Result<Vec<XMLNode>, ParseError> {
279        let mut reader = EventReader::new_with_config(r, parser_config);
280        let mut root_nodes = Vec::new();
281        loop {
282            match reader.next() {
283                Ok(XmlEvent::StartElement {
284                    name,
285                    attributes,
286                    namespace,
287                }) => {
288                    let mut attr_map = AttributeMap::allocate(attributes.len());
289                    for attr in attributes {
290                        attr_map.insert(attr.name, attr.value);
291                    }
292
293                    let root = Element {
294                        prefix: name.prefix,
295                        namespace: name.namespace,
296                        namespaces: if namespace.is_essentially_empty() {
297                            None
298                        } else {
299                            Some(namespace)
300                        },
301                        name: name.local_name,
302                        attributes: attr_map,
303                        children: Vec::new(),
304                    };
305                    root_nodes.push(XMLNode::Element(build(&mut reader, root)?));
306                }
307                Ok(XmlEvent::Comment(comment_string)) => {
308                    root_nodes.push(XMLNode::Comment(comment_string))
309                }
310                Ok(XmlEvent::Characters(text_string)) => {
311                    root_nodes.push(XMLNode::Text(text_string))
312                }
313                Ok(XmlEvent::CData(cdata_string)) => root_nodes.push(XMLNode::CData(cdata_string)),
314                Ok(XmlEvent::Whitespace(..)) | Ok(XmlEvent::StartDocument { .. }) => continue,
315                Ok(XmlEvent::ProcessingInstruction { name, data }) => {
316                    root_nodes.push(XMLNode::ProcessingInstruction(name, data))
317                }
318                Ok(XmlEvent::EndElement { .. }) => (),
319                Ok(XmlEvent::EndDocument) => return Ok(root_nodes),
320                Ok(XmlEvent::Doctype { .. }) => (),
321                Err(e) => return Err(ParseError::MalformedXml(e)),
322            }
323        }
324    }
325
326    /// Parses some data into an Element
327    pub fn parse<R: Read>(r: R) -> Result<Element, ParseError> {
328        let nodes = Element::parse_all(r)?;
329        for node in nodes {
330            if let XMLNode::Element(elem) = node {
331                return Ok(elem);
332            }
333        }
334        // This assume the underlying xml library throws an error on no root element
335        unreachable!();
336    }
337
338    pub fn parse_with_config<R: Read>(r: R, config: ParserConfig) -> Result<Element, ParseError> {
339        let nodes = Element::parse_all_with_config(r, config)?;
340        for node in nodes {
341            if let XMLNode::Element(elem) = node {
342                return Ok(elem);
343            }
344        }
345        // This assume the underlying xml library throws an error on no root element
346        unreachable!();
347    }
348
349    fn _write<B: Write>(&self, emitter: &mut xml::writer::EventWriter<B>) -> Result<(), Error> {
350        use xml::attribute::Attribute;
351        use xml::name::Name;
352        use xml::writer::events::XmlEvent;
353
354        let mut name = Name::local(&self.name);
355        if let Some(ref ns) = self.namespace {
356            name.namespace = Some(ns);
357        }
358        if let Some(ref p) = self.prefix {
359            name.prefix = Some(p);
360        }
361
362        let mut attributes = Vec::with_capacity(self.attributes.len());
363        for (k, v) in &self.attributes {
364            attributes.push(Attribute {
365                name: k.borrow(),
366                value: v,
367            });
368        }
369
370        let empty_ns = Namespace::empty();
371        let namespace = if let Some(ref ns) = self.namespaces {
372            Cow::Borrowed(ns)
373        } else {
374            Cow::Borrowed(&empty_ns)
375        };
376
377        emitter.write(XmlEvent::StartElement {
378            name,
379            attributes: Cow::Owned(attributes),
380            namespace,
381        })?;
382        for node in &self.children {
383            match node {
384                XMLNode::Element(elem) => elem._write(emitter)?,
385                XMLNode::Text(text) => emitter.write(XmlEvent::Characters(text))?,
386                XMLNode::Comment(comment) => emitter.write(XmlEvent::Comment(comment))?,
387                XMLNode::CData(comment) => emitter.write(XmlEvent::CData(comment))?,
388                XMLNode::ProcessingInstruction(name, data) => match data.to_owned() {
389                    Some(string) => emitter.write(XmlEvent::ProcessingInstruction {
390                        name,
391                        data: Some(&string),
392                    })?,
393                    None => emitter.write(XmlEvent::ProcessingInstruction { name, data: None })?,
394                },
395            }
396            // elem._write(emitter)?;
397        }
398        emitter.write(XmlEvent::EndElement { name: Some(name) })?;
399
400        Ok(())
401    }
402
403    /// Writes out this element as the root element in an new XML document
404    pub fn write<W: Write>(&self, w: W) -> Result<(), Error> {
405        self.write_with_config(w, EmitterConfig::new())
406    }
407
408    /// Writes out this element as the root element in a new XML document using the provided configuration
409    pub fn write_with_config<W: Write>(&self, w: W, config: EmitterConfig) -> Result<(), Error> {
410        use xml::common::XmlVersion;
411        use xml::writer::events::XmlEvent;
412        use xml::writer::EventWriter;
413
414        let write_document_declaration = config.write_document_declaration;
415        let mut emitter = EventWriter::new_with_config(w, config);
416        if write_document_declaration {
417            emitter.write(XmlEvent::StartDocument {
418                version: XmlVersion::Version10,
419                encoding: None,
420                standalone: None,
421            })?;
422        }
423        self._write(&mut emitter)
424    }
425
426    /// Find a child element with the given name and return a reference to it.
427    ///
428    /// Both `&str` and `String` implement `ElementPredicate` and can be used to search for child
429    /// elements that match the given element name with `.get_child("element_name")`.  You can also
430    /// search by `("element_name", "tag_name")` tuple.
431    ///
432    ///
433    /// Note: this will only return Elements.  To get other nodes (like comments), iterate through
434    /// the `children` field.
435    pub fn get_child<P: ElementPredicate>(&self, k: P) -> Option<&Element> {
436        self.children
437            .iter()
438            .filter_map(|e| match e {
439                XMLNode::Element(elem) => Some(elem),
440                _ => None,
441            })
442            .find(|e| k.match_element(e))
443    }
444
445    /// Find a child element with the given name and return a mutable reference to it.
446    pub fn get_mut_child<P: ElementPredicate>(&mut self, k: P) -> Option<&mut Element> {
447        self.children
448            .iter_mut()
449            .filter_map(|e| match e {
450                XMLNode::Element(elem) => Some(elem),
451                _ => None,
452            })
453            .find(|e| k.match_element(e))
454    }
455
456    /// Find a child element with the given name, remove and return it.
457    pub fn take_child<P: ElementPredicate>(&mut self, k: P) -> Option<Element> {
458        let index = self.children.iter().position(|e| match e {
459            XMLNode::Element(elem) => k.match_element(elem),
460            _ => false,
461        });
462        match index {
463            Some(index) => match self.children.remove(index) {
464                XMLNode::Element(elem) => Some(elem),
465                _ => None,
466            },
467            None => None,
468        }
469    }
470
471    /// Returns the inner text/cdata of this element, if any.
472    ///
473    /// If there are multiple text/cdata nodes, they will be all concatenated into one string.
474    pub fn get_text<'a>(&'a self) -> Option<Cow<'a, str>> {
475        let text_nodes: Vec<&'a str> = self
476            .children
477            .iter()
478            .filter_map(|node| node.as_text().or_else(|| node.as_cdata()))
479            .collect();
480        if text_nodes.is_empty() {
481            None
482        } else if text_nodes.len() == 1 {
483            Some(Cow::Borrowed(text_nodes[0]))
484        } else {
485            let mut full_text = String::new();
486            for text in text_nodes {
487                full_text.push_str(text);
488            }
489            Some(Cow::Owned(full_text))
490        }
491    }
492    /// Checks if this element matches the predicate.
493    pub fn matches<P: ElementPredicate>(&self, k: P) -> bool {
494        k.match_element(self)
495    }
496
497    /// Get a reference to the value of an attribute matching the provided predicate.
498    ///
499    /// Note that this will be slower than searching `attributes` directly as
500    /// it iterates over the entries in the map.
501    pub fn get_attribute<P: AttributePredicate>(&self, k: P) -> Option<&String> {
502        self.attributes
503            .iter()
504            .find(|pair| k.match_attribute(pair.0))
505            .map(|pair| pair.1)
506    }
507
508    /// Get a &mut to the value of an attribute matching the provided predicate.
509    ///
510    /// Note that this will be slower than searching `attributes` directly as
511    /// it iterates over the entries in the map.
512    pub fn get_mut_attribute<P: AttributePredicate>(&mut self, k: P) -> Option<&mut String> {
513        self.attributes
514            .iter_mut()
515            .find(|pair| k.match_attribute(pair.0))
516            .map(|pair| pair.1)
517    }
518
519    /// Find an attribute matching the provided predicate, remove, and return its value.
520    ///
521    /// Note that this will be slower than operating on `attributes` directly as
522    /// it iterates over the entries in the map.
523    pub fn take_attribute<P: AttributePredicate>(&mut self, k: P) -> Option<String> {
524        if let Some(key) = self
525            .attributes
526            .keys()
527            .find(|name| k.match_attribute(name))
528            .cloned()
529        {
530            return self.attributes.remove(&key);
531        }
532        None
533    }
534}
535
536/// A predicate for matching elements.
537///
538/// The default implementations allow you to match by tag name or a tuple of
539/// tag name and namespace.
540pub trait ElementPredicate {
541    fn match_element(&self, e: &Element) -> bool;
542}
543
544// Unfortunately,
545// `impl<TN> ElementPredicate for TN where String: PartialEq<TN>` and
546// `impl<TN, NS> ElementPredicate for (TN, NS) where String: PartialEq<TN>, String: PartialEq<NS>`
547// are conflicting implementations, even though we know that there is no
548// implementation for tuples. We just manually implement `ElementPredicate` for
549// all `PartialEq` impls of `String` and forward them to the 1-tuple version.
550//
551// This can probably be fixed once specialization is stable.
552impl<TN> ElementPredicate for (TN,)
553where
554    String: PartialEq<TN>,
555{
556    fn match_element(&self, e: &Element) -> bool {
557        e.name == self.0
558    }
559}
560
561impl ElementPredicate for &str {
562    /// Search by tag name
563    fn match_element(&self, e: &Element) -> bool {
564        (*self,).match_element(e)
565    }
566}
567
568impl<'a> ElementPredicate for Cow<'a, str> {
569    /// Search by tag name
570    fn match_element(&self, e: &Element) -> bool {
571        (&**self,).match_element(e)
572    }
573}
574
575impl ElementPredicate for String {
576    /// Search by tag name
577    fn match_element(&self, e: &Element) -> bool {
578        (&**self,).match_element(e)
579    }
580}
581
582impl<TN, NS> ElementPredicate for (TN, NS)
583where
584    String: PartialEq<TN>,
585    String: PartialEq<NS>,
586{
587    /// Search by a tuple of (tagname, namespace)
588    fn match_element(&self, e: &Element) -> bool {
589        e.name == self.0
590            && e.namespace
591                .as_ref()
592                .map(|ns| ns == &self.1)
593                .unwrap_or(false)
594    }
595}
596
597/// A predicate for matching attributes.
598///
599/// The default implementations allow you to match by attribute name or a tuple of
600/// attribute name and namespace.
601pub trait AttributePredicate {
602    fn match_attribute(&self, n: &AttributeName) -> bool;
603}
604
605impl AttributePredicate for (&str, Option<&str>) {
606    fn match_attribute(&self, n: &AttributeName) -> bool {
607        n.local_name == self.0
608            && match (&n.namespace, &self.1) {
609                (None, None) => true,
610                (Some(ns1), Some(ns2)) => ns1 == ns2,
611                _ => false,
612            }
613    }
614}
615
616impl AttributePredicate for &str {
617    /// Search by attribute name
618    fn match_attribute(&self, n: &AttributeName) -> bool {
619        n.local_name == *self
620    }
621}