elementtree/
lib.rs

1//! A simple library for parsing an XML file into an in-memory tree structure
2//!
3//! Not recommended for large XML files, as it will load the entire file into memory.
4//!
5//! ## Reading
6//!
7//! For reading XML you can use the `Element::from_reader` method which will
8//! parse from a given reader.  Afterwards you end up with a fancy element
9//! tree that can be accessed in various different ways.
10//!
11//! You can use ``("ns", "tag")`` or ``{ns}tag`` to refer to fully qualified
12//! elements.
13//!
14//! ```rust
15//! # use elementtree::Element;
16//! let root = Element::from_reader(r#"<?xml version="1.0"?>
17//! <root xmlns="tag:myns" xmlns:foo="tag:otherns">
18//!     <list a="1" b="2" c="3">
19//!         <item foo:attr="foo1"/>
20//!         <item foo:attr="foo2"/>
21//!         <item foo:attr="foo3"/>
22//!     </list>
23//! </root>
24//! "#.as_bytes()).unwrap();
25//! let list = root.find("{tag:myns}list").unwrap();
26//! for child in list.find_all("{tag:myns}item") {
27//!     println!("attribute: {}", child.get_attr("{tag:otherns}attr").unwrap());
28//! }
29//! ```
30//!
31//! ## Writing
32//!
33//! Writing is easy as well but if you work with namespaces you will need to
34//! register them with the root.  If namespaces are not used yet they will
35//! otherwise be registered with an empty (and once that is used a random prefix)
36//! on the element itself which will blow up the XML size.
37//!
38//! Most methods for modification support chaining in one form or another which
39//! makes modifications slightly more ergonomic.
40//!
41//! ```
42//! # use elementtree::Element;
43//! let ns = "http://example.invalid/#myns";
44//! let other_ns = "http://example.invalid/#otherns";
45//!
46//! let mut root = Element::new((ns, "mydoc"));
47//! root.set_namespace_prefix(other_ns, "other");
48//!
49//! {
50//!     let mut list = root.append_new_child((ns, "list"));
51//!     for x in 0..3 {
52//!         list.append_new_child((ns, "item"))
53//!             .set_text(format!("Item {}", x))
54//!             .set_attr((other_ns, "id"), x.to_string());
55//!     }
56//! }
57//! ```
58//!
59//! ## Design Notes
60//!
61//! This library largely follows the ideas of Python's ElementTree but it has some
62//! specific changes that simplify the model for Rust.  In particular nodes do not
63//! know about their parents or siblings.  While this obviously reduces a lot of
64//! what would be possible with the library it significantly simplifies memory
65//! management and the external API.
66//!
67//! If you are coming from a DOM environment the following differences are the
68//! most striking:
69//!
70//! *   There are no text nodes, instead text is stored either in the `text`
71//!     attribute of a node or in the `tail` of a child node.  This means that
72//!     for most situations just working with the `text` is what you want and
73//!     you can ignore the existence of the `tail`.
74//! *   tags and attributes are implemented through a `QName` abstraction that
75//!     simplifies working wiht namespaces.  Most APIs just accept strings and
76//!     will create `QName`s automatically.
77//! *   namespace prefixes never play a role and are in fact not really exposed.
78//!     Instead all namespaces are managed through their canonical identifier.
79//!
80//! ## Notes on Namespaces
81//!
82//! Namespaces are internally tracked in a shared map attached to elements.  The
83//! map is not exposed but when an element is created another element can be passed
84//! in and the namespace map is copied over.  Internally a copy on write mechanism
85//! is used so when changes are performed on the namespace the namespaces will be
86//! copied and the writer will emit them accordingly.
87//!
88//! Namespaces need to be registered or the XML generated will be malformed.
89#![allow(clippy::wrong_self_convention)]
90
91use std::borrow::Cow;
92use std::cmp::Ord;
93use std::cmp::Ordering;
94use std::collections::btree_map::Iter as BTreeMapIter;
95use std::collections::BTreeMap;
96use std::fmt;
97use std::hash::{Hash, Hasher};
98use std::io;
99use std::io::{Read, Write};
100use std::mem;
101use std::ops::Deref;
102use std::rc::Rc;
103use std::str::Utf8Error;
104
105use string_cache::DefaultAtom as Atom;
106
107mod xml;
108
109use crate::xml::attribute::{Attribute, OwnedAttribute};
110use crate::xml::common::Position as XmlPosition;
111use crate::xml::common::XmlVersion;
112use crate::xml::name::{Name, OwnedName};
113use crate::xml::namespace::{Namespace as XmlNamespaceMap, NS_EMPTY_URI, NS_XMLNS_URI, NS_XML_URI};
114use crate::xml::reader::{
115    Error as XmlReadError, ErrorKind as XmlReadErrorKind, EventReader, ParserConfig, XmlEvent,
116};
117use crate::xml::writer::{Error as XmlWriteError, EventWriter, XmlEvent as XmlWriteEvent};
118use crate::xml::EmitterConfig;
119
120enum XmlAtom<'a> {
121    Shared(Atom),
122    Borrowed(&'a str),
123}
124
125impl<'a> Deref for XmlAtom<'a> {
126    type Target = str;
127
128    #[inline(always)]
129    fn deref(&self) -> &str {
130        match *self {
131            XmlAtom::Shared(ref atom) => atom.deref(),
132            XmlAtom::Borrowed(s) => s,
133        }
134    }
135}
136
137impl<'a> XmlAtom<'a> {
138    #[inline(always)]
139    pub fn borrow(&self) -> &str {
140        self
141    }
142}
143
144impl<'a> fmt::Debug for XmlAtom<'a> {
145    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
146        write!(f, "{:?}", self.borrow())
147    }
148}
149
150impl<'a> Clone for XmlAtom<'a> {
151    fn clone(&self) -> XmlAtom<'a> {
152        XmlAtom::Shared(Atom::from(self.borrow()))
153    }
154}
155
156impl<'a> PartialEq for XmlAtom<'a> {
157    fn eq(&self, other: &XmlAtom<'a>) -> bool {
158        self.borrow().eq(other.borrow())
159    }
160}
161
162impl<'a> Eq for XmlAtom<'a> {}
163
164impl<'a> PartialOrd for XmlAtom<'a> {
165    fn partial_cmp(&self, other: &XmlAtom<'a>) -> Option<Ordering> {
166        self.borrow().partial_cmp(other.borrow())
167    }
168}
169
170impl<'a> Ord for XmlAtom<'a> {
171    fn cmp(&self, other: &XmlAtom<'a>) -> Ordering {
172        self.borrow().cmp(other.borrow())
173    }
174}
175
176/// Convenience trait to get a `QName` from an object.
177///
178/// This is used for the accessor interface on elements.
179pub trait AsQName<'a> {
180    /// Returns a Cow'ed `QName` from the given object.
181    fn as_qname(&self) -> Cow<'a, QName<'a>>;
182}
183
184impl<'a> AsQName<'a> for &'a QName<'a> {
185    #[inline(always)]
186    fn as_qname(&self) -> Cow<'a, QName<'a>> {
187        Cow::Borrowed(self)
188    }
189}
190
191impl<'a> AsQName<'a> for &'a str {
192    #[inline(always)]
193    fn as_qname(&self) -> Cow<'a, QName<'a>> {
194        Cow::Owned(QName::from(self))
195    }
196}
197
198impl<'a> AsQName<'a> for (&'a str, &'a str) {
199    #[inline(always)]
200    fn as_qname(&self) -> Cow<'a, QName<'a>> {
201        Cow::Owned(QName::from_ns_name(Some(self.0), self.1))
202    }
203}
204
205/// A `QName` represents a qualified name.
206///
207/// A qualified name is a tag or attribute name that has a namespace and a
208/// local name.  If the namespace is empty no namespace is assumed.  It
209/// can be constructed from a qualified name string with the ``from``
210/// method.
211///
212/// ## Notes on Memory Management
213///
214/// Qualified names that are user constructed for comparison purposes
215/// usually have a static lifetime because they are created from static
216/// strings.  Creating qualified names from other strings might make
217/// memory management harder which is why `share()` exists which moves
218/// the `QName` internal strings to shared storage in which the lifetime
219/// changes to `'static`.
220///
221/// Common usage examples:
222///
223/// ```no_run
224/// # use elementtree::QName;
225/// let href = QName::from_name("href");
226/// let a = QName::from("{http://www.w3.org/1999/xhtml}a");
227/// ```
228#[derive(Clone)]
229pub struct QName<'a> {
230    ns: Option<XmlAtom<'a>>,
231    name: XmlAtom<'a>,
232}
233
234impl<'a> QName<'a> {
235    /// Creates a qualified name from a given string.
236    ///
237    /// Two formats are supported ``{namespace}tag`` or just ``tag``.
238    ///
239    /// ```
240    /// # use elementtree::QName;
241    /// let a = QName::from("{http://www.w3.org/1999/xhtml}a");
242    /// ```
243    pub fn from(s: &'a str) -> QName<'a> {
244        let mut ns = None;
245        let mut name = None;
246        if s.starts_with('{') {
247            if let Some(index) = s.find('}') {
248                if index > 1 {
249                    ns = Some(XmlAtom::Borrowed(&s[1..index]));
250                }
251                name = Some(XmlAtom::Borrowed(&s[index + 1..]));
252            }
253        }
254
255        QName {
256            ns,
257            name: name.unwrap_or(XmlAtom::Borrowed(s)),
258        }
259    }
260
261    /// Creates a qualified name from a given string without namespace.
262    ///
263    /// This is slightly faster than using ``from()``.
264    pub fn from_name(name: &'a str) -> QName<'a> {
265        QName {
266            ns: None,
267            name: XmlAtom::Borrowed(name),
268        }
269    }
270
271    /// Creates a qualified name from a namespace and name.
272    pub fn from_ns_name(ns: Option<&'a str>, name: &'a str) -> QName<'a> {
273        QName {
274            ns: ns.map(XmlAtom::Borrowed),
275            name: XmlAtom::Borrowed(name),
276        }
277    }
278
279    /// Returns the name portion of the qualified name.  This is the local
280    /// tag or attribute name.
281    pub fn name(&self) -> &str {
282        &self.name
283    }
284
285    /// Returns the optional namespace of this element.  This is the URL of
286    /// the namespace and not the prefix.  The information about the latter
287    /// is not retained.
288    pub fn ns(&self) -> Option<&str> {
289        self.ns.as_ref().map(|x| x.borrow())
290    }
291
292    /// Creates a shared `QName` with static lifetime from an already
293    /// existing `QName`.  The internal strings are interned and might
294    /// be shared with other instances.
295    pub fn share(&self) -> QName<'static> {
296        QName {
297            name: XmlAtom::Shared(Atom::from(self.name.borrow())),
298            ns: self
299                .ns
300                .as_ref()
301                .map(|x| XmlAtom::Shared(Atom::from(x.borrow()))),
302        }
303    }
304
305    fn from_owned_name(name: OwnedName) -> QName<'static> {
306        QName {
307            name: XmlAtom::Shared(Atom::from(name.local_name)),
308            ns: match name.namespace {
309                Some(ns) => {
310                    if !ns.is_empty() {
311                        Some(XmlAtom::Shared(Atom::from(ns)))
312                    } else {
313                        None
314                    }
315                }
316                _ => None,
317            },
318        }
319    }
320}
321
322impl<'a> PartialEq for QName<'a> {
323    fn eq(&self, other: &QName<'a>) -> bool {
324        self.name() == other.name() && self.ns() == other.ns()
325    }
326}
327
328impl<'a> fmt::Debug for QName<'a> {
329    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
330        write!(f, "QName(\"{}\")", self)
331    }
332}
333
334impl<'a> fmt::Display for QName<'a> {
335    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
336        if let Some(ref ns) = self.ns {
337            write!(f, "{{{}}}", ns.borrow())?;
338        }
339        write!(f, "{}", self.name.borrow())
340    }
341}
342
343impl<'a> Eq for QName<'a> {}
344
345impl<'a> Hash for QName<'a> {
346    fn hash<H: Hasher>(&self, state: &mut H) {
347        self.name.hash(state);
348        if let Some(ref ns) = self.ns {
349            ns.hash(state);
350        }
351    }
352}
353
354impl<'a> PartialOrd for QName<'a> {
355    fn partial_cmp(&self, other: &QName<'a>) -> Option<Ordering> {
356        (self.name(), self.ns()).partial_cmp(&(other.name(), other.ns()))
357    }
358}
359
360impl<'a> Ord for QName<'a> {
361    fn cmp(&self, other: &QName<'a>) -> Ordering {
362        (self.name(), self.ns()).cmp(&(other.name(), other.ns()))
363    }
364}
365
366#[derive(Debug, Clone)]
367struct NamespaceMap {
368    prefix_to_ns: BTreeMap<XmlAtom<'static>, XmlAtom<'static>>,
369    ns_to_prefix: BTreeMap<XmlAtom<'static>, XmlAtom<'static>>,
370}
371
372impl NamespaceMap {
373    pub fn new() -> NamespaceMap {
374        NamespaceMap {
375            prefix_to_ns: BTreeMap::new(),
376            ns_to_prefix: BTreeMap::new(),
377        }
378    }
379
380    pub fn get_prefix(&self, url: &str) -> Option<&str> {
381        // same shit as with remove_attr below for the explanation.
382        let atom = XmlAtom::Borrowed(url);
383        let static_atom: &XmlAtom<'static> = unsafe { mem::transmute(&atom) };
384        self.ns_to_prefix.get(static_atom).map(|x| x.borrow())
385    }
386
387    pub fn set_prefix(&mut self, url: &str, prefix: &str) -> Result<(), Error> {
388        let prefix = XmlAtom::Shared(Atom::from(prefix));
389        if self.prefix_to_ns.contains_key(&prefix) {
390            return Err(Error::DuplicateNamespacePrefix);
391        }
392
393        let url = XmlAtom::Shared(Atom::from(url));
394        if let Some(old_prefix) = self.ns_to_prefix.remove(&url) {
395            self.prefix_to_ns.remove(&old_prefix);
396        }
397
398        self.ns_to_prefix.insert(url.clone(), prefix.clone());
399        self.prefix_to_ns.insert(prefix.clone(), url.clone());
400
401        Ok(())
402    }
403
404    fn generate_prefix(&self) -> XmlAtom<'static> {
405        let mut i = 1;
406        loop {
407            let random_prefix = format!("ns{}", i);
408            if !self
409                .prefix_to_ns
410                .contains_key(&XmlAtom::Borrowed(&random_prefix))
411            {
412                return XmlAtom::Shared(Atom::from(random_prefix));
413            }
414            i += 1;
415        }
416    }
417
418    pub fn register_if_missing(&mut self, url: &str, prefix: Option<&str>) -> bool {
419        if self.get_prefix(url).is_some() {
420            return false;
421        }
422
423        let stored_prefix = if let Some(prefix) = prefix {
424            let prefix = XmlAtom::Borrowed(prefix);
425            if self.prefix_to_ns.get(&prefix).is_some() {
426                self.generate_prefix()
427            } else {
428                XmlAtom::Shared(Atom::from(prefix.borrow()))
429            }
430        } else {
431            self.generate_prefix()
432        };
433
434        let url = XmlAtom::Shared(Atom::from(url));
435        self.prefix_to_ns.insert(stored_prefix.clone(), url.clone());
436        self.ns_to_prefix.insert(url, stored_prefix);
437        true
438    }
439}
440
441/// Represents an XML element.
442///
443/// Usually constructed from either parsing or one of the two constructors
444/// an element is part of a tree and represents an XML element and the
445/// children contained.
446///
447/// Imagine a structure like this:
448///
449/// ```xml
450/// <p>Hello <strong>World</strong>!</p>
451/// ```
452///
453/// In this case the structure is more or less represented like this:
454///
455/// ```ignore
456/// Element {
457///   tag: "p",
458///   text: "Hello ",
459///   tail: None,
460///   children: [
461///     Element {
462///       tag: "strong",
463///       text: "World",
464///       tail: Some("!")
465///     }
466///   ]
467/// }
468/// ```
469///
470/// Namespaces are internally managed and inherited downwards when an
471/// element is created.
472#[derive(Debug, Clone)]
473pub struct Element {
474    tag: QName<'static>,
475    attributes: BTreeMap<QName<'static>, String>,
476    children: Vec<Element>,
477    nsmap: Option<Rc<NamespaceMap>>,
478    emit_nsmap: bool,
479    text: String,
480    tail: String,
481}
482
483/// An iterator over children of an element.
484pub struct Children<'a> {
485    idx: usize,
486    element: &'a Element,
487}
488
489/// A mutable iterator over children of an element.
490pub struct ChildrenMut<'a> {
491    iter: ::std::slice::IterMut<'a, Element>,
492}
493
494/// An iterator over attributes of an element.
495pub struct Attrs<'a> {
496    iter: BTreeMapIter<'a, QName<'a>, String>,
497}
498
499/// An iterator over matching children.
500pub struct FindChildren<'a> {
501    tag: Cow<'a, QName<'a>>,
502    child_iter: Children<'a>,
503}
504
505/// A mutable iterator over matching children.
506pub struct FindChildrenMut<'a> {
507    tag: Cow<'a, QName<'a>>,
508    child_iter: ChildrenMut<'a>,
509}
510
511/// Represents a position in the source.
512#[derive(Debug, Clone, Copy, Eq, PartialEq, Ord, PartialOrd)]
513pub struct Position {
514    line: u64,
515    column: u64,
516}
517
518impl Position {
519    /// Creates a new position.
520    pub fn new(line: u64, column: u64) -> Position {
521        Position { line, column }
522    }
523
524    fn from_xml_position(pos: &dyn XmlPosition) -> Position {
525        let pos = pos.position();
526        Position::new(pos.row, pos.column)
527    }
528
529    /// Returns the line number of the position
530    pub fn line(&self) -> u64 {
531        self.line
532    }
533    /// Returns the column of the position
534    pub fn column(&self) -> u64 {
535        self.column
536    }
537}
538
539impl fmt::Display for Position {
540    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
541        write!(f, "{}:{}", self.line, self.column)
542    }
543}
544
545/// Errors that can occur parsing XML
546#[derive(Debug)]
547pub enum Error {
548    /// The XML is invalid
549    MalformedXml {
550        msg: Cow<'static, str>,
551        pos: Position,
552    },
553    /// An IO Error
554    Io(io::Error),
555    /// A UTF-8 Error
556    Utf8(Utf8Error),
557    /// This library is unable to process this XML. This can occur if, for
558    /// example, the XML contains processing instructions.
559    UnexpectedEvent {
560        msg: Cow<'static, str>,
561        pos: Position,
562    },
563    /// A namespace prefix was already used
564    DuplicateNamespacePrefix,
565}
566
567impl Error {
568    /// Returns the position of the error if known
569    pub fn position(&self) -> Option<Position> {
570        match self {
571            Error::MalformedXml { pos, .. } => Some(*pos),
572            Error::UnexpectedEvent { pos, .. } => Some(*pos),
573            _ => None,
574        }
575    }
576
577    /// Returns the line number of the error or 0 if unknown
578    pub fn line(&self) -> u64 {
579        self.position().map(|x| x.line()).unwrap_or(0)
580    }
581
582    /// Returns the column of the error or 0 if unknown
583    pub fn column(&self) -> u64 {
584        self.position().map(|x| x.column()).unwrap_or(0)
585    }
586}
587
588impl fmt::Display for Error {
589    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
590        match *self {
591            Error::MalformedXml { ref pos, ref msg } => {
592                write!(f, "Malformed XML: {} ({})", msg, pos)
593            }
594            Error::Io(ref e) => write!(f, "{}", e),
595            Error::Utf8(ref e) => write!(f, "{}", e),
596            Error::UnexpectedEvent { ref msg, .. } => write!(f, "Unexpected XML event: {}", msg),
597            Error::DuplicateNamespacePrefix => {
598                write!(f, "Encountered duplicated namespace prefix")
599            }
600        }
601    }
602}
603
604impl std::error::Error for Error {
605    fn cause(&self) -> Option<&dyn std::error::Error> {
606        match *self {
607            Error::Io(ref e) => Some(e),
608            Error::Utf8(ref e) => Some(e),
609            _ => None,
610        }
611    }
612}
613
614impl From<XmlReadError> for Error {
615    fn from(err: XmlReadError) -> Error {
616        match *err.kind() {
617            XmlReadErrorKind::Io(ref err) => Error::Io(io::Error::new(err.kind(), err.to_string())),
618            XmlReadErrorKind::Utf8(err) => Error::Utf8(err),
619            XmlReadErrorKind::UnexpectedEof => Error::Io(io::Error::new(
620                io::ErrorKind::UnexpectedEof,
621                "Encountered unexpected eof",
622            )),
623            XmlReadErrorKind::Syntax(ref msg) => Error::MalformedXml {
624                msg: msg.clone(),
625                pos: Position::from_xml_position(&err),
626            },
627        }
628    }
629}
630
631impl From<XmlWriteError> for Error {
632    fn from(err: XmlWriteError) -> Error {
633        match err {
634            XmlWriteError::Io(err) => Error::Io(err),
635            err => Err(err).unwrap(),
636        }
637    }
638}
639
640impl<'a> Iterator for Children<'a> {
641    type Item = &'a Element;
642
643    fn next(&mut self) -> Option<&'a Element> {
644        if self.idx < self.element.children.len() {
645            let rv = &self.element.children[self.idx];
646            self.idx += 1;
647            Some(rv)
648        } else {
649            None
650        }
651    }
652}
653
654impl<'a> Iterator for ChildrenMut<'a> {
655    type Item = &'a mut Element;
656
657    fn next(&mut self) -> Option<&'a mut Element> {
658        self.iter.next()
659    }
660}
661
662impl<'a> Iterator for Attrs<'a> {
663    type Item = (&'a QName<'a>, &'a str);
664
665    fn next(&mut self) -> Option<(&'a QName<'a>, &'a str)> {
666        if let Some((k, v)) = self.iter.next() {
667            Some((k, v.as_str()))
668        } else {
669            None
670        }
671    }
672}
673
674impl<'a> Iterator for FindChildren<'a> {
675    type Item = &'a Element;
676
677    fn next(&mut self) -> Option<&'a Element> {
678        use std::borrow::Borrow;
679        loop {
680            if let Some(child) = self.child_iter.next() {
681                if child.tag() == self.tag.borrow() {
682                    return Some(child);
683                }
684            } else {
685                return None;
686            }
687        }
688    }
689}
690
691impl<'a> Iterator for FindChildrenMut<'a> {
692    type Item = &'a mut Element;
693
694    fn next(&mut self) -> Option<&'a mut Element> {
695        use std::borrow::Borrow;
696        let tag: &QName = self.tag.borrow();
697        self.child_iter.find(|x| x.tag() == tag)
698    }
699}
700
701impl Element {
702    /// Creates a new element without any children but a given tag.
703    ///
704    /// This can be used at all times to create a new element however when you
705    /// work with namespaces it's recommended to only use this for the root
706    /// element and then create further children through `new_with_namespaces`
707    /// as otherwise namespaces will not be propagaged downwards properly.
708    pub fn new<'a, Q: AsQName<'a>>(tag: Q) -> Element {
709        Element::new_with_nsmap(&tag.as_qname(), None)
710    }
711
712    /// Creates a new element without any children but inheriting the
713    /// namespaces from another element.
714    ///
715    /// This has the advantage that internally the map will be shared
716    /// across elements for as long as no further modifications are
717    /// taking place.
718    pub fn new_with_namespaces<'a, Q: AsQName<'a>>(tag: Q, reference: &Element) -> Element {
719        Element::new_with_nsmap(&tag.as_qname(), reference.nsmap.clone())
720    }
721
722    fn new_with_nsmap(tag: &QName<'_>, nsmap: Option<Rc<NamespaceMap>>) -> Element {
723        let mut rv = Element {
724            tag: tag.share(),
725            attributes: BTreeMap::new(),
726            nsmap,
727            emit_nsmap: false,
728            children: vec![],
729            text: String::new(),
730            tail: String::new(),
731        };
732        if let Some(url) = tag.ns() {
733            let prefix = rv.get_namespace_prefix(url).unwrap_or("").to_string();
734            rv.register_namespace(url, Some(&prefix));
735        }
736        rv
737    }
738
739    /// Parses some XML data into an `Element` from a reader.
740    pub fn from_reader<R: Read>(r: R) -> Result<Element, Error> {
741        let cfg = ParserConfig::new();
742        let mut reader = cfg.create_reader(r);
743        loop {
744            match reader.next_event() {
745                Ok(XmlEvent::StartElement {
746                    name,
747                    attributes,
748                    namespace,
749                }) => {
750                    return Element::from_start_element(
751                        name,
752                        attributes,
753                        namespace,
754                        None,
755                        &mut reader,
756                    );
757                }
758                Ok(XmlEvent::StartDocument { .. }) | Ok(XmlEvent::ProcessingInstruction { .. }) => {
759                    continue;
760                }
761                Ok(_) => {
762                    return Err(Error::UnexpectedEvent {
763                        msg: Cow::Borrowed("xml construct"),
764                        pos: Position::from_xml_position(&reader),
765                    })
766                }
767                Err(e) => return Err(e.into()),
768            }
769        }
770    }
771
772    /// Dump an element as XML document into a writer.
773    ///
774    /// This will create an XML document with a processing instruction
775    /// to start it.  There is currently no API to only serialize a non
776    /// standalone element.
777    ///
778    /// Currently the writer has no way to customize what is generated
779    /// in particular there is no support yet for automatically indenting
780    /// elements.  The reason for this is that there is no way to ignore
781    /// this information automatically in the absence of DTD support which
782    /// is not really planned.
783    pub fn to_writer<W: Write>(&self, w: W) -> Result<(), Error> {
784        self.to_writer_with_options(w, WriteOptions::new())
785    }
786
787    /// Dump an element as XML document into a writer with option.
788    ///
789    /// This will create an XML document with a processing instruction
790    /// to start it.  There is currently no API to only serialize a non
791    /// standalone element.
792    ///
793    /// Currently the writer has no way to customize what is generated
794    /// in particular there is no support yet for automatically indenting
795    /// elements.  The reason for this is that there is no way to ignore
796    /// this information automatically in the absence of DTD support which
797    /// is not really planned.
798    pub fn to_writer_with_options<W: Write>(
799        &self,
800        w: W,
801        options: WriteOptions,
802    ) -> Result<(), Error> {
803        let mut writer = options.emitter_config.clone().create_writer(w);
804
805        if options.xml_prolog.is_some() {
806            writer.write(XmlWriteEvent::StartDocument {
807                version: match options.xml_prolog.unwrap() {
808                    XmlProlog::Version10 => XmlVersion::Version10,
809                    XmlProlog::Version11 => XmlVersion::Version11,
810                },
811                encoding: Some("utf-8"),
812                standalone: None,
813            })?;
814        }
815
816        self.dump_into_writer(&mut writer)
817    }
818
819    /// Dump an element as XML document into a string
820    pub fn to_string(&self) -> Result<String, Error> {
821        let mut out: Vec<u8> = Vec::new();
822        self.to_writer(&mut out)?;
823        Ok(String::from_utf8(out).unwrap())
824    }
825
826    fn get_xml_name<'a>(&'a self, qname: &'a QName<'a>) -> Name<'a> {
827        let mut name = Name::local(qname.name());
828        if let Some(url) = qname.ns() {
829            name.namespace = Some(url);
830            if let Some(prefix) = self.get_namespace_prefix(url) {
831                if !prefix.is_empty() {
832                    name.prefix = Some(prefix);
833                }
834            }
835        }
836        name
837    }
838
839    fn dump_into_writer<W: Write>(&self, w: &mut EventWriter<W>) -> Result<(), Error> {
840        let name = self.get_xml_name(&self.tag);
841
842        let mut attributes = Vec::with_capacity(self.attributes.len());
843        for (k, v) in self.attributes.iter() {
844            attributes.push(Attribute {
845                name: self.get_xml_name(k),
846                value: v,
847            });
848        }
849
850        let mut namespace = XmlNamespaceMap::empty();
851        if self.emit_nsmap {
852            if let Some(ref nsmap) = self.nsmap {
853                for (prefix, url) in &nsmap.prefix_to_ns {
854                    namespace.put(prefix.borrow(), url.borrow());
855                }
856            }
857        }
858
859        w.write(XmlWriteEvent::StartElement {
860            name,
861            attributes: Cow::Owned(attributes),
862            namespace: Cow::Owned(namespace),
863        })?;
864
865        let text = self.text();
866        if !text.is_empty() {
867            w.write(XmlWriteEvent::Characters(text))?;
868        }
869
870        for elem in &self.children {
871            elem.dump_into_writer(w)?;
872            let text = elem.tail();
873            if !text.is_empty() {
874                w.write(XmlWriteEvent::Characters(text))?;
875            }
876        }
877
878        w.write(XmlWriteEvent::EndElement { name: Some(name) })?;
879
880        Ok(())
881    }
882
883    fn from_start_element<R: Read>(
884        name: OwnedName,
885        attributes: Vec<OwnedAttribute>,
886        namespace: XmlNamespaceMap,
887        parent_nsmap: Option<Rc<NamespaceMap>>,
888        reader: &mut EventReader<R>,
889    ) -> Result<Element, Error> {
890        let mut root = Element {
891            tag: QName::from_owned_name(name),
892            attributes: BTreeMap::new(),
893            nsmap: parent_nsmap,
894            emit_nsmap: false,
895            children: vec![],
896            text: String::new(),
897            tail: String::new(),
898        };
899        for attr in attributes {
900            root.attributes
901                .insert(QName::from_owned_name(attr.name), attr.value);
902        }
903
904        if !namespace.is_essentially_empty() {
905            for (prefix, url) in namespace.0.iter() {
906                root.register_namespace(url, Some(prefix));
907            }
908        };
909
910        root.parse_children(reader)?;
911        Ok(root)
912    }
913
914    fn parse_children<R: Read>(&mut self, reader: &mut EventReader<R>) -> Result<(), Error> {
915        loop {
916            match reader.next_event() {
917                Ok(XmlEvent::EndElement { ref name }) => {
918                    if name.local_name == self.tag.name()
919                        && name.namespace.as_deref() == self.tag.ns()
920                    {
921                        return Ok(());
922                    } else {
923                        return Err(Error::UnexpectedEvent {
924                            msg: Cow::Owned(format!("Unexpected end element {}", &name.local_name)),
925                            pos: Position::from_xml_position(reader),
926                        });
927                    }
928                }
929                Ok(XmlEvent::StartElement {
930                    name,
931                    attributes,
932                    namespace,
933                }) => {
934                    self.children.push(Element::from_start_element(
935                        name,
936                        attributes,
937                        namespace,
938                        self.nsmap.clone(),
939                        reader,
940                    )?);
941                }
942                Ok(XmlEvent::Characters(s)) => {
943                    let child_count = self.children.len();
944                    if child_count > 0 {
945                        self.children[child_count - 1].tail = s;
946                    } else {
947                        self.text = s;
948                    }
949                }
950                Ok(XmlEvent::StartDocument { .. }) | Ok(XmlEvent::ProcessingInstruction { .. }) => {
951                    continue;
952                }
953                Ok(_) => {
954                    return Err(Error::UnexpectedEvent {
955                        msg: Cow::Borrowed("unknown element"),
956                        pos: Position::from_xml_position(reader),
957                    })
958                }
959                Err(e) => {
960                    return Err(e.into());
961                }
962            }
963        }
964    }
965
966    /// Returns the text of a tag.
967    ///
968    /// Note that this does not trim or modify whitespace so the return
969    /// value might contain structural information from the XML file.
970    pub fn text(&self) -> &str {
971        &self.text
972    }
973
974    /// Sets a new text value for the tag.
975    pub fn set_text<S: Into<String>>(&mut self, value: S) -> &mut Element {
976        self.text = value.into();
977        self
978    }
979
980    /// Returns the tail text of a tag.
981    ///
982    /// The tail is the text following an element.
983    pub fn tail(&self) -> &str {
984        &self.tail
985    }
986
987    /// Sets a new tail text value for the tag.
988    pub fn set_tail<S: Into<String>>(&mut self, value: S) -> &mut Element {
989        self.tail = value.into();
990        self
991    }
992
993    /// The tag of the element as qualified name.
994    ///
995    /// Use the `QName` functionality to extract the information from the
996    /// tag name you care about (like the local name).
997    pub fn tag(&self) -> &QName {
998        &self.tag
999    }
1000
1001    /// Sets a new tag for the element.
1002    pub fn set_tag<'a>(&mut self, tag: &QName<'a>) -> &mut Element {
1003        self.tag = tag.share();
1004        self
1005    }
1006
1007    /// Returns the number of children
1008    pub fn child_count(&self) -> usize {
1009        self.children.len()
1010    }
1011
1012    /// Returns the nth child.
1013    pub fn get_child(&self, idx: usize) -> Option<&Element> {
1014        self.children.get(idx)
1015    }
1016
1017    /// Returns the nth child as a mutable reference.
1018    pub fn get_child_mut(&mut self, idx: usize) -> Option<&mut Element> {
1019        self.children.get_mut(idx)
1020    }
1021
1022    /// Removes a child.
1023    ///
1024    /// This returns the element if it was removed or None if the
1025    /// index was out of bounds.
1026    pub fn remove_child(&mut self, idx: usize) -> Option<Element> {
1027        if self.children.len() > idx {
1028            Some(self.children.remove(idx))
1029        } else {
1030            None
1031        }
1032    }
1033
1034    /// Removes all children that don't match a predicate.
1035    pub fn retain_children<F>(&mut self, f: F)
1036    where
1037        F: FnMut(&Element) -> bool,
1038    {
1039        self.children.retain(f);
1040    }
1041
1042    /// Removes all children that don't match a predicate. The predicate is passed a mutable reference to each element.
1043    pub fn retain_children_mut<F>(&mut self, mut f: F)
1044    where
1045        F: FnMut(&mut Element) -> bool,
1046    {
1047        // TODO: change to retain_mut once MSRV moves up to 1.61
1048        let old_children = std::mem::take(&mut self.children);
1049        self.children
1050            .extend(old_children.into_iter().filter_map(|mut item| {
1051                if f(&mut item) {
1052                    Some(item)
1053                } else {
1054                    None
1055                }
1056            }));
1057    }
1058
1059    /// Appends a new child and returns a reference to self.
1060    pub fn append_child(&mut self, child: Element) -> &mut Element {
1061        self.children.push(child);
1062        self
1063    }
1064
1065    /// Appends a new child to the element and returns a reference to it.
1066    ///
1067    /// This uses ``Element::new_with_namespaces`` internally and can
1068    /// then be used like this:
1069    ///
1070    /// ```
1071    /// use elementtree::Element;
1072    ///
1073    /// let ns = "http://example.invalid/#ns";
1074    /// let mut root = Element::new((ns, "mydoc"));
1075    ///
1076    /// {
1077    ///     let mut list = root.append_new_child((ns, "list"));
1078    ///     for x in 0..3 {
1079    ///         list.append_new_child((ns, "item")).set_text(format!("Item {}", x));
1080    ///     }
1081    /// }
1082    /// ```
1083    pub fn append_new_child<'a, Q: AsQName<'a>>(&'a mut self, tag: Q) -> &'a mut Element {
1084        let child = Element::new_with_namespaces(tag, self);
1085        self.append_child(child);
1086        let idx = self.children.len() - 1;
1087        &mut self.children[idx]
1088    }
1089
1090    /// Returns an iterator over all children.
1091    pub fn children(&self) -> Children<'_> {
1092        Children {
1093            idx: 0,
1094            element: self,
1095        }
1096    }
1097
1098    /// Returns a mutable iterator over all children.
1099    pub fn children_mut(&mut self) -> ChildrenMut<'_> {
1100        ChildrenMut {
1101            iter: self.children.iter_mut(),
1102        }
1103    }
1104
1105    /// Returns all children with the given name.
1106    pub fn find_all<'a, Q: AsQName<'a>>(&'a self, tag: Q) -> FindChildren<'a> {
1107        FindChildren {
1108            tag: tag.as_qname(),
1109            child_iter: self.children(),
1110        }
1111    }
1112
1113    /// Returns all children with the given name.
1114    pub fn find_all_mut<'a, Q: AsQName<'a>>(&'a mut self, tag: Q) -> FindChildrenMut<'a> {
1115        FindChildrenMut {
1116            tag: tag.as_qname(),
1117            child_iter: self.children_mut(),
1118        }
1119    }
1120
1121    /// Finds the first matching child
1122    pub fn find<'a, Q: AsQName<'a>>(&'a self, tag: Q) -> Option<&'a Element> {
1123        use std::borrow::Borrow;
1124        let tag = tag.as_qname();
1125
1126        for child in self.children() {
1127            if child.tag() == tag.borrow() {
1128                return Some(child);
1129            }
1130        }
1131        None
1132    }
1133
1134    /// Finds the first matching child and returns a mut ref
1135    pub fn find_mut<'a, Q: AsQName<'a>>(&'a mut self, tag: Q) -> Option<&'a mut Element> {
1136        self.find_all_mut(tag).next()
1137    }
1138
1139    /// Look up an attribute by qualified name.
1140    pub fn get_attr<'a, Q: AsQName<'a>>(&'a self, name: Q) -> Option<&'a str> {
1141        self.attributes.get(&name.as_qname()).map(|x| x.as_str())
1142    }
1143
1144    /// Sets a new attribute.
1145    ///
1146    /// This returns a reference to the element so you can chain the calls.
1147    pub fn set_attr<'a, Q: AsQName<'a>, S: Into<String>>(
1148        &'a mut self,
1149        name: Q,
1150        value: S,
1151    ) -> &'a mut Element {
1152        self.attributes
1153            .insert(name.as_qname().share(), value.into());
1154        self
1155    }
1156
1157    /// Removes an attribute and returns the stored string.
1158    pub fn remove_attr<'a, Q: AsQName<'a>>(&'a mut self, name: Q) -> Option<String> {
1159        // so this requires some explanation.  We store internally QName<'static>
1160        // which means the QName has a global lifetime.  This works because we
1161        // move the internal string storage into a global string cache or we are
1162        // pointing to static memory in the binary.
1163        //
1164        // However while Rust can coerce our BTreeMap from QName<'static> to
1165        // QName<'a> when reading, we can't do the same when writing.  This is
1166        // to prevent us from stashing a QName<'a> into the btreemap.  However on
1167        // remove that restriction makes no sense so we can unsafely transmute it
1168        // away.  I wish there was a better way though.
1169        use std::borrow::Borrow;
1170        let name = name.as_qname();
1171        let name_ref: &QName<'a> = name.borrow();
1172        let name_ref_static: &QName<'static> = unsafe { mem::transmute(name_ref) };
1173        self.attributes.remove(name_ref_static)
1174    }
1175
1176    /// Returns an iterator over all attributes
1177    pub fn attrs(&self) -> Attrs<'_> {
1178        Attrs {
1179            iter: self.attributes.iter(),
1180        }
1181    }
1182
1183    /// Count the attributes
1184    pub fn attr_count(&self) -> usize {
1185        self.attributes.len()
1186    }
1187
1188    fn get_nsmap_mut(&mut self) -> &mut NamespaceMap {
1189        let new_map = match self.nsmap {
1190            Some(ref mut nsmap) if Rc::strong_count(nsmap) == 1 => None,
1191            Some(ref mut nsmap) => Some(Rc::new((**nsmap).clone())),
1192            None => Some(Rc::new(NamespaceMap::new())),
1193        };
1194        if let Some(nsmap) = new_map {
1195            self.nsmap = Some(nsmap);
1196        }
1197        Rc::get_mut(self.nsmap.as_mut().unwrap()).unwrap()
1198    }
1199
1200    /// Registers a namespace with the internal namespace map.
1201    ///
1202    /// Note that there is no API to remove namespaces from an element once
1203    /// the namespace has been set so be careful with modifying this!
1204    ///
1205    /// This optionally also registers a specific prefix however if that prefix
1206    /// is already used a random one is used instead.
1207    pub fn register_namespace(&mut self, url: &str, prefix: Option<&str>) {
1208        if self.get_namespace_prefix(url).is_none()
1209            && self.get_nsmap_mut().register_if_missing(url, prefix)
1210        {
1211            self.emit_nsmap = true;
1212        }
1213    }
1214
1215    /// Sets a specific namespace prefix.  This will also register the
1216    /// namespace if it was unknown so far.
1217    ///
1218    /// In case a prefix is set that is already set elsewhere an error is
1219    /// returned.  It's recommended that this method is only used on the
1220    /// root node before other prefixes are added.
1221    pub fn set_namespace_prefix(&mut self, url: &str, prefix: &str) -> Result<(), Error> {
1222        if self.get_namespace_prefix(url) == Some(prefix) {
1223            Ok(())
1224        } else {
1225            self.get_nsmap_mut().set_prefix(url, prefix)
1226        }
1227    }
1228
1229    /// Returns the assigned prefix for a namespace.
1230    pub fn get_namespace_prefix(&self, url: &str) -> Option<&str> {
1231        match url {
1232            NS_EMPTY_URI => Some(""),
1233            NS_XML_URI => Some("xml"),
1234            NS_XMLNS_URI => Some("xmlns"),
1235            _ => {
1236                if let Some(ref nsmap) = self.nsmap {
1237                    nsmap.get_prefix(url)
1238                } else {
1239                    None
1240                }
1241            }
1242        }
1243    }
1244
1245    /// Finds the first element that match a given path downwards
1246    pub fn navigate<'a, Q: AsQName<'a>>(&'a self, path: &[Q]) -> Option<&'a Element> {
1247        use std::borrow::Borrow;
1248        let mut node = self;
1249
1250        'outer: for piece in path {
1251            let reftag = piece.as_qname();
1252            for child in node.children() {
1253                if child.tag() == reftag.borrow() {
1254                    node = child;
1255                    continue 'outer;
1256                }
1257            }
1258            return None;
1259        }
1260
1261        Some(node)
1262    }
1263}
1264
1265/// Xml Prolog version handle by elementtree
1266pub enum XmlProlog {
1267    Version10,
1268    Version11,
1269}
1270
1271/// A struct that define write options.
1272pub struct WriteOptions {
1273    emitter_config: EmitterConfig,
1274    xml_prolog: Option<XmlProlog>,
1275}
1276
1277impl Default for WriteOptions {
1278    fn default() -> WriteOptions {
1279        WriteOptions {
1280            emitter_config: EmitterConfig::new(),
1281            xml_prolog: Some(XmlProlog::Version10),
1282        }
1283    }
1284}
1285
1286impl WriteOptions {
1287    pub fn new() -> WriteOptions {
1288        WriteOptions {
1289            ..WriteOptions::default()
1290        }
1291    }
1292
1293    /// Define which xml prolog will be displayed when rendering an Element.
1294    ///
1295    /// Note that prolog is optional, an XML document with a missing prolog is well-formed but not valid.
1296    ///
1297    /// See RFC: [W3C XML 26 November 2008](https://www.w3.org/TR/xml/#sec-prolog-dtd)
1298    pub fn set_xml_prolog(mut self, prolog: Option<XmlProlog>) -> Self {
1299        self.emitter_config = self
1300            .emitter_config
1301            .write_document_declaration(prolog.is_some());
1302        self.xml_prolog = prolog;
1303
1304        self
1305    }
1306
1307    /// Line separator used to separate lines in formatted output. Default is `"\n"`.
1308    pub fn set_line_separator<I: Into<Cow<'static, str>>>(mut self, sep: I) -> Self {
1309        self.emitter_config = self.emitter_config.line_separator(sep.into());
1310        self
1311    }
1312
1313    /// A string which will be used for a single level of indentation. Default is `"  "`
1314    /// (two spaces).
1315    pub fn set_indent_string<I: Into<Cow<'static, str>>>(mut self, indent: I) -> Self {
1316        self.emitter_config = self.emitter_config.indent_string(indent.into());
1317        self
1318    }
1319
1320    /// Whether or not the emitted document should be indented. Default is false.
1321    ///
1322    /// The emitter is capable to perform automatic indentation of the emitted XML document.
1323    /// It is done in stream-like fashion and does not require the knowledge of the whole
1324    /// document in advance.
1325    ///
1326    /// Sometimes, however, automatic indentation is undesirable, e.g. when you want to keep
1327    /// existing layout when processing an existing XML document. Also the indentiation algorithm
1328    /// is not thoroughly tested. Hence by default it is disabled.
1329    pub fn set_perform_indent(mut self, yes: bool) -> Self {
1330        self.emitter_config = self.emitter_config.perform_indent(yes);
1331        self
1332    }
1333
1334    /// Whether or not to convert elements with empty content to empty elements. Default is true.
1335    ///
1336    /// This option allows turning elements like `<a></a>` (an element with empty content)
1337    /// into `<a />` (an empty element).
1338    pub fn set_normalize_empty_elements(mut self, yes: bool) -> Self {
1339        self.emitter_config = self.emitter_config.normalize_empty_elements(yes);
1340        self
1341    }
1342
1343    /// Whether or not to emit CDATA events as plain characters. Default is false.
1344    ///
1345    /// This option forces the emitter to convert CDATA events into regular character events,
1346    /// performing all the necessary escaping beforehand. This may be occasionally useful
1347    /// for feeding the document into incorrect parsers which do not support CDATA.
1348    pub fn set_cdata_to_characters(mut self, yes: bool) -> Self {
1349        self.emitter_config = self.emitter_config.cdata_to_characters(yes);
1350        self
1351    }
1352
1353    /// Whether or not to automatically insert spaces before the trailing `/>` in self-closing
1354    /// elements. Default is true.
1355    ///
1356    /// This option is only meaningful if `normalize_empty_elements` is true. For example, the
1357    /// element `<a></a>` would be unaffected. When `normalize_empty_elements` is true, then when
1358    /// this option is also true, the same element would appear `<a />`. If this option is false,
1359    /// then the same element would appear `<a/>`.
1360    pub fn set_pad_self_closing(mut self, yes: bool) -> Self {
1361        self.emitter_config = self.emitter_config.pad_self_closing(yes);
1362        self
1363    }
1364
1365    /// Whether or not to automatically insert leading and trailing spaces in emitted comments,
1366    /// if necessary. Default is true.
1367    ///
1368    /// This is a convenience option in order for the user not to append spaces before and after
1369    /// comments text in order to get more pretty comments: `<!-- something -->` instead of
1370    /// `<!--something-->`.
1371    pub fn set_autopad_comments(mut self, yes: bool) -> Self {
1372        self.emitter_config = self.emitter_config.autopad_comments(yes);
1373        self
1374    }
1375}