xml/
lib.rs

1// RustyXML
2// Copyright 2013-2016 RustyXML developers
3//
4// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
5// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
6// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
7// option. This file may not be copied, modified, or distributed
8// except according to those terms.
9
10#![crate_name = "xml"]
11#![crate_type = "lib"]
12#![forbid(non_camel_case_types)]
13#![warn(missing_docs)]
14// Required for benchmarks
15#![cfg_attr(feature = "bench", feature(test))]
16
17/*!
18 * An XML parsing library
19 */
20
21pub use crate::element::ChildElements;
22pub use crate::element::Element;
23pub use crate::element_builder::BuilderError;
24pub use crate::element_builder::ElementBuilder;
25pub use crate::parser::Event;
26pub use crate::parser::Parser;
27pub use crate::parser::ParserError;
28
29use std::char;
30use std::fmt;
31
32#[cfg(feature = "ordered_attrs")]
33pub use indexmap::IndexMap as AttrMap;
34#[cfg(not(feature = "ordered_attrs"))]
35pub use std::collections::HashMap as AttrMap;
36
37mod element;
38mod element_builder;
39mod parser;
40
41// General functions
42
43#[inline]
44/// Escapes ', ", &, <, and > with the appropriate XML entities.
45pub fn escape(input: &str) -> String {
46    let mut result = String::with_capacity(input.len());
47
48    for c in input.chars() {
49        match c {
50            '&' => result.push_str("&amp;"),
51            '<' => result.push_str("&lt;"),
52            '>' => result.push_str("&gt;"),
53            '\'' => result.push_str("&apos;"),
54            '"' => result.push_str("&quot;"),
55            o => result.push(o),
56        }
57    }
58    result
59}
60
61#[inline]
62/// Unescapes all valid XML entities in a string.
63/// Returns the first invalid entity on failure.
64pub fn unescape(input: &str) -> Result<String, String> {
65    let mut result = String::with_capacity(input.len());
66
67    let mut it = input.split('&');
68
69    // Push everything before the first '&'
70    if let Some(sub) = it.next() {
71        result.push_str(sub);
72    }
73
74    for sub in it {
75        match sub.find(';') {
76            Some(idx) => {
77                let ent = &sub[..idx];
78                match ent {
79                    "quot" => result.push('"'),
80                    "apos" => result.push('\''),
81                    "gt" => result.push('>'),
82                    "lt" => result.push('<'),
83                    "amp" => result.push('&'),
84                    ent => {
85                        let val = if ent.starts_with("#x") {
86                            u32::from_str_radix(&ent[2..], 16).ok()
87                        } else if ent.starts_with('#') {
88                            u32::from_str_radix(&ent[1..], 10).ok()
89                        } else {
90                            None
91                        };
92                        match val.and_then(char::from_u32) {
93                            Some(c) => result.push(c),
94                            None => return Err(format!("&{};", ent)),
95                        }
96                    }
97                }
98                result.push_str(&sub[idx + 1..]);
99            }
100            None => return Err("&".to_owned() + sub),
101        }
102    }
103    Ok(result)
104}
105
106// General types
107#[derive(Clone, PartialEq, Debug)]
108/// An Enum describing a XML Node
109pub enum Xml {
110    /// An XML Element
111    ElementNode(Element),
112    /// Character Data
113    CharacterNode(String),
114    /// CDATA
115    CDATANode(String),
116    /// A XML Comment
117    CommentNode(String),
118    /// Processing Information
119    PINode(String),
120}
121
122#[derive(PartialEq, Eq, Debug)]
123/// Structure describing an opening tag
124pub struct StartTag {
125    /// The tag's name
126    pub name: String,
127    /// The tag's namespace
128    pub ns: Option<String>,
129    /// The tag's prefix
130    pub prefix: Option<String>,
131    /// The tag's attributes
132    pub attributes: AttrMap<(String, Option<String>), String>,
133}
134
135#[derive(PartialEq, Eq, Debug)]
136/// Structure describing a closing tag
137pub struct EndTag {
138    /// The tag's name
139    pub name: String,
140    /// The tag's namespace
141    pub ns: Option<String>,
142    /// The tag's prefix
143    pub prefix: Option<String>,
144}
145
146impl fmt::Display for Xml {
147    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
148        match *self {
149            Xml::ElementNode(ref elem) => elem.fmt(f),
150            Xml::CharacterNode(ref data) => write!(f, "{}", escape(data)),
151            Xml::CDATANode(ref data) => write!(f, "<![CDATA[{}]]>", data),
152            Xml::CommentNode(ref data) => write!(f, "<!--{}-->", data),
153            Xml::PINode(ref data) => write!(f, "<?{}?>", data),
154        }
155    }
156}
157
158#[cfg(test)]
159mod lib_tests {
160    use super::{escape, unescape, Element, Xml};
161
162    #[test]
163    fn test_escape() {
164        let esc = escape("&<>'\"");
165        assert_eq!(esc, "&amp;&lt;&gt;&apos;&quot;");
166    }
167
168    #[test]
169    fn test_unescape() {
170        let unesc = unescape("&amp;lt;&lt;&gt;&apos;&quot;&#x201c;&#x201d;&#38;&#34;");
171        assert_eq!(
172            unesc.as_ref().map(|x| &x[..]),
173            Ok("&lt;<>'\"\u{201c}\u{201d}&\""),
174        );
175    }
176
177    #[test]
178    fn test_unescape_invalid() {
179        let unesc = unescape("&amp;&nbsp;");
180        assert_eq!(unesc.as_ref().map_err(|x| &x[..]), Err("&nbsp;"));
181    }
182
183    #[test]
184    fn test_show_element() {
185        let elem = Element::new("a".to_owned(), None, vec![]);
186        assert_eq!(format!("{}", elem), "<a/>");
187
188        let elem = Element::new(
189            "a".to_owned(),
190            None,
191            vec![("href".to_owned(), None, "http://rust-lang.org".to_owned())],
192        );
193        assert_eq!(format!("{}", elem), "<a href='http://rust-lang.org'/>");
194
195        let mut elem = Element::new("a".to_owned(), None, vec![]);
196        elem.tag(Element::new("b".to_owned(), None, vec![]));
197        assert_eq!(format!("{}", elem), "<a><b/></a>");
198
199        let mut elem = Element::new(
200            "a".to_owned(),
201            None,
202            vec![("href".to_owned(), None, "http://rust-lang.org".to_owned())],
203        );
204        elem.tag(Element::new("b".to_owned(), None, vec![]));
205        assert_eq!(
206            format!("{}", elem),
207            "<a href='http://rust-lang.org'><b/></a>",
208        );
209    }
210
211    #[test]
212    fn test_show_element_xmlns() {
213        let elem: Element = "<a xmlns='urn:test'/>".parse().unwrap();
214        assert_eq!(format!("{}", elem), "<a xmlns='urn:test'/>");
215
216        let elem: Element = "<a xmlns='urn:test'><b xmlns='urn:toast'/></a>"
217            .parse()
218            .unwrap();
219        assert_eq!(
220            format!("{}", elem),
221            "<a xmlns='urn:test'><b xmlns='urn:toast'/></a>",
222        );
223
224        let elem = Element::new(
225            "a".to_owned(),
226            Some("urn:test".to_owned()),
227            vec![("href".to_owned(), None, "http://rust-lang.org".to_owned())],
228        );
229        assert_eq!(
230            format!("{}", elem),
231            "<a xmlns='urn:test' href='http://rust-lang.org'/>",
232        );
233    }
234
235    #[test]
236    fn test_show_characters() {
237        let chars = Xml::CharacterNode("some text".to_owned());
238        assert_eq!(format!("{}", chars), "some text");
239    }
240
241    #[test]
242    fn test_show_cdata() {
243        let chars = Xml::CDATANode("some text".to_owned());
244        assert_eq!(format!("{}", chars), "<![CDATA[some text]]>");
245    }
246
247    #[test]
248    fn test_show_comment() {
249        let chars = Xml::CommentNode("some text".to_owned());
250        assert_eq!(format!("{}", chars), "<!--some text-->");
251    }
252
253    #[test]
254    fn test_show_pi() {
255        let chars = Xml::PINode("xml version='1.0'".to_owned());
256        assert_eq!(format!("{}", chars), "<?xml version='1.0'?>");
257    }
258
259    #[test]
260    fn test_content_str() {
261        let mut elem = Element::new("a".to_owned(), None, vec![]);
262        elem.pi("processing information".to_owned())
263            .cdata("<hello/>".to_owned())
264            .tag_stay(Element::new("b".to_owned(), None, vec![]))
265            .text("World".to_owned())
266            .comment("Nothing to see".to_owned());
267        assert_eq!(elem.content_str(), "<hello/>World");
268    }
269}
270
271#[cfg(test)]
272#[cfg(feature = "bench")]
273mod lib_bench {
274    extern crate test;
275
276    use self::test::Bencher;
277    use super::{escape, unescape};
278    use std::iter::repeat;
279
280    #[bench]
281    fn bench_escape(bh: &mut Bencher) {
282        let input: String = repeat("&<>'\"").take(100).collect();
283        bh.iter(|| escape(&input));
284        bh.bytes = input.len() as u64;
285    }
286
287    #[bench]
288    fn bench_unescape(bh: &mut Bencher) {
289        let input: String = repeat("&amp;&lt;&gt;&apos;&quot;").take(50).collect();
290        bh.iter(|| unescape(&input));
291        bh.bytes = input.len() as u64;
292    }
293}