simple_xml/
lib.rs

1//! XML parser and writer
2//! This crate can load xml from a file or string and parse it into memory
3//! XML can also be manipulated or created and the written to file
4//! ## Loading xml from a file
5//! ```
6//! fn load_message() -> Result<(), simple_xml::Error> {
7//!     let root = simple_xml::from_file("examples/message.xml")?;
8//!     // Since there can multiple nodes/tags with the same name, we need to index twice
9//!     let heading = &root["heading"][0];
10//!     println!("Heading: {}", heading.content);
11//!     // Access attributes
12//!     let lang = root.get_attribute("lang").expect("Missing lang attribute");
13//!     println!("Language: {}", lang);
14//!     Ok(())
15//! }
16//! ```
17//! ## Creating xml structures
18//! ```
19//! let name = String::from("Tim Roberts");
20//! let health = 50;
21//!
22//! let mut player = simple_xml::new("player", String::new());
23//! player.add_new_node("health", health.to_string());
24//! player.add_new_node("name", name);
25//! // Save to file
26//! player.save_to_file("./player.xml");
27//! ```
28//! For more example, see the tests
29
30use std::collections::HashMap;
31use std::fs::File;
32use std::io;
33use std::io::Write;
34use std::path::Path;
35use std::{fmt, ops};
36
37mod split_unquoted;
38use split_unquoted::SplitUnquoted;
39
40pub mod error;
41pub use error::Error;
42pub use error::ParseError;
43
44#[derive(Debug)]
45pub struct Node {
46    pub tag: String,
47    pub attributes: HashMap<String, String>,
48    nodes: HashMap<String, Vec<Node>>,
49    pub content: String,
50}
51
52struct Payload<'a> {
53    prolog: &'a str,
54    node: Option<Node>,
55    remaining: &'a str,
56}
57
58fn validate_root(root: Result<Payload, Error>) -> Result<Node, Error> {
59    match root {
60        Ok(v) if v.prolog.len() != 0 => Err(Error::ContentOutsideRoot),
61        Ok(v) => Ok(v.node.unwrap_or(Node {
62            tag: String::new(),
63            content: String::new(),
64            nodes: HashMap::new(),
65            attributes: HashMap::new(),
66        })),
67        Err(e) => Err(e),
68    }
69}
70
71/// Loads an xml structure from a file and returns appropriate errors
72pub fn from_file<P: AsRef<Path>>(path: P) -> Result<Node, Error> {
73    validate_root(load_from_slice(&std::fs::read_to_string(path)?))
74}
75
76/// Loads an xml structure from a string and returns appropriate errors
77pub fn from_string(string: &str) -> Result<Node, Error> {
78    validate_root(load_from_slice(string))
79}
80
81/// Creates a new empty node
82/// Nodes and attributes can be added later
83/// Content is taken owned as to avoid large copy
84/// Tag is not taken owned as it is most often a string literal
85pub fn new(tag: &str, content: String) -> Node {
86    Node {
87        attributes: HashMap::new(),
88        content,
89        tag: tag.to_owned(),
90        nodes: HashMap::new(),
91    }
92}
93
94/// Creates a new node with given tag, attributes content, and child nodes
95pub fn new_filled(
96    tag: &str,
97    attributes: HashMap<String, String>,
98    content: String,
99    nodes: HashMap<String, Vec<Node>>,
100) -> Node {
101    Node {
102        tag: tag.to_owned(),
103        attributes,
104        nodes,
105        content,
106    }
107}
108
109/// Calculates the number of newlines '\n' in a slice
110fn newlines_in_slice(string: &str) -> usize {
111    string.chars().filter(|c| *c == '\n').count()
112}
113
114/// Loads a xml structure from a slice
115/// Ok variant contains a payload with the child node, name prolog, and remaining stringtuple with (prolog, tag_name, tag_data, remaining_from_in)
116fn load_from_slice(string: &str) -> Result<Payload, Error> {
117    let opening_del = match string.find("<") {
118        Some(v) => v,
119        None => {
120            return Ok(Payload {
121                prolog: "",
122                node: None,
123                remaining: string,
124            });
125        }
126    };
127
128    let closing_del = match string.find(">") {
129        Some(v) => v,
130        None => {
131            return Err(Error::ParseError(
132                ParseError::MissingClosingDelimiter,
133                newlines_in_slice(&string[..opening_del]),
134            ))
135        }
136    };
137
138    let mut tag_parts =
139        SplitUnquoted::split(&string[opening_del + 1..closing_del], |c| c.is_whitespace());
140
141    let tag_name = tag_parts.next().unwrap().trim();
142
143    // Collect the prolog as everything before opening tag excluding whitespace
144    let prolog = string[..opening_del].trim();
145
146    // Is a comment
147    // Attempt to read past comment
148    if &tag_name[0..1] == "?" {
149        return load_from_slice(&string[closing_del + 1..]);
150    }
151
152    let mut attributes = HashMap::new();
153    for part in tag_parts {
154        // Last closing of empty node
155        if part == "/" {
156            break;
157        }
158
159        let equal_sign = match part.find("=") {
160            Some(v) => v,
161            None => {
162                return Err(Error::ParseError(
163                    ParseError::MissingAttributeValue(part.to_owned()),
164                    newlines_in_slice(&string[..closing_del]),
165                ))
166            }
167        };
168
169        // Get key and value from attribute
170        let (k, v) = part.split_at(equal_sign);
171
172        // Remove quotes from value
173        let v = if &v[1..2] == "\"" && (&v[v.len() - 1..] == "\"" || v.ends_with("\"/")) {
174            &v[2..v.len() - 1]
175        } else {
176            return Err(Error::ParseError(
177                ParseError::MissingQuotes(part.to_owned()),
178                newlines_in_slice(&string[..closing_del]),
179            ));
180        };
181        attributes.insert(k.to_owned(), v.to_owned());
182    }
183
184    // Empty but valid node
185    if string[opening_del + 1..closing_del].ends_with("/") {
186        return Ok(Payload {
187            prolog,
188            node: Some(Node {
189                tag: tag_name.to_owned(),
190                nodes: HashMap::new(),
191                attributes,
192                content: String::new(),
193            }),
194            remaining: &string[closing_del + 1..],
195        });
196    }
197
198    // Find the closing tag index
199    let closing_tag = match string.find(&format!("</{}>", tag_name)) {
200        Some(v) => v,
201        None => {
202            return Err(Error::ParseError(
203                ParseError::MissingClosingTag(tag_name.to_owned()),
204                newlines_in_slice(&string[..closing_del]),
205            ))
206        }
207    };
208
209    let mut content = String::with_capacity(512);
210    let mut nodes = HashMap::new();
211
212    // Load the inside contents and nodes
213    let mut buf = &string[closing_del + 1..closing_tag];
214    let mut offset = closing_del;
215    while buf.len() != 0 {
216        let payload = load_from_slice(buf).map_err(|e| match e {
217            Error::ParseError(e, ln) => {
218                Error::ParseError(e, ln + newlines_in_slice(&string[..offset]))
219            }
220            e => e,
221        })?;
222
223        if let Some(node) = payload.node {
224            let v = nodes
225                .entry(node.tag.clone())
226                .or_insert(Vec::with_capacity(1));
227            v.push(node);
228        }
229
230        // Nothing was read by node, no more nodes
231        if payload.remaining.as_ptr() == buf.as_ptr() {
232            break;
233        }
234
235        // Put what was before the next tag into the content of the parent tag
236        content.push_str(&payload.prolog);
237        offset += buf.len() - payload.remaining.len();
238        buf = payload.remaining;
239    }
240
241    // Add the remaining inside content to content after no more nodes where found
242    content.push_str(buf);
243
244    let remaining = &string[closing_tag + tag_name.len() + 3..];
245
246    Ok(Payload {
247        prolog,
248        node: Some(Node {
249            tag: tag_name.to_owned(),
250            attributes,
251            nodes,
252            content: content.trim().into(),
253        }),
254        remaining,
255    })
256}
257
258impl Node {
259    /// Returns a list of all nodes with the specified tag
260    /// If no nodes with the specified tag exists, None is returned
261    pub fn get_nodes(&self, tag: &str) -> Option<&Vec<Node>> {
262        self.nodes.get(tag)
263    }
264
265    /// Returns a list of all nodes with the specified tag
266    /// If no nodes with the specified tag exists, an Err of TagNotFound is returned containing the parent name and requested node name
267    /// Otherwise, works exactly like get_nodes but can be chained with ? (try operator)
268    pub fn try_get_nodes(&self, tag: &str) -> Result<&Vec<Node>, Error> {
269        match self.nodes.get(tag) {
270            Some(v) => Ok(v),
271            None => Err(Error::TagNotFound(self.tag.to_owned(), tag.to_owned())),
272        }
273    }
274
275    /// Adds or updates an attribute
276    /// If an attribute with that key already exists it is returned
277    pub fn add_attribute(&mut self, key: &str, val: &str) -> Option<String> {
278        self.attributes.insert(key.to_owned(), val.to_owned())
279    }
280
281    // Gets an attribute by name or returns None if it doesn't exist
282    pub fn get_attribute(&self, key: &str) -> Option<&String> {
283        self.attributes.get(key)
284    }
285
286    /// Gets an attribute by name or returns an Err of AttributeNotFound containing the parent tag and the requested key
287    /// Otherwise, works exactly like get_nodes but can be chained with ? (try operator)
288    pub fn try_get_attribute(&self, key: &str) -> Result<&String, Error> {
289        match self.attributes.get(key) {
290            Some(v) => Ok(v),
291            None => Err(Error::AttributeNotFound(
292                self.tag.to_owned(),
293                key.to_owned(),
294            )),
295        }
296    }
297
298    /// Inserts a new node node with the name of the node field
299    pub fn add_node(&mut self, node: Node) {
300        let v = self
301            .nodes
302            .entry(node.tag.clone())
303            .or_insert(Vec::with_capacity(1));
304        v.push(node);
305    }
306
307    /// Inserts a new node into the xml structure
308    /// Does the same thing as node.add_node(simple_xml::new(tag, content));
309    pub fn add_new_node(&mut self, tag: &str, content: String) {
310        self.add_node(new(tag, content));
311    }
312
313    /// This writes an xml structure to a file specified by path
314    /// Uses the non-pretty to_string formatting
315    pub fn save_to_file<P: AsRef<Path>>(&self, path: P) -> io::Result<()> {
316        let mut file = File::create(path)?;
317        file.write_all(self.to_string().as_bytes())?;
318
319        Ok(())
320    }
321
322    /// This writes an xml structure to a file specified by path
323    /// Uses the pretty to_string_pretty formatting
324    pub fn save_to_file_pretty<P: AsRef<Path>>(&self, path: P) -> io::Result<()> {
325        let mut file = File::create(path)?;
326        file.write_all(self.to_string_pretty().as_bytes())?;
327
328        Ok(())
329    }
330
331    // Converts an xml structure to a string with whitespace formatting
332    pub fn to_string_pretty(&self) -> String {
333        fn internal(node: &Node, depth: usize) -> String {
334            if node.tag == "" {
335                return "".to_owned();
336            }
337
338            match node.nodes.len() + node.content.len() {
339                0 => format!(
340                    "{indent}<{}{}/>\n",
341                    node.tag,
342                    node.attributes
343                        .iter()
344                        .map(|(k, v)| format!(" {}=\"{}\"", k, v))
345                        .collect::<String>(),
346                    indent = " ".repeat(depth * 4)
347                ),
348                _ => format!(
349                    "{indent}<{tag}{attr}>{beg}{nodes}{content}{end}</{tag}>\n",
350                    tag = node.tag,
351                    attr = node
352                        .attributes
353                        .iter()
354                        .map(|(k, v)| format!(" {}=\"{}\"", k, v))
355                        .collect::<String>(),
356                    nodes = node
357                        .nodes
358                        .iter()
359                        .flat_map(|(_, nodes)| nodes.iter())
360                        .map(|node| internal(node, depth + 1))
361                        .collect::<String>(),
362                    beg = match node.nodes.len() {
363                        0 => "",
364                        _ => "\n",
365                    },
366                    end = match node.nodes.len() {
367                        0 => "".to_owned(),
368                        _ => " ".repeat(depth * 4),
369                    },
370                    content = node.content,
371                    indent = " ".repeat(depth * 4),
372                ),
373            }
374        }
375        internal(&self, 0)
376    }
377}
378
379impl std::fmt::Display for Node {
380    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> fmt::Result {
381        if self.tag == "" {
382            return write!(f, "");
383        }
384
385        match self.nodes.len() + self.content.len() {
386            0 => write!(
387                f,
388                "<{}{}/>",
389                self.tag,
390                self.attributes
391                    .iter()
392                    .map(|(k, v)| format!(" {}=\"{}\"", k, v))
393                    .collect::<String>(),
394            ),
395            _ => write!(
396                f,
397                "<{tag}{attr}>{nodes}{content}</{tag}>",
398                tag = self.tag,
399                attr = self
400                    .attributes
401                    .iter()
402                    .map(|(k, v)| format!(" {}=\"{}\"", k, v))
403                    .collect::<String>(),
404                nodes = self
405                    .nodes
406                    .iter()
407                    .flat_map(|(_, nodes)| nodes.iter())
408                    .map(|node| node.to_string())
409                    .collect::<String>(),
410                content = self.content,
411            ),
412        }
413    }
414}
415
416/// Returns a slice of all node nodes with the specified tag
417/// If no nodes with the specified tag exists, an empty slice is returned
418impl ops::Index<&str> for Node {
419    type Output = [Node];
420    fn index(&self, tag: &str) -> &Self::Output {
421        match self.nodes.get(tag) {
422            Some(v) => &v[..],
423            None => &[],
424        }
425    }
426}