szl_simple_xml/
lib.rs

1//! XML parser and writer
2//! This crate can load xml from a file or string and parse it into memory
3//! XML can also be manipulated or created and the written to file
4//! ## Loading xml from a file
5//! ```
6//! fn load_message() -> Result<(), simple_xml::Error> {
7//!     let root = simple_xml::from_file("examples/message.xml")?;
8//!     // Since there can multiple nodes/tags with the same name, we need to index twice
9//!     let heading = &root["heading"][0];
10//!     println!("Heading: {}", heading.content);
11//!     // Access attributes
12//!     let lang = root.get_attribute("lang").expect("Missing lang attribute");
13//!     println!("Language: {}", lang);
14//!     Ok(())
15//! }
16//! ```
17//! ## Creating xml structures
18//! ```
19//! let name = String::from("Tim Roberts");
20//! let health = 50;
21//!
22//! let mut player = simple_xml::new("player", String::new());
23//! player.add_new_node("health", health.to_string());
24//! player.add_new_node("name", name);
25//! // Save to file
26//! player.save_to_file("./player.xml");
27//! ```
28//! ## Editing xml structures
29//! ```
30//!let file =     
31//!    szl_simple_xml::from_file("./examples/note.xml").expect("Failed to parse simple_xml");
32//!    let mut resources = 
33//!        &mut file.get_mut_nodes("resources").unwrap()[0].get_mut_nodes("resource").unwrap()[0];
34//!    
35//!    let href = String::from("page1.html");
36//!    let new_file_node = szl_simple_xml::new("file", String::new());
37//!    new_file_node.add_attribute("href", &href);
38//!
39//!    resources.add_node(new_file_node);
40//!    let write_file = file.save_to_file_pretty("./test.xml")
41//! ```
42//! For more example, see the tests
43
44
45use std::collections::HashMap;
46use std::fs::File;
47use std::io;
48use std::io::Write;
49use std::path::Path;
50use std::{fmt, ops};
51
52mod split_unquoted;
53use split_unquoted::SplitUnquoted;
54
55pub mod error;
56pub use error::Error;
57pub use error::ParseError;
58
59#[derive(Debug)]
60pub struct Node {
61    pub tag: String,
62    pub attributes: HashMap<String, String>,
63    nodes: HashMap<String, Vec<Node>>,
64    pub content: String,
65}
66
67struct Payload<'a> {
68    prolog: &'a str,
69    node: Option<Node>,
70    remaining: &'a str,
71}
72
73fn validate_root(root: Result<Payload, Error>) -> Result<Node, Error> {
74    match root {
75        Ok(v) if !v.prolog.is_empty() => Err(Error::ContentOutsideRoot),
76        Ok(v) => Ok(v.node.unwrap_or(Node {
77            tag: String::new(),
78            content: String::new(),
79            nodes: HashMap::new(),
80            attributes: HashMap::new(),
81        })),
82        Err(e) => Err(e),
83    }
84}
85
86/// Loads an xml structure from a file and returns appropriate errors
87pub fn from_file<P: AsRef<Path>>(path: P) -> Result<Node, Error> {
88    validate_root(load_from_slice(&std::fs::read_to_string(path)?))
89}
90
91/// Loads an xml structure from a string and returns appropriate errors
92pub fn from_string(string: &str) -> Result<Node, Error> {
93    validate_root(load_from_slice(string))
94}
95
96/// Creates a new empty node
97/// Nodes and attributes can be added later
98/// Content is taken owned as to avoid large copy
99/// Tag is not taken owned as it is most often a string literal
100pub fn new(tag: &str, content: String) -> Node {
101    Node {
102        attributes: HashMap::new(),
103        content,
104        tag: tag.to_owned(),
105        nodes: HashMap::new(),
106    }
107}
108
109/// Creates a new node with given tag, attributes content, and child nodes
110pub fn new_filled(
111    tag: &str,
112    attributes: HashMap<String, String>,
113    content: String,
114    nodes: HashMap<String, Vec<Node>>,
115) -> Node {
116    Node {
117        tag: tag.to_owned(),
118        attributes,
119        nodes,
120        content,
121    }
122}
123
124/// Calculates the number of newlines '\n' in a slice
125fn newlines_in_slice(string: &str) -> usize {
126    string.chars().filter(|c| *c == '\n').count()
127}
128
129/// Loads a xml structure from a slice
130/// Ok variant contains a payload with the child node, name prolog, and remaining stringtuple with (prolog, tag_name, tag_data, remaining_from_in)
131fn load_from_slice(string: &str) -> Result<Payload, Error> {
132    let opening_del = match string.find('<') {
133        Some(v) => v,
134        None => {
135            return Ok(Payload {
136                prolog: "",
137                node: None,
138                remaining: string,
139            });
140        }
141    };
142
143    let closing_del = match string.find('>') {
144        Some(v) => v,
145        None => {
146            return Err(Error::ParseError(
147                ParseError::MissingClosingDelimiter,
148                newlines_in_slice(&string[..opening_del]),
149            ))
150        }
151    };
152
153    // Do not consider / of empty as a part
154    let attr_end = if &string[closing_del - 1..closing_del] == "/" {
155        closing_del - 1
156    } else {
157        closing_del
158    };
159
160    let mut tag_parts =
161        SplitUnquoted::split(&string[opening_del + 1..attr_end], |c| c.is_whitespace());
162
163    let tag_name = tag_parts.next().unwrap().trim();
164
165    // Collect the prolog as everything before opening tag excluding whitespace
166    let prolog = string[..opening_del].trim();
167
168    // Is a comment
169    // Attempt to read past comment
170    if &tag_name[0..1] == "?" {
171        return load_from_slice(&string[closing_del + 1..]);
172    }
173
174    let mut attributes = HashMap::new();
175    for part in tag_parts {
176        let equal_sign = match part.find('=') {
177            Some(v) => v,
178            None => {
179                return Err(Error::ParseError(
180                    ParseError::MissingAttributeValue(part.to_owned()),
181                    newlines_in_slice(&string[..closing_del]),
182                ))
183            }
184        };
185
186        // Get key and value from attribute
187        let (k, v) = part.split_at(equal_sign);
188
189        // Remove quotes from value
190        let v = if &v[1..2] == "\"" && (&v[v.len() - 1..] == "\"" || v.ends_with("\"/")) {
191            &v[2..v.len() - 1]
192        } else {
193            return Err(Error::ParseError(
194                ParseError::MissingQuotes(part.to_owned()),
195                newlines_in_slice(&string[..closing_del]),
196            ));
197        };
198        attributes.insert(k.to_owned(), v.to_owned());
199    }
200
201    // Empty but valid node
202    if string[opening_del + 1..closing_del].ends_with('/') {
203        return Ok(Payload {
204            prolog,
205            node: Some(Node {
206                tag: tag_name.to_owned(),
207                nodes: HashMap::new(),
208                attributes,
209                content: String::new(),
210            }),
211            remaining: &string[closing_del + 1..],
212        });
213    }
214
215    // Find the closing tag index
216    let closing_tag = match string.find(&format!("</{}>", tag_name)) {
217        Some(v) => v,
218        None => {
219            return Err(Error::ParseError(
220                ParseError::MissingClosingTag(tag_name.to_owned()),
221                newlines_in_slice(&string[..closing_del]),
222            ))
223        }
224    };
225
226    let mut content = String::with_capacity(512);
227    let mut nodes = HashMap::new();
228
229    // Load the inside contents and nodes
230    let mut buf = &string[closing_del + 1..closing_tag];
231    let mut offset = closing_del;
232    while !buf.is_empty() {
233        let payload = load_from_slice(buf).map_err(|e| match e {
234            Error::ParseError(e, ln) => {
235                Error::ParseError(e, ln + newlines_in_slice(&string[..offset]))
236            }
237            e => e,
238        })?;
239
240        if let Some(node) = payload.node {
241            let v: &mut Vec<_> = nodes.entry(node.tag.clone()).or_default();
242            v.push(node);
243        }
244
245        // Nothing was read by node, no more nodes
246        if payload.remaining.as_ptr() == buf.as_ptr() {
247            break;
248        }
249
250        // Put what was before the next tag into the content of the parent tag
251        content.push_str(payload.prolog);
252        offset += buf.len() - payload.remaining.len();
253        buf = payload.remaining;
254    }
255
256    // Add the remaining inside content to content after no more nodes where found
257    content.push_str(buf);
258
259    let remaining = &string[closing_tag + tag_name.len() + 3..];
260
261    Ok(Payload {
262        prolog,
263        node: Some(Node {
264            tag: tag_name.to_owned(),
265            attributes,
266            nodes,
267            content: content.trim().into(),
268        }),
269        remaining,
270    })
271}
272
273impl Node {
274
275    /// Returns a mutable list of nodes 
276    /// If no nodes with the specified tag exists, None is returned
277    pub fn get_mut_nodes(&mut self, tag: &str) -> Option<&mut Vec<Node>> {
278        self.nodes.get_mut(tag)
279    }
280
281    /// Returns a list of all nodes with the specified tag
282    /// If no nodes with the specified tag exists, None is returned
283    pub fn get_nodes(&self, tag: &str) -> Option<&Vec<Node>> {
284        self.nodes.get(tag)
285    }
286
287    /// Returns a list of all nodes with the specified tag
288    /// If no nodes with the specified tag exists, an Err of TagNotFound is returned containing the parent name and requested node name
289    /// Otherwise, works exactly like get_nodes but can be chained with ? (try operator)
290    pub fn try_get_nodes(&self, tag: &str) -> Result<&Vec<Node>, Error> {
291        match self.nodes.get(tag) {
292            Some(v) => Ok(v),
293            None => Err(Error::TagNotFound(self.tag.to_owned(), tag.to_owned())),
294        }
295    }
296
297    /// Adds or updates an attribute
298    /// If an attribute with that key already exists it is returned
299    pub fn add_attribute(&mut self, key: &str, val: &str) -> Option<String> {
300        self.attributes.insert(key.to_owned(), val.to_owned())
301    }
302
303    // Gets an attribute by name or returns None if it doesn't exist
304    pub fn get_attribute(&self, key: &str) -> Option<&String> {
305        self.attributes.get(key)
306    }
307
308    /// Gets an attribute by name or returns an Err of AttributeNotFound containing the parent tag and the requested key
309    /// Otherwise, works exactly like get_nodes but can be chained with ? (try operator)
310    pub fn try_get_attribute(&self, key: &str) -> Result<&String, Error> {
311        match self.attributes.get(key) {
312            Some(v) => Ok(v),
313            None => Err(Error::AttributeNotFound(
314                self.tag.to_owned(),
315                key.to_owned(),
316            )),
317        }
318    }
319
320    /// Inserts a new node node with the name of the node field
321    pub fn add_node(&mut self, node: Node) {
322        let v = self.nodes.entry(node.tag.clone()).or_default();
323        v.push(node);
324    }
325
326    /// Inserts a new node into the xml structure
327    /// Does the same thing as node.add_node(simple_xml::new(tag, content));
328    pub fn add_new_node(&mut self, tag: &str, content: String) {
329        self.add_node(new(tag, content));
330    }
331
332    /// This writes an xml structure to a file specified by path
333    /// Uses the non-pretty to_string formatting
334    pub fn save_to_file<P: AsRef<Path>>(&self, path: P) -> io::Result<()> {
335        let mut file = File::create(path)?;
336        file.write_all(self.to_string().as_bytes())?;
337
338        Ok(())
339    }
340
341    /// This writes an xml structure to a file specified by path
342    /// Uses the pretty to_string_pretty formatting
343    pub fn save_to_file_pretty<P: AsRef<Path>>(&self, path: P) -> io::Result<()> {
344        let mut file = File::create(path)?;
345        file.write_all(self.to_string_pretty().as_bytes())?;
346
347        Ok(())
348    }
349
350    // Converts an xml structure to a string with whitespace formatting
351    pub fn to_string_pretty(&self) -> String {
352        fn internal(node: &Node, depth: usize) -> String {
353            if node.tag.is_empty() {
354                return "".to_owned();
355            }
356
357            match node.nodes.len() + node.content.len() {
358                0 => format!(
359                    "{indent}<{}{}/>\n",
360                    node.tag,
361                    node.attributes
362                        .iter()
363                        .map(|(k, v)| format!(" {}=\"{}\"", k, v))
364                        .collect::<String>(),
365                    indent = " ".repeat(depth * 4)
366                ),
367                _ => format!(
368                    "{indent}<{tag}{attr}>{beg}{nodes}{content}{end}</{tag}>\n",
369                    tag = node.tag,
370                    attr = node
371                        .attributes
372                        .iter()
373                        .map(|(k, v)| format!(" {}=\"{}\"", k, v))
374                        .collect::<String>(),
375                    nodes = node
376                        .nodes
377                        .iter()
378                        .flat_map(|(_, nodes)| nodes.iter())
379                        .map(|node| internal(node, depth + 1))
380                        .collect::<String>(),
381                    beg = match node.nodes.len() {
382                        0 => "",
383                        _ => "\n",
384                    },
385                    end = match node.nodes.len() {
386                        0 => "".to_owned(),
387                        _ => " ".repeat(depth * 4),
388                    },
389                    content = node.content,
390                    indent = " ".repeat(depth * 4),
391                ),
392            }
393        }
394        internal(self, 0)
395    }
396}
397
398impl std::fmt::Display for Node {
399    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> fmt::Result {
400        if self.tag.is_empty() {
401            return write!(f, "");
402        }
403
404        match self.nodes.len() + self.content.len() {
405            0 => write!(
406                f,
407                "<{}{}/>",
408                self.tag,
409                self.attributes
410                    .iter()
411                    .map(|(k, v)| format!(" {}=\"{}\"", k, v))
412                    .collect::<String>(),
413            ),
414            _ => write!(
415                f,
416                "<{tag}{attr}>{nodes}{content}</{tag}>",
417                tag = self.tag,
418                attr = self
419                    .attributes
420                    .iter()
421                    .map(|(k, v)| format!(" {}=\"{}\"", k, v))
422                    .collect::<String>(),
423                nodes = self
424                    .nodes
425                    .iter()
426                    .flat_map(|(_, nodes)| nodes.iter())
427                    .map(|node| node.to_string())
428                    .collect::<String>(),
429                content = self.content,
430            ),
431        }
432    }
433}
434
435/// Returns a slice of all node nodes with the specified tag
436/// If no nodes with the specified tag exists, an empty slice is returned
437impl ops::Index<&str> for Node {
438    type Output = [Node];
439    fn index(&self, tag: &str) -> &Self::Output {
440        match self.nodes.get(tag) {
441            Some(v) => &v[..],
442            None => &[],
443        }
444    }
445}