Skip to main content

docx_rs/xml_json/
mod.rs

1// Licensed under either of
2//
3// Apache License, Version 2.0, (LICENSE-APACHE or http://www.apache.org/licenses/LICENSE-2.0)
4// MIT license (LICENSE-MIT or http://opensource.org/licenses/MIT)
5// at your option.
6//
7// Contribution
8// Unless you explicitly state otherwise, any contribution intentionally submitted for inclusion in the work by you, as defined in the Apache-2.0 license, shall be dual licensed as above, without any additional terms or conditions.
9// use serde::Serialize;
10use serde::Serialize;
11use std::fmt::{Display, Formatter, Write};
12use std::io::prelude::*;
13use std::io::Cursor;
14use std::str::FromStr;
15
16use crate::reader::{EventReader, Namespace, OwnedAttribute, OwnedName, XmlEvent};
17
18/// An XML Document
19#[derive(Debug, Clone)]
20pub struct XmlDocument {
21    /// Data contained within the parsed XML Document
22    pub data: Vec<XmlData>,
23}
24
25impl Display for XmlDocument {
26    fn fmt(&self, f: &mut Formatter) -> std::fmt::Result {
27        for item in self.data.iter() {
28            item.fmt(f)?;
29        }
30        Ok(())
31    }
32}
33
34/// An XML Tag
35///
36/// For example:
37///
38/// ```XML
39/// <foo bar="baz">
40///     test text
41///     <sub></sub>
42/// </foo>
43/// ```
44#[derive(Debug, Clone, Serialize)]
45pub struct XmlData {
46    /// Name of the tag (i.e. "foo")
47    pub name: String,
48    /// Key-value pairs of the attributes (i.e. ("bar", "baz"))
49    pub attributes: Vec<(String, String)>,
50    /// Data (i.e. "test text")
51    pub data: Option<String>,
52    /// Sub elements (i.e. an XML element of "sub")
53    pub children: Vec<XmlData>,
54}
55
56impl XmlData {
57    /// Format the XML data as a string
58    fn format(self: &XmlData, f: &mut Formatter, _depth: usize) -> std::fmt::Result {
59        write!(f, "<{}", self.name)?;
60
61        for (key, val) in self.attributes.iter() {
62            write!(f, r#" {}="{}""#, key, val)?;
63        }
64
65        f.write_char('>')?;
66
67        if let Some(ref data) = self.data {
68            write!(f, "{}", data)?
69        }
70
71        for child in self.children.iter() {
72            child.format(f, _depth + 1)?;
73        }
74
75        write!(f, "</{}>", self.name)
76    }
77}
78
79impl Display for XmlData {
80    fn fmt(&self, f: &mut Formatter) -> std::fmt::Result {
81        self.format(f, 0)
82    }
83}
84
85/// Get the XML attributes as a string
86fn map_owned_attributes(attrs: Vec<OwnedAttribute>) -> Vec<(String, String)> {
87    attrs
88        .into_iter()
89        .map(|attr| {
90            let fmt_name = if attr.name.prefix.is_some() {
91                if !attr.name.local_name.is_empty() {
92                    format!("{}:{}", attr.name.prefix.unwrap(), attr.name.local_name)
93                } else {
94                    attr.name.prefix.unwrap()
95                }
96            } else {
97                attr.name.local_name.clone()
98            };
99            (fmt_name, attr.value)
100        })
101        .collect()
102}
103
104fn parse(
105    mut data: Vec<XmlEvent>,
106    current: Option<XmlData>,
107    mut current_vec: Vec<XmlData>,
108    trim: bool,
109    current_namespace: Namespace,
110) -> Result<(Vec<XmlData>, Vec<XmlEvent>), String> {
111    if let Some(elmt) = data.pop() {
112        match elmt {
113            XmlEvent::StartElement {
114                name,
115                attributes,
116                namespace,
117            } => {
118                let fmt_name = if name.prefix.is_some() {
119                    if !name.local_name.is_empty() {
120                        format!("{}:{}", name.prefix.unwrap(), name.local_name)
121                    } else {
122                        name.prefix.unwrap()
123                    }
124                } else {
125                    name.local_name
126                };
127
128                let attributes = if namespace == current_namespace {
129                    attributes
130                } else {
131                    let mut attributes = attributes;
132                    let n = namespace.clone();
133                    let ns = n
134                        .into_iter()
135                        .filter(|(_k, v)| {
136                            (v != "")
137                                && (v != "http://www.w3.org/2000/xmlns/")
138                                && (v != "http://www.w3.org/XML/1998/namespace")
139                        })
140                        .map(|(k, v)| OwnedAttribute {
141                            name: OwnedName {
142                                local_name: k.to_string(),
143                                namespace: if v.is_empty() {
144                                    None
145                                } else {
146                                    Some(v.to_string())
147                                },
148                                prefix: Some("xmlns".to_string()),
149                            },
150                            value: v.to_string(),
151                        });
152                    attributes.extend(ns);
153                    attributes
154                };
155
156                let inner = XmlData {
157                    name: fmt_name,
158                    attributes: map_owned_attributes(attributes),
159                    data: None,
160                    children: Vec::new(),
161                };
162
163                let (inner, rest) = parse(data, Some(inner), Vec::new(), trim, namespace.clone())?;
164
165                if let Some(mut crnt) = current {
166                    crnt.children.extend(inner);
167                    parse(rest, Some(crnt), current_vec, trim, namespace)
168                } else {
169                    current_vec.extend(inner);
170                    parse(rest, None, current_vec, trim, namespace)
171                }
172            }
173            XmlEvent::Characters(chr) => {
174                let chr = if trim { chr.trim().to_string() } else { chr };
175                if let Some(mut crnt) = current {
176                    crnt.data = Some(chr);
177                    parse(data, Some(crnt), current_vec, trim, current_namespace)
178                } else {
179                    Err("Invalid form of XML doc".to_string())
180                }
181            }
182            XmlEvent::EndElement { name } => {
183                let fmt_name = if name.prefix.is_some() {
184                    if !name.local_name.is_empty() {
185                        format!("{}:{}", name.prefix.unwrap(), name.local_name)
186                    } else {
187                        name.prefix.unwrap()
188                    }
189                } else {
190                    name.local_name.clone()
191                };
192                if let Some(crnt) = current {
193                    if crnt.name == fmt_name {
194                        current_vec.push(crnt);
195                        Ok((current_vec, data))
196                    } else {
197                        Err(format!(
198                            "Invalid end tag: expected {}, got {}",
199                            crnt.name, name.local_name
200                        ))
201                    }
202                } else {
203                    Err(format!("Invalid end tag: {}", name.local_name))
204                }
205            }
206            _ => parse(data, current, current_vec, trim, current_namespace),
207        }
208    } else if let Some(_current) = current {
209        Err("Invalid end tag".to_string())
210    } else {
211        Ok((current_vec, Vec::new()))
212    }
213}
214
215impl XmlDocument {
216    pub fn from_reader<R>(source: R, trim: bool) -> Result<Self, ParseXmlError>
217    where
218        R: Read,
219    {
220        let parser = EventReader::new(source);
221        let mut events: Vec<XmlEvent> = parser.into_iter().map(|x| x.unwrap()).collect();
222        events.reverse();
223
224        parse(events, None, Vec::new(), trim, Namespace::empty())
225            .map(|(data, _)| XmlDocument { data })
226            .map_err(ParseXmlError)
227    }
228}
229
230/// Error when parsing XML
231#[derive(Debug, Clone, PartialEq)]
232pub struct ParseXmlError(String);
233
234impl Display for ParseXmlError {
235    fn fmt(&self, f: &mut Formatter) -> std::fmt::Result {
236        write!(f, "Coult not parse string to XML: {}", self.0)
237    }
238}
239
240// Generate an XML document from a string
241impl FromStr for XmlDocument {
242    type Err = ParseXmlError;
243
244    fn from_str(s: &str) -> Result<XmlDocument, ParseXmlError> {
245        XmlDocument::from_reader(Cursor::new(s.to_string().into_bytes()), true)
246    }
247}