docx_rs/xml_json/
mod.rs

1// Licensed under either of
2//
3// Apache License, Version 2.0, (LICENSE-APACHE or http://www.apache.org/licenses/LICENSE-2.0)
4// MIT license (LICENSE-MIT or http://opensource.org/licenses/MIT)
5// at your option.
6//
7// Contribution
8// Unless you explicitly state otherwise, any contribution intentionally submitted for inclusion in the work by you, as defined in the Apache-2.0 license, shall be dual licensed as above, without any additional terms or conditions.
9// use serde::Serialize;
10use serde::Serialize;
11use std::fmt::{Display, Formatter, Write};
12use std::io::prelude::*;
13use std::io::Cursor;
14use std::str::FromStr;
15use xml::attribute::OwnedAttribute;
16use xml::name::OwnedName;
17use xml::namespace::{self, Namespace};
18use xml::reader::{EventReader, XmlEvent};
19
20/// An XML Document
21#[derive(Debug, Clone)]
22pub struct XmlDocument {
23    /// Data contained within the parsed XML Document
24    pub data: Vec<XmlData>,
25}
26
27impl Display for XmlDocument {
28    fn fmt(&self, f: &mut Formatter) -> std::fmt::Result {
29        for item in self.data.iter() {
30            item.fmt(f)?;
31        }
32        Ok(())
33    }
34}
35
36/// An XML Tag
37///
38/// For example:
39///
40/// ```XML
41/// <foo bar="baz">
42///     test text
43///     <sub></sub>
44/// </foo>
45/// ```
46#[derive(Debug, Clone, Serialize)]
47pub struct XmlData {
48    /// Name of the tag (i.e. "foo")
49    pub name: String,
50    /// Key-value pairs of the attributes (i.e. ("bar", "baz"))
51    pub attributes: Vec<(String, String)>,
52    /// Data (i.e. "test text")
53    pub data: Option<String>,
54    /// Sub elements (i.e. an XML element of "sub")
55    pub children: Vec<XmlData>,
56}
57
58impl XmlData {
59    /// Format the XML data as a string
60    fn format(self: &XmlData, f: &mut Formatter, _depth: usize) -> std::fmt::Result {
61        write!(f, "<{}", self.name)?;
62
63        for (key, val) in self.attributes.iter() {
64            write!(f, r#" {}="{}""#, key, val)?;
65        }
66
67        f.write_char('>')?;
68
69        if let Some(ref data) = self.data {
70            write!(f, "{}", data)?
71        }
72
73        for child in self.children.iter() {
74            child.format(f, _depth + 1)?;
75        }
76
77        write!(f, "</{}>", self.name)
78    }
79}
80
81impl Display for XmlData {
82    fn fmt(&self, f: &mut Formatter) -> std::fmt::Result {
83        self.format(f, 0)
84    }
85}
86
87/// Get the XML attributes as a string
88fn map_owned_attributes(attrs: Vec<xml::attribute::OwnedAttribute>) -> Vec<(String, String)> {
89    attrs
90        .into_iter()
91        .map(|attr| {
92            let fmt_name = if attr.name.prefix.is_some() {
93                if !attr.name.local_name.is_empty() {
94                    format!("{}:{}", attr.name.prefix.unwrap(), attr.name.local_name)
95                } else {
96                    attr.name.prefix.unwrap()
97                }
98            } else {
99                attr.name.local_name.clone()
100            };
101            (fmt_name, attr.value)
102        })
103        .collect()
104}
105
106fn parse(
107    mut data: Vec<XmlEvent>,
108    current: Option<XmlData>,
109    mut current_vec: Vec<XmlData>,
110    trim: bool,
111    current_namespace: Namespace,
112) -> Result<(Vec<XmlData>, Vec<XmlEvent>), String> {
113    if let Some(elmt) = data.pop() {
114        match elmt {
115            XmlEvent::StartElement {
116                name,
117                attributes,
118                namespace,
119            } => {
120                let fmt_name = if name.prefix.is_some() {
121                    if !name.local_name.is_empty() {
122                        format!("{}:{}", name.prefix.unwrap(), name.local_name)
123                    } else {
124                        name.prefix.unwrap()
125                    }
126                } else {
127                    name.local_name
128                };
129
130                let attributes = if namespace == current_namespace {
131                    attributes
132                } else {
133                    let mut attributes = attributes;
134                    let n = namespace.clone();
135                    let ns = n
136                        .into_iter()
137                        .filter(|&(_k, v)| {
138                            (v != namespace::NS_EMPTY_URI)
139                                && (v != namespace::NS_XMLNS_URI)
140                                && (v != namespace::NS_XML_URI)
141                        })
142                        .map(|(k, v)| OwnedAttribute {
143                            name: OwnedName {
144                                local_name: k.to_string(),
145                                namespace: if v == namespace::NS_NO_PREFIX {
146                                    None
147                                } else {
148                                    Some(v.to_string())
149                                },
150                                prefix: Some("xmlns".to_string()),
151                            },
152                            value: v.to_string(),
153                        });
154                    attributes.extend(ns);
155                    attributes
156                };
157
158                let inner = XmlData {
159                    name: fmt_name,
160                    attributes: map_owned_attributes(attributes),
161                    data: None,
162                    children: Vec::new(),
163                };
164
165                let (inner, rest) = parse(data, Some(inner), Vec::new(), trim, namespace.clone())?;
166
167                if let Some(mut crnt) = current {
168                    crnt.children.extend(inner);
169                    parse(rest, Some(crnt), current_vec, trim, namespace)
170                } else {
171                    current_vec.extend(inner);
172                    parse(rest, None, current_vec, trim, namespace)
173                }
174            }
175            XmlEvent::Characters(chr) => {
176                let chr = if trim { chr.trim().to_string() } else { chr };
177                if let Some(mut crnt) = current {
178                    crnt.data = Some(chr);
179                    parse(data, Some(crnt), current_vec, trim, current_namespace)
180                } else {
181                    Err("Invalid form of XML doc".to_string())
182                }
183            }
184            XmlEvent::EndElement { name } => {
185                let fmt_name = if name.prefix.is_some() {
186                    if !name.local_name.is_empty() {
187                        format!("{}:{}", name.prefix.unwrap(), name.local_name)
188                    } else {
189                        name.prefix.unwrap()
190                    }
191                } else {
192                    name.local_name.clone()
193                };
194                if let Some(crnt) = current {
195                    if crnt.name == fmt_name {
196                        current_vec.push(crnt);
197                        Ok((current_vec, data))
198                    } else {
199                        Err(format!(
200                            "Invalid end tag: expected {}, got {}",
201                            crnt.name, name.local_name
202                        ))
203                    }
204                } else {
205                    Err(format!("Invalid end tag: {}", name.local_name))
206                }
207            }
208            _ => parse(data, current, current_vec, trim, current_namespace),
209        }
210    } else if let Some(_current) = current {
211        Err("Invalid end tag".to_string())
212    } else {
213        Ok((current_vec, Vec::new()))
214    }
215}
216
217impl XmlDocument {
218    pub fn from_reader<R>(source: R, trim: bool) -> Result<Self, ParseXmlError>
219    where
220        R: Read,
221    {
222        let parser = EventReader::new(source);
223        let mut events: Vec<XmlEvent> = parser.into_iter().map(|x| x.unwrap()).collect();
224        events.reverse();
225
226        parse(events, None, Vec::new(), trim, Namespace::empty())
227            .map(|(data, _)| XmlDocument { data })
228            .map_err(ParseXmlError)
229    }
230}
231
232/// Error when parsing XML
233#[derive(Debug, Clone, PartialEq)]
234pub struct ParseXmlError(String);
235
236impl Display for ParseXmlError {
237    fn fmt(&self, f: &mut Formatter) -> std::fmt::Result {
238        write!(f, "Coult not parse string to XML: {}", self.0)
239    }
240}
241
242// Generate an XML document from a string
243impl FromStr for XmlDocument {
244    type Err = ParseXmlError;
245
246    fn from_str(s: &str) -> Result<XmlDocument, ParseXmlError> {
247        XmlDocument::from_reader(Cursor::new(s.to_string().into_bytes()), true)
248    }
249}