xml_to_json/
lib.rs

1use std::{
2  fmt,
3  io::{Cursor, Read},
4};
5
6use serde_json::{self, map::Map};
7use xml::{
8  attribute::OwnedAttribute,
9  name::OwnedName,
10  namespace::{self, Namespace},
11  reader::{EventReader, XmlEvent},
12};
13
14#[derive(Debug, Clone, PartialEq)]
15pub struct ParseErr(String);
16
17impl fmt::Display for ParseErr {
18  fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
19    write!(
20      formatter,
21      "Failed to parse string to XML document: {}",
22      self.0
23    )
24  }
25}
26
27#[derive(Debug, Clone)]
28pub struct XmlDocument {
29  pub root: Vec<XmlNode>,
30}
31
32impl fmt::Display for XmlDocument {
33  fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
34    self
35      .root
36      .iter()
37      .fold(String::new(), |acc_string, xml_node| {
38        format!("{}{}", acc_string, xml_node)
39      })
40      .fmt(formatter)
41  }
42}
43
44impl XmlDocument {
45  pub fn from_reader<R>(source: R) -> Result<Self, ParseErr>
46  where
47    R: Read,
48  {
49    let reader = EventReader::new(source);
50    let mut xml_events: Vec<XmlEvent> = Vec::new();
51
52    for event in reader.into_iter() {
53      match event {
54        Ok(event_type) => {
55          xml_events.push(event_type);
56        }
57        Err(error) => {
58          return Err(ParseErr(format!("{}", error)));
59        }
60      }
61    }
62
63    xml_events.reverse();
64
65    Self::parse(xml_events, None, Vec::new(), Namespace::empty())
66      .map(|(root, _)| XmlDocument { root })
67      .map_err(|error| ParseErr(error))
68  }
69
70  pub fn from_string(string: &str) -> Result<XmlDocument, ParseErr> {
71    Self::from_reader(Cursor::new(string.to_owned().into_bytes()))
72  }
73
74  pub fn to_object(&self) -> Map<String, serde_json::Value> {
75    let mut map: Map<String, serde_json::Value> = Map::new();
76
77    for (key, value) in self.root.iter().map(|xml_node| xml_node.to_key_value()) {
78      map.insert(key, value);
79    }
80
81    map
82  }
83
84  pub fn to_json(&self) -> serde_json::Result<String> {
85    serde_json::to_string(&self.to_object())
86  }
87
88  fn parse_xml_attributes(
89    xml_attributes: Vec<xml::attribute::OwnedAttribute>,
90  ) -> Vec<(String, String)> {
91    xml_attributes
92      .into_iter()
93      .map(|xml_attribute| {
94        let parsed_attribute_name = match xml_attribute.name.prefix {
95          Some(prefix) => format!("{}:{}", prefix, xml_attribute.name.local_name),
96          None => xml_attribute.name.local_name.clone(),
97        };
98        (parsed_attribute_name, xml_attribute.value)
99      })
100      .collect()
101  }
102
103  fn parse(
104    mut xml_events: Vec<XmlEvent>,
105    current_node: Option<XmlNode>,
106    mut current_node_group: Vec<XmlNode>,
107    current_namespace: Namespace,
108  ) -> Result<(Vec<XmlNode>, Vec<XmlEvent>), String> {
109    match xml_events.pop() {
110      Some(xml_event) => match xml_event {
111        XmlEvent::StartElement {
112          name,
113          attributes,
114          namespace,
115        } => {
116          let formatted_node_name = match name.prefix {
117            Some(prefix) => format!("{}:{}", prefix, name.local_name),
118            None => name.local_name,
119          };
120
121          let current_attributes = if namespace == current_namespace {
122            attributes
123          } else {
124            let mut attributes = attributes;
125            let cloned_namespace = namespace.clone();
126            let namespace_attributes = cloned_namespace
127              .into_iter()
128              .filter(|&(key, value)| {
129                key != namespace::NS_NO_PREFIX && value != namespace::NS_EMPTY_URI
130              })
131              .map(|(key, value)| OwnedAttribute {
132                name: OwnedName {
133                  local_name: key.to_owned(),
134                  namespace: Some(value.to_owned()),
135                  prefix: Some("xmlns".to_owned()),
136                },
137                value: value.to_owned(),
138              });
139            attributes.extend(namespace_attributes);
140            attributes
141          };
142
143          let child_node = XmlNode {
144            name: formatted_node_name,
145            attributes: Self::parse_xml_attributes(current_attributes),
146            content: None,
147            children: Vec::new(),
148          };
149
150          let (parsed_child_node, remaining_xml_events) =
151            Self::parse(xml_events, Some(child_node), Vec::new(), namespace.clone())?;
152
153          match current_node {
154            Some(mut current_xml_node) => {
155              current_xml_node.children.extend(parsed_child_node);
156              Self::parse(
157                remaining_xml_events,
158                Some(current_xml_node),
159                current_node_group,
160                namespace,
161              )
162            }
163            None => {
164              current_node_group.extend(parsed_child_node);
165              Self::parse(remaining_xml_events, None, current_node_group, namespace)
166            }
167          }
168        }
169        XmlEvent::Characters(raw_content) | XmlEvent::CData(raw_content) => {
170          let parsed_content = raw_content.trim().to_owned();
171
172          match current_node {
173            Some(mut current_xml_node) => {
174              current_xml_node.content = Some(parsed_content);
175              Self::parse(
176                xml_events,
177                Some(current_xml_node),
178                current_node_group,
179                current_namespace,
180              )
181            }
182            None => Err("Invalid form of XML doc".to_owned()),
183          }
184        }
185        XmlEvent::EndElement { name } => {
186          let formatted_node_name = match name.prefix {
187            Some(prefix) => format!("{}:{}", prefix, name.local_name),
188            None => name.local_name.clone(),
189          };
190
191          match current_node {
192            Some(current_xml_node) => {
193              if current_xml_node.name == formatted_node_name {
194                current_node_group.push(current_xml_node);
195                return Ok((current_node_group, xml_events));
196              }
197
198              Err(format!(
199                "Invalid end tag: expected {}, got {}",
200                current_xml_node.name, name.local_name
201              ))
202            }
203            None => Err(format!("Invalid end tag: {}", name.local_name)),
204          }
205        }
206        _ => Self::parse(
207          xml_events,
208          current_node,
209          current_node_group,
210          current_namespace,
211        ),
212      },
213      None => match current_node {
214        Some(_) => Err("Invalid end tag".to_owned()),
215        None => Ok((current_node_group, Vec::new())),
216      },
217    }
218  }
219}
220
221#[derive(Debug, Clone)]
222pub struct XmlNode {
223  pub name: String,
224  pub attributes: Vec<(String, String)>,
225  pub content: Option<String>,
226  pub children: Vec<XmlNode>,
227}
228
229impl fmt::Display for XmlNode {
230  fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
231    write!(formatter, "{}", self.format(0))
232  }
233}
234
235impl XmlNode {
236  pub fn format(&self, depth: usize) -> String {
237    let formatted_children = if self.children.is_empty() {
238      String::new()
239    } else {
240      self
241        .children
242        .iter()
243        .fold("\n".to_owned(), |acc_string, child_node| {
244          format!("{}{}", acc_string, child_node.format(depth + 1))
245        })
246    };
247
248    let indentation = Self::indent(depth);
249
250    let formatted_content = match self.content {
251      Some(ref content) => format!("\n{}{}", Self::indent(depth + 1), content),
252      None => "".to_owned(),
253    };
254
255    format!(
256      "{}<{}{}>{}{}\n{}</{}>\n",
257      indentation,
258      self.name,
259      Self::attributes_to_string(&self.attributes),
260      formatted_content,
261      formatted_children,
262      indentation,
263      self.name
264    )
265  }
266
267  pub fn to_key_value(&self) -> (String, serde_json::Value) {
268    let mut map: Map<String, serde_json::Value> = Map::new();
269
270    if self.content.is_some() {
271      map.insert(
272        "_".to_owned(),
273        serde_json::Value::String(self.content.clone().unwrap()),
274      );
275    }
276
277    let children_entries = self
278      .children
279      .iter()
280      .map(|child_node| child_node.to_key_value());
281
282    for (key, value) in children_entries {
283      let found = match map.get_mut(&key) {
284        Some(thing) => match thing {
285          &mut serde_json::Value::Array(ref mut vector) => {
286            vector.push(value.clone());
287            true
288          }
289          json_value => {
290            let vector = vec![json_value.clone(), value.clone()];
291            *json_value = serde_json::Value::Array(vector);
292            true
293          }
294        },
295        None => false,
296      };
297
298      if !found {
299        map.insert(key.clone(), value.clone());
300      }
301    }
302
303    let mut attributes_map: Map<String, serde_json::Value> = Map::new();
304
305    for (key, value) in self.attributes.iter() {
306      let found = match attributes_map.get_mut(key) {
307        Some(thing) => match thing {
308          &mut serde_json::Value::Array(ref mut vector) => {
309            vector.push(serde_json::Value::String(value.clone()));
310            true
311          }
312          json_value => {
313            let vector = vec![json_value.clone(), serde_json::Value::String(value.clone())];
314            *json_value = serde_json::Value::Array(vector);
315            true
316          }
317        },
318        None => false,
319      };
320
321      if !found {
322        attributes_map.insert(key.clone(), serde_json::Value::String(value.clone()));
323      }
324    }
325
326    if !attributes_map.is_empty() {
327      map.insert("$".to_owned(), serde_json::Value::Object(attributes_map));
328    }
329
330    (self.name.clone(), serde_json::Value::Object(map))
331  }
332
333  fn indent(size: usize) -> String {
334    const INDENT: &'static str = "  ";
335    (0..size).map(|_| INDENT).fold(
336      String::with_capacity(size * INDENT.len()),
337      |acc_string, string| acc_string + string,
338    )
339  }
340
341  fn attributes_to_string(attributes: &[(String, String)]) -> String {
342    attributes.iter().fold(String::new(), |acc, (key, value)| {
343      format!("{} {}=\"{}\"", acc, key, value)
344    })
345  }
346}