xml2json_rs/
xml.rs

1use quick_xml::{events::*, Writer};
2
3use std::{cell::RefCell, convert::TryFrom, io::Cursor, ops::DerefMut, rc::Rc};
4
5use crate::{
6  error::{Error, ErrorKind},
7  utils
8};
9
10use serde_json::{Map as JsonMap, Value as JsonValue};
11
12#[derive(Clone)]
13/// XML [Declaration] encoding.
14///
15/// For now only UTF-8 is supported
16///
17/// [Declaration]: struct.Declaration.html
18pub enum Encoding {
19  /// UTF-8
20  UTF8 // see https://www.w3resource.com/xml/declarations.php
21}
22
23impl Encoding {
24  /// Serialize `Encoding` as a `&' static str`
25  pub fn to_string(&self) -> &'static str {
26    match *self {
27      Encoding::UTF8 => "UTF-8"
28    }
29  }
30}
31
32impl TryFrom<&str> for Encoding {
33  type Error = Error;
34
35  /// Try to initialize an `Encoding` from a `&str`.
36  fn try_from(s: &str) -> Result<Self, Self::Error> {
37    match s {
38      "UTF-8" | "UTF8" => Ok(Encoding::UTF8),
39      _ => Err(Error::new(ErrorKind::Encoding, format!("Cannot convert from {} to Encoding.", s)))
40    }
41  }
42}
43
44#[derive(Clone)]
45/// XML [Declaration] version.
46///
47/// Setting this in a [Declaration] will not alter the output of the XML apart from writing the
48/// version number to the declaration.
49///
50/// [Declaration]: struct.Declaration.html
51pub enum Version {
52  /// 1.0
53  XML10,
54  /// 1.1
55  XML11
56}
57
58impl Version {
59  /// Serialize `Version` as a `&' static str`
60  pub fn to_string(&self) -> &'static str {
61    match *self {
62      Version::XML10 => "1.0",
63      Version::XML11 => "1.1"
64    }
65  }
66}
67
68impl TryFrom<&str> for Version {
69  type Error = Error;
70
71  /// Try to initialize a `Version` from `&str`
72  fn try_from(s: &str) -> Result<Self, Self::Error> {
73    match s {
74      "1.0" => Ok(Version::XML10),
75      "1.1" => Ok(Version::XML11),
76      _ => Err(Error::new(ErrorKind::Unknown, format!("Cannot convert from {} to Version.", s)))
77    }
78  }
79}
80
81#[derive(Clone)]
82/// XML Declaration
83pub struct Declaration {
84  version:    Version,
85  encoding:   Option<Encoding>,
86  standalone: Option<bool>
87}
88
89impl Default for Declaration {
90  fn default() -> Declaration {
91    Declaration {
92      version:    Version::XML10,
93      encoding:   None,
94      standalone: None
95    }
96  }
97}
98
99impl Declaration {
100  /// Initialize  a Declaration
101  pub fn new(version: Version, encoding: Option<Encoding>, standalone: Option<bool>) -> Declaration {
102    Declaration {
103      version,
104      encoding,
105      standalone
106    }
107  }
108
109  fn as_bytes_decl(&self) -> BytesDecl {
110    let version = self.version.to_string().as_bytes();
111    let encoding = self.encoding.as_ref().map(|v| v.to_string().as_bytes());
112    let standalone = self
113      .standalone
114      .as_ref()
115      .map(|v| if *v { "yes".as_bytes() } else { "no".as_bytes() });
116
117    BytesDecl::new(version, encoding, standalone)
118  }
119}
120
121#[derive(Clone, Debug)]
122/// XML Indentation rendering options
123pub struct Indentation {
124  indent_char: u8,
125  indent_size: usize
126}
127
128/// Optional indentation rendering
129impl Indentation {
130  /// Initialize Indentation instance.
131  ///
132  /// This can be passed as a `XmlConfig.rendering` option to output XML with line-breaks and
133  /// indentations.
134  pub fn new(indent_char: u8, indent_size: usize) -> Indentation {
135    Indentation { indent_char, indent_size }
136  }
137}
138
139impl Default for Indentation {
140  fn default() -> Indentation {
141    let indent_char = b' ';
142    Indentation {
143      indent_char,
144      indent_size: 2
145    }
146  }
147}
148
149/// XmlBuilder configuration options
150pub struct XmlConfig {
151  attrkey:   Option<String>,
152  charkey:   Option<String>,
153  root_name: Option<String>,
154  decl:      Option<Declaration>,
155  rendering: Option<Indentation>
156}
157
158impl Default for XmlConfig {
159  fn default() -> Self {
160    Self::new()
161  }
162}
163
164impl XmlConfig {
165  /// Initialze a new XmlConfig instance.
166  ///
167  /// This uses the builder pattern. All options are initialized to `None` and can be set using
168  /// `self`s methods. Any options not set will use their defaults upon call to `finalize`.
169  pub fn new() -> XmlConfig {
170    XmlConfig {
171      root_name: None,
172      attrkey:   None,
173      charkey:   None,
174      decl:      None,
175      rendering: None
176    }
177  }
178
179  /// Root key name to contain produced JSON object.
180  ///
181  /// When this is set to its default value "root", the output will not be wrapped in `root_name`'s
182  /// value. This is to conform to match the behavior of node-xml2js.
183  ///
184  /// (`"root"` by default)
185  pub fn root_name<T: Into<String>>(&mut self, key: T) -> &mut XmlConfig {
186    self.root_name = Some(key.into());
187    self
188  }
189
190  /// Attribute key
191  ///
192  /// The value of the JSON key used to store XML attributes under.
193  ///
194  /// (`"$"` by default)
195  pub fn attrkey<T: Into<String>>(&mut self, key: T) -> &mut XmlConfig {
196    self.attrkey = Some(key.into());
197    self
198  }
199
200  /// Char data key
201  ///
202  /// The value of the JSON key used to store XML character data under.
203  ///
204  /// (`"_"` by default)
205  pub fn charkey<T: Into<String>>(&mut self, key: T) -> &mut XmlConfig {
206    self.charkey = Some(key.into());
207    self
208  }
209
210  /// XML Declaration
211  ///
212  /// ([Declaration::default()] by default)
213  /// [Declaration::default()]: Declaration::default
214  pub fn decl(&mut self, decl: Declaration) -> &mut XmlConfig {
215    self.decl = Some(decl);
216    self
217  }
218
219  /// Rendering indentation options
220  ///
221  /// (`None` by default)
222  pub fn rendering(&mut self, indentation: Indentation) -> &mut XmlConfig {
223    self.rendering = Some(indentation);
224    self
225  }
226
227  /// Finalize configuration options and build an XmlBuilder instance
228  pub fn finalize(&self) -> XmlBuilder {
229    let writer = if let Some(indentation) = &self.rendering {
230      Writer::new_with_indent(Cursor::new(Vec::new()), indentation.indent_char, indentation.indent_size)
231    } else {
232      Writer::new(Cursor::new(Vec::new()))
233    };
234
235    let decl = self.decl.clone().unwrap_or_default();
236
237    XmlBuilder {
238      root_name: self.root_name.clone().unwrap_or_else(|| "root".to_owned()),
239      attrkey: self.attrkey.clone().unwrap_or_else(|| "$".to_owned()),
240      charkey: self.charkey.clone().unwrap_or_else(|| "_".to_owned()),
241      decl,
242      writer: Rc::new(RefCell::new(writer)),
243      indent: self.rendering.clone()
244    }
245  }
246}
247
248/// XML builder
249pub struct XmlBuilder {
250  attrkey:   String,
251  charkey:   String,
252  root_name: String,
253  decl:      Declaration,
254  writer:    Rc<RefCell<Writer<Cursor<Vec<u8>>>>>,
255  indent:    Option<Indentation>
256}
257
258impl Default for XmlBuilder {
259  fn default() -> XmlBuilder {
260    XmlBuilder {
261      root_name: "root".to_owned(),
262      attrkey:   "$".to_owned(),
263      charkey:   "_".to_owned(),
264      decl:      Declaration::default(),
265      writer:    Rc::new(RefCell::new(Writer::new(Cursor::new(Vec::new())))),
266      indent:    None
267    }
268  }
269}
270
271/// Tag attributes type. A vector of key, value tuples which are each byte arrays
272type TagAttrs<'a> = Vec<(&'a [u8], &'a [u8])>;
273
274impl XmlBuilder {
275  // Check if key is an attribute key
276  fn is_attrkey(&self, key: &str) -> bool {
277    self.attrkey == *key
278  }
279
280  // Check if key is a character key
281  fn is_charkey(&self, key: &str) -> bool {
282    self.charkey == *key
283  }
284
285  // Get all a attributes at node. If successful, returns a vector of (name, value) attributes
286  fn tag_attributes<'a>(&self, node: &'a JsonValue) -> Result<TagAttrs<'a>, Error> {
287    // Node should either be an object {} or a wrapped object [{}]
288    // If it's an array, unwrap it and call self recursively
289    if let Some(array) = node.as_array() {
290      if array.len() == 1 {
291        let child = array.iter().next().unwrap_or(&JsonValue::Null);
292        self.tag_attributes(child)
293      } else {
294        Ok(Vec::new())
295      }
296    } else {
297      let mut attrs = Vec::new();
298      if let Some(attrs_value) = node.get(&self.attrkey) {
299        if let Some(object) = attrs_value.as_object() {
300          for (name, value) in object {
301            let attr = value
302              .as_str()
303              .ok_or_else(|| Error::new(ErrorKind::Syntax, "Expected attribute to be a string."))?;
304            attrs.push((name.as_bytes(), attr.as_bytes()));
305          }
306        }
307      }
308      Ok(attrs)
309    }
310  }
311
312  // Self closing tags are elements that don't contain text or other elements
313  fn is_empty_tag(&self, node: &JsonValue) -> bool {
314    // Check if child is an empty string, if not recursively check it's child
315    if let Some(array) = node.as_array() {
316      if let Some(child) = array.iter().next() {
317        return child.is_string() && utils::json_is_empty(child) || self.is_empty_tag(child);
318      }
319    } else if let Some(object) = node.as_object() {
320      for (k, v) in object.iter() {
321        if !self.is_attrkey(k) && !utils::json_is_empty(v) {
322          return false;
323        }
324      }
325      return true;
326    }
327    utils::json_is_empty(node)
328  }
329
330  // Write XML declaration
331  fn write_xml_decl(&mut self) -> Result<(), Error> {
332    let mut writer_ref = self.writer.borrow_mut();
333    let writer = writer_ref.deref_mut();
334    writer.write_event(Event::Decl(self.decl.as_bytes_decl())).map_err(|e| e.into())
335  }
336
337  // Write element's start tag including any attributes
338  fn write_start_tag(&mut self, key: &str, node: &JsonValue) -> Result<(), Error> {
339    let mut writer_ref = self.writer.borrow_mut();
340    let writer = writer_ref.deref_mut();
341
342    // Initialize the tag with key value
343    let mut tag = BytesStart::owned(key.to_owned(), key.len());
344
345    // Write any attributes
346    let attributes = self.tag_attributes(node)?;
347    for attr in attributes {
348      tag.push_attribute(attr);
349    }
350
351    // Write the tag as either empty / self-closing (<element />) or as a start tag (<element>)
352    let tag_is_empty = self.is_empty_tag(node);
353    if tag_is_empty {
354      writer.write_event(Event::Empty(tag)).map_err(|e| e.into())
355    } else {
356      writer.write_event(Event::Start(tag)).map_err(|e| e.into())
357    }
358  }
359
360  // Write text
361  fn write_text(&mut self, text: &str) -> Result<(), Error> {
362    let mut writer_ref = self.writer.borrow_mut();
363    let writer = writer_ref.deref_mut();
364    let text_content = BytesText::from_plain_str(text);
365    writer.write_event(Event::Text(text_content)).map_err(|e| e.into())
366  }
367
368  // Write element's end tag if the element wasn't self-closing
369  fn write_end_tag(&mut self, key: &str, node: &JsonValue) -> Result<(), Error> {
370    // If the tag was self-closing, do not write an end tag
371    if self.is_empty_tag(node) {
372      return Ok(());
373    }
374
375    let mut writer_ref = self.writer.borrow_mut();
376    let writer = writer_ref.deref_mut();
377    let tag_end = BytesEnd::owned(key.as_bytes().into());
378    writer.write_event(Event::End(tag_end)).map_err(|e| e.into())
379  }
380
381  // Write a string without triggering any indentation heuristics
382  fn write_raw(&mut self, value: &str) -> Result<(), Error> {
383    let mut writer_ref = self.writer.borrow_mut();
384    let writer = writer_ref.deref_mut();
385    writer.write(value.as_bytes()).map_err(|e| e.into())
386  }
387
388  // Write an indentation. Used when quick-xml's indentation heuristic doesn't
389  // have the context to properly indent
390  fn write_indent(&mut self) -> Result<(), Error> {
391    let mut writer_ref = self.writer.borrow_mut();
392    let writer = writer_ref.deref_mut();
393    writer.write_indent().map_err(|e| e.into())
394  }
395
396  /// Write end of file
397  fn write_eof(&mut self) -> Result<(), Error> {
398    let mut writer_ref = self.writer.borrow_mut();
399    let writer = writer_ref.deref_mut();
400    writer.write_event(Event::Eof).map_err(|e| e.into())
401  }
402
403  // A leaf node is an object that contains no keys apart from attrkey or charkey
404  fn is_leaf_node(&self, node: &JsonMap<String, JsonValue>) -> bool {
405    let normal_keys: Vec<&str> = node
406      .iter()
407      .filter(|&(k, _)| !self.is_charkey(k) && !self.is_attrkey(k))
408      .map(|(k, _)| k.as_ref())
409      .collect();
410    if normal_keys.is_empty() {
411      return true;
412    }
413    false
414  }
415
416  // Recursively traverse JSON while writing XML
417  fn traverse(&mut self, node: &JsonValue, parent_key: Option<String>) -> Result<(), Error> {
418    if let Some(object) = node.as_object() {
419      // Iterate over child object elements
420      for (key, child) in object {
421        // Traverse if the parent is not an attribute and not a character key
422        let pk = parent_key.clone().unwrap_or_else(|| "".to_owned());
423        if !self.is_attrkey(&pk) && !self.is_charkey(&pk) {
424          if self.is_charkey(&key) {
425            if self.indent.is_some() && !self.is_leaf_node(object) {
426              if let Some(s) = child.as_str() {
427                // Write indentation for a case quick-xml's auto-indent heuristic doesn't cover
428                self.write_indent()?;
429                self.write_raw(s)?;
430              }
431            } else {
432              self.traverse(child, Some(key.to_owned()))?;
433            }
434          }
435          // If we're not at an attribute and child is an object, write start tag, traverse and continue
436          else if !self.is_attrkey(&key) {
437            if !child.is_array() {
438              self.write_start_tag(key, child)?;
439              self.traverse(child, None)?;
440              self.write_end_tag(key, child)?;
441            } else {
442              self.traverse(child, Some(key.to_owned()))?;
443            }
444            continue;
445          } else {
446            self.traverse(child, Some(key.to_owned()))?;
447          }
448        }
449      }
450    } else if let Some(array) = node.as_array() {
451      // Iterate over child array elements
452      for child in array {
453        if let Some(ref pk) = parent_key.as_ref() {
454          self.write_start_tag(pk, child)?;
455          self.traverse(child, None)?;
456          self.write_end_tag(pk, child)?;
457        } else {
458          self.traverse(child, None)?;
459        }
460      }
461    } else {
462      let node_s = utils::to_string_raw(node);
463      if !node_s.is_empty() {
464        self.write_text(&node_s)?;
465      }
466    }
467
468    Ok(())
469  }
470
471  /// Build XML from a JSON value
472  pub fn build_from_json(&mut self, root: &JsonValue) -> Result<String, Error> {
473    // As per node-xml2js - if the root name "root" is used, then it is not added to the produced xml
474    // document. It's unclear if this is a bug or not. Keeping this behavior for now for parity reasons
475    let explicit_root = self.root_name != *"root" || utils::json_object_key_len(root) > 1;
476    let root_name = self.root_name.clone();
477
478    self.write_xml_decl()?;
479
480    // If an explicit root is set, write that before the root defined in JSON
481    if explicit_root {
482      self.write_start_tag(&root_name, root)?;
483    }
484
485    self.traverse(root, Some(root_name.clone()))?;
486
487    if explicit_root {
488      self.write_end_tag(&root_name, root)?;
489    }
490
491    self.write_eof()?;
492
493    let mut writer_guard = self.writer.borrow_mut();
494    let writer_ref = writer_guard.deref_mut();
495    let result = writer_ref.inner().get_ref();
496    String::from_utf8(result.to_vec()).map_err(|e| e.into())
497  }
498
499  /// Build XML from a JSON string
500  pub fn build_from_json_string(&mut self, json_s: &str) -> Result<String, Error> {
501    let root = serde_json::from_str(json_s)?;
502    self.build_from_json(&root)
503  }
504}
505
506#[cfg(test)]
507mod tests {
508  use super::*;
509
510  use pretty_assertions::assert_eq;
511  use serde_json::json;
512
513  #[test]
514  fn build_simple() {
515    let mut builder = XmlBuilder::default();
516    let xml = builder.build_from_json_string(r#"{"foo":"bar"}"#).unwrap();
517    assert_eq!(xml, r#"<?xml version="1.0"?><foo>bar</foo>"#);
518  }
519
520  #[test]
521  fn leaf_node1() {
522    let builder = XmlBuilder::default();
523    let node = json!({});
524    let is_leaf = builder.is_leaf_node(&node.as_object().unwrap());
525    assert!(is_leaf);
526  }
527
528  #[test]
529  fn leaf_node2() {
530    let builder = XmlBuilder::default();
531    let node = serde_json::from_str(r#"{"$": {}, "_": {}}"#).unwrap();
532    let is_leaf = builder.is_leaf_node(&node);
533    assert!(is_leaf);
534  }
535
536  #[test]
537  fn leaf_node3() {
538    let builder = XmlBuilder::default();
539    let node = serde_json::from_str(r#"{"a": {}}"#).unwrap();
540    let is_leaf = builder.is_leaf_node(&node);
541    assert!(!is_leaf);
542  }
543
544  #[test]
545  fn leaf_node4() {
546    let builder = XmlBuilder::default();
547    let node = serde_json::from_str(r#"{"$": {}, "_": {}, "a": "b"}"#).unwrap();
548    let is_leaf = builder.is_leaf_node(&node);
549    assert!(!is_leaf);
550  }
551
552  #[test]
553  fn empty_tag1() {
554    let builder = XmlBuilder::default();
555    let node = json!({});
556    let is_empty = builder.is_empty_tag(&node);
557    assert!(is_empty);
558  }
559
560  #[test]
561  fn empty_tag2() {
562    let builder = XmlBuilder::default();
563    let node = serde_json::from_str(r#"[{"$":{"desc":"nodata"}}]"#).unwrap();
564    let is_empty = builder.is_empty_tag(&node);
565    assert!(is_empty);
566  }
567
568  #[test]
569  fn empty_tag3() {
570    let builder = XmlBuilder::default();
571    let node = serde_json::from_str(r#"{"$":{"desc":"nodata"},"_":""}"#).unwrap();
572    let is_empty = builder.is_empty_tag(&node);
573    assert!(is_empty);
574  }
575
576  #[test]
577  fn empty_tag4() {
578    let builder = XmlBuilder::default();
579    let node = serde_json::from_str(r#"[{"$":{"desc":"nodata"},"_":"A"}]"#).unwrap();
580    let is_empty = builder.is_empty_tag(&node);
581    assert!(!is_empty);
582  }
583
584  #[test]
585  fn empty_tag5() {
586    let builder = XmlBuilder::default();
587    let node = serde_json::from_str(r#"[{"$":{"desc":"nodata"},"A":{"B":"C"}}]"#).unwrap();
588    let is_empty = builder.is_empty_tag(&node);
589    assert!(!is_empty);
590  }
591
592  #[test]
593  fn tag_attributes1() {
594    let builder = XmlBuilder::default();
595    let node = json!({});
596    let attrs = builder.tag_attributes(&node).unwrap();
597    assert!(attrs.is_empty());
598  }
599
600  #[test]
601  fn tag_attributes2() {
602    let builder = XmlBuilder::default();
603    let node = serde_json::from_str(r#"{"$":{"A":"B","C":"D"}}"#).unwrap();
604    let attrs = builder.tag_attributes(&node).unwrap();
605    assert!(!attrs.is_empty());
606    let mut expected = vec![("A", "B"), ("C", "D")];
607    expected.reverse(); // lazy alternative to a veqdeque
608    for attr in attrs {
609      let (k, v) = attr;
610      let a_key = std::str::from_utf8(k).unwrap();
611      let a_val = std::str::from_utf8(v).unwrap();
612      let (e_key, e_val) = expected.pop().unwrap();
613      assert_eq!(e_key, a_key);
614      assert_eq!(e_val, a_val);
615    }
616  }
617
618  #[test]
619  fn tag_attributes3() {
620    let builder = XmlConfig::new().attrkey("^").finalize();
621    let node = serde_json::from_str(r#"{"^":{"A":"B","C":"D"}}"#).unwrap();
622    let attrs = builder.tag_attributes(&node).unwrap();
623    assert!(!attrs.is_empty());
624    let mut expected = vec![("A", "B"), ("C", "D")];
625    expected.reverse(); // lazy alternative to a veqdeque
626    for attr in attrs {
627      let (k, v) = attr;
628      let a_key = std::str::from_utf8(k).unwrap();
629      let a_val = std::str::from_utf8(v).unwrap();
630      let (e_key, e_val) = expected.pop().unwrap();
631      assert_eq!(e_key, a_key);
632      assert_eq!(e_val, a_val);
633    }
634  }
635
636  #[test]
637  fn attrkey1() {
638    let builder = XmlBuilder::default();
639    let is_key = builder.is_attrkey(&"$".to_owned());
640    assert!(is_key);
641  }
642
643  #[test]
644  fn attrkey2() {
645    let builder = XmlConfig::new().attrkey("^").finalize();
646    assert!(builder.is_attrkey(&"^".to_owned()));
647    assert!(!builder.is_attrkey(&"$".to_owned()));
648  }
649
650  #[test]
651  fn charkey1() {
652    let builder = XmlBuilder::default();
653    let is_key = builder.is_charkey(&"_".to_owned());
654    assert!(is_key);
655  }
656
657  #[test]
658  fn charkey2() {
659    let builder = XmlConfig::new().charkey("^").finalize();
660    assert!(builder.is_charkey(&"^".to_owned()));
661    assert!(!builder.is_charkey(&"_".to_owned()));
662  }
663}