xml2json_rs/
json.rs

1use crate::{
2  error::{Error, ErrorKind},
3  utils
4};
5
6use serde_json::{json, Value as JsonValue};
7
8use quick_xml::{events::*, Reader};
9use regex::{NoExpand, Regex};
10
11lazy_static! {
12  static ref WHITESPACE_RE: Regex = Regex::new(r"^\s*$").unwrap();
13  static ref TWO_OR_MORE_WHITESPACE_RE: Regex = Regex::new(r"\s{2,}").unwrap();
14}
15
16/// Configuration options for JsonBuilder
17#[derive(Default)]
18pub struct JsonConfig {
19  charkey:          Option<String>,
20  attrkey:          Option<String>,
21  empty_tag:        Option<String>,
22  explicit_root:    Option<bool>,
23  trim:             Option<bool>,
24  ignore_attrs:     Option<bool>,
25  merge_attrs:      Option<bool>,
26  normalize_text:   Option<bool>,
27  lowercase_tags:   Option<bool>,
28  explicit_array:   Option<bool>,
29  explicit_charkey: Option<bool>
30}
31
32/// JsonBuilder configuration options
33impl JsonConfig {
34  /// Initialze a new JsonConfig instance.
35  ///
36  /// This uses the builder pattern. All options are initialized to `None` and can be set using
37  /// `self`s methods. Any options not set will use their defaults upon call to `finalize`.
38  pub fn new() -> JsonConfig {
39    JsonConfig {
40      charkey:          None,
41      attrkey:          None,
42      empty_tag:        None,
43      explicit_root:    None,
44      trim:             None,
45      ignore_attrs:     None,
46      merge_attrs:      None,
47      normalize_text:   None,
48      lowercase_tags:   None,
49      explicit_array:   None,
50      explicit_charkey: None
51    }
52  }
53
54  /// Key to store character content under.
55  ///
56  /// (`"_"` by default)
57  pub fn charkey<T: Into<String>>(&mut self, key: T) -> &mut JsonConfig {
58    self.charkey = Some(key.into());
59    self
60  }
61
62  /// Key to outer object containing tag attributes.
63  ///
64  /// (`"$"` by default)
65  pub fn attrkey<T: Into<String>>(&mut self, key: T) -> &mut JsonConfig {
66    self.attrkey = Some(key.into());
67    self
68  }
69
70  /// The value of empty nodes.
71  ///
72  /// Can be used if you want to specify a value other than `""` for empty nodes.
73  /// (`""` by default)
74  pub fn empty_tag<T: Into<String>>(&mut self, key: T) -> &mut JsonConfig {
75    self.empty_tag = Some(key.into());
76    self
77  }
78
79  /// Sets the root node inside the resulting object.
80  ///
81  /// (`true` by default)
82  pub fn explicit_root(&mut self, flag: bool) -> &mut JsonConfig {
83    self.explicit_root = Some(flag);
84    self
85  }
86
87  /// Trim whitespace at the beginning and end of text nodes.
88  ///
89  /// (`false` by default)
90  pub fn trim(&mut self, flag: bool) -> &mut JsonConfig {
91    self.trim = Some(flag);
92    self
93  }
94
95  /// Ingore attributes.
96  ///
97  /// Setting this to true will skip adding element attributes and create text nodes.
98  ///
99  /// (`false` by default)
100  pub fn ignore_attrs(&mut self, flag: bool) -> &mut JsonConfig {
101    self.ignore_attrs = Some(flag);
102    self
103  }
104
105  /// Merge attributes.
106  ///
107  /// Merge all XML attributes and child elements as properties of the parent, instead of keying
108  /// attributes off of the child attribute object. This option will be ignored if `ignore_attrs`
109  /// is set.
110  ///
111  /// (`false` by default)
112  pub fn merge_attrs(&mut self, flag: bool) -> &mut JsonConfig {
113    self.merge_attrs = Some(flag);
114    self
115  }
116
117  /// Removes whitespace character data in text nodes.
118  ///
119  /// This option will result in behavior that is a superset of [`trim`]. Whitespace at the
120  /// beginning and end of text nodes will be trimmed. In addition, blank space (`/s`) between
121  /// other text data will be converted to a single space (`" "`). Corresponds to the `normalize`
122  /// option in node-xmlj2.
123  ///
124  /// (`false` by default)
125  ///
126  /// [`trim`]: struct.JsonConfig.html#method.trim
127  pub fn normalize_text(&mut self, flag: bool) -> &mut JsonConfig {
128    self.normalize_text = Some(flag);
129    self
130  }
131
132  /// Normalize all tags by converting them to lowercase.
133  ///
134  /// Corresponds to the `normalizeTags` option in node-xml2js.
135  ///
136  /// (`false` by default)
137  pub fn lowercase_tags(&mut self, flag: bool) -> &mut JsonConfig {
138    self.lowercase_tags = Some(flag);
139    self
140  }
141
142  /// Always put the child nodes in an array, otherwise an array is only created if there is more
143  /// than one child.
144  ///
145  /// (`true` by default)
146  pub fn explicit_array(&mut self, flag: bool) -> &mut JsonConfig {
147    self.explicit_array = Some(flag);
148    self
149  }
150
151  /// Always store character data under `charkey` even if there are are no other text elements
152  /// stored inside the tag.
153  ///
154  /// (`false` by default)
155  pub fn explicit_charkey(&mut self, flag: bool) -> &mut JsonConfig {
156    self.explicit_charkey = Some(flag);
157    self
158  }
159
160  /// Finalize configuration options and build a JsonBuilder instance
161  pub fn finalize(&self) -> JsonBuilder {
162    JsonBuilder {
163      charkey:          self.charkey.clone().unwrap_or_else(|| "_".to_owned()),
164      attrkey:          self.attrkey.clone().unwrap_or_else(|| "$".to_owned()),
165      empty_tag:        self.empty_tag.clone().unwrap_or_else(|| "".to_owned()),
166      explicit_root:    self.explicit_root.clone().unwrap_or(true),
167      trim:             self.trim.clone().unwrap_or(false),
168      ignore_attrs:     self.ignore_attrs.clone().unwrap_or(false),
169      merge_attrs:      self.merge_attrs.clone().unwrap_or(false),
170      normalize_text:   self.normalize_text.clone().unwrap_or(false),
171      lowercase_tags:   self.lowercase_tags.clone().unwrap_or(false),
172      explicit_array:   self.explicit_array.clone().unwrap_or(true),
173      explicit_charkey: self.explicit_charkey.clone().unwrap_or(false)
174    }
175  }
176}
177
178// Text storage with state to distingiush between text in elements and text in CDATA sections
179// CDATA (literal) text will be added to JSON even when it is whitespace.
180struct Text {
181  data:    String,
182  literal: bool
183}
184
185impl Default for Text {
186  fn default() -> Text {
187    Text {
188      data:    "".to_owned(),
189      literal: false
190    }
191  }
192}
193
194// Stores state for the current and previous levels in the XML tree.
195struct Node {
196  value: JsonValue,
197  text:  Text
198}
199
200impl Node {
201  fn new() -> Node {
202    Node {
203      value: json!({}),
204      text:  Text::default()
205    }
206  }
207}
208
209/// JSON builder struct for building JSON from XML
210pub struct JsonBuilder {
211  charkey:          String,
212  attrkey:          String,
213  empty_tag:        String,
214  explicit_root:    bool,
215  trim:             bool,
216  ignore_attrs:     bool,
217  merge_attrs:      bool,
218  normalize_text:   bool,
219  lowercase_tags:   bool,
220  explicit_array:   bool,
221  explicit_charkey: bool
222}
223
224impl Default for JsonBuilder {
225  fn default() -> JsonBuilder {
226    JsonBuilder {
227      charkey:          "_".to_owned(),
228      attrkey:          "$".to_owned(),
229      empty_tag:        "".to_owned(),
230      explicit_root:    true,
231      trim:             false,
232      ignore_attrs:     false,
233      merge_attrs:      false,
234      normalize_text:   false,
235      lowercase_tags:   false,
236      explicit_array:   true,
237      explicit_charkey: false
238    }
239  }
240}
241
242impl JsonBuilder {
243  // If text matches only newlines, spaces and tabs
244  fn is_whitespace(&self, value: &str) -> bool {
245    WHITESPACE_RE.is_match(value)
246  }
247
248  // This function is used to build out the JSON object.
249  // the behavior depends on the `explicit_array` setting. When this value is
250  // - true: an array will be created at `key` if it doesn't exist and new values will be pushed
251  // - false: `value` is assigned at `key` and converted into an array if there are multiple values
252  // at that key
253  fn assign_or_push(&self, object: &mut JsonValue, key: &str, value: JsonValue) {
254    if object.get(key).is_none() {
255      if self.explicit_array {
256        object[key] = json!([value]);
257      } else {
258        object[key] = value;
259      }
260    } else {
261      // Wrap object[key] in an array if it isn't one already
262      if !object[key].is_array() {
263        let current = object[key].take();
264        object[key] = json!([current]);
265      }
266      if let Some(array) = object[key].as_array_mut() {
267        array.push(value);
268      }
269    }
270  }
271
272  // Process start tag
273  fn process_start(&self, event: &BytesStart, stack: &mut Vec<Node>, reader: &mut Reader<&[u8]>) -> Result<(), Error> {
274    let mut node = Node::new();
275
276    // Add any attributes
277    if !self.ignore_attrs {
278      // Initialize attribute object
279      if event.attributes().peekable().peek().is_some() && node.value.get(&self.attrkey).is_none() && !self.merge_attrs {
280        node.value[&self.attrkey] = json!({});
281      }
282
283      for attr in event.attributes() {
284        if let Ok(attr) = attr {
285          let value = attr.unescape_and_decode_value(&reader)?;
286          let key = std::str::from_utf8(attr.key)?;
287          if self.merge_attrs {
288            self.assign_or_push(&mut node.value, key, value.into());
289          } else {
290            node.value[&self.attrkey][key] = value.into();
291          }
292        }
293      }
294    }
295
296    stack.push(node);
297    Ok(())
298  }
299
300  // Process text
301  fn process_text(&self, event: &BytesText, stack: &mut Vec<Node>, reader: &mut Reader<&[u8]>) -> Result<(), Error> {
302    let cdata = event.unescape_and_decode(&reader)?;
303
304    if let Some(last_node) = stack.last_mut() {
305      let text = &mut last_node.text.data;
306      // Setting reader.trim_text will remove all whitespaces in char data. To preserve
307      // compatibility with node-xml2js two or more consecutive whitespace characters will be
308      // replaced with a single space and then the resulting string will be trimmed
309      if self.normalize_text && !text.is_empty() {
310        let normalized = TWO_OR_MORE_WHITESPACE_RE.replace_all(text, NoExpand(" ")).into_owned();
311        text.clear();
312        text.push_str(&normalized);
313        let _ = text.trim();
314      }
315      text.push_str(&cdata);
316    }
317
318    Ok(())
319  }
320
321  // Process end, takes a `tag` rather than an `event` since an Event::Empty(e) uses this function as
322  // well
323  fn process_end(&self, tag: &[u8], stack: &mut Vec<Node>) -> Result<Option<JsonValue>, Error> {
324    let close_tag = if self.lowercase_tags {
325      std::str::from_utf8(tag)?.to_lowercase()
326    } else {
327      std::str::from_utf8(tag)?.to_owned()
328    };
329    // The JSON value that which will be nested inside of `outer` (unless we are at EOF)
330    let mut inner = match stack.pop() {
331      Some(j) => j,
332      None => return Err(Error::new(ErrorKind::Unknown, "Expected stack item at close tag."))
333    };
334    let stack_len = stack.len();
335    let outer = stack.last_mut();
336
337    // This can grow to contain other whitespace characters ('\s')
338    let mut whitespace = "".to_owned();
339    let mut text = inner.text.data.as_ref();
340
341    if self.is_whitespace(text) && !inner.text.literal {
342      whitespace.push_str(text);
343    } else {
344      if self.trim {
345        text = text.trim();
346      }
347
348      // FIXME: warning for unused `normalized` can this be restructured in a better way?
349      let mut _normalized = String::new();
350      if self.normalize_text {
351        _normalized = TWO_OR_MORE_WHITESPACE_RE.replace_all(text, NoExpand(" ")).into_owned();
352        text = _normalized.trim().as_ref();
353      }
354
355      if utils::json_is_empty(&inner.value) && !self.explicit_charkey {
356        inner.value = JsonValue::String(text.to_owned());
357      } else {
358        inner.value[&self.charkey] = text.into();
359      }
360    }
361
362    if utils::json_is_empty(&inner.value) {
363      if !self.empty_tag.is_empty() {
364        inner.value = JsonValue::String(self.empty_tag.clone());
365      } else {
366        inner.value = JsonValue::String(whitespace);
367      }
368    }
369
370    // Check if we have closed all open tags
371    if stack_len > 0 {
372      if let Some(outer) = outer {
373        self.assign_or_push(&mut outer.value, &close_tag, inner.value);
374      }
375    } else {
376      // At EOF - either wrap result in an explicit root or return inner's value
377      let output = if self.explicit_root {
378        let output = json!({
379          close_tag: inner.value
380        });
381        output
382      } else {
383        inner.value
384      };
385      return Ok(Some(output));
386    }
387    Ok(None)
388  }
389
390  // Process empty
391  fn process_empty(&self, event: &BytesStart, stack: &mut Vec<Node>, reader: &mut Reader<&[u8]>) -> Result<Option<JsonValue>, Error> {
392    self.process_start(event, stack, reader)?;
393    self.process_end(event.name(), stack)
394  }
395
396  // Process XML CDATA
397  fn process_cdata(&self, event: &BytesCData, stack: &mut Vec<Node>, reader: &mut Reader<&[u8]>) -> Result<(), Error> {
398    self.process_text(&event.clone().escape(), stack, reader)?;
399
400    if let Some(mut last_node) = stack.last_mut() {
401      last_node.text.literal = true;
402    }
403    Ok(())
404  }
405
406  /// Build JSON from xml
407  pub fn build_from_xml(&self, xml: &str) -> Result<JsonValue, Error> {
408    let mut reader = Reader::from_str(xml);
409    let mut buffer = Vec::new();
410    let mut output = JsonValue::Null;
411    let mut stack = Vec::new();
412
413    loop {
414      match reader.read_event(&mut buffer) {
415        Ok(Event::Start(ref e)) => self.process_start(e, &mut stack, &mut reader)?,
416
417        Ok(Event::Text(ref e)) => self.process_text(e, &mut stack, &mut reader)?,
418
419        Ok(Event::End(ref e)) => {
420          if let Some(o) = self.process_end(e.name(), &mut stack)? {
421            output = o;
422          }
423        },
424
425        Ok(Event::CData(ref e)) => self.process_cdata(e, &mut stack, &mut reader)?,
426
427        Ok(Event::Empty(ref e)) => {
428          if let Some(o) = self.process_empty(e, &mut stack, &mut reader)? {
429            output = o;
430          }
431        },
432
433        Ok(Event::Eof) => {
434          break;
435        },
436
437        // Skip over everything else
438        Ok(_) => (),
439
440        Err(e) => {
441          return Err(Error::new(
442            ErrorKind::Syntax,
443            format!("Error at position {}: {:?}", reader.buffer_position(), e)
444          ))
445        },
446      }
447
448      buffer.clear();
449    }
450
451    Ok(output)
452  }
453
454  /// Build JSON string from xml
455  pub fn build_string_from_xml(&self, xml: &str) -> Result<String, Error> {
456    let object = self.build_from_xml(xml)?;
457    serde_json::to_string(&object).map_err(|e| e.into())
458  }
459
460  /// Build pretty JSON string from xml
461  pub fn build_pretty_string_from_xml(&self, xml: &str) -> Result<String, Error> {
462    let object = self.build_from_xml(xml)?;
463    serde_json::to_string_pretty(&object).map_err(|e| e.into())
464  }
465}
466
467#[cfg(test)]
468mod tests {
469  use super::*;
470
471  use pretty_assertions::assert_eq;
472
473  #[test]
474  fn invalid_xml() {
475    let builder = JsonBuilder::default();
476    let err = builder.build_from_xml("<foo>bar</baz>").unwrap_err();
477    assert_eq!(err.kind(), ErrorKind::Syntax)
478  }
479
480  #[test]
481  fn is_whitespace1() {
482    let builder = JsonBuilder::default();
483    assert!(builder.is_whitespace(" \t \n "));
484  }
485
486  #[test]
487  fn is_whitespace2() {
488    let builder = JsonBuilder::default();
489    assert!(!builder.is_whitespace(" \t A \n "));
490  }
491
492  #[test]
493  fn assign_or_push1() {
494    let builder = JsonBuilder::default();
495    let mut actual = json!({});
496    let _ = builder.assign_or_push(&mut actual, "A", "B".into());
497    let _ = builder.assign_or_push(&mut actual, "C", "D".into());
498    let _ = builder.assign_or_push(&mut actual, "C", "E".into());
499    let expected: JsonValue = serde_json::from_str(r#"{"A":["B"],"C":["D","E"]}"#).unwrap();
500    assert_eq!(actual, expected);
501  }
502
503  #[test]
504  fn assign_or_push2() {
505    let builder = JsonConfig::new().explicit_array(false).finalize();
506    let mut actual = json!({});
507    let _ = builder.assign_or_push(&mut actual, "A", "B".into());
508    let _ = builder.assign_or_push(&mut actual, "C", "D".into());
509    let _ = builder.assign_or_push(&mut actual, "C", "E".into());
510    let expected: JsonValue = serde_json::from_str(r#"{"A":"B","C":["D","E"]}"#).unwrap();
511    assert_eq!(actual, expected);
512  }
513}