quickxml_to_serde/lib.rs
1#![allow(clippy::items_after_test_module)]
2#![allow(clippy::single_match)]
3#![allow(clippy::single_char_pattern)]
4#![allow(clippy::needless_borrow)]
5#![allow(clippy::ptr_arg)]
6//! # quickxml_to_serde
7//! Fast and flexible conversion from XML to JSON using [quick-xml](https://github.com/tafia/quick-xml)
8//! and [serde](https://github.com/serde-rs/json). Inspired by [node2object](https://github.com/vorot93/node2object).
9//!
10//! This crate converts XML elements, attributes and text nodes directly into corresponding JSON structures.
11//! Some common usage scenarios would be converting XML into JSON for loading into No-SQL databases
12//! or sending it to the front end application.
13//!
14//! Because of the richness and flexibility of XML some conversion behavior is configurable:
15//! - attribute name prefixes
16//! - naming of text nodes
17//! - number format conversion
18//!
19//! ## Usage example
20//! ```
21//! extern crate quickxml_to_serde;
22//! use quickxml_to_serde::{xml_string_to_json, Config, NullValue};
23//!
24//! fn main() {
25//! let xml = r#"<a attr1="1"><b><c attr2="001">some text</c></b></a>"#;
26//! let conf = Config::new_with_defaults();
27//! let json = xml_string_to_json(xml.to_owned(), &conf);
28//! println!("{}", json.expect("Malformed XML").to_string());
29//!
30//! let conf = Config::new_with_custom_values(true, "", "txt", NullValue::Null);
31//! let json = xml_string_to_json(xml.to_owned(), &conf);
32//! println!("{}", json.expect("Malformed XML").to_string());
33//! }
34//! ```
35//! * **Output with the default config:** `{"a":{"@attr1":1,"b":{"c":{"#text":"some text","@attr2":1}}}}`
36//! * **Output with a custom config:** `{"a":{"attr1":1,"b":{"c":{"attr2":"001","txt":"some text"}}}}`
37//!
38//! ## Additional features
39//! Use `quickxml_to_serde = { version = "0.4", features = ["json_types"] }` to enable support for enforcing JSON types
40//! for some XML nodes using xPath-like notations. Example for enforcing attribute `attr2` from the snippet above
41//! as JSON String regardless of its contents:
42//! ```
43//! use quickxml_to_serde::{Config, JsonArray, JsonType};
44//!
45//! #[cfg(feature = "json_types")]
46//! let conf = Config::new_with_defaults()
47//! .add_json_type_override("/a/b/c/@attr2", JsonArray::Infer(JsonType::AlwaysString));
48//! ```
49//!
50//! ## Detailed documentation
51//! See [README](https://github.com/AlecTroemel/quickxml_to_serde) in the source repo for more examples, limitations and detailed behavior description.
52//!
53//! ## Testing your XML files
54//!
55//! If you want to see how your XML files are converted into JSON, place them into `./test_xml_files` directory
56//! and run `cargo test`. They will be converted into JSON and saved in the saved directory.
57
58extern crate minidom;
59extern crate serde_json;
60
61#[cfg(feature = "regex_path")]
62extern crate regex;
63
64use minidom::{Element, Error};
65use serde_json::{Map, Number, Value};
66#[cfg(feature = "json_types")]
67use std::collections::HashMap;
68use std::str::FromStr;
69
70#[cfg(feature = "regex_path")]
71use regex::Regex;
72
73#[cfg(test)]
74mod tests;
75
76/// Defines how empty elements like `<x />` should be handled.
77/// `Ignore` -> exclude from JSON, `Null` -> `"x":null`, EmptyObject -> `"x":{}`.
78/// `EmptyObject` is the default option and is how it was handled prior to v.0.4
79/// Using `Ignore` on an XML document with an empty root element falls back to `Null` option.
80/// E.g. both `<a><x/></a>` and `<a/>` are converted into `{"a":null}`.
81#[derive(Debug)]
82pub enum NullValue {
83 Ignore,
84 Null,
85 EmptyObject,
86}
87
88/// Defines how the values of this Node should be converted into a JSON array with the underlying types.
89/// * `Infer` - the nodes are converted into a JSON array only if there are multiple identical elements.
90/// E.g. `<a><b>1</b></a>` becomes a map `{"a": {"b": 1 }}` and `<a><b>1</b><b>2</b><b>3</b></a>` becomes
91/// an array `{"a": {"b": [1, 2, 3] }}`
92/// * `Always` - the nodes are converted into a JSON array regardless of how many there are.
93/// E.g. `<a><b>1</b></a>` becomes an array with a single value `{"a": {"b": [1] }}` and
94/// `<a><b>1</b><b>2</b><b>3</b></a>` also becomes an array `{"a": {"b": [1, 2, 3] }}`
95#[derive(Debug)]
96pub enum JsonArray {
97 /// Convert the nodes into a JSON array even if there is only one element
98 Always(JsonType),
99 /// Convert the nodes into a JSON array only if there are multiple identical elements
100 Infer(JsonType),
101}
102
103/// Used as a parameter for `Config.add_json_type_override`. Defines how the XML path should be matched
104/// in order to apply the JSON type overriding rules. This enumerator exists to allow the same function
105/// to be used for multiple different types of path matching rules.
106#[derive(Debug)]
107pub enum PathMatcher {
108 /// An absolute path starting with a leading slash (`/`). E.g. `/a/b/c/@d`.
109 /// It's implicitly converted from `&str` and automatically includes the leading slash.
110 Absolute(String),
111 /// A regex that will be checked against the XML path. E.g. `(\w/)*c$`.
112 /// It's implicitly converted from `regex::Regex`.
113 #[cfg(feature = "regex_path")]
114 Regex(Regex),
115}
116
117// For retro-compatibility and for syntax's sake, a string may be coerced into an absolute path.
118impl From<&str> for PathMatcher {
119 fn from(value: &str) -> Self {
120 let path_with_leading_slash = if value.starts_with("/") {
121 value.into()
122 } else {
123 ["/", value].concat()
124 };
125
126 PathMatcher::Absolute(path_with_leading_slash)
127 }
128}
129
130// ... While a Regex may be coerced into a regex path.
131#[cfg(feature = "regex_path")]
132impl From<Regex> for PathMatcher {
133 fn from(value: Regex) -> Self {
134 PathMatcher::Regex(value)
135 }
136}
137
138/// Defines which data type to apply in JSON format for consistency of output.
139/// E.g., the range of XML values for the same node type may be `1234`, `001234`, `AB1234`.
140/// It is impossible to guess with 100% consistency which data type to apply without seeing
141/// the entire range of values. Use this enum to tell the converter which data type should
142/// be applied.
143#[derive(Debug, PartialEq, Clone)]
144pub enum JsonType {
145 /// Do not try to infer the type and convert the value to JSON string.
146 /// E.g. convert `<a>1234</a>` into `{"a":"1234"}` or `<a>true</a>` into `{"a":"true"}`
147 AlwaysString,
148 /// Convert values included in this member into JSON bool `true` and any other value into `false`.
149 /// E.g. `Bool(vec!["True", "true", "TRUE"]) will result in any of these values to become JSON bool `true`.
150 Bool(Vec<&'static str>),
151 /// Attempt to infer the type by looking at the single value of the node being converted.
152 /// Not guaranteed to be consistent across multiple nodes.
153 /// E.g. convert `<a>1234</a>` and `<a>001234</a>` into `{"a":1234}`, or `<a>true</a>` into `{"a":true}`
154 /// Check if your values comply with JSON data types (case, range, format) to produce the expected result.
155 Infer,
156}
157
158/// Tells the converter how to perform certain conversions.
159/// See docs for individual fields for more info.
160#[derive(Debug)]
161pub struct Config {
162 /// Numeric values starting with 0 will be treated as strings.
163 /// E.g. convert `<agent>007</agent>` into `"agent":"007"` or `"agent":7`
164 /// Defaults to `false`.
165 pub leading_zero_as_string: bool,
166 /// Prefix XML attribute names with this value to distinguish them from XML elements.
167 /// E.g. set it to `@` for `<x a="Hello!" />` to become `{"x": {"@a":"Hello!"}}`
168 /// or set it to a blank string for `{"x": {"a":"Hello!"}}`
169 /// Defaults to `@`.
170 pub xml_attr_prefix: String,
171 /// A property name for XML text nodes.
172 /// E.g. set it to `text` for `<x a="Hello!">Goodbye!</x>` to become `{"x": {"@a":"Hello!", "text":"Goodbye!"}}`
173 /// XML nodes with text only and no attributes or no child elements are converted into JSON properties with the
174 /// name of the element. E.g. `<x>Goodbye!</x>` becomes `{"x":"Goodbye!"}`
175 /// Defaults to `#text`
176 pub xml_text_node_prop_name: String,
177 /// Defines how empty elements like `<x />` should be handled.
178 pub empty_element_handling: NullValue,
179 /// A map of XML paths with their JsonArray overrides. They take precedence over the document-wide `json_type`
180 /// property. The path syntax is based on xPath: literal element names and attribute names prefixed with `@`.
181 /// The path must start with a leading `/`. It is a bit of an inconvenience to remember about it, but it saves
182 /// an extra `if`-check in the code to improve the performance.
183 /// # Example
184 /// - **XML**: `<a><b c="123">007</b></a>`
185 /// - path for `c`: `/a/b/@c`
186 /// - path for `b` text node (007): `/a/b`
187 #[cfg(feature = "json_types")]
188 pub json_type_overrides: HashMap<String, JsonArray>,
189 /// A list of pairs of regex and JsonArray overrides. They take precedence over both the document-wide `json_type`
190 /// property and the `json_type_overrides` property. The path syntax is based on xPath just like `json_type_overrides`.
191 #[cfg(feature = "regex_path")]
192 pub json_regex_type_overrides: Vec<(Regex, JsonArray)>,
193}
194
195impl Config {
196 /// Numbers with leading zero will be treated as numbers.
197 /// Prefix XML Attribute names with `@`
198 /// Name XML text nodes `#text` for XML Elements with other children
199 pub fn new_with_defaults() -> Self {
200 Config {
201 leading_zero_as_string: false,
202 xml_attr_prefix: "@".to_owned(),
203 xml_text_node_prop_name: "#text".to_owned(),
204 empty_element_handling: NullValue::EmptyObject,
205 #[cfg(feature = "json_types")]
206 json_type_overrides: HashMap::new(),
207 #[cfg(feature = "regex_path")]
208 json_regex_type_overrides: Vec::new(),
209 }
210 }
211
212 /// Create a Config object with non-default values. See the `Config` struct docs for more info.
213 pub fn new_with_custom_values(
214 leading_zero_as_string: bool,
215 xml_attr_prefix: &str,
216 xml_text_node_prop_name: &str,
217 empty_element_handling: NullValue,
218 ) -> Self {
219 Config {
220 leading_zero_as_string,
221 xml_attr_prefix: xml_attr_prefix.to_owned(),
222 xml_text_node_prop_name: xml_text_node_prop_name.to_owned(),
223 empty_element_handling,
224 #[cfg(feature = "json_types")]
225 json_type_overrides: HashMap::new(),
226 #[cfg(feature = "regex_path")]
227 json_regex_type_overrides: Vec::new(),
228 }
229 }
230
231 /// Adds a single JSON Type override rule to the current config.
232 /// # Example
233 /// - **XML**: `<a><b c="123">007</b></a>`
234 /// - path for `c`: `/a/b/@c`
235 /// - path for `b` text node (007): `/a/b`
236 /// - regex path for any `element` node: `(\w/)*element$` [requires `regex_path` feature]
237 #[cfg(feature = "json_types")]
238 pub fn add_json_type_override<P>(self, path: P, json_type: JsonArray) -> Self
239 where
240 P: Into<PathMatcher>
241 {
242 let mut conf = self;
243
244 match path.into() {
245 PathMatcher::Absolute(path) => {
246 conf.json_type_overrides.insert(path, json_type);
247 }
248 #[cfg(feature = "regex_path")]
249 PathMatcher::Regex(regex) => {
250 conf.json_regex_type_overrides.push((
251 regex,
252 json_type
253 ));
254 }
255 }
256
257 conf
258 }
259}
260
261impl Default for Config {
262 fn default() -> Self {
263 Config::new_with_defaults()
264 }
265}
266
267/// Returns the text as one of `serde::Value` types: int, float, bool or string.
268fn parse_text(text: &str, leading_zero_as_string: bool, json_type: &JsonType) -> Value {
269 let text = text.trim();
270
271 // enforce JSON String data type regardless of the underlying type
272 if json_type == &JsonType::AlwaysString {
273 return Value::String(text.into());
274 }
275
276 // enforce JSON Bool data type
277 #[cfg(feature = "json_types")]
278 if let JsonType::Bool(true_values) = json_type {
279 if true_values.contains(&text) {
280 // any values matching the `true` list are bool/true
281 return Value::Bool(true);
282 } else {
283 // anything else is false
284 return Value::Bool(false);
285 }
286 }
287
288 // ints
289 if let Ok(v) = text.parse::<u64>() {
290 // don't parse octal numbers and those with leading 0
291 // `text` value "0" will always be converted into number 0, "0000" may be converted
292 // into 0 or "0000" depending on `leading_zero_as_string`
293 if leading_zero_as_string && text.starts_with("0") && (v != 0 || text.len() > 1) {
294 return Value::String(text.into());
295 }
296 return Value::Number(Number::from(v));
297 }
298
299 // floats
300 if let Ok(v) = text.parse::<f64>() {
301 if text.starts_with("0") && !text.starts_with("0.") {
302 return Value::String(text.into());
303 }
304 if let Some(val) = Number::from_f64(v) {
305 return Value::Number(val);
306 }
307 }
308
309 // booleans
310 if let Ok(v) = text.parse::<bool>() {
311 return Value::Bool(v);
312 }
313
314 Value::String(text.into())
315}
316
317/// Converts an XML Element into a JSON property
318fn convert_node(el: &Element, config: &Config, path: &String) -> Option<Value> {
319 // add the current node to the path
320 #[cfg(feature = "json_types")]
321 let path = [path, "/", el.name()].concat();
322
323 // get the json_type for this node
324 let (_, json_type_value) = get_json_type(config, &path);
325
326 // is it an element with text?
327 if el.text().trim() != "" {
328 // process node's attributes, if present
329 if el.attrs().count() > 0 {
330 Some(Value::Object(
331 el.attrs()
332 .map(|(k, v)| {
333 // add the current node to the path
334 #[cfg(feature = "json_types")]
335 let path = [path.clone(), "/@".to_owned(), k.to_owned()].concat();
336 // get the json_type for this node
337 #[cfg(feature = "json_types")]
338 let (_, json_type_value) = get_json_type(config, &path);
339 (
340 [config.xml_attr_prefix.clone(), k.to_owned()].concat(),
341 parse_text(&v, config.leading_zero_as_string, &json_type_value),
342 )
343 })
344 .chain(vec![(
345 config.xml_text_node_prop_name.clone(),
346 parse_text(
347 &el.text()[..],
348 config.leading_zero_as_string,
349 &json_type_value,
350 ),
351 )])
352 .collect(),
353 ))
354 } else {
355 Some(parse_text(
356 &el.text()[..],
357 config.leading_zero_as_string,
358 &json_type_value,
359 ))
360 }
361 } else {
362 // this element has no text, but may have other child nodes
363 let mut data = Map::new();
364
365 for (k, v) in el.attrs() {
366 // add the current node to the path
367 #[cfg(feature = "json_types")]
368 let path = [path.clone(), "/@".to_owned(), k.to_owned()].concat();
369 // get the json_type for this node
370 #[cfg(feature = "json_types")]
371 let (_, json_type_value) = get_json_type(config, &path);
372 data.insert(
373 [config.xml_attr_prefix.clone(), k.to_owned()].concat(),
374 parse_text(&v, config.leading_zero_as_string, &json_type_value),
375 );
376 }
377
378 // process child element recursively
379 for child in el.children() {
380 match convert_node(child, config, &path) {
381 Some(val) => {
382 let name = &child.name().to_string();
383
384 #[cfg(feature = "json_types")]
385 let path = [path.clone(), "/".to_owned(), name.clone()].concat();
386 let (json_type_array, _) = get_json_type(config, &path);
387 // does it have to be an array?
388 if json_type_array || data.contains_key(name) {
389 // was this property converted to an array earlier?
390 if data.get(name).unwrap_or(&Value::Null).is_array() {
391 // add the new value to an existing array
392 data.get_mut(name)
393 .unwrap()
394 .as_array_mut()
395 .unwrap()
396 .push(val);
397 } else {
398 // convert the property to an array with the existing and the new values
399 let new_val = match data.remove(name) {
400 None => vec![val],
401 Some(temp) => vec![temp, val],
402 };
403 data.insert(name.clone(), Value::Array(new_val));
404 }
405 } else {
406 // this is the first time this property is encountered and it doesn't
407 // have to be an array, so add it as-is
408 data.insert(name.clone(), val);
409 }
410 }
411 _ => (),
412 }
413 }
414
415 // return the JSON object if it's not empty
416 if !data.is_empty() {
417 return Some(Value::Object(data));
418 }
419
420 // empty objects are treated according to config rules set by the caller
421 match config.empty_element_handling {
422 NullValue::Null => Some(Value::Null),
423 NullValue::EmptyObject => Some(Value::Object(data)),
424 NullValue::Ignore => None,
425 }
426 }
427}
428
429fn xml_to_map(e: &Element, config: &Config) -> Value {
430 let mut data = Map::new();
431 data.insert(
432 e.name().to_string(),
433 convert_node(&e, &config, &String::new()).unwrap_or(Value::Null),
434 );
435 Value::Object(data)
436}
437
438/// Converts the given XML string into `serde::Value` using settings from `Config` struct.
439pub fn xml_str_to_json(xml: &str, config: &Config) -> Result<Value, Error> {
440 let root = Element::from_str(xml)?;
441 Ok(xml_to_map(&root, config))
442}
443
444/// Converts the given XML string into `serde::Value` using settings from `Config` struct.
445pub fn xml_string_to_json(xml: String, config: &Config) -> Result<Value, Error> {
446 xml_str_to_json(xml.as_str(), config)
447}
448
449/// Returns a tuple for Array and Value enforcements for the current node or
450/// `(false, JsonArray::Infer(JsonType::Infer)` if the current path is not found
451/// in the list of paths with custom config.
452#[cfg(feature = "json_types")]
453#[inline]
454fn get_json_type_with_absolute_path<'conf>(config: &'conf Config, path: &String) -> (bool, &'conf JsonType) {
455 match config
456 .json_type_overrides
457 .get(path)
458 .unwrap_or(&JsonArray::Infer(JsonType::Infer))
459 {
460 JsonArray::Infer(v) => (false, v),
461 JsonArray::Always(v) => (true, v),
462 }
463}
464
465/// Simply returns `get_json_type_with_absolute_path` if `regex_path` feature is disabled.
466#[cfg(feature = "json_types")]
467#[cfg(not(feature = "regex_path"))]
468#[inline]
469fn get_json_type<'conf>(config: &'conf Config, path: &String) -> (bool, &'conf JsonType) {
470 get_json_type_with_absolute_path(config, path)
471}
472
473/// Returns a tuple for Array and Value enforcements for the current node. Searches both absolute paths
474/// and regex paths, giving precedence to regex paths. Returns `(false, JsonArray::Infer(JsonType::Infer)`
475/// if the current path is not found in the list of paths with custom config.
476#[cfg(feature = "json_types")]
477#[cfg(feature = "regex_path")]
478#[inline]
479fn get_json_type<'conf>(config: &'conf Config, path: &String) -> (bool, &'conf JsonType) {
480 for (regex, json_array) in &config.json_regex_type_overrides {
481 if regex.is_match(path) {
482 return match json_array {
483 JsonArray::Infer(v) => (false, v),
484 JsonArray::Always(v) => (true, v),
485 };
486 }
487 }
488
489 get_json_type_with_absolute_path(config, path)
490}
491
492/// Always returns `(false, JsonArray::Infer(JsonType::Infer)` if `json_types` feature is not enabled.
493#[cfg(not(feature = "json_types"))]
494#[inline]
495fn get_json_type<'conf>(_config: &'conf Config, _path: &String) -> (bool, &'conf JsonType) {
496 (false, &JsonType::Infer)
497}