1use crate::{
2 error::{Error, ErrorKind},
3 utils
4};
5
6use serde_json::{json, Value as JsonValue};
7
8use quick_xml::{events::*, Reader};
9use regex::{NoExpand, Regex};
10
11lazy_static! {
12 static ref WHITESPACE_RE: Regex = Regex::new(r"^\s*$").unwrap();
13 static ref TWO_OR_MORE_WHITESPACE_RE: Regex = Regex::new(r"\s{2,}").unwrap();
14}
15
16#[derive(Default)]
18pub struct JsonConfig {
19 charkey: Option<String>,
20 attrkey: Option<String>,
21 empty_tag: Option<String>,
22 explicit_root: Option<bool>,
23 trim: Option<bool>,
24 ignore_attrs: Option<bool>,
25 merge_attrs: Option<bool>,
26 normalize_text: Option<bool>,
27 lowercase_tags: Option<bool>,
28 explicit_array: Option<bool>,
29 explicit_charkey: Option<bool>
30}
31
32impl JsonConfig {
34 pub fn new() -> JsonConfig {
39 JsonConfig {
40 charkey: None,
41 attrkey: None,
42 empty_tag: None,
43 explicit_root: None,
44 trim: None,
45 ignore_attrs: None,
46 merge_attrs: None,
47 normalize_text: None,
48 lowercase_tags: None,
49 explicit_array: None,
50 explicit_charkey: None
51 }
52 }
53
54 pub fn charkey<T: Into<String>>(&mut self, key: T) -> &mut JsonConfig {
58 self.charkey = Some(key.into());
59 self
60 }
61
62 pub fn attrkey<T: Into<String>>(&mut self, key: T) -> &mut JsonConfig {
66 self.attrkey = Some(key.into());
67 self
68 }
69
70 pub fn empty_tag<T: Into<String>>(&mut self, key: T) -> &mut JsonConfig {
75 self.empty_tag = Some(key.into());
76 self
77 }
78
79 pub fn explicit_root(&mut self, flag: bool) -> &mut JsonConfig {
83 self.explicit_root = Some(flag);
84 self
85 }
86
87 pub fn trim(&mut self, flag: bool) -> &mut JsonConfig {
91 self.trim = Some(flag);
92 self
93 }
94
95 pub fn ignore_attrs(&mut self, flag: bool) -> &mut JsonConfig {
101 self.ignore_attrs = Some(flag);
102 self
103 }
104
105 pub fn merge_attrs(&mut self, flag: bool) -> &mut JsonConfig {
113 self.merge_attrs = Some(flag);
114 self
115 }
116
117 pub fn normalize_text(&mut self, flag: bool) -> &mut JsonConfig {
128 self.normalize_text = Some(flag);
129 self
130 }
131
132 pub fn lowercase_tags(&mut self, flag: bool) -> &mut JsonConfig {
138 self.lowercase_tags = Some(flag);
139 self
140 }
141
142 pub fn explicit_array(&mut self, flag: bool) -> &mut JsonConfig {
147 self.explicit_array = Some(flag);
148 self
149 }
150
151 pub fn explicit_charkey(&mut self, flag: bool) -> &mut JsonConfig {
156 self.explicit_charkey = Some(flag);
157 self
158 }
159
160 pub fn finalize(&self) -> JsonBuilder {
162 JsonBuilder {
163 charkey: self.charkey.clone().unwrap_or_else(|| "_".to_owned()),
164 attrkey: self.attrkey.clone().unwrap_or_else(|| "$".to_owned()),
165 empty_tag: self.empty_tag.clone().unwrap_or_else(|| "".to_owned()),
166 explicit_root: self.explicit_root.clone().unwrap_or(true),
167 trim: self.trim.clone().unwrap_or(false),
168 ignore_attrs: self.ignore_attrs.clone().unwrap_or(false),
169 merge_attrs: self.merge_attrs.clone().unwrap_or(false),
170 normalize_text: self.normalize_text.clone().unwrap_or(false),
171 lowercase_tags: self.lowercase_tags.clone().unwrap_or(false),
172 explicit_array: self.explicit_array.clone().unwrap_or(true),
173 explicit_charkey: self.explicit_charkey.clone().unwrap_or(false)
174 }
175 }
176}
177
178struct Text {
181 data: String,
182 literal: bool
183}
184
185impl Default for Text {
186 fn default() -> Text {
187 Text {
188 data: "".to_owned(),
189 literal: false
190 }
191 }
192}
193
194struct Node {
196 value: JsonValue,
197 text: Text
198}
199
200impl Node {
201 fn new() -> Node {
202 Node {
203 value: json!({}),
204 text: Text::default()
205 }
206 }
207}
208
209pub struct JsonBuilder {
211 charkey: String,
212 attrkey: String,
213 empty_tag: String,
214 explicit_root: bool,
215 trim: bool,
216 ignore_attrs: bool,
217 merge_attrs: bool,
218 normalize_text: bool,
219 lowercase_tags: bool,
220 explicit_array: bool,
221 explicit_charkey: bool
222}
223
224impl Default for JsonBuilder {
225 fn default() -> JsonBuilder {
226 JsonBuilder {
227 charkey: "_".to_owned(),
228 attrkey: "$".to_owned(),
229 empty_tag: "".to_owned(),
230 explicit_root: true,
231 trim: false,
232 ignore_attrs: false,
233 merge_attrs: false,
234 normalize_text: false,
235 lowercase_tags: false,
236 explicit_array: true,
237 explicit_charkey: false
238 }
239 }
240}
241
242impl JsonBuilder {
243 fn is_whitespace(&self, value: &str) -> bool {
245 WHITESPACE_RE.is_match(value)
246 }
247
248 fn assign_or_push(&self, object: &mut JsonValue, key: &str, value: JsonValue) {
254 if object.get(key).is_none() {
255 if self.explicit_array {
256 object[key] = json!([value]);
257 } else {
258 object[key] = value;
259 }
260 } else {
261 if !object[key].is_array() {
263 let current = object[key].take();
264 object[key] = json!([current]);
265 }
266 if let Some(array) = object[key].as_array_mut() {
267 array.push(value);
268 }
269 }
270 }
271
272 fn process_start(&self, event: &BytesStart, stack: &mut Vec<Node>, reader: &mut Reader<&[u8]>) -> Result<(), Error> {
274 let mut node = Node::new();
275
276 if !self.ignore_attrs {
278 if event.attributes().peekable().peek().is_some() && node.value.get(&self.attrkey).is_none() && !self.merge_attrs {
280 node.value[&self.attrkey] = json!({});
281 }
282
283 for attr in event.attributes() {
284 if let Ok(attr) = attr {
285 let value = attr.unescape_and_decode_value(&reader)?;
286 let key = std::str::from_utf8(attr.key)?;
287 if self.merge_attrs {
288 self.assign_or_push(&mut node.value, key, value.into());
289 } else {
290 node.value[&self.attrkey][key] = value.into();
291 }
292 }
293 }
294 }
295
296 stack.push(node);
297 Ok(())
298 }
299
300 fn process_text(&self, event: &BytesText, stack: &mut Vec<Node>, reader: &mut Reader<&[u8]>) -> Result<(), Error> {
302 let cdata = event.unescape_and_decode(&reader)?;
303
304 if let Some(last_node) = stack.last_mut() {
305 let text = &mut last_node.text.data;
306 if self.normalize_text && !text.is_empty() {
310 let normalized = TWO_OR_MORE_WHITESPACE_RE.replace_all(text, NoExpand(" ")).into_owned();
311 text.clear();
312 text.push_str(&normalized);
313 let _ = text.trim();
314 }
315 text.push_str(&cdata);
316 }
317
318 Ok(())
319 }
320
321 fn process_end(&self, tag: &[u8], stack: &mut Vec<Node>) -> Result<Option<JsonValue>, Error> {
324 let close_tag = if self.lowercase_tags {
325 std::str::from_utf8(tag)?.to_lowercase()
326 } else {
327 std::str::from_utf8(tag)?.to_owned()
328 };
329 let mut inner = match stack.pop() {
331 Some(j) => j,
332 None => return Err(Error::new(ErrorKind::Unknown, "Expected stack item at close tag."))
333 };
334 let stack_len = stack.len();
335 let outer = stack.last_mut();
336
337 let mut whitespace = "".to_owned();
339 let mut text = inner.text.data.as_ref();
340
341 if self.is_whitespace(text) && !inner.text.literal {
342 whitespace.push_str(text);
343 } else {
344 if self.trim {
345 text = text.trim();
346 }
347
348 let mut _normalized = String::new();
350 if self.normalize_text {
351 _normalized = TWO_OR_MORE_WHITESPACE_RE.replace_all(text, NoExpand(" ")).into_owned();
352 text = _normalized.trim().as_ref();
353 }
354
355 if utils::json_is_empty(&inner.value) && !self.explicit_charkey {
356 inner.value = JsonValue::String(text.to_owned());
357 } else {
358 inner.value[&self.charkey] = text.into();
359 }
360 }
361
362 if utils::json_is_empty(&inner.value) {
363 if !self.empty_tag.is_empty() {
364 inner.value = JsonValue::String(self.empty_tag.clone());
365 } else {
366 inner.value = JsonValue::String(whitespace);
367 }
368 }
369
370 if stack_len > 0 {
372 if let Some(outer) = outer {
373 self.assign_or_push(&mut outer.value, &close_tag, inner.value);
374 }
375 } else {
376 let output = if self.explicit_root {
378 let output = json!({
379 close_tag: inner.value
380 });
381 output
382 } else {
383 inner.value
384 };
385 return Ok(Some(output));
386 }
387 Ok(None)
388 }
389
390 fn process_empty(&self, event: &BytesStart, stack: &mut Vec<Node>, reader: &mut Reader<&[u8]>) -> Result<Option<JsonValue>, Error> {
392 self.process_start(event, stack, reader)?;
393 self.process_end(event.name(), stack)
394 }
395
396 fn process_cdata(&self, event: &BytesCData, stack: &mut Vec<Node>, reader: &mut Reader<&[u8]>) -> Result<(), Error> {
398 self.process_text(&event.clone().escape(), stack, reader)?;
399
400 if let Some(mut last_node) = stack.last_mut() {
401 last_node.text.literal = true;
402 }
403 Ok(())
404 }
405
406 pub fn build_from_xml(&self, xml: &str) -> Result<JsonValue, Error> {
408 let mut reader = Reader::from_str(xml);
409 let mut buffer = Vec::new();
410 let mut output = JsonValue::Null;
411 let mut stack = Vec::new();
412
413 loop {
414 match reader.read_event(&mut buffer) {
415 Ok(Event::Start(ref e)) => self.process_start(e, &mut stack, &mut reader)?,
416
417 Ok(Event::Text(ref e)) => self.process_text(e, &mut stack, &mut reader)?,
418
419 Ok(Event::End(ref e)) => {
420 if let Some(o) = self.process_end(e.name(), &mut stack)? {
421 output = o;
422 }
423 },
424
425 Ok(Event::CData(ref e)) => self.process_cdata(e, &mut stack, &mut reader)?,
426
427 Ok(Event::Empty(ref e)) => {
428 if let Some(o) = self.process_empty(e, &mut stack, &mut reader)? {
429 output = o;
430 }
431 },
432
433 Ok(Event::Eof) => {
434 break;
435 },
436
437 Ok(_) => (),
439
440 Err(e) => {
441 return Err(Error::new(
442 ErrorKind::Syntax,
443 format!("Error at position {}: {:?}", reader.buffer_position(), e)
444 ))
445 },
446 }
447
448 buffer.clear();
449 }
450
451 Ok(output)
452 }
453
454 pub fn build_string_from_xml(&self, xml: &str) -> Result<String, Error> {
456 let object = self.build_from_xml(xml)?;
457 serde_json::to_string(&object).map_err(|e| e.into())
458 }
459
460 pub fn build_pretty_string_from_xml(&self, xml: &str) -> Result<String, Error> {
462 let object = self.build_from_xml(xml)?;
463 serde_json::to_string_pretty(&object).map_err(|e| e.into())
464 }
465}
466
467#[cfg(test)]
468mod tests {
469 use super::*;
470
471 use pretty_assertions::assert_eq;
472
473 #[test]
474 fn invalid_xml() {
475 let builder = JsonBuilder::default();
476 let err = builder.build_from_xml("<foo>bar</baz>").unwrap_err();
477 assert_eq!(err.kind(), ErrorKind::Syntax)
478 }
479
480 #[test]
481 fn is_whitespace1() {
482 let builder = JsonBuilder::default();
483 assert!(builder.is_whitespace(" \t \n "));
484 }
485
486 #[test]
487 fn is_whitespace2() {
488 let builder = JsonBuilder::default();
489 assert!(!builder.is_whitespace(" \t A \n "));
490 }
491
492 #[test]
493 fn assign_or_push1() {
494 let builder = JsonBuilder::default();
495 let mut actual = json!({});
496 let _ = builder.assign_or_push(&mut actual, "A", "B".into());
497 let _ = builder.assign_or_push(&mut actual, "C", "D".into());
498 let _ = builder.assign_or_push(&mut actual, "C", "E".into());
499 let expected: JsonValue = serde_json::from_str(r#"{"A":["B"],"C":["D","E"]}"#).unwrap();
500 assert_eq!(actual, expected);
501 }
502
503 #[test]
504 fn assign_or_push2() {
505 let builder = JsonConfig::new().explicit_array(false).finalize();
506 let mut actual = json!({});
507 let _ = builder.assign_or_push(&mut actual, "A", "B".into());
508 let _ = builder.assign_or_push(&mut actual, "C", "D".into());
509 let _ = builder.assign_or_push(&mut actual, "C", "E".into());
510 let expected: JsonValue = serde_json::from_str(r#"{"A":"B","C":["D","E"]}"#).unwrap();
511 assert_eq!(actual, expected);
512 }
513}