1use flatten_json_object::ArrayFormatting;
151use serde_json::{Deserializer, Value};
152use std::collections::BTreeMap;
153use std::collections::BTreeSet;
154use std::io::Seek;
155use std::io::SeekFrom;
156use std::io::{BufReader, BufWriter};
157use std::io::{Read, Write};
158use tempfile::tempfile;
159
160pub use csv;
161pub use error::Error;
162pub use flatten_json_object;
163
164mod error;
165
166#[derive(Clone, Debug, Eq, PartialEq)]
169pub struct Json2Csv {
170 flattener: flatten_json_object::Flattener,
172 original_flattener: flatten_json_object::Flattener,
174}
175
176impl Json2Csv {
177 #[must_use]
179 pub fn new(flattener: flatten_json_object::Flattener) -> Self {
180 let key_sep = "␝";
184 let array_start = "␞";
185 let array_end = "␟";
186 Json2Csv {
187 flattener: match flattener.array_formatting() {
188 ArrayFormatting::Plain => flattener.clone().set_key_separator(key_sep),
189 ArrayFormatting::Surrounded { start: _, end: _ } => flattener
190 .clone()
191 .set_key_separator(key_sep)
192 .set_array_formatting(ArrayFormatting::Surrounded {
193 start: array_start.to_string(),
194 end: array_end.to_string(),
195 }),
196 },
197 original_flattener: flattener,
198 }
199 }
200
201 fn transform_key(&self, key: &str) -> String {
205 let key = key.replace(
206 self.flattener.key_separator(),
207 self.original_flattener.key_separator(),
208 );
209
210 match self.original_flattener.array_formatting() {
211 ArrayFormatting::Plain => key,
212 ArrayFormatting::Surrounded { start: os, end: oe } => {
213 match self.flattener.array_formatting() {
214 ArrayFormatting::Surrounded { start: s, end: e } => {
215 key.replace(e, oe).replace(s, os)
216 }
217 ArrayFormatting::Plain => {
218 unreachable!(
219 "We cloned the original flattener so both should have the same \
220 array formatting enum variant"
221 )
222 }
223 }
224 }
225 }
226 }
227
228 pub fn convert_from_array(
239 self,
240 objects: &[Value],
241 mut csv_writer: csv::Writer<impl Write>,
242 ) -> Result<(), error::Error> {
243 let mut orig_flat_maps = Vec::<serde_json::value::Map<String, Value>>::new();
245
246 for obj in objects {
247 let obj = self.flattener.flatten(obj)?;
248 if let Value::Object(map) = obj {
249 orig_flat_maps.push(map);
250 } else {
251 unreachable!("Flattening a JSON object always produces a JSON object");
252 }
253 }
254 let orig_flat_maps = orig_flat_maps;
255
256 let mut flat_maps = Vec::<serde_json::value::Map<String, Value>>::new();
257
258 let mut orig_headers = BTreeSet::<String>::new();
261 let mut headers = BTreeSet::<String>::new();
262 for orig_map in orig_flat_maps {
263 let mut map = serde_json::value::Map::new();
264 for (orig_key, value) in orig_map {
265 let key = self.transform_key(&orig_key);
266 map.insert(key.clone(), value);
267 orig_headers.insert(orig_key);
268 headers.insert(key);
269 }
270 flat_maps.push(map);
271 }
272
273 if headers.is_empty() {
275 return Ok(());
276 }
277
278 if headers.len() != orig_headers.len() {
280 return Err(Error::FlattenedKeysCollision);
281 }
282
283 csv_writer.write_record(&headers)?;
284 for map in flat_maps {
285 csv_writer.write_record(build_record(&headers, map))?;
286 }
287
288 Ok(())
289 }
290
291 pub fn convert_from_reader(
303 self,
304 reader: impl Read,
305 mut csv_writer: csv::Writer<impl Write>,
306 ) -> Result<(), error::Error> {
307 let mut tmp_file = BufWriter::new(tempfile()?);
311
312 let mut orig_headers = BTreeSet::<String>::new();
315 let mut headers = BTreeSet::<String>::new();
316
317 for obj in Deserializer::from_reader(reader).into_iter::<Value>() {
318 let obj = obj?; let obj = self.flattener.flatten(&obj)?;
320
321 let orig_map = match obj {
322 Value::Object(map) => map,
323 _ => unreachable!("Flattening a JSON object always produces a JSON object"),
324 };
325
326 let mut map = BTreeMap::new();
327 for (orig_key, value) in orig_map {
328 let key = self.transform_key(&orig_key);
329 map.insert(key.clone(), value);
330 orig_headers.insert(orig_key);
331 headers.insert(key);
332 }
333 serde_json::to_writer(&mut tmp_file, &map)?;
334 }
335
336 if headers.is_empty() {
338 return Ok(());
339 }
340
341 if headers.len() != orig_headers.len() {
343 return Err(Error::FlattenedKeysCollision);
344 }
345
346 tmp_file.seek(SeekFrom::Start(0))?;
347 let tmp_file = BufReader::new(tmp_file.into_inner()?);
348
349 csv_writer.write_record(&headers)?;
350 for obj in Deserializer::from_reader(tmp_file).into_iter::<Value>() {
351 let map = match obj? {
352 Value::Object(map) => map,
353 _ => unreachable!("Flattening a JSON object always produces a JSON object"),
354 };
355 csv_writer.write_record(build_record(&headers, map))?;
356 }
357
358 Ok(())
359 }
360}
361
362fn build_record(
363 headers: &BTreeSet<String>,
364 mut map: serde_json::Map<String, Value>,
365) -> Vec<String> {
366 let mut record: Vec<String> = vec![];
367 for header in headers {
368 if let Some(val) = map.remove(header) {
369 match val {
370 Value::String(s) => record.push(s),
371 Value::Bool(_) | Value::Number(_) => record.push(val.to_string()),
373 Value::Null | Value::Array(_) | Value::Object(_) => record.push("".to_string()),
378 }
379 } else {
380 record.push("".to_string());
381 }
382 }
383 record
384}
385
386#[cfg(test)]
387mod tests {
388 use super::*;
389 use error::Error;
390 use flatten_json_object::{ArrayFormatting, Flattener};
391 use rstest::rstest;
392 use std::str;
393
394 struct ExecutionResult {
395 input: Vec<Value>,
396 output: String,
397 }
398
399 fn execute_expect_err(input: &str, flattener: &Flattener) -> Vec<error::Error> {
400 let mut output_from_file = Vec::<u8>::new();
401 let csv_writer_from_file = csv::WriterBuilder::new()
402 .delimiter(b',')
403 .from_writer(&mut output_from_file);
404
405 let result_from_file = Json2Csv::new(flattener.clone())
406 .convert_from_reader(input.as_bytes(), csv_writer_from_file);
407
408 let input_from_array: Result<Vec<_>, _> =
409 Deserializer::from_str(input).into_iter::<Value>().collect();
410 let input_from_array = input_from_array.unwrap();
411
412 let mut output_from_array = Vec::<u8>::new();
413 let csv_writer_from_array = csv::WriterBuilder::new()
414 .delimiter(b',')
415 .from_writer(&mut output_from_array);
416 let result_from_array = Json2Csv::new(flattener.clone())
417 .convert_from_array(&input_from_array, csv_writer_from_array);
418
419 let error_from_file = result_from_file.err().unwrap();
421 let error_from_array = result_from_array.err().unwrap();
422
423 vec![error_from_file, error_from_array]
424 }
425
426 fn execute(input: &str, flattener: &Flattener) -> ExecutionResult {
427 let mut output_from_file = Vec::<u8>::new();
428 let csv_writer_from_file = csv::WriterBuilder::new()
429 .delimiter(b',')
430 .from_writer(&mut output_from_file);
431 Json2Csv::new(flattener.clone())
432 .convert_from_reader(input.as_bytes(), csv_writer_from_file)
433 .unwrap();
434
435 let input_from_array: Result<Vec<_>, _> =
436 Deserializer::from_str(input).into_iter::<Value>().collect();
437 let input_from_array = input_from_array.unwrap();
438
439 let mut output_from_array = Vec::<u8>::new();
440 let csv_writer_from_array = csv::WriterBuilder::new()
441 .delimiter(b',')
442 .from_writer(&mut output_from_array);
443 Json2Csv::new(flattener.clone())
444 .convert_from_array(&input_from_array, csv_writer_from_array)
445 .unwrap();
446
447 let output_from_file = str::from_utf8(&output_from_file).unwrap();
448 let output_from_array = str::from_utf8(&output_from_array).unwrap();
449
450 assert_eq!(output_from_file, output_from_array);
451
452 ExecutionResult {
453 input: input_from_array,
454 output: output_from_array.to_string(),
455 }
456 }
457
458 #[rstest]
459 #[case::nesting_and_array(r#"{"a": {"b": 1}}{"c": [2]}"#, &["a.b,c.0", "1,", ",2"])]
460 #[case::spaces_end(r#"{"a": {"b": 1}}{"c": [2]} "#, &["a.b,c.0", "1,", ",2"])]
461 #[case::spaces_begin(r#" {"a": {"b": 1}}{"c": [2]}"#, &["a.b,c.0", "1,", ",2"])]
462 #[case::key_repeats_consistently(r#"{"a": 3}{"a": 4}{"a": 5}"#, &["a", "3", "4", "5"])]
463 #[case::reordering(r#"{"b": 3, "a": 1}{"a": 4, "b": 2}"#, &["a,b", "1,3", "4,2"])]
464 #[case::reordering_with_empty_array(r#"{"b": 3, "a": 1, "c": 0}{"c": [], "a": 4, "b": 2}"#, &["a,b,c", "1,3,0", "4,2,"])]
465 #[case::reordering_with_empty_object(r#"{"b": 3, "a": 1, "c": 0}{"c": {}, "a": 4, "b": 2}"#, &["a,b,c", "1,3,0", "4,2,"])]
466 #[case::reordering_with_missing(r#"{"b": 3, "a": 1, "c": 0}{"a": 4, "b": 2}"#, &["a,b,c", "1,3,0", "4,2,"])]
467 fn simple_input(
468 #[case] input: &str,
469 #[case] expected: &[&str],
470 #[values(true, false)] preserve_empty_arrays: bool,
471 #[values(true, false)] preserve_empty_objects: bool,
472 ) {
473 let flattener = Flattener::new()
474 .set_key_separator(".")
475 .set_array_formatting(ArrayFormatting::Plain)
476 .set_preserve_empty_arrays(preserve_empty_arrays)
477 .set_preserve_empty_objects(preserve_empty_objects);
478 let result = execute(input, &flattener);
479 assert_eq!(result.output, expected.join("\n") + "\n");
480 }
481
482 #[test]
483 fn duplicated_keys_last_wins() {
484 let flattener = Flattener::new()
485 .set_key_separator(".")
486 .set_array_formatting(ArrayFormatting::Plain)
487 .set_preserve_empty_arrays(true)
488 .set_preserve_empty_objects(true);
489 let result = execute(
490 r#"{"a": [1,2,3], "a": {"b": 2}, "c": 1, "c": 2}"#,
491 &flattener,
492 );
493 let expected = &["a.b,c", "2,2"];
494 assert_eq!(result.output, expected.join("\n") + "\n");
495 }
496
497 #[test]
500 fn no_reordering_on_non_default_separators() {
501 let flattener = Flattener::new()
502 .set_key_separator("]")
503 .set_array_formatting(ArrayFormatting::Surrounded {
504 start: ".".to_string(),
505 end: "".to_string(),
506 })
507 .set_preserve_empty_arrays(true)
508 .set_preserve_empty_objects(true);
509 let result = execute(r#"{"a": [1,2,3]} {"a": {"b": 2}}"#, &flattener);
510 let expected = &["a.0,a.1,a.2,a]b", "1,2,3,", ",,,2"];
511 assert_eq!(result.output, expected.join("\n") + "\n");
512 }
513
514 #[rstest]
516 #[case::in_one_object(r#"{"a": {"b": 1}, "a.b": 2}"#)]
517 #[case::in_different_objects(r#"{"a": {"b": 1}}{"a.b": 2}"#)]
518 fn error_on_collision(#[case] input: &str) {
519 let flattener = Flattener::new()
520 .set_key_separator(".")
521 .set_array_formatting(ArrayFormatting::Plain)
522 .set_preserve_empty_arrays(false)
523 .set_preserve_empty_objects(false);
524 for err in execute_expect_err(input, &flattener) {
525 assert!(
526 matches!(err, Error::FlattenedKeysCollision),
527 "Unexpected error: {}",
528 err
529 );
530 }
531 }
532
533 #[rstest]
536 #[case::in_one_object(r#"{"a[0]": 1, "a": [2]}"#, "[", "]")]
537 #[case::in_different_objects(r#"{"a[0]": 1} {"a": [2]}"#, "[", "]")]
538 fn error_on_collision_array_formatting(
539 #[case] input: &str,
540 #[case] start: &str,
541 #[case] end: &str,
542 ) {
543 let flattener = Flattener::new()
544 .set_key_separator(".")
545 .set_array_formatting(ArrayFormatting::Surrounded {
546 start: start.to_string(),
547 end: end.to_string(),
548 })
549 .set_preserve_empty_arrays(false)
550 .set_preserve_empty_objects(false);
551 for err in execute_expect_err(input, &flattener) {
552 assert!(
553 matches!(err, Error::FlattenedKeysCollision),
554 "Unexpected error: {}",
555 err
556 );
557 }
558 }
559
560 #[rstest]
563 #[case::empty_string("")]
564 #[case::empty_json_doc("{}")]
565 #[case::multiple_empty_json_docs("{}{}{}{}")]
566 #[case::empty_array(r#"{"a": []}"#)]
567 #[case::empty_obj(r#"{"b": {}}"#)]
568 #[case::empty_array_obj_and_json_doc(r#"{"a": []} {"b": {}} {}"#)]
569 fn empty_csv_when_no_headers(#[case] input: &str) {
570 let expected = "";
571 let flattener = Flattener::new()
572 .set_key_separator(".")
573 .set_array_formatting(ArrayFormatting::Plain)
574 .set_preserve_empty_arrays(false)
575 .set_preserve_empty_objects(false);
576 let result = execute(input, &flattener);
577 assert_eq!(result.output, expected);
578 }
579
580 #[rstest]
581 #[case::empty_array(r#"{"a": []}"#)]
582 #[case::empty_array_extra_obj(r#"{"a": []} {} {}"#)]
583 #[case::empty_obj(r#"{"a": {}}"#)]
584 #[case::empty_obj_extra_obj(r#"{"a": {}} {}"#)]
585 fn preserved_empty(#[case] input: &str) {
586 let flattener = Flattener::new()
587 .set_key_separator(".")
588 .set_array_formatting(ArrayFormatting::Plain)
589 .set_preserve_empty_arrays(true)
590 .set_preserve_empty_objects(true);
591 let result = execute(input, &flattener);
592
593 let mut expected = vec!["a"];
594
595 expected.extend(vec![r#""""#; result.input.len()]);
597
598 assert_eq!(result.output, expected.join("\n") + "\n");
599 }
600
601 #[rstest]
602 #[case::empty_array(r#"{"a": [], "b": 3}"#, &["b", "3"])]
603 #[case::empty_array_extra_obj(r#"{"a": [], "b": 3} {} {}"#, &["b", "3", r#""""#, r#""""#])]
604 #[case::empty_obj(r#"{"a": {}, "b": 3}"#, &["b", "3"])]
605 #[case::empty_obj_extra_obj(r#"{"a": {}} {} {"b": 3} {}"#, &["b", r#""""#, r#""""#, "3", r#""""#])]
606 #[case::empty_obj_extra_obj(r#"{"a": {}} {} {"b": 3} {"c": 4}"#, &["b,c", ",", ",", "3,", ",4"])]
607 fn not_preserved_empty(#[case] input: &str, #[case] expected: &[&str]) {
608 let flattener = Flattener::new()
609 .set_key_separator(".")
610 .set_array_formatting(ArrayFormatting::Plain)
611 .set_preserve_empty_arrays(false)
612 .set_preserve_empty_objects(false);
613 let result = execute(input, &flattener);
614
615 assert_eq!(result.output, expected.join("\n") + "\n");
616 }
617}