use flatten_json_object::ArrayFormatting;
use serde_json::{Deserializer, Value};
use std::collections::BTreeMap;
use std::collections::BTreeSet;
use std::io::Seek;
use std::io::SeekFrom;
use std::io::{BufReader, BufWriter};
use std::io::{Read, Write};
use tempfile::tempfile;
pub use csv;
pub use error::Error;
pub use flatten_json_object;
mod error;
#[derive(Clone, Debug, Eq, PartialEq)]
pub struct Json2Csv {
flattener: flatten_json_object::Flattener,
original_flattener: flatten_json_object::Flattener,
}
impl Json2Csv {
#[must_use]
pub fn new(flattener: flatten_json_object::Flattener) -> Self {
let key_sep = "␝";
let array_start = "␞";
let array_end = "␟";
Json2Csv {
flattener: match flattener.array_formatting() {
ArrayFormatting::Plain => flattener.clone().set_key_separator(key_sep),
ArrayFormatting::Surrounded { start: _, end: _ } => flattener
.clone()
.set_key_separator(key_sep)
.set_array_formatting(ArrayFormatting::Surrounded {
start: array_start.to_string(),
end: array_end.to_string(),
}),
},
original_flattener: flattener,
}
}
fn transform_key(&self, key: &str) -> String {
let key = key.replace(
self.flattener.key_separator(),
self.original_flattener.key_separator(),
);
match self.original_flattener.array_formatting() {
ArrayFormatting::Plain => key,
ArrayFormatting::Surrounded { start: os, end: oe } => {
match self.flattener.array_formatting() {
ArrayFormatting::Surrounded { start: s, end: e } => {
key.replace(e, oe).replace(s, os)
}
ArrayFormatting::Plain => {
unreachable!(
"We cloned the original flattener so both should have the same \
array formatting enum variant"
)
}
}
}
}
}
pub fn convert_from_array(
self,
objects: &[Value],
mut csv_writer: csv::Writer<impl Write>,
) -> Result<(), error::Error> {
let mut orig_flat_maps = Vec::<serde_json::value::Map<String, Value>>::new();
for obj in objects {
let obj = self.flattener.flatten(obj)?;
if let Value::Object(map) = obj {
orig_flat_maps.push(map);
} else {
unreachable!("Flattening a JSON object always produces a JSON object");
}
}
let orig_flat_maps = orig_flat_maps;
let mut flat_maps = Vec::<serde_json::value::Map<String, Value>>::new();
let mut orig_headers = BTreeSet::<String>::new();
let mut headers = BTreeSet::<String>::new();
for orig_map in orig_flat_maps {
let mut map = serde_json::value::Map::new();
for (orig_key, value) in orig_map {
let key = self.transform_key(&orig_key);
map.insert(key.clone(), value);
orig_headers.insert(orig_key);
headers.insert(key);
}
flat_maps.push(map);
}
if headers.is_empty() {
return Ok(());
}
if headers.len() != orig_headers.len() {
return Err(Error::FlattenedKeysCollision);
}
csv_writer.write_record(&headers)?;
for map in flat_maps {
csv_writer.write_record(build_record(&headers, map))?;
}
Ok(())
}
pub fn convert_from_reader(
self,
reader: impl Read,
mut csv_writer: csv::Writer<impl Write>,
) -> Result<(), error::Error> {
let mut tmp_file = BufWriter::new(tempfile()?);
let mut orig_headers = BTreeSet::<String>::new();
let mut headers = BTreeSet::<String>::new();
for obj in Deserializer::from_reader(reader).into_iter::<Value>() {
let obj = obj?; let obj = self.flattener.flatten(&obj)?;
let orig_map = match obj {
Value::Object(map) => map,
_ => unreachable!("Flattening a JSON object always produces a JSON object"),
};
let mut map = BTreeMap::new();
for (orig_key, value) in orig_map {
let key = self.transform_key(&orig_key);
map.insert(key.clone(), value);
orig_headers.insert(orig_key);
headers.insert(key);
}
serde_json::to_writer(&mut tmp_file, &map)?;
}
if headers.is_empty() {
return Ok(());
}
if headers.len() != orig_headers.len() {
return Err(Error::FlattenedKeysCollision);
}
tmp_file.seek(SeekFrom::Start(0))?;
let tmp_file = BufReader::new(tmp_file.into_inner()?);
csv_writer.write_record(&headers)?;
for obj in Deserializer::from_reader(tmp_file).into_iter::<Value>() {
let map = match obj? {
Value::Object(map) => map,
_ => unreachable!("Flattening a JSON object always produces a JSON object"),
};
csv_writer.write_record(build_record(&headers, map))?;
}
Ok(())
}
}
fn build_record(
headers: &BTreeSet<String>,
mut map: serde_json::Map<String, Value>,
) -> Vec<String> {
let mut record: Vec<String> = vec![];
for header in headers {
if let Some(val) = map.remove(header) {
match val {
Value::String(s) => record.push(s),
Value::Bool(_) | Value::Number(_) => record.push(val.to_string()),
Value::Null | Value::Array(_) | Value::Object(_) => record.push("".to_string()),
}
} else {
record.push("".to_string());
}
}
record
}
#[cfg(test)]
mod tests {
use super::*;
use error::Error;
use flatten_json_object::{ArrayFormatting, Flattener};
use rstest::rstest;
use std::str;
struct ExecutionResult {
input: Vec<Value>,
output: String,
}
fn execute_expect_err(input: &str, flattener: &Flattener) -> Vec<error::Error> {
let mut output_from_file = Vec::<u8>::new();
let csv_writer_from_file = csv::WriterBuilder::new()
.delimiter(b',')
.from_writer(&mut output_from_file);
let result_from_file = Json2Csv::new(flattener.clone())
.convert_from_reader(input.as_bytes(), csv_writer_from_file);
let input_from_array: Result<Vec<_>, _> =
Deserializer::from_str(input).into_iter::<Value>().collect();
let input_from_array = input_from_array.unwrap();
let mut output_from_array = Vec::<u8>::new();
let csv_writer_from_array = csv::WriterBuilder::new()
.delimiter(b',')
.from_writer(&mut output_from_array);
let result_from_array = Json2Csv::new(flattener.clone())
.convert_from_array(&input_from_array, csv_writer_from_array);
let error_from_file = result_from_file.err().unwrap();
let error_from_array = result_from_array.err().unwrap();
vec![error_from_file, error_from_array]
}
fn execute(input: &str, flattener: &Flattener) -> ExecutionResult {
let mut output_from_file = Vec::<u8>::new();
let csv_writer_from_file = csv::WriterBuilder::new()
.delimiter(b',')
.from_writer(&mut output_from_file);
Json2Csv::new(flattener.clone())
.convert_from_reader(input.as_bytes(), csv_writer_from_file)
.unwrap();
let input_from_array: Result<Vec<_>, _> =
Deserializer::from_str(input).into_iter::<Value>().collect();
let input_from_array = input_from_array.unwrap();
let mut output_from_array = Vec::<u8>::new();
let csv_writer_from_array = csv::WriterBuilder::new()
.delimiter(b',')
.from_writer(&mut output_from_array);
Json2Csv::new(flattener.clone())
.convert_from_array(&input_from_array, csv_writer_from_array)
.unwrap();
let output_from_file = str::from_utf8(&output_from_file).unwrap();
let output_from_array = str::from_utf8(&output_from_array).unwrap();
assert_eq!(output_from_file, output_from_array);
ExecutionResult {
input: input_from_array,
output: output_from_array.to_string(),
}
}
#[rstest]
#[case::nesting_and_array(r#"{"a": {"b": 1}}{"c": [2]}"#, &["a.b,c.0", "1,", ",2"])]
#[case::spaces_end(r#"{"a": {"b": 1}}{"c": [2]} "#, &["a.b,c.0", "1,", ",2"])]
#[case::spaces_begin(r#" {"a": {"b": 1}}{"c": [2]}"#, &["a.b,c.0", "1,", ",2"])]
#[case::key_repeats_consistently(r#"{"a": 3}{"a": 4}{"a": 5}"#, &["a", "3", "4", "5"])]
#[case::reordering(r#"{"b": 3, "a": 1}{"a": 4, "b": 2}"#, &["a,b", "1,3", "4,2"])]
#[case::reordering_with_empty_array(r#"{"b": 3, "a": 1, "c": 0}{"c": [], "a": 4, "b": 2}"#, &["a,b,c", "1,3,0", "4,2,"])]
#[case::reordering_with_empty_object(r#"{"b": 3, "a": 1, "c": 0}{"c": {}, "a": 4, "b": 2}"#, &["a,b,c", "1,3,0", "4,2,"])]
#[case::reordering_with_missing(r#"{"b": 3, "a": 1, "c": 0}{"a": 4, "b": 2}"#, &["a,b,c", "1,3,0", "4,2,"])]
fn simple_input(
#[case] input: &str,
#[case] expected: &[&str],
#[values(true, false)] preserve_empty_arrays: bool,
#[values(true, false)] preserve_empty_objects: bool,
) {
let flattener = Flattener::new()
.set_key_separator(".")
.set_array_formatting(ArrayFormatting::Plain)
.set_preserve_empty_arrays(preserve_empty_arrays)
.set_preserve_empty_objects(preserve_empty_objects);
let result = execute(input, &flattener);
assert_eq!(result.output, expected.join("\n") + "\n");
}
#[test]
fn duplicated_keys_last_wins() {
let flattener = Flattener::new()
.set_key_separator(".")
.set_array_formatting(ArrayFormatting::Plain)
.set_preserve_empty_arrays(true)
.set_preserve_empty_objects(true);
let result = execute(
r#"{"a": [1,2,3], "a": {"b": 2}, "c": 1, "c": 2}"#,
&flattener,
);
let expected = &["a.b,c", "2,2"];
assert_eq!(result.output, expected.join("\n") + "\n");
}
#[test]
fn no_reordering_on_non_default_separators() {
let flattener = Flattener::new()
.set_key_separator("]")
.set_array_formatting(ArrayFormatting::Surrounded {
start: ".".to_string(),
end: "".to_string(),
})
.set_preserve_empty_arrays(true)
.set_preserve_empty_objects(true);
let result = execute(r#"{"a": [1,2,3]} {"a": {"b": 2}}"#, &flattener);
let expected = &["a.0,a.1,a.2,a]b", "1,2,3,", ",,,2"];
assert_eq!(result.output, expected.join("\n") + "\n");
}
#[rstest]
#[case::in_one_object(r#"{"a": {"b": 1}, "a.b": 2}"#)]
#[case::in_different_objects(r#"{"a": {"b": 1}}{"a.b": 2}"#)]
fn error_on_collision(#[case] input: &str) {
let flattener = Flattener::new()
.set_key_separator(".")
.set_array_formatting(ArrayFormatting::Plain)
.set_preserve_empty_arrays(false)
.set_preserve_empty_objects(false);
for err in execute_expect_err(input, &flattener) {
assert!(
matches!(err, Error::FlattenedKeysCollision),
"Unexpected error: {}",
err
);
}
}
#[rstest]
#[case::in_one_object(r#"{"a[0]": 1, "a": [2]}"#, "[", "]")]
#[case::in_different_objects(r#"{"a[0]": 1} {"a": [2]}"#, "[", "]")]
fn error_on_collision_array_formatting(
#[case] input: &str,
#[case] start: &str,
#[case] end: &str,
) {
let flattener = Flattener::new()
.set_key_separator(".")
.set_array_formatting(ArrayFormatting::Surrounded {
start: start.to_string(),
end: end.to_string(),
})
.set_preserve_empty_arrays(false)
.set_preserve_empty_objects(false);
for err in execute_expect_err(input, &flattener) {
assert!(
matches!(err, Error::FlattenedKeysCollision),
"Unexpected error: {}",
err
);
}
}
#[rstest]
#[case::empty_string("")]
#[case::empty_json_doc("{}")]
#[case::multiple_empty_json_docs("{}{}{}{}")]
#[case::empty_array(r#"{"a": []}"#)]
#[case::empty_obj(r#"{"b": {}}"#)]
#[case::empty_array_obj_and_json_doc(r#"{"a": []} {"b": {}} {}"#)]
fn empty_csv_when_no_headers(#[case] input: &str) {
let expected = "";
let flattener = Flattener::new()
.set_key_separator(".")
.set_array_formatting(ArrayFormatting::Plain)
.set_preserve_empty_arrays(false)
.set_preserve_empty_objects(false);
let result = execute(input, &flattener);
assert_eq!(result.output, expected);
}
#[rstest]
#[case::empty_array(r#"{"a": []}"#)]
#[case::empty_array_extra_obj(r#"{"a": []} {} {}"#)]
#[case::empty_obj(r#"{"a": {}}"#)]
#[case::empty_obj_extra_obj(r#"{"a": {}} {}"#)]
fn preserved_empty(#[case] input: &str) {
let flattener = Flattener::new()
.set_key_separator(".")
.set_array_formatting(ArrayFormatting::Plain)
.set_preserve_empty_arrays(true)
.set_preserve_empty_objects(true);
let result = execute(input, &flattener);
let mut expected = vec!["a"];
expected.extend(vec![r#""""#; result.input.len()]);
assert_eq!(result.output, expected.join("\n") + "\n");
}
#[rstest]
#[case::empty_array(r#"{"a": [], "b": 3}"#, &["b", "3"])]
#[case::empty_array_extra_obj(r#"{"a": [], "b": 3} {} {}"#, &["b", "3", r#""""#, r#""""#])]
#[case::empty_obj(r#"{"a": {}, "b": 3}"#, &["b", "3"])]
#[case::empty_obj_extra_obj(r#"{"a": {}} {} {"b": 3} {}"#, &["b", r#""""#, r#""""#, "3", r#""""#])]
#[case::empty_obj_extra_obj(r#"{"a": {}} {} {"b": 3} {"c": 4}"#, &["b,c", ",", ",", "3,", ",4"])]
fn not_preserved_empty(#[case] input: &str, #[case] expected: &[&str]) {
let flattener = Flattener::new()
.set_key_separator(".")
.set_array_formatting(ArrayFormatting::Plain)
.set_preserve_empty_arrays(false)
.set_preserve_empty_objects(false);
let result = execute(input, &flattener);
assert_eq!(result.output, expected.join("\n") + "\n");
}
}