use crate::{Encoding, Result, key::expand_keys, parsers::gron};
use hcl::eval::Evaluate;
use regex::Regex;
use serde::Deserialize;
use serde_json::{Map, Value};
#[derive(Debug, Default, Clone)]
pub struct DeserializeOptions {
pub csv_without_headers: bool,
pub csv_headers_as_keys: bool,
pub csv_delimiter: Option<u8>,
pub text_split_pattern: Option<Regex>,
pub simplify: bool,
}
impl DeserializeOptions {
pub fn new() -> Self {
Self::default()
}
}
#[derive(Debug, Default, Clone)]
pub struct DeserializerBuilder {
opts: DeserializeOptions,
}
impl DeserializerBuilder {
pub fn new() -> Self {
Self::default()
}
pub fn csv_without_headers(&mut self, yes: bool) -> &mut Self {
self.opts.csv_without_headers = yes;
self
}
pub fn csv_headers_as_keys(&mut self, yes: bool) -> &mut Self {
self.opts.csv_headers_as_keys = yes;
self
}
pub fn csv_delimiter(&mut self, delim: u8) -> &mut Self {
self.opts.csv_delimiter = Some(delim);
self
}
pub fn text_split_pattern(&mut self, pattern: Regex) -> &mut Self {
self.opts.text_split_pattern = Some(pattern);
self
}
pub fn simplifiy(&mut self, yes: bool) -> &mut Self {
self.opts.simplify = yes;
self
}
pub fn build<R>(&self, reader: R) -> Deserializer<R>
where
R: std::io::Read,
{
Deserializer::with_options(reader, self.opts.clone())
}
}
pub struct Deserializer<R> {
reader: R,
opts: DeserializeOptions,
}
impl<R> Deserializer<R>
where
R: std::io::Read,
{
pub fn new(reader: R) -> Self {
Self::with_options(reader, Default::default())
}
pub fn with_options(reader: R, opts: DeserializeOptions) -> Self {
Self { reader, opts }
}
pub fn deserialize(&mut self, encoding: Encoding) -> Result<Value> {
match encoding {
Encoding::Yaml => self.deserialize_yaml(),
Encoding::Json => self.deserialize_json(),
Encoding::Toml => self.deserialize_toml(),
Encoding::Json5 => self.deserialize_json5(),
Encoding::Csv => self.deserialize_csv(),
Encoding::QueryString => self.deserialize_query_string(),
Encoding::Xml => self.deserialize_xml(),
Encoding::Text => self.deserialize_text(),
Encoding::Gron => self.deserialize_gron(),
Encoding::Hcl => self.deserialize_hcl(),
}
}
fn deserialize_yaml(&mut self) -> Result<Value> {
let mut values = serde_yaml::Deserializer::from_reader(&mut self.reader)
.map(Value::deserialize)
.collect::<Result<Vec<_>, _>>()?;
if values.len() == 1 {
Ok(values.swap_remove(0))
} else {
Ok(Value::Array(values))
}
}
fn deserialize_json(&mut self) -> Result<Value> {
Ok(serde_json::from_reader(&mut self.reader)?)
}
fn deserialize_toml(&mut self) -> Result<Value> {
let mut s = String::new();
self.reader.read_to_string(&mut s)?;
Ok(toml::from_str(&s)?)
}
fn deserialize_json5(&mut self) -> Result<Value> {
let mut s = String::new();
self.reader.read_to_string(&mut s)?;
Ok(json5::from_str(&s)?)
}
fn deserialize_csv(&mut self) -> Result<Value> {
let keep_first_line = self.opts.csv_without_headers || self.opts.csv_headers_as_keys;
let mut csv_reader = csv::ReaderBuilder::new()
.trim(csv::Trim::All)
.has_headers(!keep_first_line)
.delimiter(self.opts.csv_delimiter.unwrap_or(b','))
.from_reader(&mut self.reader);
let mut iter = csv_reader.deserialize();
let value = if self.opts.csv_headers_as_keys {
match iter.next() {
Some(headers) => {
let headers: Vec<String> = headers?;
Value::Array(
iter.map(|record| {
Ok(headers.iter().cloned().zip(record?.into_iter()).collect())
})
.collect::<Result<_>>()?,
)
}
None => Value::Array(Vec::new()),
}
} else {
Value::Array(
iter.map(|v| Ok(serde_json::to_value(v?)?))
.collect::<Result<_>>()?,
)
};
Ok(value)
}
fn deserialize_query_string(&mut self) -> Result<Value> {
let mut s = String::new();
self.reader.read_to_string(&mut s)?;
Ok(Value::Object(serde_qs::from_str(&s)?))
}
fn deserialize_xml(&mut self) -> Result<Value> {
Ok(serde_xml_rs::from_reader(&mut self.reader)?)
}
fn deserialize_text(&mut self) -> Result<Value> {
let mut s = String::new();
self.reader.read_to_string(&mut s)?;
let pattern = match &self.opts.text_split_pattern {
Some(pattern) => pattern.clone(),
None => Regex::new("\n").unwrap(),
};
Ok(Value::Array(
pattern
.split(&s)
.map(serde_json::to_value)
.collect::<Result<_, serde_json::Error>>()?,
))
}
fn deserialize_gron(&mut self) -> Result<Value> {
let mut s = String::new();
self.reader.read_to_string(&mut s)?;
let map = gron::parse(&s)?
.iter()
.map(|statement| {
Ok((
statement.path().to_owned(),
serde_json::from_str(statement.value())?,
))
})
.collect::<Result<Map<_, _>>>()?;
Ok(expand_keys(Value::Object(map)))
}
fn deserialize_hcl(&mut self) -> Result<Value> {
let value = if self.opts.simplify {
let mut body: hcl::Body = hcl::from_reader(&mut self.reader)?;
let ctx = hcl::eval::Context::new();
let _ = body.evaluate_in_place(&ctx);
hcl::from_body(body)?
} else {
hcl::from_reader(&mut self.reader)?
};
Ok(value)
}
}
#[cfg(test)]
mod test {
use super::*;
use pretty_assertions::assert_eq;
use serde_json::json;
#[track_caller]
fn assert_builder_deserializes_to(
builder: &mut DeserializerBuilder,
encoding: Encoding,
input: &str,
expected: Value,
) {
let mut de = builder.build(input.as_bytes());
let value = de.deserialize(encoding).unwrap();
assert_eq!(value, expected);
}
#[track_caller]
fn assert_deserializes_to(encoding: Encoding, input: &str, expected: Value) {
assert_builder_deserializes_to(&mut DeserializerBuilder::new(), encoding, input, expected);
}
#[test]
fn test_deserialize_yaml() {
assert_deserializes_to(Encoding::Yaml, "---\nfoo: bar", json!({"foo": "bar"}));
assert_deserializes_to(
Encoding::Yaml,
"---\nfoo: bar\n---\nbaz: qux",
json!([{"foo": "bar"}, {"baz": "qux"}]),
);
}
#[test]
fn test_deserialize_csv() {
assert_deserializes_to(
Encoding::Csv,
"header1,header2\ncol1,col2",
json!([["col1", "col2"]]),
);
assert_builder_deserializes_to(
&mut DeserializerBuilder::new().csv_without_headers(true),
Encoding::Csv,
"row1col1,row1col2\nrow2col1,row2col2",
json!([["row1col1", "row1col2"], ["row2col1", "row2col2"]]),
);
assert_builder_deserializes_to(
&mut DeserializerBuilder::new().csv_headers_as_keys(true),
Encoding::Csv,
"header1,header2\nrow1col1,row1col2\nrow2col1,row2col2",
json!([{"header1":"row1col1", "header2":"row1col2"}, {"header1":"row2col1", "header2":"row2col2"}]),
);
assert_builder_deserializes_to(
&mut DeserializerBuilder::new().csv_delimiter(b'|'),
Encoding::Csv,
"header1|header2\ncol1|col2",
json!([["col1", "col2"]]),
);
}
#[test]
fn test_deserialize_text() {
assert_deserializes_to(
Encoding::Text,
"one\ntwo\nthree\n",
json!(["one", "two", "three", ""]),
);
assert_deserializes_to(Encoding::Text, "", json!([""]));
}
}