dts_core/
de.rs

1//! This module provides a `Deserializer` which supports deserializing input data with various
2//! encodings into a `Value`.
3
4use crate::{key::expand_keys, parsers::gron, Encoding, Result};
5use regex::Regex;
6use serde::Deserialize;
7use serde_json::{Map, Value};
8
9/// Options for the `Deserializer`. The options are context specific and may only be honored when
10/// deserializing from a certain `Encoding`.
11#[derive(Debug, Default, Clone)]
12pub struct DeserializeOptions {
13    /// Indicates that an input CSV does not include a header line. If `false`, the first line is
14    /// discarded.
15    pub csv_without_headers: bool,
16    /// Indicates that the header fields of an input CSV should be used as keys for each row's
17    /// columns. This means that the deserialized row data will be of type object. Otherwise row
18    /// data will be of type array.
19    pub csv_headers_as_keys: bool,
20    /// Optional custom delimiter for CSV input.
21    pub csv_delimiter: Option<u8>,
22    /// Optional regex pattern to split text input at.
23    pub text_split_pattern: Option<Regex>,
24}
25
26impl DeserializeOptions {
27    /// Creates new `DeserializeOptions`.
28    pub fn new() -> Self {
29        Self::default()
30    }
31}
32
33/// A `DeserializerBuilder` can be used to build a `Deserializer` with certain
34/// `DeserializeOptions`.
35///
36/// ## Example
37///
38/// ```
39/// use dts_core::{de::DeserializerBuilder, Encoding};
40///
41/// let buf = r#"["foo"]"#.as_bytes();
42///
43/// let deserializer = DeserializerBuilder::new()
44///     .csv_delimiter(b'\t')
45///     .build(buf);
46/// ```
47#[derive(Debug, Default, Clone)]
48pub struct DeserializerBuilder {
49    opts: DeserializeOptions,
50}
51
52impl DeserializerBuilder {
53    /// Creates a new `DeserializerBuilder`.
54    pub fn new() -> Self {
55        Self::default()
56    }
57
58    /// Indicates that an input CSV does not include a header line. If `false`, the first line is
59    /// discarded.
60    pub fn csv_without_headers(&mut self, yes: bool) -> &mut Self {
61        self.opts.csv_without_headers = yes;
62        self
63    }
64
65    /// Indicates that the header fields of an input CSV should be used as keys for each row's
66    /// columns. This means that the deserialized row data will be of type object. Otherwise row
67    /// data will be of type array.
68    pub fn csv_headers_as_keys(&mut self, yes: bool) -> &mut Self {
69        self.opts.csv_headers_as_keys = yes;
70        self
71    }
72
73    /// Sets a custom CSV delimiter.
74    pub fn csv_delimiter(&mut self, delim: u8) -> &mut Self {
75        self.opts.csv_delimiter = Some(delim);
76        self
77    }
78
79    /// Sets regex pattern to split text at.
80    pub fn text_split_pattern(&mut self, pattern: Regex) -> &mut Self {
81        self.opts.text_split_pattern = Some(pattern);
82        self
83    }
84
85    /// Builds the `Deserializer` for the given reader.
86    pub fn build<R>(&self, reader: R) -> Deserializer<R>
87    where
88        R: std::io::Read,
89    {
90        Deserializer::with_options(reader, self.opts.clone())
91    }
92}
93
94/// A `Deserializer` can deserialize input data from a reader into a `Value`.
95pub struct Deserializer<R> {
96    reader: R,
97    opts: DeserializeOptions,
98}
99
100impl<R> Deserializer<R>
101where
102    R: std::io::Read,
103{
104    /// Creates a new `Deserializer` for reader with default options.
105    pub fn new(reader: R) -> Self {
106        Self::with_options(reader, Default::default())
107    }
108
109    /// Creates a new `Deserializer` for reader with options.
110    pub fn with_options(reader: R, opts: DeserializeOptions) -> Self {
111        Self { reader, opts }
112    }
113
114    /// Reads input data from the given reader and deserializes it in a `Value`.
115    ///
116    /// ## Example
117    ///
118    /// ```
119    /// use dts_core::{de::DeserializerBuilder, Encoding};
120    /// use serde_json::json;
121    /// # use std::error::Error;
122    /// #
123    /// # fn main() -> Result<(), Box<dyn Error>> {
124    /// let buf = r#"["foo"]"#.as_bytes();
125    ///
126    /// let mut de = DeserializerBuilder::new().build(buf);
127    /// let value = de.deserialize(Encoding::Json)?;
128    ///
129    /// assert_eq!(value, json!(["foo"]));
130    /// #     Ok(())
131    /// # }
132    /// ```
133    pub fn deserialize(&mut self, encoding: Encoding) -> Result<Value> {
134        match encoding {
135            Encoding::Yaml => self.deserialize_yaml(),
136            Encoding::Json => self.deserialize_json(),
137            Encoding::Toml => self.deserialize_toml(),
138            Encoding::Json5 => self.deserialize_json5(),
139            Encoding::Csv => self.deserialize_csv(),
140            Encoding::QueryString => self.deserialize_query_string(),
141            Encoding::Xml => self.deserialize_xml(),
142            Encoding::Text => self.deserialize_text(),
143            Encoding::Gron => self.deserialize_gron(),
144            Encoding::Hcl => self.deserialize_hcl(),
145        }
146    }
147
148    fn deserialize_yaml(&mut self) -> Result<Value> {
149        let mut values = serde_yaml::Deserializer::from_reader(&mut self.reader)
150            .map(Value::deserialize)
151            .collect::<Result<Vec<_>, _>>()?;
152
153        // If this was not multi-document YAML, just take the first document's value without
154        // wrapping it into an array.
155        if values.len() == 1 {
156            Ok(values.swap_remove(0))
157        } else {
158            Ok(Value::Array(values))
159        }
160    }
161
162    fn deserialize_json(&mut self) -> Result<Value> {
163        Ok(serde_json::from_reader(&mut self.reader)?)
164    }
165
166    fn deserialize_toml(&mut self) -> Result<Value> {
167        let mut buf = Vec::new();
168        self.reader.read_to_end(&mut buf)?;
169        Ok(toml::de::from_slice(&buf)?)
170    }
171
172    fn deserialize_json5(&mut self) -> Result<Value> {
173        let mut s = String::new();
174        self.reader.read_to_string(&mut s)?;
175        Ok(json5::from_str(&s)?)
176    }
177
178    fn deserialize_csv(&mut self) -> Result<Value> {
179        let keep_first_line = self.opts.csv_without_headers || self.opts.csv_headers_as_keys;
180
181        let mut csv_reader = csv::ReaderBuilder::new()
182            .trim(csv::Trim::All)
183            .has_headers(!keep_first_line)
184            .delimiter(self.opts.csv_delimiter.unwrap_or(b','))
185            .from_reader(&mut self.reader);
186
187        let mut iter = csv_reader.deserialize();
188
189        let value = if self.opts.csv_headers_as_keys {
190            match iter.next() {
191                Some(headers) => {
192                    let headers: Vec<String> = headers?;
193
194                    Value::Array(
195                        iter.map(|record| {
196                            Ok(headers.iter().cloned().zip(record?.into_iter()).collect())
197                        })
198                        .collect::<Result<_>>()?,
199                    )
200                }
201                None => Value::Array(Vec::new()),
202            }
203        } else {
204            Value::Array(
205                iter.map(|v| Ok(serde_json::to_value(v?)?))
206                    .collect::<Result<_>>()?,
207            )
208        };
209
210        Ok(value)
211    }
212
213    fn deserialize_query_string(&mut self) -> Result<Value> {
214        let mut s = String::new();
215        self.reader.read_to_string(&mut s)?;
216        Ok(Value::Object(serde_qs::from_str(&s)?))
217    }
218
219    fn deserialize_xml(&mut self) -> Result<Value> {
220        Ok(serde_xml_rs::from_reader(&mut self.reader)?)
221    }
222
223    fn deserialize_text(&mut self) -> Result<Value> {
224        let mut s = String::new();
225        self.reader.read_to_string(&mut s)?;
226
227        let pattern = match &self.opts.text_split_pattern {
228            Some(pattern) => pattern.clone(),
229            None => Regex::new("\n").unwrap(),
230        };
231
232        Ok(Value::Array(
233            pattern
234                .split(&s)
235                .map(serde_json::to_value)
236                .collect::<Result<_, serde_json::Error>>()?,
237        ))
238    }
239
240    fn deserialize_gron(&mut self) -> Result<Value> {
241        let mut s = String::new();
242        self.reader.read_to_string(&mut s)?;
243
244        let map = gron::parse(&s)?
245            .iter()
246            .map(|statement| {
247                Ok((
248                    statement.path().to_owned(),
249                    serde_json::from_str(statement.value())?,
250                ))
251            })
252            .collect::<Result<Map<_, _>>>()?;
253
254        Ok(expand_keys(Value::Object(map)))
255    }
256
257    fn deserialize_hcl(&mut self) -> Result<Value> {
258        Ok(hcl::from_reader(&mut self.reader)?)
259    }
260}
261
262#[cfg(test)]
263mod test {
264    use super::*;
265    use pretty_assertions::assert_eq;
266    use serde_json::json;
267
268    #[track_caller]
269    fn assert_builder_deserializes_to(
270        builder: &mut DeserializerBuilder,
271        encoding: Encoding,
272        input: &str,
273        expected: Value,
274    ) {
275        let mut de = builder.build(input.as_bytes());
276        let value = de.deserialize(encoding).unwrap();
277        assert_eq!(value, expected);
278    }
279
280    #[track_caller]
281    fn assert_deserializes_to(encoding: Encoding, input: &str, expected: Value) {
282        assert_builder_deserializes_to(&mut DeserializerBuilder::new(), encoding, input, expected);
283    }
284
285    #[test]
286    fn test_deserialize_yaml() {
287        assert_deserializes_to(Encoding::Yaml, "---\nfoo: bar", json!({"foo": "bar"}));
288        assert_deserializes_to(
289            Encoding::Yaml,
290            "---\nfoo: bar\n---\nbaz: qux",
291            json!([{"foo": "bar"}, {"baz": "qux"}]),
292        );
293    }
294
295    #[test]
296    fn test_deserialize_csv() {
297        assert_deserializes_to(
298            Encoding::Csv,
299            "header1,header2\ncol1,col2",
300            json!([["col1", "col2"]]),
301        );
302        assert_builder_deserializes_to(
303            &mut DeserializerBuilder::new().csv_without_headers(true),
304            Encoding::Csv,
305            "row1col1,row1col2\nrow2col1,row2col2",
306            json!([["row1col1", "row1col2"], ["row2col1", "row2col2"]]),
307        );
308        assert_builder_deserializes_to(
309            &mut DeserializerBuilder::new().csv_headers_as_keys(true),
310            Encoding::Csv,
311            "header1,header2\nrow1col1,row1col2\nrow2col1,row2col2",
312            json!([{"header1":"row1col1", "header2":"row1col2"}, {"header1":"row2col1", "header2":"row2col2"}]),
313        );
314        assert_builder_deserializes_to(
315            &mut DeserializerBuilder::new().csv_delimiter(b'|'),
316            Encoding::Csv,
317            "header1|header2\ncol1|col2",
318            json!([["col1", "col2"]]),
319        );
320    }
321
322    #[test]
323    fn test_deserialize_text() {
324        assert_deserializes_to(
325            Encoding::Text,
326            "one\ntwo\nthree\n",
327            json!(["one", "two", "three", ""]),
328        );
329        assert_deserializes_to(Encoding::Text, "", json!([""]));
330    }
331}