dts_core/
encoding.rs

1//! Supported encodings for serialization and deserialization.
2
3use clap::ArgEnum;
4use once_cell::sync::Lazy;
5use regex::Regex;
6use std::fmt;
7use std::path::Path;
8
9/// Encodings supported by this crate.
10///
11/// Not all of the supported encodings are supported to serialize and deserialize into. Some, like
12/// hjson only allow deserialization of encoded data but are not able to serialize back into the
13/// original representation.
14#[non_exhaustive]
15#[derive(ArgEnum, Debug, PartialEq, Clone, Copy)]
16pub enum Encoding {
17    /// JavaScript Object Notation
18    Json,
19    /// Yet Another Markup Language
20    #[clap(alias = "yml")]
21    Yaml,
22    /// TOML configuration format
23    Toml,
24    /// ES5 JSON
25    Json5,
26    /// Comma separated values
27    Csv,
28    /// URL query string
29    #[clap(alias = "qs")]
30    QueryString,
31    /// Extensible Markup Language
32    Xml,
33    /// Plaintext document
34    #[clap(alias = "txt")]
35    Text,
36    /// Gron
37    Gron,
38    /// HCL
39    Hcl,
40}
41
42// Patterns to detect a source encoding by looking at the first line of input. The patterns are
43// lazily constructed upon first usage as they are only needed if there is no other encoding hint
44// (e.g. encoding inferred from file extension or explicitly provided on the command line).
45//
46// These patterns are very basic and will only detect some of the more common first lines. Thus
47// they may not match valid pattern for a given encoding on purpose due to ambiguities. For example
48// the first line `["foo"]` may be a JSON array or a TOML table header. Make sure to avoid matching
49// anything that is ambiguous.
50static FIRST_LINES: Lazy<Vec<(Encoding, Regex)>> = Lazy::new(|| {
51    vec![
52        // XML or HTML start.
53        (
54            Encoding::Xml,
55            Regex::new(
56                r#"^(?x:
57                    <\?xml\s
58                    | \s*<(?:[\w-]+):Envelope\s+
59                    | \s*(?i:<!DOCTYPE\s+)
60                )"#,
61            )
62            .unwrap(),
63        ),
64        // HCL block start of the form
65        //
66        //   <identifier> [<identifier>|<quoted-string>]* {
67        //
68        // Expression for matching quoted strings is very basic.
69        (
70            Encoding::Hcl,
71            Regex::new(
72                r#"^(?xi:
73                    [a-z_][a-z0-9_-]*\s+
74                    (?:(?:[a-z_][a-z0-9_-]*|"[^"]*")\s+)*\{
75                )"#,
76            )
77            .unwrap(),
78        ),
79        // YAML document start or document separator.
80        (Encoding::Yaml, Regex::new(r"^(?:%YAML.*|---\s*)$").unwrap()),
81        // TOML array of tables or table.
82        (
83            Encoding::Toml,
84            Regex::new(
85                r#"^(?xi:
86                    # array of tables
87                    \[\[\s*[a-z0-9_-]+(?:\s*\.\s*(?:[a-z0-9_-]+|"[^"]*"))*\s*\]\]\s*
88                    # table
89                    | \[\s*[a-z0-9_-]+(?:\s*\.\s*(?:[a-z0-9_-]+|"[^"]*"))*\s*\]\s*
90                )$"#,
91            )
92            .unwrap(),
93        ),
94        // JSON object start or array start.
95        (
96            Encoding::Json,
97            Regex::new(r#"^(?:\{\s*(?:"|$)|\[\s*$)"#).unwrap(),
98        ),
99    ]
100});
101
102impl Encoding {
103    /// Creates an `Encoding` from a path by looking at the file extension.
104    ///
105    /// Returns `None` if the extension is absent or if the extension does not match any of the
106    /// supported encodings.
107    pub fn from_path<P>(path: P) -> Option<Encoding>
108    where
109        P: AsRef<Path>,
110    {
111        let ext = path.as_ref().extension()?.to_str()?;
112
113        match ext {
114            "json" => Some(Encoding::Json),
115            "yaml" | "yml" => Some(Encoding::Yaml),
116            "toml" => Some(Encoding::Toml),
117            "json5" => Some(Encoding::Json5),
118            "csv" => Some(Encoding::Csv),
119            "xml" => Some(Encoding::Xml),
120            "txt" | "text" => Some(Encoding::Text),
121            "hcl" | "tf" => Some(Encoding::Hcl),
122            _ => None,
123        }
124    }
125
126    /// Tries to detect the `Encoding` by looking at the first line of the input.
127    ///
128    /// Returns `None` if the encoding cannot be detected from the first line.
129    pub fn from_first_line(line: &str) -> Option<Encoding> {
130        if line.is_empty() {
131            // Fast path.
132            return None;
133        }
134
135        for (encoding, regex) in FIRST_LINES.iter() {
136            if regex.is_match(line) {
137                return Some(*encoding);
138            }
139        }
140
141        None
142    }
143
144    /// Returns the name of the `Encoding`.
145    pub fn as_str(&self) -> &'static str {
146        match self {
147            Encoding::Json => "json",
148            Encoding::Yaml => "yaml",
149            Encoding::Toml => "toml",
150            Encoding::Json5 => "json5",
151            Encoding::Csv => "csv",
152            Encoding::QueryString => "query-string",
153            Encoding::Xml => "xml",
154            Encoding::Text => "text",
155            Encoding::Gron => "gron",
156            Encoding::Hcl => "hcl",
157        }
158    }
159}
160
161impl fmt::Display for Encoding {
162    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
163        fmt::Display::fmt(self.as_str(), f)
164    }
165}
166
167#[cfg(test)]
168mod tests {
169    use super::*;
170    use pretty_assertions::assert_eq;
171
172    #[test]
173    fn test_encoding_from_path() {
174        assert_eq!(Encoding::from_path("foo.yaml"), Some(Encoding::Yaml));
175        assert_eq!(Encoding::from_path("foo.yml"), Some(Encoding::Yaml));
176        assert_eq!(Encoding::from_path("foo.json"), Some(Encoding::Json));
177        assert_eq!(Encoding::from_path("foo.json5"), Some(Encoding::Json5));
178        assert_eq!(Encoding::from_path("foo.toml"), Some(Encoding::Toml));
179        assert_eq!(Encoding::from_path("foo.bak"), None);
180        assert_eq!(Encoding::from_path("foo"), None);
181    }
182
183    #[test]
184    fn test_encoding_from_first_line() {
185        // no match
186        assert_eq!(Encoding::from_first_line(""), None);
187        assert_eq!(Encoding::from_first_line(r#"["foo"]"#), None);
188
189        // match
190        assert_eq!(
191            Encoding::from_first_line(r#"resource "aws_s3_bucket" "my-bucket" {"#),
192            Some(Encoding::Hcl)
193        );
194        assert_eq!(Encoding::from_first_line("{ "), Some(Encoding::Json));
195        assert_eq!(Encoding::from_first_line("[ "), Some(Encoding::Json));
196        assert_eq!(
197            Encoding::from_first_line(r#"{"foo": 1 }"#),
198            Some(Encoding::Json)
199        );
200        assert_eq!(
201            Encoding::from_first_line(r#"[foo .bar."baz".qux]"#),
202            Some(Encoding::Toml)
203        );
204        assert_eq!(
205            Encoding::from_first_line(r#"[[foo .bar."baz".qux]] "#),
206            Some(Encoding::Toml)
207        );
208        assert_eq!(Encoding::from_first_line("%YAML 1.2"), Some(Encoding::Yaml));
209        assert_eq!(
210            Encoding::from_first_line("<!doctype html>"),
211            Some(Encoding::Xml)
212        );
213        assert_eq!(
214            Encoding::from_first_line(r#"<?xml version="1.0" ?>"#),
215            Some(Encoding::Xml)
216        );
217        assert_eq!(
218            Encoding::from_first_line(
219                r#"<soap:Envelope xmlns:soap="http://www.w3.org/2003/05/soap-envelope/" soap:encodingStyle="http://www.w3.org/2003/05/soap-encoding">"#
220            ),
221            Some(Encoding::Xml)
222        );
223    }
224}