corn/
parser.rs

1use indexmap::IndexMap;
2use std::borrow::Cow;
3use std::collections::HashMap;
4use std::env::var;
5use std::fmt::Formatter;
6
7use pest::iterators::Pair;
8use pest::Parser;
9
10use crate::error::{Error, Result};
11use crate::{Inputs, Object, Value};
12
13#[derive(pest_derive::Parser)]
14#[grammar = "grammar.pest"]
15pub struct AstParser;
16
17impl std::fmt::Display for Rule {
18    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
19        write!(f, "{self:?}")
20    }
21}
22
23struct CornParser<'a> {
24    input_block: Option<Pair<'a, Rule>>,
25    inputs: Inputs<'a>,
26}
27
28impl<'a> CornParser<'a> {
29    pub fn new(input_block: Option<Pair<'a, Rule>>) -> Self {
30        let inputs = HashMap::new();
31        Self {
32            input_block,
33            inputs,
34        }
35    }
36
37    pub fn parse(mut self, object_block: Pair<'a, Rule>) -> Result<Value> {
38        if let Some(input_block) = self.input_block.take() {
39            self.parse_assign_block(input_block)?;
40        }
41
42        let value_block = self.parse_object(object_block)?;
43        Ok(Value::Object(value_block))
44    }
45
46    /// Parses a pair of tokens (marked as a `Rule`) into a `Value`.
47    fn parse_value(&self, pair: Pair<'a, Rule>) -> Result<Value<'a>> {
48        match pair.as_rule() {
49            Rule::object => Ok(Value::Object(self.parse_object(pair)?)),
50            Rule::array => Ok(Value::Array(self.parse_array(pair)?)),
51            Rule::string => Ok(Value::String(self.parse_string(pair)?)),
52            Rule::integer => Ok(Value::Integer(Self::parse_integer(pair))),
53            Rule::float => Ok(Value::Float(Self::parse_float(&pair))),
54            Rule::boolean => Ok(Value::Boolean(Self::parse_bool(&pair))),
55            Rule::null => Ok(Value::Null(None)),
56            Rule::input => {
57                let key = pair.as_str();
58                self.get_input(key)
59            }
60            _ => unreachable!(),
61        }
62    }
63
64    fn parse_bool(pair: &Pair<'_, Rule>) -> bool {
65        assert_eq!(pair.as_rule(), Rule::boolean);
66        match pair.as_str() {
67            "true" => true,
68            "false" => false,
69            _ => unreachable!(),
70        }
71    }
72
73    fn parse_integer(pair: Pair<'_, Rule>) -> i64 {
74        assert_eq!(pair.as_rule(), Rule::integer);
75        let sub_pair = pair
76            .into_inner()
77            .next()
78            .expect("integers should contain a sub-rule of their type");
79
80        match sub_pair.as_rule() {
81            Rule::decimal_integer => sub_pair
82                .as_str()
83                .replace('_', "")
84                .parse()
85                .expect("decimal integer rules should match valid rust integers"),
86            Rule::hex_integer => i64::from_str_radix(&sub_pair.as_str()[2..], 16)
87                .expect("hex integer rules contain valid hex values"),
88            _ => unreachable!(),
89        }
90    }
91
92    fn parse_float(pair: &Pair<'_, Rule>) -> f64 {
93        assert_eq!(pair.as_rule(), Rule::float);
94        pair.as_str()
95            .parse()
96            .expect("float rules should match valid rust floats")
97    }
98
99    /// Collects each `char` in a `Rule::string`
100    /// to form a single `String`.
101    fn parse_string(&self, pair: Pair<'a, Rule>) -> Result<Cow<'a, str>> {
102        assert_eq!(pair.as_rule(), Rule::string);
103
104        let mut full_string = String::new();
105
106        let pairs = pair
107            .into_inner()
108            .next()
109            .expect("string rules should contain a valid string value")
110            .into_inner();
111
112        for pair in pairs {
113            match pair.as_rule() {
114                Rule::char => full_string.push(Self::parse_char(&pair)),
115                Rule::input => {
116                    let input_name = pair.as_str();
117                    let value = self.get_input(input_name)?;
118                    match value {
119                        Value::String(value) => full_string.push_str(&value),
120                        _ => return Err(Error::InvalidInterpolationError(input_name.to_string())),
121                    }
122                }
123                _ => unreachable!(),
124            };
125        }
126
127        let full_string = if full_string.contains('\n') {
128            trim_multiline_string(&full_string)
129        } else {
130            full_string
131        };
132
133        Ok(Cow::Owned(full_string))
134    }
135
136    fn parse_char(pair: &Pair<'a, Rule>) -> char {
137        let str = pair.as_str();
138        let mut chars = str.chars();
139
140        let first_char = chars.next().expect("character to exist");
141        if first_char != '\\' {
142            return first_char;
143        }
144
145        let second_char = chars.next().expect("character to exist");
146        if second_char != 'u' {
147            return match second_char {
148                'n' => '\n',
149                'r' => '\r',
150                't' => '\t',
151                '"' => '\"',
152                '$' => '$',
153                '\\' => '\\',
154                _ => unreachable!(),
155            };
156        }
157
158        let num =
159            u32::from_str_radix(&str[3..], 16).expect("valid hex characters to exist after \\u");
160        char::from_u32(num).unwrap_or('\u{FFFD}')
161    }
162
163    /// Parses each rule in a `Rule::array`
164    /// to form a vector of `Value`s.
165    fn parse_array(&self, block: Pair<'a, Rule>) -> Result<Vec<Value<'a>>> {
166        assert_eq!(block.as_rule(), Rule::array);
167
168        let mut arr = vec![];
169
170        for pair in block.into_inner() {
171            match pair.as_rule() {
172                Rule::spread => {
173                    let input = pair
174                        .into_inner()
175                        .next()
176                        .expect("spread operators should contain an input");
177
178                    let input_name = input.as_str();
179                    let value = self.parse_value(input)?;
180
181                    match value {
182                        Value::Array(other) => arr.extend(other),
183                        _ => return Err(Error::InvalidSpreadError(input_name.to_string())),
184                    }
185                }
186                _ => arr.push(self.parse_value(pair)?),
187            };
188        }
189
190        Ok(arr)
191    }
192
193    /// Parses each key/value pair in a `Rule::object`
194    /// to form a `IndexMap` of Values.
195    ///
196    /// An `IndexMap` is used to ensure keys
197    /// always output in the same order.
198    fn parse_object(&self, block: Pair<'a, Rule>) -> Result<Object<'a>> {
199        assert_eq!(block.as_rule(), Rule::object);
200
201        let mut obj = IndexMap::new();
202
203        for pair in block.into_inner() {
204            match pair.as_rule() {
205                Rule::pair => {
206                    let mut path_rules = pair.into_inner();
207
208                    let path = path_rules
209                        .next()
210                        .expect("object pairs should contain a key");
211
212                    let paths = Self::parse_path(path);
213
214                    let value = self.parse_value(
215                        path_rules
216                            .next()
217                            .expect("object pairs should contain a value"),
218                    )?;
219
220                    obj = Self::add_at_path(obj, &paths, value)?;
221                }
222                Rule::spread => {
223                    let input = pair
224                        .into_inner()
225                        .next()
226                        .expect("spread operators should contain an input");
227
228                    let input_name = input.as_str();
229                    let value = self.parse_value(input)?;
230
231                    match value {
232                        Value::Object(other) => obj.extend(other),
233                        _ => return Err(Error::InvalidSpreadError(input_name.to_string())),
234                    }
235                }
236                _ => unreachable!(),
237            }
238        }
239
240        Ok(obj)
241    }
242
243    fn parse_path(path: Pair<Rule>) -> Vec<Cow<str>> {
244        path.into_inner()
245            .map(|pair| match pair.as_rule() {
246                Rule::regular_path_seg => Cow::Borrowed(pair.as_str()),
247                Rule::quoted_path_seg => Cow::Owned(
248                    pair.into_inner()
249                        .next()
250                        .expect("quoted paths should contain an inner value")
251                        .as_str()
252                        .replace('\\', ""),
253                ),
254                _ => unreachable!(),
255            })
256            .collect::<Vec<_>>()
257    }
258
259    /// Adds `Value` at the `path` in `obj`.
260    ///
261    /// `path` is an array where each entry represents another object key,
262    /// for example `foo.bar` is represented as `["foo", "bar"]`.
263    ///
264    /// Objects are created up to the required depth recursively.
265    fn add_at_path(
266        mut obj: Object<'a>,
267        path: &[Cow<'a, str>],
268        value: Value<'a>,
269    ) -> Result<Object<'a>> {
270        let (part, path_rest) = path
271            .split_first()
272            .expect("paths should contain at least 1 segment");
273
274        if path_rest.is_empty() {
275            obj.insert(part.clone(), value);
276            return Ok(obj);
277        }
278
279        let child_obj = obj
280            .shift_remove(part)
281            .unwrap_or_else(|| Value::Object(IndexMap::new()));
282
283        match child_obj {
284            Value::Object(map) => {
285                obj.insert(
286                    part.clone(),
287                    Value::Object(Self::add_at_path(map, path_rest, value)?),
288                );
289
290                Ok(obj)
291            }
292            _ => Err(Error::InvalidPathError(path.join("."))),
293        }
294    }
295
296    /// Parses the `let { } in` block at the start of files.
297    /// Each input is inserted into into `self.inputs`.
298    fn parse_assign_block(&mut self, block: Pair<'a, Rule>) -> Result<()> {
299        assert_eq!(block.as_rule(), Rule::assign_block);
300
301        for pair in block.into_inner() {
302            let mut assign_rules = pair.into_inner();
303            let name = assign_rules
304                .next()
305                .expect("input assignments should have a name")
306                .as_str();
307
308            let value = self.parse_value(
309                assign_rules
310                    .next()
311                    .expect("input assignments should have a value"),
312            )?;
313
314            self.inputs.insert(name, value);
315        }
316
317        Ok(())
318    }
319
320    /// Attempts to get an input value from the `inputs` map.
321    /// If the `key` starts with `$env_` the system environment variables will be consulted first.
322    fn get_input(&self, key: &'a str) -> Result<Value<'a>> {
323        if let Some(env_name) = key.strip_prefix("$env_") {
324            let var = var(env_name);
325
326            if let Ok(var) = var {
327                return Ok(Value::String(Cow::Owned(var)));
328            }
329        }
330
331        if let Some(value) = self.inputs.get(key) {
332            Ok(value.clone())
333        } else {
334            Err(Error::InputResolveError(key.to_string()))
335        }
336    }
337}
338
339/// Takes a multiline string and trims the maximum amount of
340/// whitespace at the start of each line
341/// while preserving formatting.
342///
343/// Based on code from `indoc` crate:
344/// <https://github.com/dtolnay/indoc/blob/60b5fa29ba4f98b479713621a1f4ec96155caaba/src/unindent.rs#L15-L51>
345
346fn trim_multiline_string(string: &str) -> String {
347    let ignore_first_line = string.starts_with('\n') || string.starts_with("\r\n");
348
349    let spaces = string
350        .lines()
351        .skip(1)
352        .map(|line| line.chars().take_while(char::is_ascii_whitespace).count())
353        .min()
354        .unwrap_or_default();
355
356    let mut result = String::with_capacity(string.len());
357    for (i, line) in string.lines().enumerate() {
358        if i > 1 || (i == 1 && !ignore_first_line) {
359            result.push('\n');
360        }
361        if i == 0 {
362            // Do not un-indent anything on same line as opening quote
363            result.push_str(line);
364        } else if line.len() > spaces {
365            // Whitespace-only lines may have fewer than the number of spaces
366            // being removed
367            result.push_str(&line[spaces..]);
368        }
369    }
370    result
371}
372
373/// Parses the input string into a `Config`
374/// containing the resolved inputs
375/// and a map of values representing the top-level object.
376///
377/// # Examples
378///
379/// ```rust
380/// use corn::parse;
381///
382/// let corn = "{foo = 42}";
383///
384/// let config = parse(corn).unwrap();
385/// let json = serde_json::to_string(&config).unwrap();
386///
387/// assert_eq!(json, "{\"foo\":42}");
388/// ```
389///
390/// # Errors
391///
392/// Will fail if the input contains a syntax error.
393/// Will fail if the input contains invalid Corn for another reason,
394/// including references to undefined inputs or dot-notation for non-object values.
395/// Will fail if the input cannot be deserialized for any reaon.
396///
397/// Any of the above will return a specific error type with details.
398///
399/// # Panics
400///
401/// If the internal AST parser produces a tree in an invalid structure,
402/// the function will panic.
403/// This indicates a severe error in the library and should never occur.
404pub fn parse(file: &str) -> Result<Value> {
405    let rules = AstParser::parse(Rule::config, file);
406
407    match rules {
408        Ok(mut rules) => {
409            let first_block = rules.next().expect("should be at least 1 rule");
410
411            match first_block.as_rule() {
412                Rule::assign_block => {
413                    let parser = CornParser::new(Some(first_block));
414                    let object_block = rules.next().expect("should always be an object block");
415                    parser.parse(object_block)
416                }
417                Rule::object => {
418                    let parser = CornParser::new(None);
419                    parser.parse(first_block)
420                }
421                _ => unreachable!(),
422            }
423        }
424        Err(error) => Err(Error::ParserError(Box::new(error))),
425    }
426}