corn/
parser.rs

1use indexmap::IndexMap;
2use std::borrow::Cow;
3use std::collections::HashMap;
4use std::env::var;
5use std::fmt::Formatter;
6
7use pest::iterators::Pair;
8use pest::Parser;
9
10use crate::error::{Error, Result};
11use crate::{Inputs, Key, Object, Value};
12
13#[derive(pest_derive::Parser)]
14#[grammar = "grammar.pest"]
15pub struct AstParser;
16
17impl std::fmt::Display for Rule {
18    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
19        write!(f, "{self:?}")
20    }
21}
22
23struct CornParser<'a> {
24    input_block: Option<Pair<'a, Rule>>,
25    inputs: Inputs<'a>,
26}
27
28impl<'a> CornParser<'a> {
29    pub fn new(input_block: Option<Pair<'a, Rule>>) -> Self {
30        let inputs = HashMap::new();
31        Self {
32            input_block,
33            inputs,
34        }
35    }
36
37    pub fn parse(mut self, object_block: Pair<'a, Rule>) -> Result<Value<'a>> {
38        if let Some(input_block) = self.input_block.take() {
39            self.parse_assign_block(input_block)?;
40        }
41
42        let value_block = self.parse_object(object_block)?;
43        Ok(Value::Object(value_block))
44    }
45
46    /// Parses a pair of tokens (marked as a `Rule`) into a `Value`.
47    fn parse_value(&self, pair: Pair<'a, Rule>) -> Result<Value<'a>> {
48        match pair.as_rule() {
49            Rule::object => Ok(Value::Object(self.parse_object(pair)?)),
50            Rule::array => Ok(Value::Array(self.parse_array(pair)?)),
51            Rule::string => Ok(Value::String(self.parse_string(pair)?)),
52            Rule::integer => Ok(Value::Integer(Self::parse_integer(pair))),
53            Rule::float => Ok(Value::Float(Self::parse_float(&pair))),
54            Rule::boolean => Ok(Value::Boolean(Self::parse_bool(&pair))),
55            Rule::null => Ok(Value::Null(None)),
56            Rule::input => {
57                let key = pair.as_str();
58                self.get_input(key)
59            }
60            _ => unreachable!(),
61        }
62    }
63
64    fn parse_bool(pair: &Pair<'_, Rule>) -> bool {
65        assert_eq!(pair.as_rule(), Rule::boolean);
66        match pair.as_str() {
67            "true" => true,
68            "false" => false,
69            _ => unreachable!(),
70        }
71    }
72
73    fn parse_integer(pair: Pair<'_, Rule>) -> i64 {
74        assert_eq!(pair.as_rule(), Rule::integer);
75        let sub_pair = pair
76            .into_inner()
77            .next()
78            .expect("integers should contain a sub-rule of their type");
79
80        match sub_pair.as_rule() {
81            Rule::decimal_integer => sub_pair
82                .as_str()
83                .replace('_', "")
84                .parse()
85                .expect("decimal integer rules should match valid rust integers"),
86            Rule::hex_integer => i64::from_str_radix(&sub_pair.as_str()[2..], 16)
87                .expect("hex integer rules contain valid hex values"),
88            _ => unreachable!(),
89        }
90    }
91
92    fn parse_float(pair: &Pair<'_, Rule>) -> f64 {
93        assert_eq!(pair.as_rule(), Rule::float);
94        pair.as_str()
95            .parse()
96            .expect("float rules should match valid rust floats")
97    }
98
99    /// Collects each `char` in a `Rule::string`
100    /// to form a single `String`.
101    fn parse_string(&self, pair: Pair<'a, Rule>) -> Result<Cow<'a, str>> {
102        assert_eq!(pair.as_rule(), Rule::string);
103
104        let mut full_string = String::new();
105
106        let pairs = pair
107            .into_inner()
108            .next()
109            .expect("string rules should contain a valid string value")
110            .into_inner();
111
112        for pair in pairs {
113            match pair.as_rule() {
114                Rule::char => full_string.push(Self::parse_char(&pair)),
115                Rule::input => {
116                    let input_name = pair.as_str();
117                    let value = self.get_input(input_name)?;
118                    match value {
119                        Value::String(value) => full_string.push_str(&value),
120                        _ => return Err(Error::InvalidInterpolationError(input_name.to_string())),
121                    }
122                }
123                _ => unreachable!(),
124            }
125        }
126
127        let full_string = if full_string.contains('\n') {
128            trim_multiline_string(&full_string)
129        } else {
130            full_string
131        };
132
133        Ok(Cow::Owned(full_string))
134    }
135
136    fn parse_char(pair: &Pair<'a, Rule>) -> char {
137        let str = pair.as_str();
138        let mut chars = str.chars();
139
140        let first_char = chars.next().expect("character to exist");
141        if first_char != '\\' {
142            return first_char;
143        }
144
145        let second_char = chars.next().expect("character to exist");
146        if second_char != 'u' {
147            return match second_char {
148                'n' => '\n',
149                'r' => '\r',
150                't' => '\t',
151                '"' => '\"',
152                '$' => '$',
153                '\\' => '\\',
154                _ => unreachable!(),
155            };
156        }
157
158        let num =
159            u32::from_str_radix(&str[3..], 16).expect("valid hex characters to exist after \\u");
160        char::from_u32(num).unwrap_or('\u{FFFD}')
161    }
162
163    /// Parses each rule in a `Rule::array`
164    /// to form a vector of `Value`s.
165    fn parse_array(&self, block: Pair<'a, Rule>) -> Result<Vec<Value<'a>>> {
166        assert_eq!(block.as_rule(), Rule::array);
167
168        let mut arr = vec![];
169
170        for pair in block.into_inner() {
171            match pair.as_rule() {
172                Rule::spread => {
173                    let input = pair
174                        .into_inner()
175                        .next()
176                        .expect("spread operators should contain an input");
177
178                    let input_name = input.as_str();
179                    let value = self.parse_value(input)?;
180
181                    match value {
182                        Value::Array(other) => arr.extend(other),
183                        _ => return Err(Error::InvalidSpreadError(input_name.to_string())),
184                    }
185                }
186                _ => arr.push(self.parse_value(pair)?),
187            }
188        }
189
190        Ok(arr)
191    }
192
193    /// Parses each key/value pair in a `Rule::object`
194    /// to form a `IndexMap` of Values.
195    ///
196    /// An `IndexMap` is used to ensure keys
197    /// always output in the same order.
198    fn parse_object(&self, block: Pair<'a, Rule>) -> Result<Object<'a>> {
199        assert_eq!(block.as_rule(), Rule::object);
200
201        let mut obj = IndexMap::new();
202
203        for pair in block.into_inner() {
204            match pair.as_rule() {
205                Rule::pair => {
206                    let mut path_rules = pair.into_inner();
207
208                    let path = path_rules
209                        .next()
210                        .expect("object pairs should contain a key");
211
212                    let paths = Self::parse_path(path);
213
214                    let value = self.parse_value(
215                        path_rules
216                            .next()
217                            .expect("object pairs should contain a value"),
218                    )?;
219
220                    obj = Self::add_at_path(obj, &paths, value)?;
221                }
222                Rule::spread => {
223                    let input = pair
224                        .into_inner()
225                        .next()
226                        .expect("spread operators should contain an input");
227
228                    let input_name = input.as_str();
229                    let value = self.parse_value(input)?;
230
231                    match value {
232                        Value::Object(other) => obj.extend(other),
233                        _ => return Err(Error::InvalidSpreadError(input_name.to_string())),
234                    }
235                }
236                _ => unreachable!(),
237            }
238        }
239
240        Ok(obj)
241    }
242
243    fn parse_path(path: Pair<Rule>) -> Vec<Key> {
244        path.into_inner()
245            .map(|pair| match pair.as_rule() {
246                Rule::regular_path_seg => Key::String(Cow::Borrowed(pair.as_str())),
247                Rule::quoted_path_seg => Key::String(Cow::Owned(
248                    pair.into_inner()
249                        .next()
250                        .expect("quoted paths should contain an inner value")
251                        .as_str()
252                        .replace('\\', ""),
253                )),
254                Rule::integer => Key::Integer(Self::parse_integer(pair)),
255                _ => unreachable!(),
256            })
257            .collect::<Vec<_>>()
258    }
259
260    /// Adds `Value` at the `path` in `obj`.
261    ///
262    /// `path` is an array where each entry represents another object key,
263    /// for example `foo.bar` is represented as `["foo", "bar"]`.
264    ///
265    /// Objects are created up to the required depth recursively.
266    fn add_at_path(mut obj: Object<'a>, path: &[Key<'a>], value: Value<'a>) -> Result<Object<'a>> {
267        let (part, path_rest) = path
268            .split_first()
269            .expect("paths should contain at least 1 segment");
270
271        if path_rest.is_empty() {
272            obj.insert(part.clone(), value);
273            return Ok(obj);
274        }
275
276        let child_obj = obj
277            .shift_remove(part)
278            .unwrap_or_else(|| Value::Object(IndexMap::new()));
279
280        match child_obj {
281            Value::Object(map) => {
282                obj.insert(
283                    part.clone(),
284                    Value::Object(Self::add_at_path(map, path_rest, value)?),
285                );
286
287                Ok(obj)
288            }
289            _ => Err(Error::InvalidPathError(
290                path.iter()
291                    .map(ToString::to_string)
292                    .collect::<Vec<_>>()
293                    .join("."),
294            )),
295        }
296    }
297
298    /// Parses the `let { } in` block at the start of files.
299    /// Each input is inserted into into `self.inputs`.
300    fn parse_assign_block(&mut self, block: Pair<'a, Rule>) -> Result<()> {
301        assert_eq!(block.as_rule(), Rule::assign_block);
302
303        for pair in block.into_inner() {
304            let mut assign_rules = pair.into_inner();
305            let name = assign_rules
306                .next()
307                .expect("input assignments should have a name")
308                .as_str();
309
310            let value = self.parse_value(
311                assign_rules
312                    .next()
313                    .expect("input assignments should have a value"),
314            )?;
315
316            self.inputs.insert(name, value);
317        }
318
319        Ok(())
320    }
321
322    /// Attempts to get an input value from the `inputs` map.
323    /// If the `key` starts with `$env_` the system environment variables will be consulted first.
324    fn get_input(&self, key: &'a str) -> Result<Value<'a>> {
325        if let Some(env_name) = key.strip_prefix("$env_") {
326            let var = var(env_name);
327
328            if let Ok(var) = var {
329                return Ok(Value::String(Cow::Owned(var)));
330            }
331        }
332
333        if let Some(value) = self.inputs.get(key) {
334            Ok(value.clone())
335        } else {
336            Err(Error::InputResolveError(key.to_string()))
337        }
338    }
339}
340
341/// Takes a multiline string and trims the maximum amount of
342/// whitespace at the start of each line
343/// while preserving formatting.
344///
345/// Based on code from `indoc` crate:
346/// <https://github.com/dtolnay/indoc/blob/60b5fa29ba4f98b479713621a1f4ec96155caaba/src/unindent.rs#L15-L51>
347fn trim_multiline_string(string: &str) -> String {
348    let ignore_first_line = string.starts_with('\n') || string.starts_with("\r\n");
349
350    let spaces = string
351        .lines()
352        .skip(1)
353        .map(|line| line.chars().take_while(char::is_ascii_whitespace).count())
354        .min()
355        .unwrap_or_default();
356
357    let mut result = String::with_capacity(string.len());
358    for (i, line) in string.lines().enumerate() {
359        if i > 1 || (i == 1 && !ignore_first_line) {
360            result.push('\n');
361        }
362        if i == 0 {
363            // Do not un-indent anything on same line as opening quote
364            result.push_str(line);
365        } else if line.len() > spaces {
366            // Whitespace-only lines may have fewer than the number of spaces
367            // being removed
368            result.push_str(&line[spaces..]);
369        }
370    }
371    result
372}
373
374/// Parses the input string into a `Config`
375/// containing the resolved inputs
376/// and a map of values representing the top-level object.
377///
378/// # Examples
379///
380/// ```rust
381/// use corn::parse;
382///
383/// let corn = "{foo = 42}";
384///
385/// let config = parse(corn).unwrap();
386/// let json = serde_json::to_string(&config).unwrap();
387///
388/// assert_eq!(json, "{\"foo\":42}");
389/// ```
390///
391/// # Errors
392///
393/// Will fail if the input contains a syntax error.
394/// Will fail if the input contains invalid Corn for another reason,
395/// including references to undefined inputs or dot-notation for non-object values.
396/// Will fail if the input cannot be deserialized for any reaon.
397///
398/// Any of the above will return a specific error type with details.
399///
400/// # Panics
401///
402/// If the internal AST parser produces a tree in an invalid structure,
403/// the function will panic.
404/// This indicates a severe error in the library and should never occur.
405pub fn parse(file: &str) -> Result<Value<'_>> {
406    let rules = AstParser::parse(Rule::config, file);
407
408    match rules {
409        Ok(mut rules) => {
410            let first_block = rules.next().expect("should be at least 1 rule");
411
412            match first_block.as_rule() {
413                Rule::assign_block => {
414                    let parser = CornParser::new(Some(first_block));
415                    let object_block = rules.next().expect("should always be an object block");
416                    parser.parse(object_block)
417                }
418                Rule::object => {
419                    let parser = CornParser::new(None);
420                    parser.parse(first_block)
421                }
422                _ => unreachable!(),
423            }
424        }
425        Err(error) => Err(Error::ParserError(Box::new(error))),
426    }
427}