Skip to main content

tanzim_source/
parse.rs

1//! Parse and format configuration source strings.
2//!
3//! Format: `SOURCE [(OPTIONS)] [?] [:RESOURCE]` — see crate README for rules.
4//!
5//! Use [`parse`] or [`Source::parse`] to parse; [`Source`] [`Display`] writes the canonical form.
6
7use crate::{OptionValue, Options, Source};
8use std::fmt::{self, Display, Formatter};
9
10/// Error while parsing a configuration source string.
11///
12/// Format: `SOURCE [(OPTIONS)] [?] [:RESOURCE]` — see the crate README for rules.
13///
14/// [`Display`] is one line by default; use `{error:#}` for the input snippet and caret.
15#[derive(Debug, Clone, PartialEq, Eq)]
16pub enum ParseError {
17    /// No source identifier (empty input or invalid start).
18    MissingSource { input: String, at: usize },
19    /// Input ended before a required token.
20    UnexpectedEnd {
21        input: String,
22        at: usize,
23        expected: &'static str,
24    },
25    /// Unexpected character at the current position.
26    UnexpectedChar {
27        input: String,
28        at: usize,
29        found: char,
30        expected: &'static str,
31    },
32    /// Option or map key is not a valid identifier.
33    InvalidIdentifier {
34        input: String,
35        at: usize,
36        found: String,
37    },
38    /// Option or map key is empty.
39    EmptyKey { input: String, at: usize },
40    /// Option value is empty; use `""` for an empty string.
41    EmptyValue { input: String, at: usize },
42    /// Invalid escape sequence inside a quoted string.
43    InvalidEscape { input: String, at: usize },
44    /// Quoted string has no closing `"`.
45    UnclosedString { input: String, at: usize },
46    /// List has no closing `]`.
47    UnclosedList { input: String, at: usize },
48    /// Map or options block has no closing `)`.
49    UnclosedMap { input: String, at: usize },
50    /// Comma with no following entry.
51    TrailingComma { input: String, at: usize },
52    /// Token looks like a number but is not valid.
53    InvalidNumber {
54        input: String,
55        at: usize,
56        found: String,
57    },
58    /// Non-empty input after a complete configuration source.
59    TrailingInput {
60        input: String,
61        at: usize,
62        rest: String,
63    },
64    /// Skip marker `?` appears before `(...)` options (`source?(...)` is invalid).
65    SkipMarkerBeforeOptions { input: String, at: usize },
66}
67
68impl Display for ParseError {
69    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
70        let (input, at, message) = match self {
71            Self::MissingSource { input, at, .. } => (
72                input.as_str(),
73                *at,
74                "configuration source is required".to_string(),
75            ),
76            Self::UnexpectedEnd { input, at, expected, .. } => (
77                input.as_str(),
78                *at,
79                format!("configuration source: expected {expected}, found end of input"),
80            ),
81            Self::UnexpectedChar {
82                input,
83                at,
84                found,
85                expected,
86                ..
87            } => (
88                input.as_str(),
89                *at,
90                format!("configuration source: expected {expected}, found `{found}`"),
91            ),
92            Self::InvalidIdentifier { input, at, found, .. } => (
93                input.as_str(),
94                *at,
95                format!("configuration source: invalid identifier `{found}`"),
96            ),
97            Self::EmptyKey { input, at, .. } => (
98                input.as_str(),
99                *at,
100                "configuration source option key cannot be empty".to_string(),
101            ),
102            Self::EmptyValue { input, at, .. } => (
103                input.as_str(),
104                *at,
105                "configuration source option value cannot be empty; use \"\"".to_string(),
106            ),
107            Self::InvalidEscape { input, at, .. } => (
108                input.as_str(),
109                *at,
110                "configuration source: invalid escape sequence in string".to_string(),
111            ),
112            Self::UnclosedString { input, at, .. } => (
113                input.as_str(),
114                *at,
115                "configuration source: unclosed string".to_string(),
116            ),
117            Self::UnclosedList { input, at, .. } => (
118                input.as_str(),
119                *at,
120                "configuration source: unclosed list".to_string(),
121            ),
122            Self::UnclosedMap { input, at, .. } => (
123                input.as_str(),
124                *at,
125                "configuration source: unclosed map".to_string(),
126            ),
127            Self::TrailingComma { input, at, .. } => (
128                input.as_str(),
129                *at,
130                "configuration source: trailing comma".to_string(),
131            ),
132            Self::InvalidNumber { input, at, found, .. } => (
133                input.as_str(),
134                *at,
135                format!("configuration source: invalid number `{found}`"),
136            ),
137            Self::TrailingInput { input, at, rest, .. } => (
138                input.as_str(),
139                *at,
140                format!("configuration source: unexpected trailing input `{rest}`"),
141            ),
142            Self::SkipMarkerBeforeOptions { input, at, .. } => (
143                input.as_str(),
144                *at,
145                "configuration source: skip marker `?` must come after options `(...)`; use `source(...)?` not `source?(...)`"
146                    .to_string(),
147            ),
148        };
149        write!(
150            f,
151            "invalid configuration source at column {}: {}",
152            at + 1,
153            message
154        )?;
155        if f.alternate() {
156            write!(f, "\n  {}\n  ", input)?;
157            for _ in 0..at {
158                write!(f, " ")?;
159            }
160            write!(f, "^")?;
161        }
162        Ok(())
163    }
164}
165
166impl std::error::Error for ParseError {}
167
168/// Parse a configuration source string.
169pub fn parse(input: &str) -> Result<Source, ParseError> {
170    Parser::new(input).parse()
171}
172
173impl Display for Source {
174    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
175        write!(f, "{}", self.source())?;
176        if !self.options().is_empty() {
177            write!(f, "(")?;
178            for (index, (key, value)) in self.options().entries().iter().enumerate() {
179                if index > 0 {
180                    write!(f, ",")?;
181                }
182                write!(f, "{key}=")?;
183                write_option_value(f, value)?;
184            }
185            write!(f, ")")?;
186        }
187        if self.skip_errors() {
188            write!(f, "?")?;
189        }
190        if self.resource_colon() || !self.resource().is_empty() {
191            write!(f, ":{}", self.resource())?;
192        }
193        Ok(())
194    }
195}
196
197fn write_option_value(f: &mut Formatter<'_>, value: &OptionValue) -> fmt::Result {
198    match value {
199        OptionValue::Bool(value) => write!(f, "{value}"),
200        OptionValue::Integer(value) => write!(f, "{value}"),
201        OptionValue::Float(value) => {
202            if value.is_finite() && value.fract() == 0.0 {
203                write!(f, "{value:.1}")
204            } else {
205                write!(f, "{value}")
206            }
207        }
208        OptionValue::String(value) => {
209            let needs_quotes = value.is_empty()
210                || !value
211                    .chars()
212                    .all(|ch| ch.is_ascii_alphanumeric() || matches!(ch, '-' | '_' | '.'))
213                || value.eq_ignore_ascii_case("true")
214                || value.eq_ignore_ascii_case("false")
215                || is_int_token(value)
216                || is_float_token(value);
217            if needs_quotes {
218                write!(f, "\"")?;
219                for ch in value.chars() {
220                    match ch {
221                        '"' => write!(f, "\\\"")?,
222                        '\\' => write!(f, "\\\\")?,
223                        '\n' => write!(f, "\\n")?,
224                        '\r' => write!(f, "\\r")?,
225                        '\t' => write!(f, "\\t")?,
226                        ch => write!(f, "{ch}")?,
227                    }
228                }
229                write!(f, "\"")
230            } else {
231                write!(f, "{value}")
232            }
233        }
234        OptionValue::List(values) => {
235            write!(f, "[")?;
236            for (index, item) in values.iter().enumerate() {
237                if index > 0 {
238                    write!(f, ",")?;
239                }
240                write_option_value(f, item)?;
241            }
242            write!(f, "]")
243        }
244        OptionValue::Map(options) => {
245            write!(f, "(")?;
246            for (index, (key, item)) in options.entries().iter().enumerate() {
247                if index > 0 {
248                    write!(f, ",")?;
249                }
250                write!(f, "{key}=")?;
251                write_option_value(f, item)?;
252            }
253            write!(f, ")")
254        }
255    }
256}
257
258struct Parser<'a> {
259    input: &'a str,
260    pos: usize,
261}
262
263impl<'a> Parser<'a> {
264    fn new(input: &'a str) -> Self {
265        Self { input, pos: 0 }
266    }
267
268    fn owned_input(&self) -> String {
269        self.input.to_string()
270    }
271
272    fn parse(mut self) -> Result<Source, ParseError> {
273        let source = self.parse_source()?;
274        if self.peek() == Some('?') && self.input[self.pos..].starts_with("?(") {
275            return Err(ParseError::SkipMarkerBeforeOptions {
276                input: self.owned_input(),
277                at: self.pos,
278            });
279        }
280        let options = if self.peek() == Some('(') {
281            self.parse_options_block()?
282        } else {
283            Options::default()
284        };
285        let skip_errors = if self.peek() == Some('?') {
286            self.bump();
287            true
288        } else {
289            false
290        };
291        let (resource_colon, resource) = if self.peek() == Some(':') {
292            self.bump();
293            let resource = self.input[self.pos..].to_string();
294            self.pos = self.input.len();
295            (true, resource)
296        } else {
297            (false, String::new())
298        };
299        if self.pos < self.input.len() {
300            return Err(ParseError::TrailingInput {
301                input: self.owned_input(),
302                at: self.pos,
303                rest: self.input[self.pos..].to_string(),
304            });
305        }
306        Ok(Source {
307            source,
308            options,
309            resource,
310            skip_errors,
311            resource_colon,
312        })
313    }
314
315    fn parse_source(&mut self) -> Result<String, ParseError> {
316        let start = self.pos;
317        if !self
318            .peek()
319            .is_some_and(|ch| is_ident_char(ch) && !ch.is_ascii_digit())
320        {
321            if self.pos >= self.input.len() {
322                return Err(ParseError::MissingSource {
323                    input: self.owned_input(),
324                    at: self.pos,
325                });
326            }
327            let found = self.peek().unwrap();
328            return Err(ParseError::UnexpectedChar {
329                input: self.owned_input(),
330                at: self.pos,
331                found,
332                expected: "source identifier",
333            });
334        }
335        while self.peek().is_some_and(is_ident_char) {
336            self.bump();
337        }
338        if self.pos == start {
339            return Err(ParseError::MissingSource {
340                input: self.owned_input(),
341                at: self.pos,
342            });
343        }
344        Ok(self.input[start..self.pos].to_string())
345    }
346
347    fn parse_options_block(&mut self) -> Result<Options, ParseError> {
348        self.expect_char('(', "opening `(` for options")?;
349        let mut options = Options::default();
350        if self.peek() == Some(')') {
351            self.bump();
352            return Ok(options);
353        }
354        loop {
355            let key = self.parse_key()?;
356            self.expect_char('=', "option value after `=`")?;
357            let value = self.parse_value()?;
358            options.insert(key, value);
359            match self.peek() {
360                Some(',') => {
361                    self.bump();
362                    if matches!(self.peek(), Some(')' | ']' | ',')) {
363                        return Err(ParseError::TrailingComma {
364                            input: self.owned_input(),
365                            at: self.pos,
366                        });
367                    }
368                }
369                Some(')') => {
370                    self.bump();
371                    break;
372                }
373                None => {
374                    return Err(ParseError::UnclosedMap {
375                        input: self.owned_input(),
376                        at: self.pos,
377                    });
378                }
379                Some(found) => {
380                    return Err(ParseError::UnexpectedChar {
381                        input: self.owned_input(),
382                        at: self.pos,
383                        found,
384                        expected: "`,` or `)`",
385                    });
386                }
387            }
388        }
389        Ok(options)
390    }
391
392    fn parse_map_value(&mut self) -> Result<OptionValue, ParseError> {
393        self.expect_char('(', "opening `(` for map")?;
394        let mut options = Options::default();
395        if self.peek() == Some(')') {
396            self.bump();
397            return Ok(OptionValue::Map(options));
398        }
399        loop {
400            let key = self.parse_key()?;
401            self.expect_char('=', "map value after `=`")?;
402            let value = self.parse_value()?;
403            options.insert(key, value);
404            match self.peek() {
405                Some(',') => {
406                    self.bump();
407                    if matches!(self.peek(), Some(')' | ']' | ',')) {
408                        return Err(ParseError::TrailingComma {
409                            input: self.owned_input(),
410                            at: self.pos,
411                        });
412                    }
413                }
414                Some(')') => {
415                    self.bump();
416                    break;
417                }
418                None => {
419                    return Err(ParseError::UnclosedMap {
420                        input: self.owned_input(),
421                        at: self.pos,
422                    });
423                }
424                Some(found) => {
425                    return Err(ParseError::UnexpectedChar {
426                        input: self.owned_input(),
427                        at: self.pos,
428                        found,
429                        expected: "`,` or `)`",
430                    });
431                }
432            }
433        }
434        Ok(OptionValue::Map(options))
435    }
436
437    fn parse_list_value(&mut self) -> Result<OptionValue, ParseError> {
438        self.expect_char('[', "opening `[` for list")?;
439        let mut values = Vec::new();
440        if self.peek() == Some(']') {
441            self.bump();
442            return Ok(OptionValue::List(values));
443        }
444        loop {
445            values.push(self.parse_value()?);
446            match self.peek() {
447                Some(',') => {
448                    self.bump();
449                    if matches!(self.peek(), Some(']' | ',')) {
450                        return Err(ParseError::TrailingComma {
451                            input: self.owned_input(),
452                            at: self.pos,
453                        });
454                    }
455                }
456                Some(']') => {
457                    self.bump();
458                    break;
459                }
460                None => {
461                    return Err(ParseError::UnclosedList {
462                        input: self.owned_input(),
463                        at: self.pos,
464                    });
465                }
466                Some(found) => {
467                    return Err(ParseError::UnexpectedChar {
468                        input: self.owned_input(),
469                        at: self.pos,
470                        found,
471                        expected: "`,` or `]`",
472                    });
473                }
474            }
475        }
476        Ok(OptionValue::List(values))
477    }
478
479    fn parse_key(&mut self) -> Result<String, ParseError> {
480        let start = self.pos;
481        if !self
482            .peek()
483            .is_some_and(|ch| is_ident_char(ch) && !ch.is_ascii_digit())
484        {
485            if self.peek() == Some('=') {
486                return Err(ParseError::EmptyKey {
487                    input: self.owned_input(),
488                    at: self.pos,
489                });
490            }
491            let found = self
492                .peek()
493                .map(|ch| ch.to_string())
494                .unwrap_or_else(|| "end of input".to_string());
495            return if self.peek().is_some() {
496                Err(ParseError::UnexpectedChar {
497                    input: self.owned_input(),
498                    at: self.pos,
499                    found: self.peek().unwrap(),
500                    expected: "option key",
501                })
502            } else {
503                Err(ParseError::InvalidIdentifier {
504                    input: self.owned_input(),
505                    at: self.pos,
506                    found,
507                })
508            };
509        }
510        while self.peek().is_some_and(is_ident_char) {
511            self.bump();
512        }
513        if self.pos == start {
514            return Err(ParseError::EmptyKey {
515                input: self.owned_input(),
516                at: self.pos,
517            });
518        }
519        Ok(self.input[start..self.pos].to_string())
520    }
521
522    fn parse_value(&mut self) -> Result<OptionValue, ParseError> {
523        match self.peek() {
524            Some('"') => Ok(OptionValue::String(self.parse_quoted_string()?)),
525            Some('[') => self.parse_list_value(),
526            Some('(') => self.parse_map_value(),
527            Some('=') | Some(',') | Some(')') | Some(']') | Some(':') | Some('?') | None => {
528                Err(ParseError::EmptyValue {
529                    input: self.owned_input(),
530                    at: self.pos,
531                })
532            }
533            Some(_) => {
534                let token = self.parse_unquoted_token()?;
535                let at = self.pos - token.len();
536                let owned_input = self.input.to_string();
537                if token.eq_ignore_ascii_case("true") {
538                    Ok(OptionValue::Bool(true))
539                } else if token.eq_ignore_ascii_case("false") {
540                    Ok(OptionValue::Bool(false))
541                } else if token.contains('.') {
542                    if !is_float_token(&token) {
543                        Err(ParseError::InvalidNumber {
544                            input: owned_input,
545                            at,
546                            found: token,
547                        })
548                    } else {
549                        token.parse::<f64>().map(OptionValue::Float).map_err(|_| {
550                            ParseError::InvalidNumber {
551                                input: owned_input,
552                                at,
553                                found: token,
554                            }
555                        })
556                    }
557                } else if is_int_token(&token) {
558                    token.parse::<i64>().map(OptionValue::Integer).map_err(|_| {
559                        ParseError::InvalidNumber {
560                            input: owned_input,
561                            at,
562                            found: token,
563                        }
564                    })
565                } else {
566                    Ok(OptionValue::String(token))
567                }
568            }
569        }
570    }
571
572    fn parse_unquoted_token(&mut self) -> Result<String, ParseError> {
573        let start = self.pos;
574        while self
575            .peek()
576            .is_some_and(|ch| ch.is_ascii_alphanumeric() || matches!(ch, '-' | '_' | '.'))
577        {
578            self.bump();
579        }
580        if self.pos == start {
581            let found = self.peek().unwrap();
582            return Err(ParseError::UnexpectedChar {
583                input: self.owned_input(),
584                at: self.pos,
585                found,
586                expected: "value",
587            });
588        }
589        Ok(self.input[start..self.pos].to_string())
590    }
591
592    fn parse_quoted_string(&mut self) -> Result<String, ParseError> {
593        self.expect_char('"', "opening `\"` for string")?;
594        let start = self.pos;
595        let mut value = String::new();
596        while let Some(ch) = self.peek() {
597            if ch == '"' {
598                self.bump();
599                return Ok(value);
600            }
601            if ch == '\\' {
602                self.bump();
603                let escaped = self.peek().ok_or(ParseError::UnclosedString {
604                    input: self.owned_input(),
605                    at: start,
606                })?;
607                value.push(match escaped {
608                    '"' => '"',
609                    '\\' => '\\',
610                    'n' => '\n',
611                    'r' => '\r',
612                    't' => '\t',
613                    _ => {
614                        return Err(ParseError::InvalidEscape {
615                            input: self.owned_input(),
616                            at: self.pos - 1,
617                        });
618                    }
619                });
620                self.bump();
621                continue;
622            }
623            self.bump();
624            value.push(ch);
625        }
626        Err(ParseError::UnclosedString {
627            input: self.owned_input(),
628            at: start,
629        })
630    }
631
632    fn expect_char(&mut self, expected: char, message: &'static str) -> Result<(), ParseError> {
633        match self.peek() {
634            Some(found) if found == expected => {
635                self.bump();
636                Ok(())
637            }
638            Some(found) => Err(ParseError::UnexpectedChar {
639                input: self.owned_input(),
640                at: self.pos,
641                found,
642                expected: message,
643            }),
644            None => Err(ParseError::UnexpectedEnd {
645                input: self.owned_input(),
646                at: self.pos,
647                expected: message,
648            }),
649        }
650    }
651
652    fn peek(&self) -> Option<char> {
653        self.input[self.pos..].chars().next()
654    }
655
656    fn bump(&mut self) -> Option<char> {
657        let ch = self.peek()?;
658        self.pos += ch.len_utf8();
659        Some(ch)
660    }
661}
662
663fn is_ident_char(ch: char) -> bool {
664    ch.is_ascii_alphanumeric() || matches!(ch, '-' | '_' | '.')
665}
666
667fn is_int_token(token: &str) -> bool {
668    let Some(body) = token.strip_prefix('-').or(Some(token)) else {
669        return false;
670    };
671    !body.is_empty() && body.chars().all(|ch| ch.is_ascii_digit())
672}
673
674fn is_float_token(token: &str) -> bool {
675    let token = token.strip_prefix('-').unwrap_or(token);
676    let Some((whole, fraction)) = token.split_once('.') else {
677        return false;
678    };
679    !whole.is_empty()
680        && !fraction.is_empty()
681        && whole.chars().all(|ch| ch.is_ascii_digit())
682        && fraction.chars().all(|ch| ch.is_ascii_digit())
683}
684
685#[cfg(test)]
686mod tests {
687    use super::*;
688    use crate::OptionValue;
689
690    fn parsed(input: &str) -> Source {
691        parse(input).unwrap_or_else(|error| panic!("{error}"))
692    }
693
694    #[test]
695    fn parses_documented_examples() {
696        let env = parsed("env");
697        assert_eq!(env.source(), "env");
698        assert!(env.options().is_empty());
699        assert_eq!(env.resource(), "");
700        assert!(!env.skip_errors());
701        assert!(!env.resource_colon());
702
703        let env_opts = parsed("env(prefix=APP_)");
704        assert_eq!(
705            env_opts.options().get("prefix"),
706            Some(&OptionValue::String("APP_".into()))
707        );
708
709        let file = parsed("file:/x/y/z");
710        assert_eq!(file.resource(), "/x/y/z");
711        assert!(!file.skip_errors());
712
713        let file_skip = parsed("file?:.env");
714        assert!(file_skip.skip_errors());
715        assert_eq!(file_skip.resource(), ".env");
716
717        let http = parsed(
718            r#"http(headers=(Authorization="TOKEN"),timeout=3s)?:https://domain.tld/my/config.yml"#,
719        );
720        assert_eq!(http.source(), "http");
721        assert!(http.skip_errors());
722        assert_eq!(http.resource(), "https://domain.tld/my/config.yml");
723        assert_eq!(
724            http.options().get("timeout"),
725            Some(&OptionValue::String("3s".into()))
726        );
727    }
728
729    #[test]
730    fn round_trips_examples() {
731        for input in [
732            "env",
733            "env(prefix=APP_)",
734            "file:/x/y/z",
735            "file?:.env",
736            "file?",
737            "env:",
738        ] {
739            let source = parsed(input);
740            assert_eq!(source.to_string(), input, "round-trip failed for `{input}`");
741        }
742
743        let http = parsed(
744            r#"http(headers=(Authorization="TOKEN"),timeout=3s)?:https://domain.tld/my/config.yml"#,
745        );
746        assert_eq!(parsed(&http.to_string()), http);
747    }
748
749    #[test]
750    fn parses_bool_case_insensitive() {
751        let source = parsed("env(on=TRUE,off=false)");
752        assert_eq!(source.options().get("on"), Some(&OptionValue::Bool(true)));
753        assert_eq!(source.options().get("off"), Some(&OptionValue::Bool(false)));
754    }
755
756    #[test]
757    fn rejects_question_mark_before_options() {
758        let error = parse(r#"env?(kv=salam):oops"#).unwrap_err();
759        assert!(matches!(error, ParseError::SkipMarkerBeforeOptions { .. }));
760        assert!(error.to_string().contains("configuration source:"));
761    }
762
763    #[test]
764    fn parses_complex_options_before_skip_marker() {
765        let source = parsed(r#"env(kv=salam,h=(o=b,z=[1,2,3.14,""]))?:oops"#);
766        assert!(source.skip_errors());
767        assert_eq!(source.resource(), "oops");
768        assert_eq!(
769            source.options().get("kv"),
770            Some(&OptionValue::String("salam".into()))
771        );
772    }
773
774    #[test]
775    fn rejects_invalid_forms() {
776        assert!(matches!(parse(""), Err(ParseError::MissingSource { .. })));
777        assert!(matches!(
778            parse("env(a=)"),
779            Err(ParseError::EmptyValue { .. })
780        ));
781        assert!(matches!(
782            parse("env(a=1,)"),
783            Err(ParseError::TrailingComma { .. })
784        ));
785        assert!(matches!(
786            parse("env(a=.5)"),
787            Err(ParseError::InvalidNumber { .. })
788        ));
789        assert!(matches!(
790            parse("env(a=+5)"),
791            Err(ParseError::UnexpectedChar { .. })
792        ));
793        assert!(matches!(
794            parse("env()oops"),
795            Err(ParseError::TrailingInput { .. })
796        ));
797    }
798
799    #[test]
800    fn parse_error_alternate_includes_caret() {
801        let error = parse("env(prefix=)").unwrap_err();
802        let message = format!("{error:#}");
803        assert!(message.contains("column"));
804        assert!(message.contains('^'));
805        assert!(message.contains('\n'));
806    }
807
808    #[test]
809    fn parse_error_default_is_single_line() {
810        let error = parse("env(prefix=)").unwrap_err();
811        let message = error.to_string();
812        assert!(!message.contains('^'));
813        assert!(!message.contains('\n'));
814    }
815}