keyvalues_parser/text/parse/
mod.rs

1use std::borrow::Cow;
2
3use crate::{error::Result, Obj, PartialVdf, Value, Vdf};
4
5use pest::{iterators::Pair as PestPair, Atomicity, RuleType};
6
7// TODO: rename `PartialVdf` to `TopLevelVdf` and have it hold a `Vdf` instead of flattening it out
8
9mod escaped;
10mod raw;
11
12#[expect(deprecated)]
13pub use escaped::parse as escaped_parse;
14pub use escaped::PestError as EscapedPestError;
15#[expect(deprecated)]
16pub use raw::parse as raw_parse;
17pub use raw::PestError as RawPestError;
18
19type BoxedState<'a, R> = Box<pest::ParserState<'a, R>>;
20type ParseResult<'a, R> = pest::ParseResult<BoxedState<'a, R>>;
21
22#[inline]
23fn whitespace<R: RuleType>(s: BoxedState<'_, R>) -> ParseResult<'_, R> {
24    s.atomic(Atomicity::Atomic, |s| {
25        s.match_string(" ")
26            .or_else(|s| s.match_string("\t"))
27            .or_else(|s| s.match_string("\r"))
28            .or_else(|s| s.match_string("\n"))
29    })
30}
31
32#[inline]
33fn any<R: RuleType>(s: BoxedState<'_, R>) -> ParseResult<'_, R> {
34    s.skip(1)
35}
36
37fn soi<R: RuleType>(s: BoxedState<'_, R>) -> ParseResult<'_, R> {
38    s.start_of_input()
39}
40
41#[inline]
42fn comment<R: RuleType>(s: BoxedState<'_, R>) -> ParseResult<'_, R> {
43    s.atomic(Atomicity::Atomic, |s| {
44        s.sequence(|s| {
45            s.match_string("//").and_then(|s| {
46                s.repeat(|s| {
47                    s.sequence(|s| s.lookahead(false, |s| s.match_string("\n")).and_then(any))
48                })
49            })
50        })
51    })
52}
53
54#[inline]
55fn skip<R: RuleType>(s: BoxedState<'_, R>) -> ParseResult<'_, R> {
56    if s.atomicity() == Atomicity::NonAtomic {
57        s.sequence(|s| {
58            s.repeat(whitespace).and_then(|s| {
59                s.repeat(|s| s.sequence(|s| comment(s).and_then(|s| s.repeat(whitespace))))
60            })
61        })
62    } else {
63        Ok(s)
64    }
65}
66
67// unfortunate hack to re-use most of the code that consumes the pest parser produced by our two
68// separate grammars :/
69macro_rules! common_parsing {
70    ($parse_fn:ident, $rule:ty, $parse_escaped:expr) => {
71        fn parse_(s: &str) -> Result<PartialVdf<'_>> {
72            let mut full_grammar = $parse_fn(s)?;
73
74            // There can be multiple base macros before the initial pair
75            let mut bases = Vec::new();
76            loop {
77                let pair = full_grammar.next().unwrap();
78                if let <$rule>::base_macro = pair.as_rule() {
79                    let base_path_string = pair.into_inner().next().unwrap();
80                    let base_path = match base_path_string.as_rule() {
81                        <$rule>::quoted_raw_string => base_path_string.into_inner().next().unwrap(),
82                        <$rule>::unquoted_string => base_path_string,
83                        _ => unreachable!("Prevented by grammar"),
84                    }
85                    .as_str();
86                    bases.push(Cow::from(base_path));
87                } else {
88                    let (key, value) = parse_pair(pair);
89                    return Ok(PartialVdf { key, value, bases });
90                }
91            }
92        }
93
94        fn parse_pair(grammar_pair: PestPair<'_, $rule>) -> (Cow<'_, str>, Value<'_>) {
95            // Structure: pair
96            //            \ key   <- Desired
97            //            \ value <- Desired
98            if let <$rule>::pair = grammar_pair.as_rule() {
99                // Parse out the key and value
100                let mut grammar_pair_innards = grammar_pair.into_inner();
101                let grammar_string = grammar_pair_innards.next().unwrap();
102                let key = parse_string(grammar_string);
103
104                let grammar_value = grammar_pair_innards.next().unwrap();
105                let value = Value::from(grammar_value);
106
107                (key, value)
108            } else {
109                unreachable!("Prevented by grammar");
110            }
111        }
112
113        fn parse_string(grammar_string: PestPair<'_, $rule>) -> Cow<'_, str> {
114            match grammar_string.as_rule() {
115                // Structure: quoted_string
116                //            \ "
117                //            \ quoted_inner <- Desired
118                //            \ "
119                <$rule>::quoted_string => {
120                    let quoted_inner = grammar_string.into_inner().next().unwrap();
121                    if $parse_escaped {
122                        parse_escaped_string(quoted_inner)
123                    } else {
124                        Cow::from(quoted_inner.as_str())
125                    }
126                }
127                // Structure: unquoted_string <- Desired
128                <$rule>::unquoted_string => {
129                    let s = grammar_string.as_str();
130                    Cow::from(s)
131                }
132                _ => unreachable!("Prevented by grammar"),
133            }
134        }
135
136        // Note: there can be a slight performance win here by having the grammar skip capturing
137        // quoted_inner and instead just slice off the starting and ending '"', but I'm going to pass since
138        // it seems like a hack for a ~4% improvement
139        fn parse_escaped_string(inner: PestPair<'_, $rule>) -> Cow<'_, str> {
140            let s = inner.as_str();
141
142            if s.contains('\\') {
143                // Escaped version won't be quite as long, but it will likely be close
144                let mut escaped = String::with_capacity(s.len());
145                let mut it = s.chars();
146
147                while let Some(ch) = it.next() {
148                    if ch == '\\' {
149                        // Character is escaped so check the next character to figure out the full
150                        // character
151                        match it.next() {
152                            Some('n') => escaped.push('\n'),
153                            Some('r') => escaped.push('\r'),
154                            Some('t') => escaped.push('\t'),
155                            Some('\\') => escaped.push('\\'),
156                            Some('\"') => escaped.push('\"'),
157                            _ => unreachable!("Prevented by grammar"),
158                        }
159                    } else {
160                        escaped.push(ch)
161                    }
162                }
163
164                Cow::from(escaped)
165            } else {
166                Cow::from(s)
167            }
168        }
169
170        impl<'a> From<PestPair<'a, $rule>> for Value<'a> {
171            fn from(grammar_value: PestPair<'a, $rule>) -> Self {
172                // Structure: value is ( obj | quoted_string | unquoted_string )
173                match grammar_value.as_rule() {
174                    // Structure: ( quoted_string | unquoted_string )
175                    <$rule>::quoted_string | <$rule>::unquoted_string => {
176                        Self::Str(parse_string(grammar_value))
177                    }
178                    // Structure: obj
179                    //            \ pair* <- Desired
180                    <$rule>::obj => {
181                        let mut obj = Obj::new();
182                        for grammar_pair in grammar_value.into_inner() {
183                            let (key, value) = parse_pair(grammar_pair);
184                            let entry = obj.entry(key).or_default();
185                            (*entry).push(value);
186                        }
187
188                        Self::Obj(obj)
189                    }
190                    _ => unreachable!("Prevented by grammar"),
191                }
192            }
193        }
194    };
195}
196
197// expose ^^ macro to the rest of the crate
198pub(crate) use common_parsing;
199
200impl<'a> Vdf<'a> {
201    /// Attempts to parse VDF text to a [`Vdf`]
202    pub fn parse(s: &'a str) -> Result<Self> {
203        Ok(Self::from(PartialVdf::parse(s)?))
204    }
205
206    pub fn parse_raw(s: &'a str) -> Result<Self> {
207        Ok(Self::from(PartialVdf::parse_raw(s)?))
208    }
209}
210
211impl<'a> PartialVdf<'a> {
212    /// Attempts to parse VDF text to a [`Vdf`]
213    pub fn parse(s: &'a str) -> Result<Self> {
214        #[expect(deprecated)]
215        escaped_parse(s)
216    }
217
218    pub fn parse_raw(s: &'a str) -> Result<Self> {
219        #[expect(deprecated)]
220        raw_parse(s)
221    }
222}