keyvalues_parser/text/parse/
mod.rs

1use std::borrow::Cow;
2
3use crate::{error::Result, Obj, PartialVdf, Value, Vdf};
4
5use pest::{iterators::Pair as PestPair, Atomicity, RuleType};
6
7// TODO: rename `PartialVdf` to `TopLevelVdf` and have it hold a `Vdf` instead of flattening it out
8
9mod escaped;
10mod raw;
11
12pub use escaped::{parse as escaped_parse, PestError as EscapedPestError};
13pub use raw::{parse as raw_parse, PestError as RawPestError};
14
15type BoxedState<'a, R> = Box<pest::ParserState<'a, R>>;
16type ParseResult<'a, R> = pest::ParseResult<BoxedState<'a, R>>;
17
18#[inline]
19fn whitespace<R: RuleType>(s: BoxedState<'_, R>) -> ParseResult<'_, R> {
20    s.atomic(Atomicity::Atomic, |s| {
21        s.match_string(" ")
22            .or_else(|s| s.match_string("\t"))
23            .or_else(|s| s.match_string("\r"))
24            .or_else(|s| s.match_string("\n"))
25    })
26}
27
28#[inline]
29fn any<R: RuleType>(s: BoxedState<'_, R>) -> ParseResult<'_, R> {
30    s.skip(1)
31}
32
33fn soi<R: RuleType>(s: BoxedState<'_, R>) -> ParseResult<'_, R> {
34    s.start_of_input()
35}
36
37#[inline]
38fn comment<R: RuleType>(s: BoxedState<'_, R>) -> ParseResult<'_, R> {
39    s.atomic(Atomicity::Atomic, |s| {
40        s.sequence(|s| {
41            s.match_string("//").and_then(|s| {
42                s.repeat(|s| {
43                    s.sequence(|s| s.lookahead(false, |s| s.match_string("\n")).and_then(any))
44                })
45            })
46        })
47    })
48}
49
50#[inline]
51fn skip<R: RuleType>(s: BoxedState<'_, R>) -> ParseResult<'_, R> {
52    if s.atomicity() == Atomicity::NonAtomic {
53        s.sequence(|s| {
54            s.repeat(whitespace).and_then(|s| {
55                s.repeat(|s| s.sequence(|s| comment(s).and_then(|s| s.repeat(whitespace))))
56            })
57        })
58    } else {
59        Ok(s)
60    }
61}
62
63// unfortunate hack to re-use most of the code that consumes the pest parser produced by our two
64// separate grammars :/
65macro_rules! common_parsing {
66    ($parse_fn:ident, $rule:ty, $parse_escaped:expr) => {
67        /// Attempts to parse VDF text to a [`Vdf`]
68        pub fn parse(s: &str) -> Result<PartialVdf<'_>> {
69            let mut full_grammar = $parse_fn(s)?;
70
71            // There can be multiple base macros before the initial pair
72            let mut bases = Vec::new();
73            loop {
74                let pair = full_grammar.next().unwrap();
75                if let <$rule>::base_macro = pair.as_rule() {
76                    let base_path_string = pair.into_inner().next().unwrap();
77                    let base_path = match base_path_string.as_rule() {
78                        <$rule>::quoted_raw_string => base_path_string.into_inner().next().unwrap(),
79                        <$rule>::unquoted_string => base_path_string,
80                        _ => unreachable!("Prevented by grammar"),
81                    }
82                    .as_str();
83                    bases.push(Cow::from(base_path));
84                } else {
85                    let (key, value) = parse_pair(pair);
86                    return Ok(PartialVdf { key, value, bases });
87                }
88            }
89        }
90
91        fn parse_pair(grammar_pair: PestPair<'_, $rule>) -> (Cow<'_, str>, Value<'_>) {
92            // Structure: pair
93            //            \ key   <- Desired
94            //            \ value <- Desired
95            if let <$rule>::pair = grammar_pair.as_rule() {
96                // Parse out the key and value
97                let mut grammar_pair_innards = grammar_pair.into_inner();
98                let grammar_string = grammar_pair_innards.next().unwrap();
99                let key = parse_string(grammar_string);
100
101                let grammar_value = grammar_pair_innards.next().unwrap();
102                let value = Value::from(grammar_value);
103
104                (key, value)
105            } else {
106                unreachable!("Prevented by grammar");
107            }
108        }
109
110        fn parse_string(grammar_string: PestPair<'_, $rule>) -> Cow<'_, str> {
111            match grammar_string.as_rule() {
112                // Structure: quoted_string
113                //            \ "
114                //            \ quoted_inner <- Desired
115                //            \ "
116                <$rule>::quoted_string => {
117                    let quoted_inner = grammar_string.into_inner().next().unwrap();
118                    if $parse_escaped {
119                        parse_escaped_string(quoted_inner)
120                    } else {
121                        Cow::from(quoted_inner.as_str())
122                    }
123                }
124                // Structure: unquoted_string <- Desired
125                <$rule>::unquoted_string => {
126                    let s = grammar_string.as_str();
127                    Cow::from(s)
128                }
129                _ => unreachable!("Prevented by grammar"),
130            }
131        }
132
133        // Note: there can be a slight performance win here by having the grammar skip capturing
134        // quoted_inner and instead just slice off the starting and ending '"', but I'm going to pass since
135        // it seems like a hack for a ~4% improvement
136        fn parse_escaped_string(inner: PestPair<'_, $rule>) -> Cow<'_, str> {
137            let s = inner.as_str();
138
139            if s.contains('\\') {
140                // Escaped version won't be quite as long, but it will likely be close
141                let mut escaped = String::with_capacity(s.len());
142                let mut it = s.chars();
143
144                while let Some(ch) = it.next() {
145                    if ch == '\\' {
146                        // Character is escaped so check the next character to figure out the full
147                        // character
148                        match it.next() {
149                            Some('n') => escaped.push('\n'),
150                            Some('r') => escaped.push('\r'),
151                            Some('t') => escaped.push('\t'),
152                            Some('\\') => escaped.push('\\'),
153                            Some('\"') => escaped.push('\"'),
154                            _ => unreachable!("Prevented by grammar"),
155                        }
156                    } else {
157                        escaped.push(ch)
158                    }
159                }
160
161                Cow::from(escaped)
162            } else {
163                Cow::from(s)
164            }
165        }
166
167        impl<'a> From<PestPair<'a, $rule>> for Value<'a> {
168            fn from(grammar_value: PestPair<'a, $rule>) -> Self {
169                // Structure: value is ( obj | quoted_string | unquoted_string )
170                match grammar_value.as_rule() {
171                    // Structure: ( quoted_string | unquoted_string )
172                    <$rule>::quoted_string | <$rule>::unquoted_string => {
173                        Self::Str(parse_string(grammar_value))
174                    }
175                    // Structure: obj
176                    //            \ pair* <- Desired
177                    <$rule>::obj => {
178                        let mut obj = Obj::new();
179                        for grammar_pair in grammar_value.into_inner() {
180                            let (key, value) = parse_pair(grammar_pair);
181                            let entry = obj.entry(key).or_default();
182                            (*entry).push(value);
183                        }
184
185                        Self::Obj(obj)
186                    }
187                    _ => unreachable!("Prevented by grammar"),
188                }
189            }
190        }
191    };
192}
193
194// expose ^^ macro to the rest of the crate
195pub(crate) use common_parsing;
196
197impl<'a> Vdf<'a> {
198    /// Attempts to parse VDF text to a [`Vdf`]
199    pub fn parse(s: &'a str) -> Result<Self> {
200        Ok(Self::from(PartialVdf::parse(s)?))
201    }
202
203    pub fn parse_raw(s: &'a str) -> Result<Self> {
204        Ok(Self::from(PartialVdf::parse_raw(s)?))
205    }
206}
207
208impl<'a> PartialVdf<'a> {
209    /// Attempts to parse VDF text to a [`Vdf`]
210    pub fn parse(s: &'a str) -> Result<Self> {
211        escaped_parse(s)
212    }
213
214    pub fn parse_raw(s: &'a str) -> Result<Self> {
215        raw_parse(s)
216    }
217}