pomsky_syntax/parse/
parser.rs

1use std::str::FromStr;
2
3use crate::{
4    Span,
5    diagnose::{
6        LexErrorMsg, NumberError, ParseDiagnostic, ParseError, ParseErrorKind as PEK, ParseWarning,
7    },
8    exprs::*,
9    lexer::{Token, tokenize},
10};
11
12/// Parses a source string as a pomsky expression.
13///
14/// The `recursion` argument determines how much nesting is allowed in the
15/// expression. Note that **pomsky will overflow the stack** when parsing an
16/// expression with too much nesting, so the `recursion` argument should be low
17/// enough to prevent that. The recommended default is 256.
18pub fn parse(source: &str, recursion: u32) -> (Option<Rule>, Vec<ParseDiagnostic>) {
19    if source.len() > u32::MAX as usize {
20        let error = PEK::LexErrorWithMessage(LexErrorMsg::FileTooBig);
21        return (None, vec![error.at(Span::empty()).into()]);
22    }
23
24    let tokens = tokenize(source);
25
26    let mut errors = Vec::new();
27    for &(t, span) in &tokens {
28        match t {
29            Token::Error => errors.push((span, None)),
30            Token::ErrorMsg(m) => errors.push((span, Some(m))),
31            _ => {}
32        }
33    }
34
35    if !errors.is_empty() {
36        let errors = errors
37            .into_iter()
38            .map(|(span, msg)| {
39                msg.map_or(PEK::UnknownToken, PEK::LexErrorWithMessage).at(span).into()
40            })
41            .collect::<Vec<_>>();
42
43        return (None, errors);
44    }
45
46    let mut parser = Parser {
47        source,
48        tokens: tokens.into_boxed_slice(),
49        offset: 0,
50        warnings: Vec::new(),
51        recursion,
52        is_lazy: false,
53        is_unicode_aware: true,
54    };
55
56    let rule = match parser.parse_modified() {
57        Ok(rule) => rule,
58        Err(err) => {
59            let mut diagnostics = vec![err.into()];
60            diagnostics.extend(parser.warnings);
61            return (None, diagnostics);
62        }
63    };
64    if parser.is_empty() {
65        (Some(rule), parser.warnings)
66    } else {
67        let mut diagnostics = vec![PEK::LeftoverTokens.at(parser.span()).into()];
68        diagnostics.extend(parser.warnings);
69        (None, diagnostics)
70    }
71}
72
73type PResult<T> = Result<T, ParseError>;
74
75pub(super) struct Parser<'i> {
76    source: &'i str,
77    tokens: Box<[(Token, Span)]>,
78    offset: usize,
79    warnings: Vec<ParseDiagnostic>,
80    recursion: u32,
81    pub(super) is_lazy: bool,
82    pub(super) is_unicode_aware: bool,
83}
84
85// Utilities
86impl<'i> Parser<'i> {
87    pub(super) fn is_empty(&self) -> bool {
88        self.tokens.len() == self.offset
89    }
90
91    pub(super) fn source_at(&self, span: Span) -> &'i str {
92        &self.source[span.range_unchecked()]
93    }
94
95    pub(super) fn peek(&self) -> Option<(Token, &'i str)> {
96        self.tokens.get(self.offset).map(|&(t, span)| (t, self.source_at(span)))
97    }
98
99    pub(super) fn peek_pair(&self) -> Option<(Token, Span)> {
100        self.tokens.get(self.offset).copied()
101    }
102
103    /// Returns the span of the next token
104    pub(super) fn span(&self) -> Span {
105        self.tokens
106            .get(self.offset)
107            .map_or_else(|| Span::new(self.source.len(), self.source.len()), |&(_, s)| s)
108    }
109
110    /// Returns the span of the previously consumed token
111    pub(super) fn last_span(&self) -> Span {
112        self.tokens[self.offset - 1].1
113    }
114
115    pub(super) fn advance(&mut self) {
116        self.offset += 1;
117    }
118
119    pub(super) fn recursion_start(&mut self) -> PResult<()> {
120        self.recursion =
121            self.recursion.checked_sub(1).ok_or_else(|| PEK::RecursionLimit.at(self.span()))?;
122        Ok(())
123    }
124
125    pub(super) fn recursion_end(&mut self) {
126        self.recursion += 1;
127    }
128
129    pub(super) fn add_warning(&mut self, warning: ParseWarning) {
130        self.warnings.push(warning.into());
131    }
132
133    pub(super) fn is(&mut self, token: Token) -> bool {
134        matches!(self.peek_pair(), Some((t, _)) if t == token)
135    }
136
137    pub(super) fn consume(&mut self, token: Token) -> bool {
138        match self.peek_pair() {
139            Some((t, _)) if t == token => {
140                self.offset += 1;
141                true
142            }
143            _ => false,
144        }
145    }
146
147    pub(super) fn consume_as(&mut self, token: Token) -> Option<&'i str> {
148        match self.peek_pair() {
149            Some((t, span)) if t == token => {
150                self.offset += 1;
151                Some(self.source_at(span))
152            }
153            _ => None,
154        }
155    }
156
157    pub(super) fn consume_reserved(&mut self, reserved: &str) -> bool {
158        match self.peek_pair() {
159            Some((Token::ReservedName, s)) if self.source_at(s) == reserved => {
160                self.offset += 1;
161                true
162            }
163            _ => false,
164        }
165    }
166
167    pub(super) fn consume_contextual_keyword(&mut self, keyword: &str) -> bool {
168        match self.peek_pair() {
169            Some((Token::Identifier, s)) if self.source_at(s) == keyword => {
170                self.offset += 1;
171                true
172            }
173            _ => false,
174        }
175    }
176
177    pub(super) fn consume_number<T: FromStr + PartialOrd>(&mut self, max: T) -> PResult<Option<T>> {
178        match self.peek_pair() {
179            Some((Token::Number, span)) => {
180                let n = str::parse(self.source_at(span))
181                    .ok()
182                    .and_then(|n| if n > max { None } else { Some(n) })
183                    .ok_or_else(|| PEK::Number(NumberError::TooLarge).at(span))?;
184                self.offset += 1;
185                Ok(Some(n))
186            }
187            _ => Ok(None),
188        }
189    }
190
191    pub(super) fn expect(&mut self, token: Token) -> PResult<()> {
192        match self.peek_pair() {
193            Some((t, _)) if t == token => {
194                self.offset += 1;
195                Ok(())
196            }
197            _ => Err(PEK::ExpectedToken(token).at(self.span())),
198        }
199    }
200
201    pub(super) fn expect_as(&mut self, token: Token) -> PResult<&'i str> {
202        match self.peek_pair() {
203            Some((t, span)) if t == token => {
204                self.offset += 1;
205                Ok(self.source_at(span))
206            }
207            _ => Err(PEK::ExpectedToken(token).at(self.span())),
208        }
209    }
210
211    pub(super) fn expect_number<T: FromStr>(&mut self) -> PResult<T> {
212        match self.peek_pair() {
213            Some((Token::Number, span)) => {
214                let n = str::parse(self.source_at(span))
215                    .map_err(|_| PEK::Number(NumberError::TooLarge).at(span))?;
216                self.offset += 1;
217                Ok(n)
218            }
219            _ => Err(PEK::ExpectedToken(Token::Number).at(self.span())),
220        }
221    }
222}