datex_core/parser/
mod.rs

1use crate::ast::expressions::DatexExpression;
2use crate::ast::expressions::DatexExpressionData;
3use crate::ast::spanned::Spanned;
4use crate::ast::type_expressions::{TypeExpression, TypeExpressionData};
5use crate::compiler::error::{
6    ErrorCollector, MaybeAction, collect_or_pass_error,
7};
8use crate::parser::errors::{ParserError, SpannedParserError};
9use crate::parser::lexer::{SpannedToken, Token};
10use crate::parser::parser_result::{
11    InvalidDatexParseResult, ValidDatexParseResult,
12};
13use core::ops::Range;
14use itertools::Itertools;
15use parser_result::ParserResult;
16// TODO #658: move to different module
17
18pub mod errors;
19pub mod lexer;
20pub mod parser_result;
21mod parsers;
22pub mod utils;
23
24#[derive(Debug, Clone, Default)]
25pub struct ParserOptions {
26    // does not collapse grouped statements, even if there is only a single statement inside
27    pub(crate) preserve_scoping: bool,
28}
29
30pub struct Parser {
31    tokens: Vec<SpannedToken>,
32    pos: usize,
33    // when Some, collect all errors instead of returning on first error
34    collected_errors: Option<Vec<SpannedParserError>>,
35    options: ParserOptions,
36}
37
38impl Parser {
39    /// Parses the given source code.
40    /// Collects all lexing and parsing errors encountered.
41    pub fn parse_collecting(src: &str, options: ParserOptions) -> ParserResult {
42        let (tokens, errors) = lexer::get_spanned_tokens_from_source(src);
43        let mut parser = Self::new_from_tokens(tokens, Some(errors), options);
44        match parser.parse_root() {
45            // this should never happen when collecting errors
46            Err(e) => {
47                unreachable!(
48                    "An error was not correctly handled during parsing: {:#?}",
49                    e
50                );
51            }
52            Ok(ast) => {
53                // has errors, return invalid result
54                if let Some(errors) = parser.collected_errors
55                    && !errors.is_empty()
56                {
57                    ParserResult::Invalid(InvalidDatexParseResult {
58                        ast,
59                        errors,
60                    })
61                }
62                // has no errors, return valid result
63                else {
64                    ParserResult::Valid(ValidDatexParseResult { ast })
65                }
66            }
67        }
68    }
69
70    /// Parses the given source code.
71    /// Aborts on the first lexing or parsing error encountered.
72    pub fn parse(
73        src: &str,
74        options: ParserOptions,
75    ) -> Result<DatexExpression, SpannedParserError> {
76        let (tokens, errors) = lexer::get_spanned_tokens_from_source(src);
77        // already has lexer errors - aborts early when parsing starts
78        if let Some(first_error) = errors.into_iter().next() {
79            Err(first_error)
80        }
81        // no lexer errors - can proceed with parsing (using early abort mode)
82        else {
83            let mut parser = Self::new_from_tokens(tokens, None, options);
84            parser.parse_root()
85        }
86    }
87
88    /// Parses the given source code.
89    /// Aborts on the first lexing or parsing error encountered.
90    /// Uses default parser options.
91    pub fn parse_with_default_options(
92        src: &str,
93    ) -> Result<DatexExpression, SpannedParserError> {
94        Self::parse(src, ParserOptions::default())
95    }
96
97    /// Parses the given source code.
98    /// Collects all lexing and parsing errors encountered.
99    /// Uses default parser options.
100    pub fn parse_collecting_with_default_options(src: &str) -> ParserResult {
101        Self::parse_collecting(src, ParserOptions::default())
102    }
103
104    fn new_from_tokens(
105        tokens: Vec<SpannedToken>,
106        collected_errors: Option<Vec<SpannedParserError>>,
107        options: ParserOptions,
108    ) -> Self {
109        Self {
110            tokens,
111            pos: 0,
112            collected_errors,
113            options,
114        }
115    }
116
117    /// Entrypoint for parsing a full source file.
118    fn parse_root(&mut self) -> Result<DatexExpression, SpannedParserError> {
119        self.parse_top_level_statements()
120    }
121
122    /// Collects an error if detailed error collection is enabled,
123    /// or returns the error as Err()
124    fn collect_error(
125        &mut self,
126        error: SpannedParserError,
127    ) -> Result<(), SpannedParserError> {
128        match &mut self.collected_errors {
129            Some(collected_errors) => {
130                collected_errors.record_error(error);
131                Ok(())
132            }
133            None => Err(error),
134        }
135    }
136
137    /// Collects an error and returns a Recover expression to continue parsing if
138    /// detailed error collection is enabled,
139    /// or returns the error as Err()
140    fn collect_error_and_continue(
141        &mut self,
142        error: SpannedParserError,
143    ) -> Result<DatexExpression, SpannedParserError> {
144        let span = error.span.clone();
145        self.collect_error(error)
146            .map(|_| DatexExpressionData::Recover.with_span(span))
147    }
148
149    /// Collects an error and returns a Recover type expression to continue parsing if
150    /// detailed error collection is enabled,
151    /// or returns the error as Err()
152    fn collect_error_and_continue_with_type_expression(
153        &mut self,
154        error: SpannedParserError,
155    ) -> Result<TypeExpression, SpannedParserError> {
156        let span = error.span.clone();
157        self.collect_error(error)
158            .map(|_| TypeExpressionData::Recover.with_span(span))
159    }
160
161    /// Collects the Err variant of the Result if detailed error collection is enabled,
162    /// or returns the Result mapped to a MaybeAction.
163    fn collect_result<T>(
164        &mut self,
165        result: Result<T, SpannedParserError>,
166    ) -> Result<MaybeAction<T>, SpannedParserError> {
167        collect_or_pass_error(&mut self.collected_errors, result)
168    }
169
170    fn peek(&self) -> Result<&SpannedToken, SpannedParserError> {
171        if self.pos >= self.tokens.len() {
172            Err(SpannedParserError {
173                error: ParserError::ExpectedMoreTokens,
174                span: if let Some(last) = self.tokens.last() {
175                    last.span.end..last.span.end
176                } else {
177                    0..0
178                },
179            })
180        } else {
181            Ok(&self.tokens[self.pos])
182        }
183    }
184
185    fn has_more_tokens(&self) -> bool {
186        self.pos < self.tokens.len()
187    }
188
189    fn advance(&mut self) -> Result<SpannedToken, SpannedParserError> {
190        if self.pos >= self.tokens.len() {
191            return Err(SpannedParserError {
192                error: ParserError::ExpectedMoreTokens,
193                span: if let Some(last) = self.tokens.last() {
194                    last.span.end..last.span.end
195                } else {
196                    0..0
197                },
198            });
199        }
200        let tok = self.tokens[self.pos].clone(); // TODO #659: take, don't clone?
201        self.pos += 1;
202        Ok(tok)
203    }
204
205    fn expect(
206        &mut self,
207        token: Token,
208    ) -> Result<SpannedToken, SpannedParserError> {
209        let next_token = self.advance()?;
210        if next_token.token != token {
211            self.collect_error(SpannedParserError {
212                error: ParserError::UnexpectedToken {
213                    expected: vec![token],
214                    found: next_token.token.clone(),
215                },
216                span: self.peek()?.span.clone(),
217            })?;
218        }
219        Ok(next_token)
220    }
221
222    fn expect_identifier(
223        &mut self,
224    ) -> Result<(String, Range<usize>), SpannedParserError> {
225        match self.advance()? {
226            SpannedToken {
227                token: Token::Identifier(identifier),
228                span,
229            } => Ok((identifier, span)),
230            token => Err(SpannedParserError {
231                error: ParserError::UnexpectedToken {
232                    expected: vec![Token::Identifier("identifier".to_string())],
233                    found: token.token.clone(),
234                },
235                span: token.span.clone(),
236            }),
237        }
238    }
239
240    fn get_current_source_position(&self) -> usize {
241        if let Some(token) = self.tokens.get(self.pos) {
242            token.span.start
243        } else if let Some(last_token) = self.tokens.last() {
244            last_token.span.end
245        } else {
246            0
247        }
248    }
249
250    /// Attempt to recover from a parsing error by skipping tokens until one of the recovery tokens is found.
251    /// If recovery is successful after an error result was provided, returns an Ok result containing a Recover expression.
252    /// If the result was Ok, returns the contained expression directly.
253    /// If error collection is not enabled in the parser, the error is returned directly in the result and can be bubbled up.
254    /// TODO #660: set correct span on recovered expression
255    fn recover_on_error(
256        &mut self,
257        result: Result<DatexExpression, SpannedParserError>,
258        recovery_tokens: &[Token],
259    ) -> Result<DatexExpression, SpannedParserError> {
260        match result {
261            Ok(statement) => Ok(statement),
262            Err(err) => {
263                self.collect_error(err)?;
264                // attempt to recover by skipping to next semicolon or right paren
265                while self.has_more_tokens() {
266                    let token = &self.peek()?.token;
267                    if recovery_tokens.contains(token) {
268                        break;
269                    }
270                    self.advance()?;
271                }
272                Ok(DatexExpressionData::Recover.with_default_span())
273            }
274        }
275    }
276}
277
278#[cfg(test)]
279mod tests {
280    use super::*;
281
282    pub fn try_parse_and_return_on_first_error(
283        src: &str,
284    ) -> Result<DatexExpression, SpannedParserError> {
285        Parser::parse_with_default_options(src)
286    }
287
288    pub fn try_parse_and_collect_errors(src: &str) -> ParserResult {
289        Parser::parse_collecting_with_default_options(src)
290    }
291
292    pub fn parse(src: &str) -> DatexExpression {
293        Parser::parse_with_default_options(src).unwrap()
294    }
295}