Skip to main content

mago_syntax/parser/
mod.rs

1use bumpalo::Bump;
2use bumpalo::collections::Vec;
3
4use mago_database::file::File;
5use mago_database::file::FileId;
6use mago_database::file::HasFileId;
7use mago_syntax_core::input::Input;
8
9use crate::ast::Program;
10use crate::ast::sequence::Sequence;
11use crate::error::ParseError;
12use crate::lexer::Lexer;
13use crate::parser::stream::TokenStream;
14use crate::settings::ParserSettings;
15
16mod internal;
17
18pub mod stream;
19
20/// Maximum recursion depth for expression parsing.
21/// This prevents stack overflow on deeply nested expressions.
22const MAX_RECURSION_DEPTH: u16 = 512;
23
24#[derive(Debug, Default)]
25pub struct State {
26    pub within_indirect_variable: bool,
27    pub within_string_interpolation: bool,
28    pub recursion_depth: u16,
29}
30
31/// The main parser for PHP source code.
32///
33/// The parser holds an arena reference, the token stream, and parsing state.
34#[derive(Debug)]
35pub struct Parser<'input, 'arena> {
36    pub(crate) arena: &'arena Bump,
37    pub(crate) state: State,
38    pub(crate) stream: TokenStream<'input, 'arena>,
39    pub(crate) errors: Vec<'arena, ParseError>,
40}
41
42impl<'input, 'arena> Parser<'input, 'arena> {
43    /// Creates a new parser for the given content.
44    ///
45    /// # Parameters
46    ///
47    /// - `arena`: The memory arena for allocations.
48    /// - `file_id`: The ID of the file being parsed.
49    /// - `content`: The content to parse.
50    /// - `settings`: The parser settings.
51    ///
52    /// # Returns
53    ///
54    /// A new `Parser` instance.
55    #[inline]
56    pub fn new(arena: &'arena Bump, file_id: FileId, content: &'input str, settings: ParserSettings) -> Self {
57        let input = Input::new(file_id, content.as_bytes());
58        let lexer = Lexer::new(input, settings.lexer);
59        let stream = TokenStream::new(arena, lexer);
60
61        Self { arena, state: State::default(), stream, errors: Vec::new_in(arena) }
62    }
63
64    /// Creates a new parser for the given file.
65    ///
66    /// # Parameters
67    ///
68    /// - `arena`: The memory arena for allocations.
69    /// - `file`: The file to parse.
70    /// - `settings`: The parser settings.
71    ///
72    /// # Returns
73    ///
74    /// A new `Parser` instance.
75    pub fn for_file(arena: &'arena Bump, file: &'input File, settings: ParserSettings) -> Self {
76        Self::new(arena, file.file_id(), file.contents.as_ref(), settings)
77    }
78
79    /// Parses and returns the program AST.
80    fn parse(mut self, source_text: &'arena str, file_id: FileId) -> &'arena Program<'arena> {
81        let mut statements = Vec::new_in(self.arena);
82
83        loop {
84            let reached_eof = match self.stream.has_reached_eof() {
85                Ok(eof) => eof,
86                Err(err) => {
87                    self.errors.push(ParseError::from(err));
88                    break;
89                }
90            };
91
92            if reached_eof {
93                break;
94            }
95
96            // Record position before parsing to detect infinite loops
97            let position_before = self.stream.current_position();
98
99            match self.parse_statement() {
100                Ok(statement) => statements.push(statement),
101                Err(err) => self.errors.push(err),
102            }
103
104            // Safety check: if we didn't advance at all, skip a token to prevent infinite loop.
105            // This can happen with orphan keywords like `finally`, `catch`, `else`, etc.
106            // that are preserved by the expression parser but not handled by the statement parser.
107            let position_after = self.stream.current_position();
108            if position_after == position_before
109                && let Ok(Some(token)) = self.stream.lookahead(0)
110            {
111                self.errors.push(self.stream.unexpected(Some(token), &[]));
112                let _ = self.stream.consume();
113            }
114        }
115
116        self.arena.alloc(Program {
117            file_id,
118            source_text,
119            statements: Sequence::new(statements),
120            trivia: self.stream.get_trivia(),
121            errors: self.errors,
122        })
123    }
124}
125
126/// Parses the given file and returns the program AST.
127///
128/// # Parameters
129///
130/// - `arena`: The memory arena for allocations.
131/// - `file`: The file to parse.
132///
133/// # Returns
134///
135/// The parsed `Program` AST.
136#[inline]
137pub fn parse_file<'arena>(arena: &'arena Bump, file: &File) -> &'arena Program<'arena> {
138    parse_file_content(arena, file.file_id(), file.contents.as_ref())
139}
140
141/// Parses the given file with custom settings and returns the program AST.
142///
143/// # Parameters
144///
145/// - `arena`: The memory arena for allocations.
146/// - `file`: The file to parse.
147/// - `settings`: The parser settings.
148///
149/// # Returns
150///
151/// The parsed `Program` AST.
152#[inline]
153pub fn parse_file_with_settings<'arena>(
154    arena: &'arena Bump,
155    file: &File,
156    settings: ParserSettings,
157) -> &'arena Program<'arena> {
158    parse_file_content_with_settings(arena, file.file_id(), file.contents.as_ref(), settings)
159}
160
161/// Parses the given file content and returns the program AST.
162///
163/// # Parameters
164///
165/// - `arena`: The memory arena for allocations.
166/// - `file_id`: The ID of the file being parsed.
167/// - `content`: The content to parse.
168///
169/// # Returns
170///
171/// The parsed `Program` AST.
172pub fn parse_file_content<'arena>(arena: &'arena Bump, file_id: FileId, content: &str) -> &'arena Program<'arena> {
173    let source_text = arena.alloc_str(content);
174    Parser::new(arena, file_id, source_text, ParserSettings::default()).parse(source_text, file_id)
175}
176
177/// Parses the given file content with custom settings and returns the program AST.
178///
179/// # Parameters
180///
181/// - `arena`: The memory arena for allocations.
182/// - `file_id`: The ID of the file being parsed.
183/// - `content`: The content to parse.
184/// - `settings`: The parser settings.
185///
186/// # Returns
187///
188/// The parsed `Program` AST.
189pub fn parse_file_content_with_settings<'arena>(
190    arena: &'arena Bump,
191    file_id: FileId,
192    content: &str,
193    settings: ParserSettings,
194) -> &'arena Program<'arena> {
195    let source_text = arena.alloc_str(content);
196    Parser::new(arena, file_id, source_text, settings).parse(source_text, file_id)
197}