Skip to main content

mago_syntax/parser/
mod.rs

1use bumpalo::Bump;
2use bumpalo::collections::Vec;
3
4use mago_database::file::File;
5use mago_database::file::FileId;
6use mago_database::file::HasFileId;
7use mago_syntax_core::input::Input;
8
9use crate::ast::Program;
10use crate::ast::sequence::Sequence;
11use crate::error::ParseError;
12use crate::lexer::Lexer;
13use crate::parser::stream::TokenStream;
14use crate::settings::ParserSettings;
15
16mod internal;
17
18pub mod stream;
19
20/// Maximum recursion depth for expression parsing.
21/// This prevents stack overflow on deeply nested expressions and statements.
22const MAX_RECURSION_DEPTH: u16 = 512;
23
24#[derive(Debug, Default)]
25pub struct State {
26    pub within_indirect_variable: bool,
27    pub within_string_interpolation: bool,
28    pub recursion_depth: u16,
29}
30
31/// The main parser for PHP source code.
32///
33/// The parser holds an arena reference, the token stream, and parsing state.
34#[derive(Debug)]
35#[allow(clippy::field_scoped_visibility_modifiers)]
36pub struct Parser<'input, 'arena> {
37    pub(crate) arena: &'arena Bump,
38    pub(crate) state: State,
39    pub(crate) stream: TokenStream<'input, 'arena>,
40    pub(crate) errors: Vec<'arena, ParseError>,
41}
42
43impl<'input, 'arena> Parser<'input, 'arena> {
44    /// Creates a new parser for the given content.
45    ///
46    /// # Parameters
47    ///
48    /// - `arena`: The memory arena for allocations.
49    /// - `file_id`: The ID of the file being parsed.
50    /// - `content`: The content to parse.
51    /// - `settings`: The parser settings.
52    ///
53    /// # Returns
54    ///
55    /// A new `Parser` instance.
56    #[inline]
57    pub fn new(arena: &'arena Bump, file_id: FileId, content: &'input str, settings: ParserSettings) -> Self {
58        let input = Input::new(file_id, content.as_bytes());
59        let lexer = Lexer::new(input, settings.lexer);
60        let stream = TokenStream::new(arena, lexer);
61
62        Self { arena, state: State::default(), stream, errors: Vec::new_in(arena) }
63    }
64
65    /// Creates a new parser for the given file.
66    ///
67    /// # Parameters
68    ///
69    /// - `arena`: The memory arena for allocations.
70    /// - `file`: The file to parse.
71    /// - `settings`: The parser settings.
72    ///
73    /// # Returns
74    ///
75    /// A new `Parser` instance.
76    pub fn for_file(arena: &'arena Bump, file: &'input File, settings: ParserSettings) -> Self {
77        Self::new(arena, file.file_id(), file.contents.as_ref(), settings)
78    }
79
80    /// Parses and returns the program AST.
81    fn parse(mut self, source_text: &'arena str, file_id: FileId) -> &'arena Program<'arena> {
82        let mut statements = Vec::new_in(self.arena);
83
84        loop {
85            let reached_eof = match self.stream.has_reached_eof() {
86                Ok(eof) => eof,
87                Err(err) => {
88                    self.errors.push(ParseError::from(err));
89                    break;
90                }
91            };
92
93            if reached_eof {
94                break;
95            }
96
97            // Record position before parsing to detect infinite loops
98            let position_before = self.stream.current_position();
99
100            match self.parse_statement() {
101                Ok(statement) => statements.push(statement),
102                Err(err) => self.errors.push(err),
103            }
104
105            // Safety check: if we didn't advance at all, skip a token to prevent infinite loop.
106            // This can happen with orphan keywords like `finally`, `catch`, `else`, etc.
107            // that are preserved by the expression parser but not handled by the statement parser.
108            let position_after = self.stream.current_position();
109            if position_after == position_before
110                && let Ok(Some(token)) = self.stream.lookahead(0)
111            {
112                self.errors.push(self.stream.unexpected(Some(token), &[]));
113                let _ = self.stream.consume();
114            }
115        }
116
117        self.arena.alloc(Program {
118            file_id,
119            source_text,
120            statements: Sequence::new(statements),
121            trivia: self.stream.get_trivia(),
122            errors: self.errors,
123        })
124    }
125}
126
127/// Parses the given file and returns the program AST.
128///
129/// # Parameters
130///
131/// - `arena`: The memory arena for allocations.
132/// - `file`: The file to parse.
133///
134/// # Returns
135///
136/// The parsed `Program` AST.
137#[inline]
138pub fn parse_file<'arena>(arena: &'arena Bump, file: &File) -> &'arena Program<'arena> {
139    parse_file_content(arena, file.file_id(), file.contents.as_ref())
140}
141
142/// Parses the given file with custom settings and returns the program AST.
143///
144/// # Parameters
145///
146/// - `arena`: The memory arena for allocations.
147/// - `file`: The file to parse.
148/// - `settings`: The parser settings.
149///
150/// # Returns
151///
152/// The parsed `Program` AST.
153#[inline]
154pub fn parse_file_with_settings<'arena>(
155    arena: &'arena Bump,
156    file: &File,
157    settings: ParserSettings,
158) -> &'arena Program<'arena> {
159    parse_file_content_with_settings(arena, file.file_id(), file.contents.as_ref(), settings)
160}
161
162/// Parses the given file content and returns the program AST.
163///
164/// # Parameters
165///
166/// - `arena`: The memory arena for allocations.
167/// - `file_id`: The ID of the file being parsed.
168/// - `content`: The content to parse.
169///
170/// # Returns
171///
172/// The parsed `Program` AST.
173pub fn parse_file_content<'arena>(arena: &'arena Bump, file_id: FileId, content: &str) -> &'arena Program<'arena> {
174    let source_text = arena.alloc_str(content);
175    Parser::new(arena, file_id, source_text, ParserSettings::default()).parse(source_text, file_id)
176}
177
178/// Parses the given file content with custom settings and returns the program AST.
179///
180/// # Parameters
181///
182/// - `arena`: The memory arena for allocations.
183/// - `file_id`: The ID of the file being parsed.
184/// - `content`: The content to parse.
185/// - `settings`: The parser settings.
186///
187/// # Returns
188///
189/// The parsed `Program` AST.
190pub fn parse_file_content_with_settings<'arena>(
191    arena: &'arena Bump,
192    file_id: FileId,
193    content: &str,
194    settings: ParserSettings,
195) -> &'arena Program<'arena> {
196    let source_text = arena.alloc_str(content);
197    Parser::new(arena, file_id, source_text, settings).parse(source_text, file_id)
198}