php_parser/parser/
mod.rs

1use crate::ast::{Name, ParseError, Program};
2use crate::lexer::{
3    Lexer, LexerMode,
4    token::{Token, TokenKind},
5};
6use bumpalo::Bump;
7
8use crate::span::Span;
9
10mod attributes;
11mod control_flow;
12mod definitions;
13mod expr;
14mod stmt;
15mod types;
16
17#[allow(dead_code)]
18pub trait TokenSource<'src> {
19    fn current(&self) -> &Token;
20    fn lookahead(&self, n: usize) -> &Token;
21    fn bump(&mut self);
22    fn set_mode(&mut self, mode: LexerMode);
23}
24
25pub struct Parser<'src, 'ast> {
26    pub(super) lexer: Lexer<'src>, // In real impl, this would be wrapped in a TokenSource
27    pub(super) arena: &'ast Bump,
28    pub(super) current_token: Token,
29    pub(super) next_token: Token,
30    pub(super) errors: std::vec::Vec<ParseError>,
31    pub(super) current_doc_comment: Option<Span>,
32    pub(super) next_doc_comment: Option<Span>,
33}
34
35impl<'src, 'ast> Parser<'src, 'ast> {
36    pub fn new(lexer: Lexer<'src>, arena: &'ast Bump) -> Self {
37        let mut parser = Self {
38            lexer,
39            arena,
40            current_token: Token {
41                kind: TokenKind::Eof,
42                span: Span::default(),
43            },
44            next_token: Token {
45                kind: TokenKind::Eof,
46                span: Span::default(),
47            },
48            errors: std::vec::Vec::new(),
49            current_doc_comment: None,
50            next_doc_comment: None,
51        };
52        parser.bump();
53        parser.bump();
54        parser
55    }
56
57    fn bump(&mut self) {
58        self.current_token = self.next_token;
59        self.current_doc_comment = self.next_doc_comment;
60        self.next_doc_comment = None;
61        loop {
62            let token = self.lexer.next().unwrap_or(Token {
63                kind: TokenKind::Eof,
64                span: Span::default(),
65            });
66            if token.kind == TokenKind::DocComment {
67                self.next_doc_comment = Some(token.span);
68            } else if token.kind != TokenKind::Comment {
69                self.next_token = token;
70                break;
71            }
72        }
73    }
74
75    fn expect_semicolon(&mut self) {
76        if self.current_token.kind == TokenKind::SemiColon {
77            self.bump();
78        } else if self.current_token.kind == TokenKind::CloseTag {
79            // Implicit semicolon at close tag
80        } else if self.current_token.kind == TokenKind::Eof {
81            // Implicit semicolon at EOF
82        } else {
83            // Error: Missing semicolon
84            self.errors.push(ParseError {
85                span: self.current_token.span,
86                message: "Missing semicolon",
87            });
88            // Recovery: Assume it was there and continue.
89            // We do NOT bump the current token because it belongs to the next statement.
90            self.sync_to_statement_end();
91        }
92    }
93
94    pub(super) fn parse_name(&mut self) -> Name<'ast> {
95        let start = self.current_token.span.start;
96        let mut parts = std::vec::Vec::new();
97
98        if self.current_token.kind == TokenKind::NsSeparator {
99            parts.push(self.current_token);
100            self.bump();
101        } else if self.current_token.kind == TokenKind::Namespace {
102            parts.push(self.current_token);
103            self.bump();
104            if self.current_token.kind == TokenKind::NsSeparator {
105                parts.push(self.current_token);
106                self.bump();
107            }
108        }
109
110        loop {
111            if self.current_token.kind == TokenKind::Identifier
112                || self.current_token.kind.is_semi_reserved()
113            {
114                parts.push(self.current_token);
115                self.bump();
116            } else {
117                break;
118            }
119
120            if self.current_token.kind == TokenKind::NsSeparator {
121                parts.push(self.current_token);
122                self.bump();
123            } else {
124                break;
125            }
126        }
127
128        let end = if parts.is_empty() {
129            start
130        } else {
131            parts.last().unwrap().span.end
132        };
133
134        Name {
135            parts: self.arena.alloc_slice_copy(&parts),
136            span: Span::new(start, end),
137        }
138    }
139
140    pub fn parse_program(&mut self) -> Program<'ast> {
141        let mut statements = std::vec::Vec::new(); // Temporary vec, will be moved to arena
142
143        while self.current_token.kind != TokenKind::Eof {
144            statements.push(self.parse_top_stmt());
145        }
146
147        let span = if let (Some(first), Some(last)) = (statements.first(), statements.last()) {
148            Span::new(first.span().start, last.span().end)
149        } else {
150            Span::default()
151        };
152
153        Program {
154            statements: self.arena.alloc_slice_copy(&statements),
155            errors: self.arena.alloc_slice_copy(&self.errors),
156            span,
157        }
158    }
159
160    fn sync_to_statement_end(&mut self) {
161        while !matches!(
162            self.current_token.kind,
163            TokenKind::SemiColon | TokenKind::CloseBrace | TokenKind::CloseTag | TokenKind::Eof
164        ) {
165            self.bump();
166        }
167        if self.current_token.kind == TokenKind::SemiColon {
168            self.bump();
169        }
170    }
171}