php_parser/parser/
mod.rs

1use crate::ast::{Name, ParseError, Program};
2use crate::lexer::{
3    Lexer, LexerMode,
4    token::{Token, TokenKind},
5};
6use bumpalo::Bump;
7
8use crate::span::Span;
9
10mod attributes;
11mod control_flow;
12mod definitions;
13mod expr;
14mod stmt;
15mod types;
16
17#[allow(dead_code)]
18pub trait TokenSource<'src> {
19    fn current(&self) -> &Token;
20    fn lookahead(&self, n: usize) -> &Token;
21    fn bump(&mut self);
22    fn set_mode(&mut self, mode: LexerMode);
23}
24
25pub struct Parser<'src, 'ast> {
26    pub(super) lexer: Lexer<'src>, // In real impl, this would be wrapped in a TokenSource
27    pub(super) arena: &'ast Bump,
28    pub(super) current_token: Token,
29    pub(super) next_token: Token,
30    pub(super) errors: std::vec::Vec<ParseError>,
31}
32
33impl<'src, 'ast> Parser<'src, 'ast> {
34    pub fn new(lexer: Lexer<'src>, arena: &'ast Bump) -> Self {
35        let mut parser = Self {
36            lexer,
37            arena,
38            current_token: Token {
39                kind: TokenKind::Eof,
40                span: Span::default(),
41            },
42            next_token: Token {
43                kind: TokenKind::Eof,
44                span: Span::default(),
45            },
46            errors: std::vec::Vec::new(),
47        };
48        parser.bump();
49        parser.bump();
50        parser
51    }
52
53    fn bump(&mut self) {
54        self.current_token = self.next_token;
55        loop {
56            let token = self.lexer.next().unwrap_or(Token {
57                kind: TokenKind::Eof,
58                span: Span::default(),
59            });
60            if token.kind != TokenKind::Comment && token.kind != TokenKind::DocComment {
61                self.next_token = token;
62                break;
63            }
64        }
65    }
66
67    fn expect_semicolon(&mut self) {
68        if self.current_token.kind == TokenKind::SemiColon {
69            self.bump();
70        } else if self.current_token.kind == TokenKind::CloseTag {
71            // Implicit semicolon at close tag
72        } else if self.current_token.kind == TokenKind::Eof {
73            // Implicit semicolon at EOF
74        } else {
75            // Error: Missing semicolon
76            self.errors.push(ParseError {
77                span: self.current_token.span,
78                message: "Missing semicolon",
79            });
80            // Recovery: Assume it was there and continue.
81            // We do NOT bump the current token because it belongs to the next statement.
82            self.sync_to_statement_end();
83        }
84    }
85
86    pub(super) fn parse_name(&mut self) -> Name<'ast> {
87        let start = self.current_token.span.start;
88        let mut parts = std::vec::Vec::new();
89
90        if self.current_token.kind == TokenKind::NsSeparator {
91            parts.push(self.current_token);
92            self.bump();
93        } else if self.current_token.kind == TokenKind::Namespace {
94            parts.push(self.current_token);
95            self.bump();
96            if self.current_token.kind == TokenKind::NsSeparator {
97                parts.push(self.current_token);
98                self.bump();
99            }
100        }
101
102        loop {
103            if self.current_token.kind == TokenKind::Identifier
104                || self.current_token.kind.is_semi_reserved()
105            {
106                parts.push(self.current_token);
107                self.bump();
108            } else {
109                break;
110            }
111
112            if self.current_token.kind == TokenKind::NsSeparator {
113                parts.push(self.current_token);
114                self.bump();
115            } else {
116                break;
117            }
118        }
119
120        let end = if parts.is_empty() {
121            start
122        } else {
123            parts.last().unwrap().span.end
124        };
125
126        Name {
127            parts: self.arena.alloc_slice_copy(&parts),
128            span: Span::new(start, end),
129        }
130    }
131
132    pub fn parse_program(&mut self) -> Program<'ast> {
133        let mut statements = std::vec::Vec::new(); // Temporary vec, will be moved to arena
134
135        while self.current_token.kind != TokenKind::Eof {
136            if matches!(
137                self.current_token.kind,
138                TokenKind::OpenTag | TokenKind::CloseTag
139            ) {
140                self.bump();
141                continue;
142            }
143            statements.push(self.parse_top_stmt());
144        }
145
146        let span = if let (Some(first), Some(last)) = (statements.first(), statements.last()) {
147            Span::new(first.span().start, last.span().end)
148        } else {
149            Span::default()
150        };
151
152        Program {
153            statements: self.arena.alloc_slice_copy(&statements),
154            errors: self.arena.alloc_slice_copy(&self.errors),
155            span,
156        }
157    }
158
159    fn sync_to_statement_end(&mut self) {
160        while !matches!(
161            self.current_token.kind,
162            TokenKind::SemiColon | TokenKind::CloseBrace | TokenKind::CloseTag | TokenKind::Eof
163        ) {
164            self.bump();
165        }
166        if self.current_token.kind == TokenKind::SemiColon {
167            self.bump();
168        }
169    }
170}