Skip to main content

php_rs_parser/
lib.rs

1//! Fast, fault-tolerant PHP parser that produces a fully typed AST.
2//!
3//! This crate parses PHP source code (PHP 7.4–8.5) into a [`php_ast::Program`]
4//! tree, recovering from syntax errors so that downstream tools always receive
5//! a complete AST.
6//!
7//! # Semantic-rejection responsibility
8//!
9//! The parser is fault-tolerant: it always produces an AST and reports every
10//! error it can identify before recovering. Its semantic-rejection
11//! responsibility is defined externally:
12//!
13//! > **For any input, the parser emits at least one diagnostic iff `php -l`
14//! > would reject that input at the configured target PHP version.**
15//!
16//! Flow-sensitive checks — cross-file resolution, unused variables, dead code,
17//! type-mismatched returns — are out of scope and belong in a later semantic
18//! layer. Checks decidable from one declaration, one parameter list, one
19//! modifier set, or one declaration loop are in scope and use
20//! [`diagnostics::ParseError::Forbidden`].
21//!
22//! The `===php_error===` section in `tests/fixtures/**/*.phpt` records `php -l`
23//! output; the fixture runner enforces the rule above by failing CI when PHP
24//! rejects an input that the parser silently accepts.
25//!
26//! # Quick start
27//!
28//! ```
29//! let result = php_rs_parser::parse("<?php echo 'hello';");
30//! assert!(result.errors.is_empty());
31//! ```
32//!
33//! # Version-aware parsing
34//!
35//! Use [`parse_versioned`] to target a specific PHP version. Syntax that
36//! requires a higher version is still parsed into the AST, but a
37//! [`diagnostics::ParseError::VersionTooLow`] diagnostic is emitted.
38//!
39//! ```
40//! let result = php_rs_parser::parse_versioned(
41//!     "<?php enum Status { case Active; }",
42//!     php_rs_parser::PhpVersion::Php80,
43//! );
44//! assert!(!result.errors.is_empty()); // enums require PHP 8.1
45//! ```
46//!
47//! # Multi-file cache
48//!
49//! [`parse`] returns a [`ParseResult`] with no lifetime parameters — fully
50//! owned, storable in a `HashMap`, sendable across threads.
51//!
52//! ```
53//! use std::collections::HashMap;
54//! use std::path::PathBuf;
55//!
56//! let mut cache: HashMap<PathBuf, php_rs_parser::ParseResult> = HashMap::new();
57//! cache.insert(PathBuf::from("a.php"), php_rs_parser::parse("<?php echo 1;"));
58//! ```
59//!
60//! # Arena API (LSP / hot-path usage)
61//!
62//! Use [`parse_arena`] / [`ParserContext`] when you need maximum throughput
63//! and can manage the arena lifetime yourself. The returned
64//! [`ArenaParseResult`] borrows from both the arena and the source string —
65//! no allocation copying occurs.
66//!
67//! ```
68//! let mut ctx = php_rs_parser::ParserContext::new();
69//!
70//! let result = ctx.reparse("<?php echo 1;");
71//! assert!(result.errors.is_empty());
72//! drop(result); // must be dropped before the next reparse
73//!
74//! let result = ctx.reparse("<?php echo 2;");
75//! assert!(result.errors.is_empty());
76//! ```
77//!
78//! Use [`parse_arena_raw`] instead of [`parse_arena`] when you never need
79//! line/column positions — it skips building the [`source_map::SourceMap`]
80//! and is slightly faster.
81
82pub mod diagnostics;
83pub(crate) mod expr;
84pub mod instrument;
85pub(crate) mod parser;
86pub use phpdoc_parser as phpdoc;
87pub(crate) mod precedence;
88pub mod source_map;
89pub(crate) mod stmt;
90pub mod version;
91
92use diagnostics::ParseError;
93use php_ast::owned::Comment as OwnedComment;
94use php_ast::{owned::to_owned_program, Comment, Program};
95use source_map::SourceMap;
96pub use version::PhpVersion;
97
98/// Lifetime-free result of parsing a PHP source string.
99///
100/// This is the primary return type of [`parse`] and [`parse_versioned`]. The
101/// AST is fully owned (`Box<str>`, `Box<[T]>`) so it can be stored in a
102/// `HashMap`, sent across threads, or cached alongside other data without
103/// fighting the borrow checker.
104///
105/// Use [`parse_arena`] or [`ParserContext`] when you need the arena-allocated
106/// form for maximum throughput in tight loops or LSP re-parse scenarios.
107pub struct ParseResult {
108    /// The original source text, owned.
109    pub source: String,
110    /// The parsed AST, fully owned with no lifetime parameters.
111    pub program: php_ast::owned::Program,
112    /// All comments found in the source, in source order. Doc-block comments
113    /// attached to a declaration are stored in the declaration node's
114    /// `doc_comment` field, not here.
115    pub comments: Vec<php_ast::owned::Comment>,
116    /// Parse errors and diagnostics. Empty on a successful parse.
117    pub errors: Vec<ParseError>,
118    /// `true` when the error list was capped and further errors were dropped.
119    pub errors_truncated: bool,
120    /// Pre-computed line index for span-to-line/column resolution.
121    pub source_map: SourceMap,
122}
123
124impl ParseResult {
125    fn from_arena_result(result: ArenaParseResult<'_, '_>) -> Self {
126        let program = to_owned_program(&result.program);
127        let comments = result
128            .comments
129            .iter()
130            .map(|c| OwnedComment {
131                kind: c.kind,
132                text: c.text.into(),
133                span: c.span,
134            })
135            .collect();
136        Self {
137            source: result.source.to_owned(),
138            program,
139            comments,
140            errors: result.errors,
141            errors_truncated: result.errors_truncated,
142            source_map: result.source_map,
143        }
144    }
145}
146
147/// Arena-allocated result of parsing a PHP source string.
148///
149/// Returned by [`parse_arena`], [`parse_arena_versioned`], and
150/// [`ParserContext::reparse`]. Both the AST and the source text are borrowed,
151/// so this type has two lifetime parameters. Use [`ParseResult`] (from
152/// [`parse`]) when you need an owned, lifetime-free result.
153pub struct ArenaParseResult<'arena, 'src> {
154    /// The original source text. Useful for extracting text from spans
155    /// via `&result.source[span.start as usize..span.end as usize]`.
156    pub source: &'src str,
157    /// The parsed AST. Always produced, even when errors are present.
158    pub program: Program<'arena, 'src>,
159    /// All comments found in the source, in source order, **except** `/** */`
160    /// doc-block comments that are immediately attached to a declaration.
161    ///
162    /// When the parser encounters a `/** */` comment directly before a
163    /// function, class, method, property, constant, or enum case, it removes
164    /// that comment from this list and stores it in the declaration node's
165    /// `doc_comment` field instead. The two collections are therefore
166    /// **disjoint**: iterating both without deduplication will double-count
167    /// nothing, but iterating only one will miss the other's entries.
168    ///
169    /// To process every comment in the file, iterate `result.comments` (for
170    /// line, hash, block, and unattached doc comments) and also visit each
171    /// declaration node's `doc_comment` field. Or use
172    /// [`php_ast::visitor::walk_comments`] with a [`Visitor`] that also
173    /// overrides the declaration visit methods.
174    pub comments: Vec<Comment<'src>>,
175    /// Parse errors and diagnostics. Empty on a successful parse.
176    pub errors: Vec<ParseError>,
177    /// `true` when the error list was capped at the internal limit and further
178    /// errors were silently dropped. Callers that need a complete error list
179    /// (e.g. linters) should treat this as an incomplete result.
180    pub errors_truncated: bool,
181    /// Pre-computed line index for resolving byte offsets in [`Span`](php_ast::Span)
182    /// to line/column positions. Use [`SourceMap::offset_to_line_col`] or
183    /// [`SourceMap::span_to_line_col`] to convert.
184    pub source_map: SourceMap,
185}
186
187/// Parse PHP `source` using the latest supported PHP version (currently 8.5).
188///
189/// Returns a fully-owned [`ParseResult`] with no lifetime parameters. The
190/// internal arena is created, used, and converted within this call.
191///
192/// Use [`parse_arena`] when you need the raw arena-allocated AST for maximum
193/// throughput (no allocation copying).
194pub fn parse(source: &str) -> ParseResult {
195    let arena = bumpalo::Bump::new();
196    ParseResult::from_arena_result(parse_arena(&arena, source))
197}
198
199/// Parse `source` targeting the given PHP `version`.
200///
201/// Syntax that requires a higher version than `version` is still parsed and
202/// included in the AST, but a [`diagnostics::ParseError::VersionTooLow`] error
203/// is also emitted so callers can report it to the user.
204///
205/// Returns a fully-owned [`ParseResult`]. Use [`parse_arena_versioned`] for the
206/// arena form.
207pub fn parse_versioned(source: &str, version: PhpVersion) -> ParseResult {
208    let arena = bumpalo::Bump::new();
209    ParseResult::from_arena_result(parse_arena_versioned(&arena, source, version))
210}
211
212/// Parse PHP `source` using the latest supported PHP version, returning an
213/// arena-allocated [`ArenaParseResult`].
214///
215/// The `arena` is used for all AST allocations, giving callers control over
216/// memory lifetime. The returned result borrows from both the arena and the
217/// source string.
218///
219/// Prefer [`parse`] unless you are managing the arena yourself for performance
220/// reasons (e.g. LSP re-parsing with [`ParserContext`]). Use [`parse_arena_raw`]
221/// when you never need line/column positions from `result.source_map`.
222pub fn parse_arena<'arena, 'src>(
223    arena: &'arena bumpalo::Bump,
224    source: &'src str,
225) -> ArenaParseResult<'arena, 'src> {
226    let mut parser = parser::Parser::new(arena, source);
227    let program = parser.parse_program();
228    let errors_truncated = parser.errors_truncated();
229    ArenaParseResult {
230        source,
231        program,
232        comments: parser.take_comments(),
233        errors: parser.into_errors(),
234        errors_truncated,
235        source_map: SourceMap::new(source),
236    }
237}
238
239/// Like [`parse_arena`], but skips building the [`source_map::SourceMap`].
240///
241/// `result.source_map` is a no-op empty map. Use this when you only need the
242/// AST and errors — formatters, linters, and batch processors that never call
243/// `source_map.offset_to_line_col` or `source_map.span_to_line_col`.
244pub fn parse_arena_raw<'arena, 'src>(
245    arena: &'arena bumpalo::Bump,
246    source: &'src str,
247) -> ArenaParseResult<'arena, 'src> {
248    let mut parser = parser::Parser::new(arena, source);
249    let program = parser.parse_program();
250    let errors_truncated = parser.errors_truncated();
251    ArenaParseResult {
252        source,
253        program,
254        comments: parser.take_comments(),
255        errors: parser.into_errors(),
256        errors_truncated,
257        source_map: SourceMap::empty(),
258    }
259}
260
261/// Parse `source` targeting the given PHP `version`, returning an
262/// arena-allocated [`ArenaParseResult`].
263///
264/// See [`parse_arena`] for arena lifetime semantics and [`parse_versioned`] for
265/// version-gating behaviour.
266pub fn parse_arena_versioned<'arena, 'src>(
267    arena: &'arena bumpalo::Bump,
268    source: &'src str,
269    version: PhpVersion,
270) -> ArenaParseResult<'arena, 'src> {
271    let mut parser = parser::Parser::with_version(arena, source, version);
272    let program = parser.parse_program();
273    let errors_truncated = parser.errors_truncated();
274    ArenaParseResult {
275        source,
276        program,
277        comments: parser.take_comments(),
278        errors: parser.into_errors(),
279        errors_truncated,
280        source_map: SourceMap::new(source),
281    }
282}
283
284/// A reusable parse context that keeps a `bumpalo::Bump` arena alive between
285/// re-parses, resetting it (O(1)) instead of dropping and reallocating.
286///
287/// This is the preferred entry point for LSP servers or any tool that parses
288/// the same document repeatedly. Once the arena has grown to accommodate the
289/// largest document seen, subsequent parses reuse the backing memory without
290/// any new allocations.
291///
292/// The Rust lifetime system enforces safety: the returned [`ArenaParseResult`]
293/// borrows from `self`, so the borrow checker prevents calling [`reparse`] or
294/// [`reparse_versioned`] again while the previous result is still alive.
295///
296/// [`reparse`]: ParserContext::reparse
297/// [`reparse_versioned`]: ParserContext::reparse_versioned
298///
299/// # Example
300///
301/// ```
302/// let mut ctx = php_rs_parser::ParserContext::new();
303///
304/// let result = ctx.reparse("<?php echo 1;");
305/// assert!(result.errors.is_empty());
306/// drop(result); // must be dropped before the next reparse
307///
308/// let result = ctx.reparse("<?php echo 2;");
309/// assert!(result.errors.is_empty());
310/// ```
311pub struct ParserContext {
312    arena: bumpalo::Bump,
313}
314
315impl ParserContext {
316    /// Create a new context with an empty arena.
317    pub fn new() -> Self {
318        Self {
319            arena: bumpalo::Bump::new(),
320        }
321    }
322
323    /// Reset the arena and parse `source` using PHP 8.5 (the latest version).
324    ///
325    /// The previous [`ArenaParseResult`] **must be dropped** before calling
326    /// this method. The borrow checker enforces this: the returned result
327    /// borrows `self` for the duration of its lifetime, so a second call while
328    /// the first result is still live is a compile-time error.
329    pub fn reparse<'a, 'src>(&'a mut self, source: &'src str) -> ArenaParseResult<'a, 'src> {
330        self.arena.reset();
331        parse_arena(&self.arena, source)
332    }
333
334    /// Reset the arena and parse `source` targeting the given PHP `version`.
335    ///
336    /// See [`reparse`](ParserContext::reparse) for lifetime safety notes.
337    pub fn reparse_versioned<'a, 'src>(
338        &'a mut self,
339        source: &'src str,
340        version: PhpVersion,
341    ) -> ArenaParseResult<'a, 'src> {
342        self.arena.reset();
343        parse_arena_versioned(&self.arena, source, version)
344    }
345
346    /// Reset the arena and parse `source`, returning a fully-owned [`ParseResult`].
347    ///
348    /// Unlike [`reparse`](ParserContext::reparse), the returned result has no
349    /// lifetime parameters and can be stored anywhere. The arena is reused for
350    /// the parse but the output is immediately converted to owned types, so
351    /// there is no borrow on `self` after this call returns.
352    pub fn reparse_owned(&mut self, source: &str) -> ParseResult {
353        self.arena.reset();
354        ParseResult::from_arena_result(parse_arena(&self.arena, source))
355    }
356
357    /// Reset the arena and parse `source` targeting the given PHP `version`,
358    /// returning a fully-owned [`ParseResult`].
359    ///
360    /// See [`reparse_owned`](ParserContext::reparse_owned) for ownership notes
361    /// and [`reparse_versioned`](ParserContext::reparse_versioned) for version
362    /// semantics.
363    pub fn reparse_owned_versioned(&mut self, source: &str, version: PhpVersion) -> ParseResult {
364        self.arena.reset();
365        ParseResult::from_arena_result(parse_arena_versioned(&self.arena, source, version))
366    }
367}
368
369impl Default for ParserContext {
370    fn default() -> Self {
371        Self::new()
372    }
373}