Skip to main content

php_rs_parser/
lib.rs

1//! Fast, fault-tolerant PHP parser that produces a fully typed AST.
2//!
3//! This crate parses PHP source code (PHP 7.4–8.5) into a [`php_ast::Program`]
4//! tree, recovering from syntax errors so that downstream tools always receive
5//! a complete AST.
6//!
7//! # Semantic-rejection responsibility
8//!
9//! The parser is fault-tolerant: it always produces an AST and reports every
10//! error it can identify before recovering. Its semantic-rejection
11//! responsibility is defined externally:
12//!
13//! > **For any input, the parser emits at least one diagnostic iff `php -l`
14//! > would reject that input at the configured target PHP version.**
15//!
16//! Flow-sensitive checks — cross-file resolution, unused variables, dead code,
17//! type-mismatched returns — are out of scope and belong in a later semantic
18//! layer. Checks decidable from one declaration, one parameter list, one
19//! modifier set, or one declaration loop are in scope and use
20//! [`diagnostics::ParseError::Forbidden`].
21//!
22//! The `===php_error===` section in `tests/fixtures/**/*.phpt` records `php -l`
23//! output; the fixture runner enforces the rule above by failing CI when PHP
24//! rejects an input that the parser silently accepts.
25//!
26//! # Quick start
27//!
28//! ```
29//! let arena = bumpalo::Bump::new();
30//! let result = php_rs_parser::parse(&arena, "<?php echo 'hello';");
31//! assert!(result.errors.is_empty());
32//! ```
33//!
34//! # Version-aware parsing
35//!
36//! Use [`parse_versioned`] to target a specific PHP version. Syntax that
37//! requires a higher version is still parsed into the AST, but a
38//! [`diagnostics::ParseError::VersionTooLow`] diagnostic is emitted.
39//!
40//! ```
41//! let arena = bumpalo::Bump::new();
42//! let result = php_rs_parser::parse_versioned(
43//!     &arena,
44//!     "<?php enum Status { case Active; }",
45//!     php_rs_parser::PhpVersion::Php80,
46//! );
47//! assert!(!result.errors.is_empty()); // enums require PHP 8.1
48//! ```
49//!
50//! # Reusing arenas across re-parses (LSP usage)
51//!
52//! Use [`ParserContext`] to avoid allocator churn when the same document is
53//! re-parsed on every edit. The context owns a `bumpalo::Bump` arena and resets
54//! it in O(1) before each parse, reusing the backing memory once it has grown
55//! to a stable size.
56//!
57//! ```
58//! let mut ctx = php_rs_parser::ParserContext::new();
59//!
60//! let result = ctx.reparse("<?php echo 1;");
61//! assert!(result.errors.is_empty());
62//! drop(result); // must be dropped before the next reparse
63//!
64//! let result = ctx.reparse("<?php echo 2;");
65//! assert!(result.errors.is_empty());
66//! ```
67
68pub mod diagnostics;
69pub(crate) mod expr;
70pub mod instrument;
71pub(crate) mod parser;
72pub use phpdoc_parser as phpdoc;
73pub(crate) mod precedence;
74pub mod source_map;
75pub(crate) mod stmt;
76pub mod version;
77
78use diagnostics::ParseError;
79use php_ast::{Comment, Program};
80use source_map::SourceMap;
81pub use version::PhpVersion;
82
83/// The result of parsing a PHP source string.
84pub struct ParseResult<'arena, 'src> {
85    /// The original source text. Useful for extracting text from spans
86    /// via `&result.source[span.start as usize..span.end as usize]`.
87    pub source: &'src str,
88    /// The parsed AST. Always produced, even when errors are present.
89    pub program: Program<'arena, 'src>,
90    /// All comments found in the source, in source order, **except** `/** */`
91    /// doc-block comments that are immediately attached to a declaration.
92    ///
93    /// When the parser encounters a `/** */` comment directly before a
94    /// function, class, method, property, constant, or enum case, it removes
95    /// that comment from this list and stores it in the declaration node's
96    /// `doc_comment` field instead. The two collections are therefore
97    /// **disjoint**: iterating both without deduplication will double-count
98    /// nothing, but iterating only one will miss the other's entries.
99    ///
100    /// To process every comment in the file, iterate `result.comments` (for
101    /// line, hash, block, and unattached doc comments) and also visit each
102    /// declaration node's `doc_comment` field. Or use
103    /// [`php_ast::visitor::walk_comments`] with a [`Visitor`] that also
104    /// overrides the declaration visit methods.
105    pub comments: Vec<Comment<'src>>,
106    /// Parse errors and diagnostics. Empty on a successful parse.
107    pub errors: Vec<ParseError>,
108    /// `true` when the error list was capped at the internal limit and further
109    /// errors were silently dropped. Callers that need a complete error list
110    /// (e.g. linters) should treat this as an incomplete result.
111    pub errors_truncated: bool,
112    /// Pre-computed line index for resolving byte offsets in [`Span`](php_ast::Span)
113    /// to line/column positions. Use [`SourceMap::offset_to_line_col`] or
114    /// [`SourceMap::span_to_line_col`] to convert.
115    pub source_map: SourceMap,
116}
117
118/// Parse PHP `source` using the latest supported PHP version (currently 8.5).
119///
120/// The `arena` is used for all AST allocations, giving callers control over
121/// memory lifetime. The returned [`ParseResult`] borrows from both the arena
122/// and the source string.
123pub fn parse<'arena, 'src>(
124    arena: &'arena bumpalo::Bump,
125    source: &'src str,
126) -> ParseResult<'arena, 'src> {
127    let mut parser = parser::Parser::new(arena, source);
128    let program = parser.parse_program();
129    let errors_truncated = parser.errors_truncated();
130    ParseResult {
131        source,
132        program,
133        comments: parser.take_comments(),
134        errors: parser.into_errors(),
135        errors_truncated,
136        source_map: SourceMap::new(source),
137    }
138}
139
140/// Parse `source` targeting the given PHP `version`.
141///
142/// Syntax that requires a higher version than `version` is still parsed and
143/// included in the AST, but a [`diagnostics::ParseError::VersionTooLow`] error
144/// is also emitted so callers can report it to the user.
145pub fn parse_versioned<'arena, 'src>(
146    arena: &'arena bumpalo::Bump,
147    source: &'src str,
148    version: PhpVersion,
149) -> ParseResult<'arena, 'src> {
150    let mut parser = parser::Parser::with_version(arena, source, version);
151    let program = parser.parse_program();
152    let errors_truncated = parser.errors_truncated();
153    ParseResult {
154        source,
155        program,
156        comments: parser.take_comments(),
157        errors: parser.into_errors(),
158        errors_truncated,
159        source_map: SourceMap::new(source),
160    }
161}
162
163/// A reusable parse context that keeps a `bumpalo::Bump` arena alive between
164/// re-parses, resetting it (O(1)) instead of dropping and reallocating.
165///
166/// This is the preferred entry point for LSP servers or any tool that parses
167/// the same document repeatedly. Once the arena has grown to accommodate the
168/// largest document seen, subsequent parses reuse the backing memory without
169/// any new allocations.
170///
171/// The Rust lifetime system enforces safety: the returned [`ParseResult`]
172/// borrows from `self`, so the borrow checker prevents calling [`reparse`] or
173/// [`reparse_versioned`] again while the previous result is still alive.
174///
175/// [`reparse`]: ParserContext::reparse
176/// [`reparse_versioned`]: ParserContext::reparse_versioned
177///
178/// # Example
179///
180/// ```
181/// let mut ctx = php_rs_parser::ParserContext::new();
182///
183/// let result = ctx.reparse("<?php echo 1;");
184/// assert!(result.errors.is_empty());
185/// drop(result); // must be dropped before the next reparse
186///
187/// let result = ctx.reparse("<?php echo 2;");
188/// assert!(result.errors.is_empty());
189/// ```
190pub struct ParserContext {
191    arena: bumpalo::Bump,
192}
193
194impl ParserContext {
195    /// Create a new context with an empty arena.
196    pub fn new() -> Self {
197        Self {
198            arena: bumpalo::Bump::new(),
199        }
200    }
201
202    /// Reset the arena and parse `source` using PHP 8.5 (the latest version).
203    ///
204    /// The previous [`ParseResult`] **must be dropped** before calling this
205    /// method. The borrow checker enforces this: the returned result borrows
206    /// `self` for the duration of its lifetime, so a second call while the
207    /// first result is still live is a compile-time error.
208    pub fn reparse<'a, 'src>(&'a mut self, source: &'src str) -> ParseResult<'a, 'src> {
209        self.arena.reset();
210        parse(&self.arena, source)
211    }
212
213    /// Reset the arena and parse `source` targeting the given PHP `version`.
214    ///
215    /// See [`reparse`](ParserContext::reparse) for lifetime safety notes.
216    pub fn reparse_versioned<'a, 'src>(
217        &'a mut self,
218        source: &'src str,
219        version: PhpVersion,
220    ) -> ParseResult<'a, 'src> {
221        self.arena.reset();
222        parse_versioned(&self.arena, source, version)
223    }
224}
225
226impl Default for ParserContext {
227    fn default() -> Self {
228        Self::new()
229    }
230}