mir_analyzer/file_analyzer.rs
1//! Per-file analysis entry point for incremental analysis.
2//!
3//! [`FileAnalyzer`] runs single-pass Pass 2 against an [`AnalysisSession`] and
4//! returns issues + resolved symbols for one file. Unlike
5//! [`crate::ProjectAnalyzer::re_analyze_file`], it does **not** run the
6//! inference-only Pass 2 sweep — that's a batch concern. For cross-file
7//! inferred return types, schedule a project-wide inference sweep on idle.
8//!
9//! Caller is responsible for parsing the file (so they keep ownership of the
10//! arena and AST). The session must already have Pass 1 state for any files
11//! whose definitions this analysis depends on; call
12//! [`AnalysisSession::ingest_file`] first.
13//!
14//! For batch multi-file analysis, use [`BatchFileAnalyzer::analyze_batch`]
15//! which parallelizes analysis across multiple pre-parsed files.
16
17use std::sync::Arc;
18
19use mir_issues::Issue;
20use php_ast::ast::Program;
21use php_rs_parser::source_map::SourceMap;
22use rayon::prelude::*;
23
24use crate::db::MirDatabase;
25use crate::pass2::Pass2Driver;
26use crate::session::AnalysisSession;
27use crate::symbol::ResolvedSymbol;
28
29/// Result of a single-file analysis.
30pub struct FileAnalysis {
31 pub issues: Vec<Issue>,
32 pub symbols: Vec<ResolvedSymbol>,
33}
34
35impl FileAnalysis {
36 /// Return the innermost resolved symbol whose span contains `byte_offset`,
37 /// or `None` if no symbol was recorded at that position.
38 ///
39 /// Entry point for hover / go-to-definition flows: callers map
40 /// (line, column) → byte offset → resolved symbol, then look up the
41 /// symbol's definition via [`crate::AnalysisSession::definition_of`] or
42 /// type info via [`ResolvedSymbol::resolved_type`].
43 pub fn symbol_at(&self, byte_offset: u32) -> Option<&ResolvedSymbol> {
44 self.symbols
45 .iter()
46 .filter(|s| s.span.start <= byte_offset && byte_offset < s.span.end)
47 .min_by_key(|s| s.span.end - s.span.start)
48 }
49}
50
51/// Per-file Pass 2 analyzer bound to an [`AnalysisSession`]. Cheap to
52/// construct — typically held transiently per analysis call.
53pub struct FileAnalyzer<'a> {
54 session: &'a AnalysisSession,
55}
56
57impl<'a> FileAnalyzer<'a> {
58 pub fn new(session: &'a AnalysisSession) -> Self {
59 Self { session }
60 }
61
62 /// Single-pass Pass 2. Returns issues and per-expression resolved symbols.
63 ///
64 /// Pass 2 runs against a cloned db snapshot — the lock is not held during
65 /// analysis, so concurrent edits and reads on the session proceed without
66 /// blocking on this call.
67 ///
68 /// Stub loading: ensures the session's essentials are loaded, then auto-
69 /// discovers any extension stubs (`imagecreate` → gd, `ReflectionClass` →
70 /// Reflection, …) referenced by `source` and lazy-ingests them. This
71 /// keeps essentials-only sessions correct without callers having to
72 /// enumerate stubs by hand. Call `ensure_all_stubs_loaded` once if the
73 /// consumer prefers eager loading instead.
74 pub fn analyze(
75 &self,
76 file: Arc<str>,
77 source: &str,
78 program: &Program<'_, '_>,
79 source_map: &SourceMap,
80 ) -> FileAnalysis {
81 self.session.ensure_essential_stubs_loaded();
82 self.session.ensure_stubs_for_ast(program);
83 let db = self.session.snapshot_db();
84 let driver = Pass2Driver::new(&db, self.session.php_version());
85 let (issues, symbols) = driver.analyze_bodies(program, file, source, source_map);
86 self.session
87 .commit_ref_locs_batch(db.take_pending_ref_locs());
88 FileAnalysis { issues, symbols }
89 }
90}
91
92/// Batch file analyzer for parallel multi-file analysis.
93///
94/// `BatchFileAnalyzer` processes pre-parsed files in parallel using rayon,
95/// making it efficient for analyzing many files at once (e.g., cold-start analysis).
96pub struct BatchFileAnalyzer<'a> {
97 session: &'a AnalysisSession,
98}
99
100/// A pre-parsed file ready for batch analysis.
101///
102/// Use [`ParsedFile::new`] (unsafe) to construct. Fields are intentionally
103/// private — the raw pointer fields must satisfy a non-trivial safety contract
104/// enforced only by the constructor.
105pub struct ParsedFile {
106 pub(crate) file: Arc<str>,
107 pub(crate) source: Arc<str>,
108 pub(crate) program: *const Program<'static, 'static>,
109 pub(crate) source_map: *const SourceMap,
110}
111
112impl ParsedFile {
113 /// File path this `ParsedFile` represents.
114 pub fn file(&self) -> &Arc<str> {
115 &self.file
116 }
117
118 /// Source text for this file.
119 pub fn source(&self) -> &Arc<str> {
120 &self.source
121 }
122}
123
124// SAFETY: ParsedFile contains pointers to owned AST and source_map that are kept
125// alive by the parser and owned by the caller. Analysis only reads these, never mutates.
126unsafe impl Send for ParsedFile {}
127unsafe impl Sync for ParsedFile {}
128
129impl ParsedFile {
130 /// Create a ParsedFile from a pre-parsed AST and source map.
131 ///
132 /// # Safety
133 ///
134 /// The caller must ensure that:
135 /// - `program` points to a valid `Program` that remains alive during the entire
136 /// `BatchFileAnalyzer::analyze_batch` call
137 /// - `source_map` points to a valid `SourceMap` that remains alive during the entire
138 /// `BatchFileAnalyzer::analyze_batch` call
139 /// - Both pointers came from the same `php_rs_parser::parse()` call and use the same
140 /// bump allocator
141 ///
142 /// The typical usage pattern is to call `php_rs_parser::parse(&arena, source)` and
143 /// immediately pass the resulting `program` and `source_map` pointers (obtained via
144 /// `&parsed.program` and `&parsed.source_map`) to this function. The arena must be
145 /// kept alive until analysis completes.
146 pub unsafe fn new(
147 file: Arc<str>,
148 source: Arc<str>,
149 program: *const Program<'static, 'static>,
150 source_map: *const SourceMap,
151 ) -> Self {
152 Self {
153 file,
154 source,
155 program,
156 source_map,
157 }
158 }
159}
160
161impl<'a> BatchFileAnalyzer<'a> {
162 pub fn new(session: &'a AnalysisSession) -> Self {
163 Self { session }
164 }
165
166 /// Analyze multiple pre-parsed files in parallel.
167 ///
168 /// Each file must already have its AST and source_map computed and kept alive
169 /// by the caller. This function processes all files in parallel using rayon.
170 ///
171 /// Each rayon worker gets its own cloned database snapshot, so concurrent
172 /// analysis proceeds without lock contention on the session.
173 ///
174 /// # Safety
175 ///
176 /// The caller is responsible for ensuring that the Program and SourceMap pointers
177 /// remain valid for the duration of this call.
178 pub fn analyze_batch(&self, files: Vec<ParsedFile>) -> Vec<(Arc<str>, FileAnalysis)> {
179 self.session.ensure_essential_stubs_loaded();
180
181 // First pass: collect all ASTs and auto-discover stubs.
182 files.iter().for_each(|file| {
183 // SAFETY: Caller guarantees pointer validity.
184 let program = unsafe { &*file.program };
185 self.session.ensure_stubs_for_ast(program);
186 });
187
188 // Second pass: analyze files in parallel.
189 // Each rayon worker gets its own database clone (Salsa is Send but !Sync).
190 let db = self.session.snapshot_db();
191 let results: Vec<(Arc<str>, FileAnalysis, Vec<crate::db::RefLoc>)> = files
192 .into_par_iter()
193 .map_with(db, |db, file| {
194 // SAFETY: Caller guarantees pointer validity.
195 let program = unsafe { &*file.program };
196 let source_map = unsafe { &*file.source_map };
197 let driver = Pass2Driver::new(db as &dyn MirDatabase, self.session.php_version());
198 let (issues, symbols) =
199 driver.analyze_bodies(program, file.file.clone(), &file.source, source_map);
200 let pending = db.take_pending_ref_locs();
201 let analysis = FileAnalysis { issues, symbols };
202 (file.file, analysis, pending)
203 })
204 .collect();
205 let mut all_ref_locs = Vec::new();
206 let mut out = Vec::with_capacity(results.len());
207 for (file, analysis, ref_locs) in results {
208 all_ref_locs.extend(ref_locs);
209 out.push((file, analysis));
210 }
211 self.session.commit_ref_locs_batch(all_ref_locs);
212 out
213 }
214}