mir_analyzer/session/mod.rs
1//! Session-based analysis API for incremental, per-file analysis.
2//!
3//! [`AnalysisSession`] owns the salsa database and per-session caches for a
4//! long-running analysis context shared across many per-file analyses. Reads
5//! clone the database under a brief lock, then run lock-free; writes hold the
6//! lock briefly to mutate canonical state. `MirDbStorage::clone()` is cheap
7//! (Arc-wrapped registries), so this pattern gives parallel readers without
8//! blocking on concurrent writes for longer than the clone itself.
9//!
10//! See [`crate::file_analyzer::FileAnalyzer`] for the per-file analysis
11//! entry point that operates against a session.
12
13use rustc_hash::{FxHashMap as HashMap, FxHashSet as HashSet};
14use std::path::PathBuf;
15use std::sync::Arc;
16
17use parking_lot::RwLock;
18
19use crate::analyzer_db::AnalyzerDb;
20use crate::cache::AnalysisCache;
21use crate::composer::Psr4Map;
22use crate::db::{MirDatabase, MirDbStorage, RefLoc};
23use crate::php_version::PhpVersion;
24
25/// Long-lived analysis context. Owns the salsa database and tracks which
26/// stubs have been loaded.
27///
28/// Cheap to clone the inner db for parallel reads; writes funnel through
29/// [`Self::ingest_file`], [`Self::invalidate_file`], and the crate-internal
30/// [`Self::with_db_mut`].
31#[derive(Clone)]
32pub struct AnalysisSession {
33 /// Shared database management (salsa, file registry, stub tracking).
34 pub(crate) db: Arc<AnalyzerDb>,
35 pub(crate) cache: Option<Arc<AnalysisCache>>,
36 /// PSR-4 / Composer autoload map. Retained alongside `resolver` so the
37 /// `psr4()` accessor can still return a typed `Psr4Map` for callers that
38 /// need Composer-specific data (project_files / vendor_files / etc.).
39 pub(crate) psr4: Option<Arc<Psr4Map>>,
40 /// Generic class resolver used for on-demand lazy loading. When `psr4`
41 /// is set via [`Self::with_psr4`], this is populated with the same map
42 /// re-typed as `dyn ClassResolver`. Consumers can also supply their own
43 /// resolver via [`Self::with_class_resolver`] without going through
44 /// Composer.
45 resolver: Option<Arc<dyn crate::ClassResolver>>,
46 pub(crate) php_version: PhpVersion,
47 pub(crate) user_stub_files: Vec<PathBuf>,
48 pub(crate) user_stub_dirs: Vec<PathBuf>,
49 /// Tracks symbols that were previously defined in a file but have since
50 /// been removed (deleted or renamed). When `ingest_file` detects that
51 /// a symbol disappears, it records it here so `dependency_graph()` can
52 /// still produce edges to files that reference the now-gone symbol.
53 ///
54 /// Keyed by the file that used to define the symbols. Symbols are removed
55 /// from the set when re-added to the same file on a subsequent ingest.
56 /// The set may contain symbols with no current referencers; those are
57 /// harmless — the `symbol_referencers_of` lookup returns empty.
58 stale_defined_symbols: Arc<RwLock<HashMap<String, HashSet<Arc<str>>>>>,
59 /// Symbols defined by each file as of its last `ingest_file`. The
60 /// authoritative "old" set for the rename/deletion diff, independent of
61 /// whether the salsa `SourceFile` input was already updated to the new text
62 /// by a host driving the db directly (the LSP convergence path). Without
63 /// this, re-deriving "old" symbols from the (possibly pre-updated) input
64 /// would miss deletions and break cross-file dependency invalidation.
65 last_ingested_symbols: Arc<RwLock<HashMap<String, HashSet<Arc<str>>>>>,
66 /// Negative cache: FQCNs that `load_class` already failed on.
67 /// The value is the resolver-mapped path (when known) so eviction on
68 /// `set_file_text` / `ingest_file` is a path equality check rather than
69 /// re-running the resolver per entry. `None` means the resolver itself
70 /// couldn't map the FQCN; those entries survive file edits (no source
71 /// change makes a never-resolvable name resolvable).
72 /// Bounded to `UNRESOLVABLE_CACHE_CAP`; clears on overflow.
73 unresolvable_fqcns: UnresolvableCache,
74 /// Pluggable source-text provider for lazy-load. Defaults to filesystem
75 /// reads ([`crate::FsSourceProvider`]); LSPs swap in a VFS-backed
76 /// implementation so unsaved buffers override on-disk content.
77 source_provider: Arc<dyn crate::SourceProvider>,
78 /// Vendor `autoload.files` entries not yet indexed. `Some(paths)` means
79 /// pending; `None` means the load has already run (idempotent). Populated
80 /// by [`Self::with_psr4`]; drained by [`Self::ensure_vendor_eager_functions`],
81 /// which is called automatically from [`Self::prepare_ast_for_analysis`].
82 ///
83 /// The mutex is held for the full duration of the load so concurrent callers
84 /// block until indexing is complete rather than proceeding with a stale
85 /// workspace snapshot.
86 pub(crate) pending_eager_function_files: Arc<parking_lot::Mutex<Option<Vec<PathBuf>>>>,
87}
88
89/// FQCN → optional resolver-mapped path. See the field doc on
90/// `AnalysisSession::unresolvable_fqcns`.
91type UnresolvableCache = Arc<RwLock<HashMap<Arc<str>, Option<Arc<str>>>>>;
92
93/// Cap on the negative-resolution cache. Sized to accommodate a large
94/// workspace's worth of genuinely-missing references without unbounded
95/// growth. On overflow the cache is cleared; the cost is a few extra
96/// resolver calls until it re-fills.
97const UNRESOLVABLE_CACHE_CAP: usize = 10_000;
98
99impl AnalysisSession {
100 /// Create a session targeting the given PHP language version.
101 pub fn new(php_version: PhpVersion) -> Self {
102 let db = Arc::new(AnalyzerDb::new());
103 db.salsa
104 .write()
105 .set_php_version(Arc::from(php_version.to_string().as_str()));
106 Self {
107 db,
108 cache: None,
109 psr4: None,
110 resolver: None,
111 php_version,
112 user_stub_files: Vec::new(),
113 user_stub_dirs: Vec::new(),
114 stale_defined_symbols: Arc::new(RwLock::new(HashMap::default())),
115 last_ingested_symbols: Arc::new(RwLock::new(HashMap::default())),
116 unresolvable_fqcns: Arc::new(RwLock::new(HashMap::default())),
117 source_provider: Arc::new(crate::FsSourceProvider),
118 pending_eager_function_files: Arc::new(parking_lot::Mutex::new(Some(Vec::new()))),
119 }
120 }
121
122 /// Swap in a custom [`crate::SourceProvider`]. LSPs install a VFS-backed
123 /// provider here so the analyzer reads from unsaved editor buffers
124 /// instead of disk.
125 pub fn with_source_provider(mut self, provider: Arc<dyn crate::SourceProvider>) -> Self {
126 self.source_provider = provider;
127 self
128 }
129
130 /// Attach a pre-built [`AnalysisCache`] (the body-analysis issue cache) and
131 /// open a sibling definition [`StubSlice`] cache under the same root, so
132 /// callers using this builder get the same speedup as `with_cache_dir`.
133 ///
134 /// Rebuilds the shared database to attach the definition cache — call
135 /// **before** any file is ingested. A debug assertion catches misuse.
136 ///
137 /// [`StubSlice`]: mir_codebase::storage::StubSlice
138 pub fn with_cache(mut self, cache: Arc<AnalysisCache>) -> Self {
139 debug_assert_eq!(
140 self.db.source_file_count(),
141 0,
142 "AnalysisSession::with_cache must be called before any file is ingested"
143 );
144 let dir = cache.cache_dir().to_path_buf();
145 self.db = Arc::new(AnalyzerDb::new().with_cache_dir(&dir));
146 self.db
147 .salsa
148 .write()
149 .set_php_version(Arc::from(self.php_version.to_string().as_str()));
150 self.cache = Some(cache);
151 self
152 }
153
154 /// Convenience: open a disk-backed cache at `cache_dir` and attach it.
155 ///
156 /// Attaches both the body-analysis issue cache ([`AnalysisCache`]) and the
157 /// definition [`StubSlice`] cache to the shared database. Builds a fresh
158 /// [`AnalyzerDb`] internally — call **before** any file is ingested. A
159 /// debug assertion catches misuse.
160 ///
161 /// [`StubSlice`]: mir_codebase::storage::StubSlice
162 pub fn with_cache_dir(mut self, cache_dir: &std::path::Path) -> Self {
163 debug_assert_eq!(
164 self.db.source_file_count(),
165 0,
166 "AnalysisSession::with_cache_dir must be called before any file is ingested"
167 );
168 self.db = Arc::new(AnalyzerDb::new().with_cache_dir(cache_dir));
169 self.db
170 .salsa
171 .write()
172 .set_php_version(Arc::from(self.php_version.to_string().as_str()));
173 // Fold the user-stub fingerprint into the cache epoch. `with_user_stubs`
174 // must run before this for it to be picked up (it does in `build_session`);
175 // sessions without user stubs get 0, which is correct.
176 let user_stub_fp =
177 crate::stubs::user_stub_fingerprint(&self.user_stub_files, &self.user_stub_dirs);
178 self.cache = Some(Arc::new(AnalysisCache::open(
179 cache_dir,
180 self.php_version.cache_byte(),
181 user_stub_fp,
182 )));
183 self
184 }
185
186 /// Attach a Composer autoload map (PSR-4, PSR-0, classmap, files).
187 /// Sets the same map as the active [`crate::ClassResolver`] so
188 /// [`Self::load_class`] works out of the box.
189 pub fn with_psr4(mut self, map: Arc<Psr4Map>) -> Self {
190 let user_resolver: Arc<dyn crate::ClassResolver> = map.clone();
191 // Wrap with stub awareness so `find_class_like` / `resolve_fqcn_to_path`
192 // can map built-in PHP class FQCNs (`ArrayObject`, `Exception`, …)
193 // to their stub virtual paths.
194 let resolver: Arc<dyn crate::ClassResolver> = Arc::new(crate::ChainedClassResolver::new(
195 user_resolver,
196 Arc::new(crate::StubClassResolver),
197 ));
198 self.psr4 = Some(map.clone());
199 self.resolver = Some(resolver.clone());
200 // Mirror into MirDbStorage so salsa-tracked resolver queries
201 // (`db::resolve_fqcn_to_path`) see the same resolver and are
202 // invalidated on swap.
203 self.db.salsa.write().set_resolver(Some(resolver));
204 // Register vendor autoload.files for lazy loading. They define global
205 // functions and constants that the class resolver cannot discover.
206 // `ensure_vendor_eager_functions` will index them on first analysis call.
207 *self.pending_eager_function_files.lock() = Some(map.vendor_eager_files());
208 self
209 }
210
211 /// Attach a generic class resolver for projects that don't use Composer
212 /// (WordPress, Drupal, custom autoloaders, workspace-walk indexes).
213 /// Replaces any previously-set Composer-backed resolver. Automatically
214 /// wrapped with stub awareness so PHP built-ins remain resolvable.
215 pub fn with_class_resolver(mut self, resolver: Arc<dyn crate::ClassResolver>) -> Self {
216 let wrapped: Arc<dyn crate::ClassResolver> = Arc::new(crate::ChainedClassResolver::new(
217 resolver,
218 Arc::new(crate::StubClassResolver),
219 ));
220 self.db.salsa.write().set_resolver(Some(wrapped.clone()));
221 self.resolver = Some(wrapped);
222 self
223 }
224
225 pub fn with_user_stubs(mut self, files: Vec<PathBuf>, dirs: Vec<PathBuf>) -> Self {
226 self.user_stub_files = files;
227 self.user_stub_dirs = dirs;
228 self
229 }
230
231 pub fn php_version(&self) -> PhpVersion {
232 self.php_version
233 }
234
235 pub fn cache(&self) -> Option<&AnalysisCache> {
236 self.cache.as_deref()
237 }
238
239 pub fn psr4(&self) -> Option<&Psr4Map> {
240 self.psr4.as_deref()
241 }
242}
243
244mod incremental;
245mod ingest;
246mod loading;
247mod queries;
248mod stubs;
249
250/// Compute the full set of files `file` depends on: structural edges from
251/// the memoized [`crate::db::file_structural_deps`] tracked query, plus
252/// bare-FQN references recorded during body analysis (which live in the
253/// reference index and are not visible to salsa). Self-edges are excluded.
254/// Used to persist the disk cache's reverse-dep graph.
255fn file_outgoing_dependencies(db: &dyn MirDatabase, file: &str) -> HashSet<String> {
256 let mut targets: HashSet<String> = HashSet::default();
257
258 if let Some(sf) = db.lookup_source_file(file) {
259 for target in crate::db::file_structural_deps(db, sf).iter() {
260 targets.insert(target.as_ref().to_string());
261 }
262 }
263
264 // Bare-FQN references recorded during body analysis (new \Foo(),
265 // \Foo::method(), \foo()) that do not appear in use-import statements.
266 for symbol_key in db.file_referenced_symbols(file) {
267 let lookup: &str = match symbol_key.split_once("::") {
268 Some((class, _)) => class,
269 None => &symbol_key,
270 };
271 if let Some(defining_file) = db.symbol_defining_file(lookup) {
272 if defining_file.as_ref() != file {
273 targets.insert(defining_file.as_ref().to_string());
274 }
275 }
276 }
277
278 targets
279}
280
281/// AST visitor that collects class FQCN references for PSR-4 preloading.
282/// Captures identifiers from `new X`, static calls / property / constant
283/// access, type hints, `instanceof`, and `@param`/`@return`/`@var`/`@extends`/
284/// `@implements` docblock annotations. Does *not* normalize via PSR-4 /
285/// imports — callers run the raw string through `resolve_name`.
286fn collect_class_refs_from_ast(program: &php_ast::owned::Program) -> Vec<String> {
287 use php_ast::ast::BinaryOp;
288 use php_ast::owned::visitor::{
289 walk_owned_class_member, walk_owned_expr, walk_owned_program, walk_owned_stmt, OwnedVisitor,
290 };
291 use php_ast::owned::{ClassMemberKind, ExprKind};
292 use std::ops::ControlFlow;
293
294 fn owned_name_str(name: &php_ast::owned::Name) -> String {
295 let joined: String = name
296 .parts
297 .iter()
298 .map(|p| p.as_ref())
299 .collect::<Vec<&str>>()
300 .join("\\");
301 if name.kind == php_ast::ast::NameKind::FullyQualified {
302 format!("\\{joined}")
303 } else {
304 joined
305 }
306 }
307
308 /// Recursively collect all `TNamedObject` FQCNs from a mir type, including
309 /// those nested inside generic type parameters (e.g. `Collection<Item>`).
310 fn collect_from_type(ty: &mir_types::Type, out: &mut std::collections::HashSet<String>) {
311 for atomic in ty.types.iter() {
312 if let mir_types::Atomic::TNamedObject { fqcn, type_params } = atomic {
313 out.insert(fqcn.as_ref().to_string());
314 for tp in type_params.iter() {
315 collect_from_type(tp, out);
316 }
317 }
318 }
319 }
320
321 /// Parse a docblock and collect class names from `@param`, `@return`,
322 /// `@var`, `@extends`, and `@implements` annotations.
323 fn collect_from_docblock(text: &str, out: &mut std::collections::HashSet<String>) {
324 let parsed = crate::parser::DocblockParser::parse(text);
325 for (_, ty) in &parsed.params {
326 collect_from_type(ty, out);
327 }
328 if let Some(ret) = &parsed.return_type {
329 collect_from_type(ret, out);
330 }
331 if let Some(var) = &parsed.var_type {
332 collect_from_type(var, out);
333 }
334 if let Some(ext) = &parsed.extends {
335 collect_from_type(ext, out);
336 }
337 for impl_ty in &parsed.implements {
338 collect_from_type(impl_ty, out);
339 }
340 }
341
342 struct V {
343 names: std::collections::HashSet<String>,
344 }
345 impl OwnedVisitor for V {
346 fn visit_stmt(&mut self, stmt: &php_ast::owned::Stmt) -> ControlFlow<()> {
347 if let Some(doc) = stmt.leading_doc_comment() {
348 collect_from_docblock(&doc.text, &mut self.names);
349 }
350 walk_owned_stmt(self, stmt)
351 }
352
353 fn visit_class_member(&mut self, member: &php_ast::owned::ClassMember) -> ControlFlow<()> {
354 match &member.kind {
355 ClassMemberKind::Method(m) => {
356 if let Some(doc) = &m.doc_comment {
357 collect_from_docblock(&doc.text, &mut self.names);
358 }
359 }
360 ClassMemberKind::Property(p) => {
361 if let Some(doc) = &p.doc_comment {
362 collect_from_docblock(&doc.text, &mut self.names);
363 }
364 }
365 _ => {}
366 }
367 walk_owned_class_member(self, member)
368 }
369
370 fn visit_expr(&mut self, expr: &php_ast::owned::Expr) -> ControlFlow<()> {
371 match &expr.kind {
372 ExprKind::New(n) => {
373 if let ExprKind::Identifier(name) = &n.class.kind {
374 self.names.insert(name.as_ref().to_string());
375 }
376 }
377 ExprKind::StaticMethodCall(c) => {
378 if let ExprKind::Identifier(name) = &c.class.kind {
379 self.names.insert(name.as_ref().to_string());
380 }
381 }
382 ExprKind::StaticPropertyAccess(a) => {
383 if let ExprKind::Identifier(name) = &a.class.kind {
384 self.names.insert(name.as_ref().to_string());
385 }
386 }
387 ExprKind::ClassConstAccess(a) => {
388 if let ExprKind::Identifier(name) = &a.class.kind {
389 self.names.insert(name.as_ref().to_string());
390 }
391 }
392 ExprKind::Binary(b) if b.op == BinaryOp::Instanceof => {
393 if let ExprKind::Identifier(name) = &b.right.kind {
394 self.names.insert(name.as_ref().to_string());
395 }
396 }
397 _ => {}
398 }
399 walk_owned_expr(self, expr)
400 }
401
402 // Walker routes every class/type-position Name here: type hints, catch types, extends/implements, trait use, attributes.
403 fn visit_name(&mut self, name: &php_ast::owned::Name) -> ControlFlow<()> {
404 let s = owned_name_str(name);
405 if !s.is_empty() {
406 self.names.insert(s);
407 }
408 ControlFlow::Continue(())
409 }
410 }
411 let mut v = V {
412 names: std::collections::HashSet::default(),
413 };
414 let _ = walk_owned_program(&mut v, program);
415 v.names.into_iter().collect()
416}