mir_analyzer/session.rs
1//! Session-based analysis API for incremental, per-file analysis.
2//!
3//! [`AnalysisSession`] owns the salsa database and per-session caches for a
4//! long-running analysis context shared across many per-file analyses. Reads
5//! clone the database under a brief lock, then run lock-free; writes hold the
6//! lock briefly to mutate canonical state. `MirDb::clone()` is cheap
7//! (Arc-wrapped registries), so this pattern gives parallel readers without
8//! blocking on concurrent writes for longer than the clone itself.
9//!
10//! See [`crate::file_analyzer::FileAnalyzer`] for the per-file Pass 2 entry
11//! point that operates against a session.
12
13use std::collections::{HashMap, HashSet};
14use std::path::PathBuf;
15use std::sync::{Arc, Mutex};
16
17use rayon::prelude::*;
18use salsa::Setter as _;
19
20use crate::cache::AnalysisCache;
21use crate::composer::Psr4Map;
22use crate::db::{collect_file_definitions, FileDefinitions, MirDatabase, MirDb, SourceFile};
23use crate::php_version::PhpVersion;
24
25/// Long-lived analysis context. Owns the salsa database and tracks which
26/// stubs have been loaded.
27///
28/// Cheap to clone the inner db for parallel reads; writes funnel through
29/// [`Self::ingest_file`], [`Self::invalidate_file`], and the crate-internal
30/// [`Self::with_db_mut`].
31pub struct AnalysisSession {
32 salsa: Mutex<(MirDb, HashMap<Arc<str>, SourceFile>)>,
33 cache: Option<Arc<AnalysisCache>>,
34 psr4: Option<Arc<Psr4Map>>,
35 /// Set of stub virtual paths that have already been ingested. Replaces an
36 /// older `AtomicBool stubs_loaded` flag — tracking individual paths lets
37 /// us lazy-load extension stubs on demand without re-ingesting essentials.
38 loaded_stubs: Mutex<HashSet<&'static str>>,
39 /// True once user stubs (configured via [`Self::with_user_stubs`]) have
40 /// been ingested. They are loaded together with the essential set on the
41 /// first call to a stubs-loading method.
42 user_stubs_loaded: Mutex<bool>,
43 php_version: PhpVersion,
44 user_stub_files: Vec<PathBuf>,
45 user_stub_dirs: Vec<PathBuf>,
46}
47
48impl AnalysisSession {
49 /// Create a session targeting the given PHP language version.
50 pub fn new(php_version: PhpVersion) -> Self {
51 Self {
52 salsa: Mutex::new((MirDb::default(), HashMap::new())),
53 cache: None,
54 psr4: None,
55 loaded_stubs: Mutex::new(HashSet::new()),
56 user_stubs_loaded: Mutex::new(false),
57 php_version,
58 user_stub_files: Vec::new(),
59 user_stub_dirs: Vec::new(),
60 }
61 }
62
63 pub fn with_cache(mut self, cache: Arc<AnalysisCache>) -> Self {
64 self.cache = Some(cache);
65 self
66 }
67
68 pub fn with_psr4(mut self, map: Arc<Psr4Map>) -> Self {
69 self.psr4 = Some(map);
70 self
71 }
72
73 pub fn with_user_stubs(mut self, files: Vec<PathBuf>, dirs: Vec<PathBuf>) -> Self {
74 self.user_stub_files = files;
75 self.user_stub_dirs = dirs;
76 self
77 }
78
79 pub fn php_version(&self) -> PhpVersion {
80 self.php_version
81 }
82
83 pub fn cache(&self) -> Option<&AnalysisCache> {
84 self.cache.as_deref()
85 }
86
87 pub fn psr4(&self) -> Option<&Psr4Map> {
88 self.psr4.as_deref()
89 }
90
91 /// Load every PHP built-in stub plus any configured user stubs.
92 /// Idempotent. Equivalent to the legacy "load everything" behavior; use
93 /// [`Self::ensure_essential_stubs_loaded`] in incremental scenarios where
94 /// cold-start latency matters more than comprehensive stub coverage.
95 pub fn ensure_stubs_loaded(&self) {
96 self.ensure_all_stubs_loaded();
97 }
98
99 /// Load only the curated set of essential stubs (Core, standard, SPL,
100 /// date) plus any configured user stubs. About 25 of 120 stub files;
101 /// covers types and functions used by virtually all PHP code.
102 ///
103 /// Other extension stubs (Reflection, gd, openssl, …) can be brought in
104 /// on demand via [`Self::ensure_stubs_for_symbol`] when user code
105 /// references them. Idempotent — already-loaded stubs are skipped.
106 pub fn ensure_essential_stubs_loaded(&self) {
107 self.ingest_stub_paths(crate::stubs::ESSENTIAL_STUB_PATHS);
108 self.ensure_user_stubs_loaded();
109 }
110
111 /// Load every embedded PHP stub plus any configured user stubs.
112 /// Use for batch tools (CLI, full project analysis) where comprehensive
113 /// symbol coverage matters more than cold-start latency.
114 pub fn ensure_all_stubs_loaded(&self) {
115 let paths: Vec<&'static str> = crate::stubs::stub_files().iter().map(|&(p, _)| p).collect();
116 self.ingest_stub_paths(&paths);
117 self.ensure_user_stubs_loaded();
118 }
119
120 /// Ensure the embedded stub that defines `name` (a function) is ingested.
121 /// Returns `true` when a matching stub exists (whether or not it was
122 /// already loaded), `false` when `name` isn't a known PHP built-in.
123 pub fn ensure_stub_for_function(&self, name: &str) -> bool {
124 match crate::stubs::stub_path_for_function(name) {
125 Some(path) => {
126 self.ingest_stub_paths(&[path]);
127 true
128 }
129 None => false,
130 }
131 }
132
133 /// Ensure the embedded stub that defines `fqcn` (a class / interface /
134 /// trait / enum) is ingested. Case-insensitive lookup with optional
135 /// leading backslash.
136 pub fn ensure_stub_for_class(&self, fqcn: &str) -> bool {
137 match crate::stubs::stub_path_for_class(fqcn) {
138 Some(path) => {
139 self.ingest_stub_paths(&[path]);
140 true
141 }
142 None => false,
143 }
144 }
145
146 /// Ensure the embedded stub that defines `name` (a constant) is ingested.
147 pub fn ensure_stub_for_constant(&self, name: &str) -> bool {
148 match crate::stubs::stub_path_for_constant(name) {
149 Some(path) => {
150 self.ingest_stub_paths(&[path]);
151 true
152 }
153 None => false,
154 }
155 }
156
157 /// Number of distinct embedded stubs currently ingested into the session.
158 /// Useful for diagnostics and bench reporting.
159 pub fn loaded_stub_count(&self) -> usize {
160 self.loaded_stubs.lock().expect("loaded_stubs lock").len()
161 }
162
163 /// Auto-discover and ingest the embedded stubs needed to cover every
164 /// built-in PHP function / class / constant referenced by `source`.
165 ///
166 /// Used by [`crate::FileAnalyzer::analyze`] to keep essentials-only mode
167 /// correct without forcing callers to enumerate which stubs they need.
168 /// Idempotent — already-loaded stubs are skipped via [`Self::loaded_stubs`].
169 ///
170 /// The discovery scan is a coarse identifier sweep (see
171 /// [`crate::stubs::collect_referenced_builtin_paths`]) — it may pull in
172 /// a slightly larger set than the file strictly needs, but never misses
173 /// a referenced built-in. Cost is sub-millisecond per file.
174 ///
175 /// Fast path: if every embedded stub is already loaded (e.g. after a
176 /// batch tool called [`Self::ensure_all_stubs_loaded`]), the source scan
177 /// is skipped entirely.
178 pub fn ensure_stubs_for_source(&self, source: &str) {
179 // Cheap check first: skip the scan entirely when we already know we
180 // have everything. Avoids a ~50-500µs source walk on every analyze
181 // call in batch / warm-session scenarios.
182 {
183 let loaded = self.loaded_stubs.lock().expect("loaded_stubs lock");
184 if loaded.len() >= crate::stubs::stub_files().len() {
185 return;
186 }
187 }
188 let paths = crate::stubs::collect_referenced_builtin_paths(source);
189 if paths.is_empty() {
190 return;
191 }
192 self.ingest_stub_paths(&paths);
193 }
194
195 /// Internal: parse + ingest each path in `paths` that hasn't already been
196 /// ingested. Holds the salsa write lock per file (brief), and the
197 /// `loaded_stubs` set lock briefly to record paths.
198 fn ingest_stub_paths(&self, paths: &[&'static str]) {
199 // Pick out the not-yet-loaded paths first to avoid redundant parsing.
200 let needed: Vec<&'static str> = {
201 let loaded = self.loaded_stubs.lock().expect("loaded_stubs lock");
202 paths
203 .iter()
204 .copied()
205 .filter(|p| !loaded.contains(p))
206 .collect()
207 };
208 if needed.is_empty() {
209 return;
210 }
211
212 let php_version = self.php_version;
213 // Parse in parallel; ingest serially under the salsa write lock.
214 let slices: Vec<(&'static str, mir_codebase::storage::StubSlice)> = needed
215 .par_iter()
216 .filter_map(|&path| {
217 crate::stubs::stub_content_for_path(path).map(|content| {
218 let slice =
219 crate::stubs::stub_slice_from_source(path, content, Some(php_version));
220 (path, slice)
221 })
222 })
223 .collect();
224
225 let mut guard = self.salsa.lock().expect("salsa lock poisoned");
226 let mut loaded = self.loaded_stubs.lock().expect("loaded_stubs lock");
227 for (path, slice) in slices {
228 if loaded.insert(path) {
229 guard.0.ingest_stub_slice(&slice);
230 }
231 }
232 }
233
234 fn ensure_user_stubs_loaded(&self) {
235 if self.user_stub_files.is_empty() && self.user_stub_dirs.is_empty() {
236 return;
237 }
238 let mut guard = self.user_stubs_loaded.lock().expect("user_stubs lock");
239 if *guard {
240 return;
241 }
242 let slices = crate::stubs::user_stub_slices(&self.user_stub_files, &self.user_stub_dirs);
243 let mut salsa = self.salsa.lock().expect("salsa lock poisoned");
244 for slice in slices {
245 salsa.0.ingest_stub_slice(&slice);
246 }
247 *guard = true;
248 }
249
250 /// Cheap clone of the salsa db for a read-only query. The lock is held
251 /// only for the duration of the clone, so concurrent readers never
252 /// serialize on each other or on writes for longer than the clone itself.
253 pub fn snapshot_db(&self) -> MirDb {
254 let guard = self.salsa.lock().expect("salsa lock poisoned");
255 guard.0.clone()
256 }
257
258 /// Run a closure with read access to a database snapshot. The snapshot is
259 /// taken under a brief lock, then the closure runs without holding it.
260 pub fn read<R>(&self, f: impl FnOnce(&dyn MirDatabase) -> R) -> R {
261 let db = self.snapshot_db();
262 f(&db)
263 }
264
265 /// Pass 1 ingestion. Updates the file's source text in the salsa db,
266 /// runs definition collection, and ingests the resulting stub slice.
267 /// Triggers stub loading on first call. Also updates the cache's reverse-
268 /// dependency graph for `file` so cross-file invalidation stays correct
269 /// across incremental edits — without rebuilding the graph from scratch.
270 ///
271 /// If `file` was previously ingested, its old definitions and reference
272 /// locations are removed first so renames / deletions don't leave stale
273 /// state in the codebase. (Without this, long-running sessions would
274 /// accumulate dead reference-location entries indefinitely.)
275 pub fn ingest_file(&self, file: Arc<str>, source: Arc<str>) -> FileDefinitions {
276 self.ensure_stubs_loaded();
277 let file_defs = {
278 let mut guard = self.salsa.lock().expect("salsa lock poisoned");
279 let (ref mut db, ref mut files) = *guard;
280 let salsa_file = match files.get(&file) {
281 Some(&sf) => {
282 // Re-ingestion: drop old definitions + reference locations
283 // before collecting fresh ones. Mirrors what
284 // ProjectAnalyzer::re_analyze_file does.
285 db.remove_file_definitions(file.as_ref());
286 if sf.text(db).as_ref() != source.as_ref() {
287 sf.set_text(db).to(source.clone());
288 }
289 sf
290 }
291 None => {
292 let sf = SourceFile::new(db, file.clone(), source.clone());
293 files.insert(file.clone(), sf);
294 sf
295 }
296 };
297 collect_file_definitions(db, salsa_file)
298 };
299 {
300 let mut guard = self.salsa.lock().expect("salsa lock poisoned");
301 guard.0.ingest_stub_slice(&file_defs.slice);
302 }
303 self.update_reverse_deps_for(&file);
304 file_defs
305 }
306
307 /// Drop a file's contribution to the session: codebase definitions,
308 /// reference locations, salsa input handle, cache entry, and outgoing
309 /// reverse-dependency edges. Cache entries of *dependent* files are
310 /// also evicted (cross-file invalidation).
311 ///
312 /// Use this when a file is closed by the consumer, or before a re-ingest
313 /// of substantially changed content. (Plain re-ingest via
314 /// [`Self::ingest_file`] also drops old definitions, but does not
315 /// remove the salsa input handle — call this for full cleanup.)
316 pub fn invalidate_file(&self, file: &str) {
317 {
318 let mut guard = self.salsa.lock().expect("salsa lock poisoned");
319 let (ref mut db, ref mut files) = *guard;
320 db.remove_file_definitions(file);
321 files.remove(file);
322 }
323 if let Some(cache) = &self.cache {
324 cache.update_reverse_deps_for_file(file, &HashSet::new());
325 cache.evict_with_dependents(&[file.to_string()]);
326 }
327 }
328
329 /// Number of files currently tracked in this session's salsa input set.
330 /// Stable across reads; useful for diagnostics and memory bounds checks.
331 pub fn tracked_file_count(&self) -> usize {
332 let guard = self.salsa.lock().expect("salsa lock poisoned");
333 guard.1.len()
334 }
335
336 // -----------------------------------------------------------------------
337 // Read-only codebase queries
338 //
339 // All take a brief lock to clone the db, then run the lookup against the
340 // owned snapshot — concurrent edits proceed without blocking.
341 // -----------------------------------------------------------------------
342
343 /// Resolve `symbol` (a class FQCN or function FQN) to its declaration
344 /// location. Powers go-to-definition for top-level symbols. Returns
345 /// `None` if the symbol isn't known to the codebase or has no recorded
346 /// source span (e.g. some stub-only declarations).
347 pub fn definition_of(&self, symbol: &str) -> Option<mir_codebase::storage::Location> {
348 let db = self.snapshot_db();
349 db.lookup_class_node(symbol)
350 .filter(|n| n.active(&db))
351 .and_then(|n| n.location(&db))
352 .or_else(|| {
353 db.lookup_function_node(symbol)
354 .filter(|n| n.active(&db))
355 .and_then(|n| n.location(&db))
356 })
357 }
358
359 /// Resolve a class member (method / property / class constant / enum case)
360 /// to its declaration location, walking the inheritance chain.
361 pub fn member_definition(
362 &self,
363 fqcn: &str,
364 member_name: &str,
365 ) -> Option<mir_codebase::storage::Location> {
366 let db = self.snapshot_db();
367 crate::db::member_location_via_db(&db, fqcn, member_name)
368 }
369
370 /// Every recorded reference to `symbol` (as `(file, line, col_start,
371 /// col_end)`). Use [`crate::symbol::ResolvedSymbol::codebase_key`] to
372 /// build the lookup key from a `ResolvedSymbol` returned by
373 /// [`crate::FileAnalysis::symbol_at`].
374 pub fn references_to(&self, symbol: &str) -> Vec<(Arc<str>, u32, u16, u16)> {
375 let db = self.snapshot_db();
376 db.reference_locations(symbol)
377 }
378
379 /// All declarations defined in `file` (classes, interfaces, traits, enums,
380 /// functions, constants). Powers outline / document-symbols views and any
381 /// other consumer that needs the file's top-level symbol set. Returns an
382 /// empty Vec if `file` hasn't been ingested.
383 pub fn document_symbols(&self, file: &str) -> Vec<crate::symbol::DocumentSymbol> {
384 use crate::symbol::{DocumentSymbol, DocumentSymbolKind};
385
386 let db = self.snapshot_db();
387 let mut out = Vec::new();
388 for symbol in db.symbols_defined_in_file(file) {
389 // Try class side first — covers Class / Interface / Trait / Enum.
390 if let Some(class_node) = db.lookup_class_node(symbol.as_ref()) {
391 if !class_node.active(&db) {
392 continue;
393 }
394 let kind = crate::db::class_kind_via_db(&db, symbol.as_ref())
395 .map(|k| {
396 if k.is_interface {
397 DocumentSymbolKind::Interface
398 } else if k.is_trait {
399 DocumentSymbolKind::Trait
400 } else if k.is_enum {
401 DocumentSymbolKind::Enum
402 } else {
403 DocumentSymbolKind::Class
404 }
405 })
406 .unwrap_or(DocumentSymbolKind::Class);
407 out.push(DocumentSymbol {
408 name: symbol.clone(),
409 kind,
410 location: class_node.location(&db),
411 });
412 continue;
413 }
414 if let Some(fn_node) = db.lookup_function_node(symbol.as_ref()) {
415 if !fn_node.active(&db) {
416 continue;
417 }
418 out.push(DocumentSymbol {
419 name: symbol.clone(),
420 kind: DocumentSymbolKind::Function,
421 location: fn_node.location(&db),
422 });
423 continue;
424 }
425 // Constants and other top-level declarations: emit with no
426 // location info; consumers can still surface them in an outline.
427 out.push(DocumentSymbol {
428 name: symbol,
429 kind: DocumentSymbolKind::Constant,
430 location: None,
431 });
432 }
433 out
434 }
435
436 /// Compute `file`'s outgoing dependency edges and update the cache's
437 /// reverse-dep graph in place. No-op if no cache is configured.
438 fn update_reverse_deps_for(&self, file: &str) {
439 let Some(cache) = self.cache.as_deref() else {
440 return;
441 };
442 let db = self.snapshot_db();
443 let targets = file_outgoing_dependencies(&db, file);
444 cache.update_reverse_deps_for_file(file, &targets);
445 }
446
447 /// Cross-file inference sweep. For each `(file, source)` pair, runs the
448 /// Pass 2 inference-only mode on a cloned db (parallel via rayon), then
449 /// commits the collected inferred return types to the canonical db.
450 ///
451 /// Call this on idle / save / explicit user request, **not** on every
452 /// keystroke — [`crate::FileAnalyzer::analyze`] deliberately skips
453 /// inference sweep on the hot path. Files whose source contains parse
454 /// errors are silently skipped.
455 pub fn run_inference_sweep(&self, files: &[(Arc<str>, Arc<str>)]) {
456 self.ensure_stubs_loaded();
457
458 // The priming db lives only inside `gather_inferred_types`. After it
459 // returns, all rayon-clone references to the salsa storage are dropped
460 // — required so that the subsequent `commit_inferred_return_types`
461 // call (which calls salsa's `cancel_others`) doesn't deadlock waiting
462 // for outstanding db references.
463 let (functions, methods) =
464 gather_inferred_types(self.snapshot_db(), files, self.php_version);
465
466 let mut guard = self.salsa.lock().expect("salsa lock poisoned");
467 guard.0.commit_inferred_return_types(functions, methods);
468 }
469}
470
471/// Drive Pass 2 inference-only mode in parallel across `files`, accumulating
472/// inferred function and method return types. The `db_priming` MirDb is
473/// consumed (cloned per spawned task and dropped on return), so the caller's
474/// canonical db can subsequently take exclusive access without deadlock.
475///
476/// Crate-internal so [`crate::project::ProjectAnalyzer`] can use the same
477/// deadlock-safe helper for its lazy-load reanalysis sweep.
478#[allow(clippy::type_complexity)]
479pub(crate) fn gather_inferred_types(
480 db_priming: MirDb,
481 files: &[(Arc<str>, Arc<str>)],
482 php_version: PhpVersion,
483) -> (
484 Vec<(Arc<str>, mir_types::Union)>,
485 Vec<(Arc<str>, Arc<str>, mir_types::Union)>,
486) {
487 use crate::pass2::Pass2Driver;
488 use mir_types::Union;
489 use std::sync::Mutex as StdMutex;
490
491 type Functions = Vec<(Arc<str>, Union)>;
492 type Methods = Vec<(Arc<str>, Arc<str>, Union)>;
493 let functions: Arc<StdMutex<Functions>> = Arc::new(StdMutex::new(Vec::new()));
494 let methods: Arc<StdMutex<Methods>> = Arc::new(StdMutex::new(Vec::new()));
495
496 rayon::in_place_scope(|s| {
497 for (file, source) in files {
498 let db = db_priming.clone();
499 let functions = Arc::clone(&functions);
500 let methods = Arc::clone(&methods);
501 let file = file.clone();
502 let source = source.clone();
503
504 s.spawn(move |_| {
505 let arena = bumpalo::Bump::new();
506 let parsed = php_rs_parser::parse(&arena, source.as_ref());
507 if !parsed.errors.is_empty() {
508 return;
509 }
510 let driver = Pass2Driver::new_inference_only(&db as &dyn MirDatabase, php_version);
511 driver.analyze_bodies(&parsed.program, file, source.as_ref(), &parsed.source_map);
512 let inferred = driver.take_inferred_types();
513 if let Ok(mut f) = functions.lock() {
514 f.extend(inferred.functions);
515 }
516 if let Ok(mut m) = methods.lock() {
517 m.extend(inferred.methods);
518 }
519 });
520 }
521 });
522
523 let functions = Arc::try_unwrap(functions)
524 .map(|m| m.into_inner().unwrap_or_default())
525 .unwrap_or_else(|arc| arc.lock().unwrap().clone());
526 let methods = Arc::try_unwrap(methods)
527 .map(|m| m.into_inner().unwrap_or_default())
528 .unwrap_or_else(|arc| arc.lock().unwrap().clone());
529
530 (functions, methods)
531}
532
533/// Compute the set of files `file` depends on: defining files of its imports,
534/// plus parent / interfaces / traits' defining files for any classes declared
535/// in `file`. Self-edges are excluded.
536fn file_outgoing_dependencies(db: &dyn MirDatabase, file: &str) -> HashSet<String> {
537 let mut targets: HashSet<String> = HashSet::new();
538
539 let mut add_target = |symbol: &str| {
540 if let Some(defining_file) = db.symbol_defining_file(symbol) {
541 let def = defining_file.as_ref().to_string();
542 if def != file {
543 targets.insert(def);
544 }
545 }
546 };
547
548 let imports = db.file_imports(file);
549 for fqcn in imports.values() {
550 add_target(fqcn);
551 }
552
553 for fqcn in db.symbols_defined_in_file(file) {
554 let Some(node) = db.lookup_class_node(fqcn.as_ref()) else {
555 continue;
556 };
557 if let Some(parent) = node.parent(db) {
558 add_target(parent.as_ref());
559 }
560 for iface in node.interfaces(db).iter() {
561 add_target(iface.as_ref());
562 }
563 for tr in node.traits(db).iter() {
564 add_target(tr.as_ref());
565 }
566 }
567
568 targets
569}