mir_analyzer/session.rs
1//! Session-based analysis API for incremental, per-file analysis.
2//!
3//! [`AnalysisSession`] owns the salsa database and per-session caches for a
4//! long-running analysis context shared across many per-file analyses. Reads
5//! clone the database under a brief lock, then run lock-free; writes hold the
6//! lock briefly to mutate canonical state. `MirDb::clone()` is cheap
7//! (Arc-wrapped registries), so this pattern gives parallel readers without
8//! blocking on concurrent writes for longer than the clone itself.
9//!
10//! See [`crate::file_analyzer::FileAnalyzer`] for the per-file Pass 2 entry
11//! point that operates against a session.
12
13use std::collections::{HashMap, HashSet};
14use std::path::PathBuf;
15use std::sync::atomic::{AtomicBool, Ordering};
16use std::sync::Arc;
17
18use parking_lot::Mutex;
19
20use rayon::prelude::*;
21use salsa::Setter as _;
22
23use crate::cache::AnalysisCache;
24use crate::composer::Psr4Map;
25use crate::db::{collect_file_definitions, FileDefinitions, MirDatabase, MirDb, SourceFile};
26use crate::php_version::PhpVersion;
27
28/// Long-lived analysis context. Owns the salsa database and tracks which
29/// stubs have been loaded.
30///
31/// Cheap to clone the inner db for parallel reads; writes funnel through
32/// [`Self::ingest_file`], [`Self::invalidate_file`], and the crate-internal
33/// [`Self::with_db_mut`].
34pub struct AnalysisSession {
35 salsa: Mutex<(MirDb, HashMap<Arc<str>, SourceFile>)>,
36 cache: Option<Arc<AnalysisCache>>,
37 psr4: Option<Arc<Psr4Map>>,
38 /// Set of stub virtual paths that have already been ingested. Replaces an
39 /// older `AtomicBool stubs_loaded` flag — tracking individual paths lets
40 /// us lazy-load extension stubs on demand without re-ingesting essentials.
41 loaded_stubs: Mutex<HashSet<&'static str>>,
42 /// True once user stubs (configured via [`Self::with_user_stubs`]) have
43 /// been ingested. They are loaded together with the essential set on the
44 /// first call to a stubs-loading method.
45 user_stubs_loaded: AtomicBool,
46 php_version: PhpVersion,
47 user_stub_files: Vec<PathBuf>,
48 user_stub_dirs: Vec<PathBuf>,
49}
50
51impl AnalysisSession {
52 /// Create a session targeting the given PHP language version.
53 pub fn new(php_version: PhpVersion) -> Self {
54 Self {
55 salsa: Mutex::new((MirDb::default(), HashMap::new())),
56 cache: None,
57 psr4: None,
58 loaded_stubs: Mutex::new(HashSet::new()),
59 user_stubs_loaded: AtomicBool::new(false),
60 php_version,
61 user_stub_files: Vec::new(),
62 user_stub_dirs: Vec::new(),
63 }
64 }
65
66 pub fn with_cache(mut self, cache: Arc<AnalysisCache>) -> Self {
67 self.cache = Some(cache);
68 self
69 }
70
71 pub fn with_psr4(mut self, map: Arc<Psr4Map>) -> Self {
72 self.psr4 = Some(map);
73 self
74 }
75
76 pub fn with_user_stubs(mut self, files: Vec<PathBuf>, dirs: Vec<PathBuf>) -> Self {
77 self.user_stub_files = files;
78 self.user_stub_dirs = dirs;
79 self
80 }
81
82 pub fn php_version(&self) -> PhpVersion {
83 self.php_version
84 }
85
86 pub fn cache(&self) -> Option<&AnalysisCache> {
87 self.cache.as_deref()
88 }
89
90 pub fn psr4(&self) -> Option<&Psr4Map> {
91 self.psr4.as_deref()
92 }
93
94 /// Load every PHP built-in stub plus any configured user stubs.
95 /// Idempotent. Equivalent to the legacy "load everything" behavior; use
96 /// [`Self::ensure_essential_stubs_loaded`] in incremental scenarios where
97 /// cold-start latency matters more than comprehensive stub coverage.
98 pub fn ensure_stubs_loaded(&self) {
99 self.ensure_all_stubs_loaded();
100 }
101
102 /// Load only the curated set of essential stubs (Core, standard, SPL,
103 /// date) plus any configured user stubs. About 25 of 120 stub files;
104 /// covers types and functions used by virtually all PHP code.
105 ///
106 /// Other extension stubs (Reflection, gd, openssl, …) can be brought in
107 /// on demand via [`Self::ensure_stubs_for_symbol`] when user code
108 /// references them. Idempotent — already-loaded stubs are skipped.
109 pub fn ensure_essential_stubs_loaded(&self) {
110 self.ingest_stub_paths(crate::stubs::ESSENTIAL_STUB_PATHS);
111 self.ensure_user_stubs_loaded();
112 }
113
114 /// Load every embedded PHP stub plus any configured user stubs.
115 /// Use for batch tools (CLI, full project analysis) where comprehensive
116 /// symbol coverage matters more than cold-start latency.
117 pub fn ensure_all_stubs_loaded(&self) {
118 let paths: Vec<&'static str> = crate::stubs::stub_files().iter().map(|&(p, _)| p).collect();
119 self.ingest_stub_paths(&paths);
120 self.ensure_user_stubs_loaded();
121 }
122
123 /// Ensure the embedded stub that defines `name` (a function) is ingested.
124 /// Returns `true` when a matching stub exists (whether or not it was
125 /// already loaded), `false` when `name` isn't a known PHP built-in.
126 pub fn ensure_stub_for_function(&self, name: &str) -> bool {
127 match crate::stubs::stub_path_for_function(name) {
128 Some(path) => {
129 self.ingest_stub_paths(&[path]);
130 true
131 }
132 None => false,
133 }
134 }
135
136 /// Ensure the embedded stub that defines `fqcn` (a class / interface /
137 /// trait / enum) is ingested. Case-insensitive lookup with optional
138 /// leading backslash.
139 pub fn ensure_stub_for_class(&self, fqcn: &str) -> bool {
140 match crate::stubs::stub_path_for_class(fqcn) {
141 Some(path) => {
142 self.ingest_stub_paths(&[path]);
143 true
144 }
145 None => false,
146 }
147 }
148
149 /// Ensure the embedded stub that defines `name` (a constant) is ingested.
150 pub fn ensure_stub_for_constant(&self, name: &str) -> bool {
151 match crate::stubs::stub_path_for_constant(name) {
152 Some(path) => {
153 self.ingest_stub_paths(&[path]);
154 true
155 }
156 None => false,
157 }
158 }
159
160 /// Number of distinct embedded stubs currently ingested into the session.
161 /// Useful for diagnostics and bench reporting.
162 pub fn loaded_stub_count(&self) -> usize {
163 self.loaded_stubs.lock().len()
164 }
165
166 /// Auto-discover and ingest the embedded stubs needed to cover every
167 /// built-in PHP function / class / constant referenced by `source`.
168 ///
169 /// Used by [`crate::FileAnalyzer::analyze`] to keep essentials-only mode
170 /// correct without forcing callers to enumerate which stubs they need.
171 /// Idempotent — already-loaded stubs are skipped via [`Self::loaded_stubs`].
172 ///
173 /// The discovery scan is a coarse identifier sweep (see
174 /// [`crate::stubs::collect_referenced_builtin_paths`]) — it may pull in
175 /// a slightly larger set than the file strictly needs, but never misses
176 /// a referenced built-in. Cost is sub-millisecond per file.
177 ///
178 /// Fast path: if every embedded stub is already loaded (e.g. after a
179 /// batch tool called [`Self::ensure_all_stubs_loaded`]), the source scan
180 /// is skipped entirely.
181 pub fn ensure_stubs_for_source(&self, source: &str) {
182 // Cheap check first: skip the scan entirely when we already know we
183 // have everything. Avoids a ~50-500µs source walk on every analyze
184 // call in batch / warm-session scenarios.
185 {
186 let loaded = self.loaded_stubs.lock();
187 if loaded.len() >= crate::stubs::stub_files().len() {
188 return;
189 }
190 }
191 let paths = crate::stubs::collect_referenced_builtin_paths(source);
192 if paths.is_empty() {
193 return;
194 }
195 self.ingest_stub_paths(&paths);
196 }
197
198 /// Discover and ingest stubs by walking the parsed AST of a PHP file.
199 ///
200 /// Similar to [`Self::ensure_stubs_for_source`], but takes an already-parsed
201 /// AST instead of raw source text. Produces zero false positives since it
202 /// only extracts identifiers from actual AST nodes (not from strings or
203 /// comments). Preferred over `ensure_stubs_for_source` when the AST is
204 /// already available (e.g., in [`crate::FileAnalyzer`]).
205 ///
206 /// Idempotent and skips the scan if all stubs are already loaded.
207 pub fn ensure_stubs_for_ast(&self, program: &php_ast::ast::Program<'_, '_>) {
208 {
209 let loaded = self.loaded_stubs.lock();
210 if loaded.len() >= crate::stubs::stub_files().len() {
211 return;
212 }
213 }
214 let paths = crate::stubs::collect_referenced_builtin_paths_from_ast(program);
215 if paths.is_empty() {
216 return;
217 }
218 self.ingest_stub_paths(&paths);
219 }
220
221 /// Internal: parse + ingest each path in `paths` that hasn't already been
222 /// ingested. Holds the salsa write lock per file (brief), and the
223 /// `loaded_stubs` set lock briefly to record paths.
224 fn ingest_stub_paths(&self, paths: &[&'static str]) {
225 // Pick out the not-yet-loaded paths first to avoid redundant parsing.
226 let needed: Vec<&'static str> = {
227 let loaded = self.loaded_stubs.lock();
228 paths
229 .iter()
230 .copied()
231 .filter(|p| !loaded.contains(p))
232 .collect()
233 };
234 if needed.is_empty() {
235 return;
236 }
237
238 let php_version = self.php_version;
239 // Parse in parallel; ingest serially under the salsa write lock.
240 let slices: Vec<(&'static str, mir_codebase::storage::StubSlice)> = needed
241 .par_iter()
242 .filter_map(|&path| {
243 crate::stubs::stub_content_for_path(path).map(|content| {
244 let slice =
245 crate::stubs::stub_slice_from_source(path, content, Some(php_version));
246 (path, slice)
247 })
248 })
249 .collect();
250
251 let mut guard = self.salsa.lock();
252 let mut loaded = self.loaded_stubs.lock();
253 for (path, slice) in slices {
254 if loaded.insert(path) {
255 guard.0.ingest_stub_slice(&slice);
256 }
257 }
258 }
259
260 fn ensure_user_stubs_loaded(&self) {
261 if self.user_stub_files.is_empty() && self.user_stub_dirs.is_empty() {
262 return;
263 }
264 let was_loaded = self.user_stubs_loaded.load(Ordering::Relaxed);
265 if was_loaded {
266 return;
267 }
268 let slices = crate::stubs::user_stub_slices(&self.user_stub_files, &self.user_stub_dirs);
269 let mut salsa = self.salsa.lock();
270 for slice in slices {
271 salsa.0.ingest_stub_slice(&slice);
272 }
273 self.user_stubs_loaded.store(true, Ordering::Relaxed);
274 }
275
276 /// Cheap clone of the salsa db for a read-only query. The lock is held
277 /// only for the duration of the clone, so concurrent readers never
278 /// serialize on each other or on writes for longer than the clone itself.
279 pub fn snapshot_db(&self) -> MirDb {
280 let guard = self.salsa.lock();
281 guard.0.clone()
282 }
283
284 /// Run a closure with read access to a database snapshot. The snapshot is
285 /// taken under a brief lock, then the closure runs without holding it.
286 pub fn read<R>(&self, f: impl FnOnce(&dyn MirDatabase) -> R) -> R {
287 let db = self.snapshot_db();
288 f(&db)
289 }
290
291 /// Pass 1 ingestion. Updates the file's source text in the salsa db,
292 /// runs definition collection, and ingests the resulting stub slice.
293 /// Triggers stub loading on first call. Also updates the cache's reverse-
294 /// dependency graph for `file` so cross-file invalidation stays correct
295 /// across incremental edits — without rebuilding the graph from scratch.
296 ///
297 /// If `file` was previously ingested, its old definitions and reference
298 /// locations are removed first so renames / deletions don't leave stale
299 /// state in the codebase. (Without this, long-running sessions would
300 /// accumulate dead reference-location entries indefinitely.)
301 pub fn ingest_file(&self, file: Arc<str>, source: Arc<str>) -> FileDefinitions {
302 self.ensure_stubs_loaded();
303 let file_defs = {
304 let mut guard = self.salsa.lock();
305 let (ref mut db, ref mut files) = *guard;
306 let salsa_file = match files.get(&file) {
307 Some(&sf) => {
308 // Re-ingestion: drop old definitions + reference locations
309 // before collecting fresh ones. Mirrors what
310 // ProjectAnalyzer::re_analyze_file does.
311 db.remove_file_definitions(file.as_ref());
312 if sf.text(db).as_ref() != source.as_ref() {
313 sf.set_text(db).to(source.clone());
314 }
315 sf
316 }
317 None => {
318 let file_cloned = file.clone();
319 let sf = SourceFile::new(db, file_cloned.clone(), source.clone());
320 files.insert(file_cloned, sf);
321 sf
322 }
323 };
324 collect_file_definitions(db, salsa_file)
325 };
326 {
327 let mut guard = self.salsa.lock();
328 guard.0.ingest_stub_slice(&file_defs.slice);
329 }
330 self.update_reverse_deps_for(&file);
331 file_defs
332 }
333
334 /// Drop a file's contribution to the session: codebase definitions,
335 /// reference locations, salsa input handle, cache entry, and outgoing
336 /// reverse-dependency edges. Cache entries of *dependent* files are
337 /// also evicted (cross-file invalidation).
338 ///
339 /// Use this when a file is closed by the consumer, or before a re-ingest
340 /// of substantially changed content. (Plain re-ingest via
341 /// [`Self::ingest_file`] also drops old definitions, but does not
342 /// remove the salsa input handle — call this for full cleanup.)
343 pub fn invalidate_file(&self, file: &str) {
344 {
345 let mut guard = self.salsa.lock();
346 let (ref mut db, ref mut files) = *guard;
347 db.remove_file_definitions(file);
348 files.remove(file);
349 }
350 if let Some(cache) = &self.cache {
351 cache.update_reverse_deps_for_file(file, &HashSet::new());
352 cache.evict_with_dependents(&[file.to_string()]);
353 }
354 }
355
356 /// Number of files currently tracked in this session's salsa input set.
357 /// Stable across reads; useful for diagnostics and memory bounds checks.
358 pub fn tracked_file_count(&self) -> usize {
359 let guard = self.salsa.lock();
360 guard.1.len()
361 }
362
363 // -----------------------------------------------------------------------
364 // Read-only codebase queries
365 //
366 // All take a brief lock to clone the db, then run the lookup against the
367 // owned snapshot — concurrent edits proceed without blocking.
368 // -----------------------------------------------------------------------
369
370 /// Resolve `symbol` (a class FQCN or function FQN) to its declaration
371 /// location. Powers go-to-definition for top-level symbols. Returns
372 /// `None` if the symbol isn't known to the codebase or has no recorded
373 /// source span (e.g. some stub-only declarations).
374 pub fn definition_of(&self, symbol: &str) -> Option<mir_codebase::storage::Location> {
375 let db = self.snapshot_db();
376 db.lookup_class_node(symbol)
377 .filter(|n| n.active(&db))
378 .and_then(|n| n.location(&db))
379 .or_else(|| {
380 db.lookup_function_node(symbol)
381 .filter(|n| n.active(&db))
382 .and_then(|n| n.location(&db))
383 })
384 }
385
386 /// Resolve a class member (method / property / class constant / enum case)
387 /// to its declaration location, walking the inheritance chain.
388 pub fn member_definition(
389 &self,
390 fqcn: &str,
391 member_name: &str,
392 ) -> Option<mir_codebase::storage::Location> {
393 let db = self.snapshot_db();
394 crate::db::member_location_via_db(&db, fqcn, member_name)
395 }
396
397 /// Every recorded reference to `symbol` (as `(file, line, col_start,
398 /// col_end)`). Use [`crate::symbol::ResolvedSymbol::codebase_key`] to
399 /// build the lookup key from a `ResolvedSymbol` returned by
400 /// [`crate::FileAnalysis::symbol_at`].
401 pub fn references_to(&self, symbol: &str) -> Vec<(Arc<str>, u32, u16, u16)> {
402 let db = self.snapshot_db();
403 db.reference_locations(symbol)
404 }
405
406 /// All declarations defined in `file` (classes, interfaces, traits, enums,
407 /// functions, constants). Powers outline / document-symbols views and any
408 /// other consumer that needs the file's top-level symbol set. Returns an
409 /// empty Vec if `file` hasn't been ingested.
410 pub fn document_symbols(&self, file: &str) -> Vec<crate::symbol::DocumentSymbol> {
411 use crate::symbol::{DocumentSymbol, DocumentSymbolKind};
412
413 let db = self.snapshot_db();
414 let mut out = Vec::new();
415 for symbol in db.symbols_defined_in_file(file) {
416 // Try class side first — covers Class / Interface / Trait / Enum.
417 if let Some(class_node) = db.lookup_class_node(symbol.as_ref()) {
418 if !class_node.active(&db) {
419 continue;
420 }
421 let kind = crate::db::class_kind_via_db(&db, symbol.as_ref())
422 .map(|k| {
423 if k.is_interface {
424 DocumentSymbolKind::Interface
425 } else if k.is_trait {
426 DocumentSymbolKind::Trait
427 } else if k.is_enum {
428 DocumentSymbolKind::Enum
429 } else {
430 DocumentSymbolKind::Class
431 }
432 })
433 .unwrap_or(DocumentSymbolKind::Class);
434 out.push(DocumentSymbol {
435 name: symbol.clone(),
436 kind,
437 location: class_node.location(&db),
438 });
439 continue;
440 }
441 if let Some(fn_node) = db.lookup_function_node(symbol.as_ref()) {
442 if !fn_node.active(&db) {
443 continue;
444 }
445 out.push(DocumentSymbol {
446 name: symbol.clone(),
447 kind: DocumentSymbolKind::Function,
448 location: fn_node.location(&db),
449 });
450 continue;
451 }
452 // Constants and other top-level declarations: emit with no
453 // location info; consumers can still surface them in an outline.
454 out.push(DocumentSymbol {
455 name: symbol,
456 kind: DocumentSymbolKind::Constant,
457 location: None,
458 });
459 }
460 out
461 }
462
463 /// Compute `file`'s outgoing dependency edges and update the cache's
464 /// reverse-dep graph in place. No-op if no cache is configured.
465 fn update_reverse_deps_for(&self, file: &str) {
466 let Some(cache) = self.cache.as_deref() else {
467 return;
468 };
469 let db = self.snapshot_db();
470 let targets = file_outgoing_dependencies(&db, file);
471 cache.update_reverse_deps_for_file(file, &targets);
472 }
473
474 /// Cross-file inference sweep. For each `(file, source)` pair, runs the
475 /// Pass 2 inference-only mode on a cloned db (parallel via rayon), then
476 /// commits the collected inferred return types to the canonical db.
477 ///
478 /// Call this on idle / save / explicit user request, **not** on every
479 /// keystroke — [`crate::FileAnalyzer::analyze`] deliberately skips
480 /// inference sweep on the hot path. Files whose source contains parse
481 /// errors are silently skipped.
482 pub fn run_inference_sweep(&self, files: &[(Arc<str>, Arc<str>)]) {
483 self.ensure_stubs_loaded();
484
485 // The priming db lives only inside `gather_inferred_types`. After it
486 // returns, all rayon-clone references to the salsa storage are dropped
487 // — required so that the subsequent `commit_inferred_return_types`
488 // call (which calls salsa's `cancel_others`) doesn't deadlock waiting
489 // for outstanding db references.
490 let (functions, methods) =
491 gather_inferred_types(self.snapshot_db(), files, self.php_version);
492
493 let mut guard = self.salsa.lock();
494 guard.0.commit_inferred_return_types(functions, methods);
495 }
496}
497
498/// Drive Pass 2 inference-only mode in parallel across `files`, accumulating
499/// inferred function and method return types. The `db_priming` MirDb is
500/// consumed (cloned per spawned task and dropped on return), so the caller's
501/// canonical db can subsequently take exclusive access without deadlock.
502///
503/// Crate-internal so [`crate::project::ProjectAnalyzer`] can use the same
504/// deadlock-safe helper for its lazy-load reanalysis sweep.
505#[allow(clippy::type_complexity)]
506pub(crate) fn gather_inferred_types(
507 db_priming: MirDb,
508 files: &[(Arc<str>, Arc<str>)],
509 php_version: PhpVersion,
510) -> (
511 Vec<(Arc<str>, mir_types::Union)>,
512 Vec<(Arc<str>, Arc<str>, mir_types::Union)>,
513) {
514 use crate::pass2::Pass2Driver;
515 use mir_types::Union;
516
517 type Functions = Vec<(Arc<str>, Union)>;
518 type Methods = Vec<(Arc<str>, Arc<str>, Union)>;
519 let functions: Arc<Mutex<Functions>> = Arc::new(Mutex::new(Vec::new()));
520 let methods: Arc<Mutex<Methods>> = Arc::new(Mutex::new(Vec::new()));
521
522 rayon::in_place_scope(|s| {
523 for (file, source) in files {
524 let db = db_priming.clone();
525 let functions = Arc::clone(&functions);
526 let methods = Arc::clone(&methods);
527 let file = file.clone();
528 let source = source.clone();
529
530 s.spawn(move |_| {
531 let arena = crate::arena::create_parse_arena(source.len());
532 let parsed = php_rs_parser::parse(&arena, source.as_ref());
533 if !parsed.errors.is_empty() {
534 return;
535 }
536 let driver = Pass2Driver::new_inference_only(&db as &dyn MirDatabase, php_version);
537 driver.analyze_bodies(&parsed.program, file, source.as_ref(), &parsed.source_map);
538 let inferred = driver.take_inferred_types();
539 {
540 let mut f = functions.lock();
541 f.extend(inferred.functions);
542 }
543 {
544 let mut m = methods.lock();
545 m.extend(inferred.methods);
546 }
547 });
548 }
549 });
550
551 let functions = Arc::try_unwrap(functions)
552 .map(|m| m.into_inner())
553 .unwrap_or_else(|arc| arc.lock().clone());
554 let methods = Arc::try_unwrap(methods)
555 .map(|m| m.into_inner())
556 .unwrap_or_else(|arc| arc.lock().clone());
557
558 (functions, methods)
559}
560
561/// Compute the set of files `file` depends on: defining files of its imports,
562/// plus parent / interfaces / traits' defining files for any classes declared
563/// in `file`. Self-edges are excluded.
564fn file_outgoing_dependencies(db: &dyn MirDatabase, file: &str) -> HashSet<String> {
565 let mut targets: HashSet<String> = HashSet::new();
566
567 let mut add_target = |symbol: &str| {
568 if let Some(defining_file) = db.symbol_defining_file(symbol) {
569 let def = defining_file.as_ref().to_string();
570 if def != file {
571 targets.insert(def);
572 }
573 }
574 };
575
576 let imports = db.file_imports(file);
577 for fqcn in imports.values() {
578 add_target(fqcn);
579 }
580
581 for fqcn in db.symbols_defined_in_file(file) {
582 let Some(node) = db.lookup_class_node(fqcn.as_ref()) else {
583 continue;
584 };
585 if let Some(parent) = node.parent(db) {
586 add_target(parent.as_ref());
587 }
588 for iface in node.interfaces(db).iter() {
589 add_target(iface.as_ref());
590 }
591 for tr in node.traits(db).iter() {
592 add_target(tr.as_ref());
593 }
594 }
595
596 targets
597}