mir_analyzer/session.rs
1//! Session-based analysis API for incremental, per-file analysis.
2//!
3//! [`AnalysisSession`] owns the salsa database and per-session caches for a
4//! long-running analysis context shared across many per-file analyses. Reads
5//! clone the database under a brief lock, then run lock-free; writes hold the
6//! lock briefly to mutate canonical state. `MirDb::clone()` is cheap
7//! (Arc-wrapped registries), so this pattern gives parallel readers without
8//! blocking on concurrent writes for longer than the clone itself.
9//!
10//! See [`crate::file_analyzer::FileAnalyzer`] for the per-file Pass 2 entry
11//! point that operates against a session.
12
13use std::collections::HashSet;
14use std::path::PathBuf;
15use std::sync::Arc;
16
17use parking_lot::Mutex;
18
19use crate::cache::AnalysisCache;
20use crate::composer::Psr4Map;
21use crate::db::{MirDatabase, MirDb};
22use crate::php_version::PhpVersion;
23use crate::shared_db::SharedDb;
24
25/// Long-lived analysis context. Owns the salsa database and tracks which
26/// stubs have been loaded.
27///
28/// Cheap to clone the inner db for parallel reads; writes funnel through
29/// [`Self::ingest_file`], [`Self::invalidate_file`], and the crate-internal
30/// [`Self::with_db_mut`].
31pub struct AnalysisSession {
32 /// Shared database management (salsa, file registry, stub tracking).
33 /// Extracted to allow code sharing with ProjectAnalyzer.
34 shared_db: Arc<SharedDb>,
35 cache: Option<Arc<AnalysisCache>>,
36 /// PSR-4 / Composer autoload map. Retained alongside `resolver` so the
37 /// `psr4()` accessor can still return a typed `Psr4Map` for callers that
38 /// need Composer-specific data (project_files / vendor_files / etc.).
39 psr4: Option<Arc<Psr4Map>>,
40 /// Generic class resolver used for on-demand lazy loading. When `psr4`
41 /// is set via [`Self::with_psr4`], this is populated with the same map
42 /// re-typed as `dyn ClassResolver`. Consumers can also supply their own
43 /// resolver via [`Self::with_class_resolver`] without going through
44 /// Composer.
45 resolver: Option<Arc<dyn crate::ClassResolver>>,
46 php_version: PhpVersion,
47 user_stub_files: Vec<PathBuf>,
48 user_stub_dirs: Vec<PathBuf>,
49}
50
51impl AnalysisSession {
52 /// Create a session targeting the given PHP language version.
53 pub fn new(php_version: PhpVersion) -> Self {
54 Self {
55 shared_db: Arc::new(SharedDb::new()),
56 cache: None,
57 psr4: None,
58 resolver: None,
59 php_version,
60 user_stub_files: Vec::new(),
61 user_stub_dirs: Vec::new(),
62 }
63 }
64
65 pub fn with_cache(mut self, cache: Arc<AnalysisCache>) -> Self {
66 self.cache = Some(cache);
67 self
68 }
69
70 /// Convenience: open a disk-backed cache at `cache_dir` and attach it.
71 /// Avoids forcing callers to wrap [`AnalysisCache`] in `Arc` themselves.
72 pub fn with_cache_dir(self, cache_dir: &std::path::Path) -> Self {
73 self.with_cache(Arc::new(AnalysisCache::open(cache_dir)))
74 }
75
76 /// Attach a Composer autoload map (PSR-4, PSR-0, classmap, files).
77 /// Sets the same map as the active [`crate::ClassResolver`] so
78 /// [`Self::lazy_load_class`] works out of the box.
79 pub fn with_psr4(mut self, map: Arc<Psr4Map>) -> Self {
80 let resolver: Arc<dyn crate::ClassResolver> = map.clone();
81 self.psr4 = Some(map);
82 self.resolver = Some(resolver);
83 self
84 }
85
86 /// Attach a generic class resolver for projects that don't use Composer
87 /// (WordPress, Drupal, custom autoloaders, workspace-walk indexes).
88 /// Replaces any previously-set Composer-backed resolver.
89 pub fn with_class_resolver(mut self, resolver: Arc<dyn crate::ClassResolver>) -> Self {
90 self.resolver = Some(resolver);
91 self
92 }
93
94 pub fn with_user_stubs(mut self, files: Vec<PathBuf>, dirs: Vec<PathBuf>) -> Self {
95 self.user_stub_files = files;
96 self.user_stub_dirs = dirs;
97 self
98 }
99
100 pub fn php_version(&self) -> PhpVersion {
101 self.php_version
102 }
103
104 pub fn cache(&self) -> Option<&AnalysisCache> {
105 self.cache.as_deref()
106 }
107
108 pub fn psr4(&self) -> Option<&Psr4Map> {
109 self.psr4.as_deref()
110 }
111
112 /// Load every PHP built-in stub plus any configured user stubs.
113 ///
114 /// **Deprecated**: prefer [`Self::ensure_all_stubs_loaded`] (explicit
115 /// "comprehensive") or [`Self::ensure_essential_stubs_loaded`] (fast
116 /// cold-start with auto-discovery on demand).
117 #[doc(hidden)]
118 pub fn ensure_stubs_loaded(&self) {
119 self.ensure_all_stubs_loaded();
120 }
121
122 /// Load only the curated set of essential stubs (Core, standard, SPL,
123 /// date) plus any configured user stubs. About 25 of 120 stub files;
124 /// covers types and functions used by virtually all PHP code.
125 ///
126 /// Other extension stubs (Reflection, gd, openssl, …) can be brought in
127 /// on demand via [`Self::ensure_stubs_for_symbol`] when user code
128 /// references them. Idempotent — already-loaded stubs are skipped.
129 pub fn ensure_essential_stubs_loaded(&self) {
130 self.shared_db
131 .ingest_stub_paths(crate::stubs::ESSENTIAL_STUB_PATHS, self.php_version);
132 self.ensure_user_stubs_loaded();
133 }
134
135 /// Load every embedded PHP stub plus any configured user stubs.
136 /// Use for batch tools (CLI, full project analysis) where comprehensive
137 /// symbol coverage matters more than cold-start latency.
138 pub fn ensure_all_stubs_loaded(&self) {
139 let paths: Vec<&'static str> = crate::stubs::stub_files().iter().map(|&(p, _)| p).collect();
140 self.shared_db.ingest_stub_paths(&paths, self.php_version);
141 self.ensure_user_stubs_loaded();
142 }
143
144 /// Ensure the embedded stub that defines `name` (a function) is ingested.
145 /// Returns `true` when a matching stub exists (whether or not it was
146 /// already loaded), `false` when `name` isn't a known PHP built-in.
147 ///
148 /// Most callers should use [`Self::ensure_stubs_for_ast`] instead —
149 /// it auto-discovers needed stubs from a parsed file.
150 #[doc(hidden)]
151 pub fn ensure_stub_for_function(&self, name: &str) -> bool {
152 match crate::stubs::stub_path_for_function(name) {
153 Some(path) => {
154 self.shared_db.ingest_stub_paths(&[path], self.php_version);
155 true
156 }
157 None => false,
158 }
159 }
160
161 /// Ensure the embedded stub that defines `fqcn` (a class / interface /
162 /// trait / enum) is ingested. Case-insensitive lookup with optional
163 /// leading backslash.
164 ///
165 /// Most callers should use [`Self::ensure_stubs_for_ast`] instead.
166 #[doc(hidden)]
167 pub fn ensure_stub_for_class(&self, fqcn: &str) -> bool {
168 match crate::stubs::stub_path_for_class(fqcn) {
169 Some(path) => {
170 self.shared_db.ingest_stub_paths(&[path], self.php_version);
171 true
172 }
173 None => false,
174 }
175 }
176
177 /// Ensure the embedded stub that defines `name` (a constant) is ingested.
178 ///
179 /// Most callers should use [`Self::ensure_stubs_for_ast`] instead.
180 #[doc(hidden)]
181 pub fn ensure_stub_for_constant(&self, name: &str) -> bool {
182 match crate::stubs::stub_path_for_constant(name) {
183 Some(path) => {
184 self.shared_db.ingest_stub_paths(&[path], self.php_version);
185 true
186 }
187 None => false,
188 }
189 }
190
191 /// Number of distinct embedded stubs currently ingested into the session.
192 /// Useful for diagnostics and bench reporting.
193 pub fn loaded_stub_count(&self) -> usize {
194 self.shared_db.loaded_stubs.lock().len()
195 }
196
197 /// Auto-discover and ingest the embedded stubs needed to cover every
198 /// built-in PHP function / class / constant referenced by `source`.
199 ///
200 /// Used by [`crate::FileAnalyzer::analyze`] to keep essentials-only mode
201 /// correct without forcing callers to enumerate which stubs they need.
202 /// Idempotent — already-loaded stubs are skipped via [`Self::loaded_stubs`].
203 ///
204 /// The discovery scan is a coarse identifier sweep (see
205 /// [`crate::stubs::collect_referenced_builtin_paths`]) — it may pull in
206 /// a slightly larger set than the file strictly needs, but never misses
207 /// a referenced built-in. Cost is sub-millisecond per file.
208 ///
209 /// Fast path: if every embedded stub is already loaded (e.g. after a
210 /// batch tool called [`Self::ensure_all_stubs_loaded`]), the source scan
211 /// is skipped entirely.
212 pub fn ensure_stubs_for_source(&self, source: &str) {
213 // Cheap check first: skip the scan entirely when we already know we
214 // have everything. Avoids a ~50-500µs source walk on every analyze
215 // call in batch / warm-session scenarios.
216 {
217 let loaded = self.shared_db.loaded_stubs.lock();
218 if loaded.len() >= crate::stubs::stub_files().len() {
219 return;
220 }
221 }
222 let paths = crate::stubs::collect_referenced_builtin_paths(source);
223 if paths.is_empty() {
224 return;
225 }
226 self.shared_db.ingest_stub_paths(&paths, self.php_version);
227 }
228
229 /// Discover and ingest stubs by walking the parsed AST of a PHP file.
230 ///
231 /// Similar to [`Self::ensure_stubs_for_source`], but takes an already-parsed
232 /// AST instead of raw source text. Produces zero false positives since it
233 /// only extracts identifiers from actual AST nodes (not from strings or
234 /// comments). Preferred over `ensure_stubs_for_source` when the AST is
235 /// already available (e.g., in [`crate::FileAnalyzer`]).
236 ///
237 /// Idempotent and skips the scan if all stubs are already loaded.
238 pub fn ensure_stubs_for_ast(&self, program: &php_ast::ast::Program<'_, '_>) {
239 {
240 let loaded = self.shared_db.loaded_stubs.lock();
241 if loaded.len() >= crate::stubs::stub_files().len() {
242 return;
243 }
244 }
245 let paths = crate::stubs::collect_referenced_builtin_paths_from_ast(program);
246 if paths.is_empty() {
247 return;
248 }
249 self.shared_db.ingest_stub_paths(&paths, self.php_version);
250 }
251
252 fn ensure_user_stubs_loaded(&self) {
253 self.shared_db
254 .ingest_user_stubs(&self.user_stub_files, &self.user_stub_dirs);
255 }
256
257 /// Cheap clone of the salsa db for a read-only query. The lock is held
258 /// only for the duration of the clone, so concurrent readers never
259 /// serialize on each other or on writes for longer than the clone itself.
260 ///
261 /// **Internal API — exposes Salsa types.** Subject to change without
262 /// notice. Public consumers should use the typed query methods
263 /// ([`Self::definition_of`], [`Self::hover`], etc.) instead.
264 #[doc(hidden)]
265 pub fn snapshot_db(&self) -> MirDb {
266 self.shared_db.snapshot_db()
267 }
268
269 /// Run a closure with read access to a database snapshot.
270 ///
271 /// **Internal API — exposes Salsa types.** Subject to change without
272 /// notice.
273 #[doc(hidden)]
274 pub fn read<R>(&self, f: impl FnOnce(&dyn MirDatabase) -> R) -> R {
275 let db = self.snapshot_db();
276 f(&db)
277 }
278
279 /// Pass 1 ingestion. Updates the file's source text in the salsa db,
280 /// runs definition collection, and ingests the resulting stub slice.
281 /// Triggers stub loading on first call. Also updates the cache's reverse-
282 /// dependency graph for `file` so cross-file invalidation stays correct
283 /// across incremental edits — without rebuilding the graph from scratch.
284 ///
285 /// If `file` was previously ingested, its old definitions and reference
286 /// locations are removed first so renames / deletions don't leave stale
287 /// state in the codebase. (Without this, long-running sessions would
288 /// accumulate dead reference-location entries indefinitely.)
289 pub fn ingest_file(&self, file: Arc<str>, source: Arc<str>) {
290 self.ensure_stubs_loaded();
291 {
292 let mut guard = self.shared_db.salsa.lock();
293 let (ref mut db, _) = *guard;
294 db.remove_file_definitions(file.as_ref());
295 }
296 let _file_defs = self
297 .shared_db
298 .collect_and_ingest_file(file.clone(), source.as_ref());
299 self.update_reverse_deps_for(&file);
300 }
301
302 /// Drop a file's contribution to the session: codebase definitions,
303 /// reference locations, salsa input handle, cache entry, and outgoing
304 /// reverse-dependency edges. Cache entries of *dependent* files are
305 /// also evicted (cross-file invalidation).
306 ///
307 /// Use this when a file is closed by the consumer, or before a re-ingest
308 /// of substantially changed content. (Plain re-ingest via
309 /// [`Self::ingest_file`] also drops old definitions, but does not
310 /// remove the salsa input handle — call this for full cleanup.)
311 pub fn invalidate_file(&self, file: &str) {
312 {
313 let mut guard = self.shared_db.salsa.lock();
314 let (ref mut db, ref mut files) = *guard;
315 db.remove_file_definitions(file);
316 files.remove(file);
317 }
318 if let Some(cache) = &self.cache {
319 cache.update_reverse_deps_for_file(file, &HashSet::new());
320 cache.evict_with_dependents(&[file.to_string()]);
321 }
322 }
323
324 /// Number of files currently tracked in this session's salsa input set.
325 /// Stable across reads; useful for diagnostics and memory bounds checks.
326 pub fn tracked_file_count(&self) -> usize {
327 let guard = self.shared_db.salsa.lock();
328 guard.1.len()
329 }
330
331 // -----------------------------------------------------------------------
332 // Read-only codebase queries
333 //
334 // All take a brief lock to clone the db, then run the lookup against the
335 // owned snapshot — concurrent edits proceed without blocking.
336 // -----------------------------------------------------------------------
337
338 /// Resolve a top-level symbol (class or function) to its declaration
339 /// location. Powers go-to-definition.
340 ///
341 /// Returns:
342 /// - `Ok(Location)` — symbol found with a source location
343 /// - `Err(NotFound)` — no such symbol in the codebase
344 /// - `Err(NoSourceLocation)` — symbol exists but has no recorded span
345 /// (e.g. some stub-only declarations)
346 pub fn definition_of(
347 &self,
348 symbol: &crate::Symbol,
349 ) -> Result<mir_codebase::storage::Location, crate::SymbolLookupError> {
350 let db = self.snapshot_db();
351 match symbol {
352 crate::Symbol::Class(fqcn) => {
353 let node = db
354 .lookup_class_node(fqcn.as_ref())
355 .filter(|n| n.active(&db))
356 .ok_or(crate::SymbolLookupError::NotFound)?;
357 node.location(&db)
358 .ok_or(crate::SymbolLookupError::NoSourceLocation)
359 }
360 crate::Symbol::Function(fqn) => {
361 let node = db
362 .lookup_function_node(fqn.as_ref())
363 .filter(|n| n.active(&db))
364 .ok_or(crate::SymbolLookupError::NotFound)?;
365 node.location(&db)
366 .ok_or(crate::SymbolLookupError::NoSourceLocation)
367 }
368 crate::Symbol::Method { class, name }
369 | crate::Symbol::Property { class, name }
370 | crate::Symbol::ClassConstant { class, name } => {
371 crate::db::member_location_via_db(&db, class, name)
372 .ok_or(crate::SymbolLookupError::NotFound)
373 }
374 crate::Symbol::GlobalConstant(_) => {
375 // Global constants don't currently store location info
376 Err(crate::SymbolLookupError::NoSourceLocation)
377 }
378 }
379 }
380
381 /// Hover information for a symbol: type, docstring, and definition location.
382 ///
383 /// Use [`crate::FileAnalysis::symbol_at`] to find the symbol at a cursor
384 /// position, then build a [`crate::Symbol`] from its `kind`. This method
385 /// assembles the displayable hover data.
386 ///
387 /// Returns `Err(NotFound)` if the symbol doesn't exist. May still return
388 /// `Ok` with `docstring: None` or `definition: None` if those specific
389 /// pieces aren't available.
390 pub fn hover(
391 &self,
392 symbol: &crate::Symbol,
393 ) -> Result<crate::HoverInfo, crate::SymbolLookupError> {
394 use mir_types::{Atomic, Union};
395 let db = self.snapshot_db();
396 match symbol {
397 crate::Symbol::Function(fqn) => {
398 let node = db
399 .lookup_function_node(fqn.as_ref())
400 .filter(|n| n.active(&db))
401 .ok_or(crate::SymbolLookupError::NotFound)?;
402 let ty = node
403 .return_type(&db)
404 .map(|t| (*t).clone())
405 .unwrap_or_else(Union::mixed);
406 let docstring = node.docstring(&db).map(|s| s.to_string());
407 let definition = node.location(&db);
408 Ok(crate::HoverInfo {
409 ty,
410 docstring,
411 definition,
412 })
413 }
414 crate::Symbol::Method { class, name } => {
415 let node = db
416 .lookup_method_node(class.as_ref(), name.as_ref())
417 .filter(|n| n.active(&db))
418 .ok_or(crate::SymbolLookupError::NotFound)?;
419 let ty = node
420 .return_type(&db)
421 .map(|t| (*t).clone())
422 .unwrap_or_else(Union::mixed);
423 let docstring = node.docstring(&db).map(|s| s.to_string());
424 let definition = node.location(&db);
425 Ok(crate::HoverInfo {
426 ty,
427 docstring,
428 definition,
429 })
430 }
431 crate::Symbol::Class(fqcn) => {
432 let node = db
433 .lookup_class_node(fqcn.as_ref())
434 .filter(|n| n.active(&db))
435 .ok_or(crate::SymbolLookupError::NotFound)?;
436 let ty = Union::single(Atomic::TNamedObject {
437 fqcn: fqcn.clone(),
438 type_params: Vec::new(),
439 });
440 let definition = node.location(&db);
441 Ok(crate::HoverInfo {
442 ty,
443 docstring: None,
444 definition,
445 })
446 }
447 crate::Symbol::Property { class, name } => {
448 let node = db
449 .lookup_property_node(class.as_ref(), name.as_ref())
450 .filter(|n| n.active(&db))
451 .ok_or(crate::SymbolLookupError::NotFound)?;
452 let ty = node.ty(&db).unwrap_or_else(Union::mixed);
453 let definition = node.location(&db);
454 Ok(crate::HoverInfo {
455 ty,
456 docstring: None,
457 definition,
458 })
459 }
460 crate::Symbol::ClassConstant { class, name } => {
461 let node = db
462 .lookup_class_constant_node(class.as_ref(), name.as_ref())
463 .filter(|n| n.active(&db))
464 .ok_or(crate::SymbolLookupError::NotFound)?;
465 let ty = node.ty(&db);
466 let definition = node.location(&db);
467 Ok(crate::HoverInfo {
468 ty,
469 docstring: None,
470 definition,
471 })
472 }
473 crate::Symbol::GlobalConstant(fqn) => {
474 let node = db
475 .lookup_global_constant_node(fqn.as_ref())
476 .filter(|n| n.active(&db))
477 .ok_or(crate::SymbolLookupError::NotFound)?;
478 let ty = node.ty(&db);
479 Ok(crate::HoverInfo {
480 ty,
481 docstring: None,
482 definition: None,
483 })
484 }
485 }
486 }
487
488 /// Every recorded reference to `symbol` with its source location as a Range.
489 /// Use [`crate::FileAnalysis::symbol_at`] to find the symbol at a cursor,
490 /// build a [`crate::Symbol`] from it, and pass it here.
491 pub fn references_to(&self, symbol: &crate::Symbol) -> Vec<(Arc<str>, crate::Range)> {
492 let db = self.snapshot_db();
493 let key = symbol.codebase_key();
494 db.reference_locations(&key)
495 .into_iter()
496 .map(|(file, line, col_start, col_end)| {
497 let range = crate::Range {
498 start: crate::Position {
499 line,
500 column: col_start as u32,
501 },
502 end: crate::Position {
503 line,
504 column: col_end as u32,
505 },
506 };
507 (file, range)
508 })
509 .collect()
510 }
511
512 /// Class-level issues (inheritance violations, abstract-method gaps, override
513 /// incompatibilities) for the given set of files.
514 ///
515 /// These checks are cross-file by nature and are not emitted by
516 /// [`crate::FileAnalyzer::analyze`]. Call this after ingesting or
517 /// re-analyzing a file and its dependents to get the full diagnostic picture.
518 ///
519 /// Circular-inheritance checks always run against the full workspace graph
520 /// regardless of the `files` filter — a cycle is a workspace-wide problem.
521 pub fn class_issues_for(&self, files: &[Arc<str>]) -> Vec<crate::Issue> {
522 let db = self.snapshot_db();
523 let file_set: HashSet<Arc<str>> = files.iter().cloned().collect();
524 let file_data: Vec<(Arc<str>, Arc<str>)> = files
525 .iter()
526 .filter_map(|f| Some((f.clone(), self.source_of(f)?)))
527 .collect();
528 crate::class::ClassAnalyzer::with_files(&db, file_set, &file_data).analyze_all()
529 }
530
531 /// All declarations defined in `file` as a **hierarchical tree**.
532 ///
533 /// Classes/interfaces/traits/enums are returned with their methods,
534 /// properties, and constants nested in `children`. Top-level functions
535 /// and constants are returned with empty `children`.
536 pub fn document_symbols(&self, file: &str) -> Vec<crate::symbol::DocumentSymbol> {
537 use crate::symbol::{DocumentSymbol, DocumentSymbolKind};
538
539 let db = self.snapshot_db();
540 let mut out = Vec::new();
541 for symbol in db.symbols_defined_in_file(file) {
542 // Try class side first — covers Class / Interface / Trait / Enum.
543 if let Some(class_node) = db.lookup_class_node(symbol.as_ref()) {
544 if !class_node.active(&db) {
545 continue;
546 }
547 let (kind, is_enum) = crate::db::class_kind_via_db(&db, symbol.as_ref())
548 .map(|k| {
549 let kind = if k.is_interface {
550 DocumentSymbolKind::Interface
551 } else if k.is_trait {
552 DocumentSymbolKind::Trait
553 } else if k.is_enum {
554 DocumentSymbolKind::Enum
555 } else {
556 DocumentSymbolKind::Class
557 };
558 (kind, k.is_enum)
559 })
560 .unwrap_or((DocumentSymbolKind::Class, false));
561
562 // Build children: methods, properties, and class constants.
563 let mut children: Vec<DocumentSymbol> = Vec::new();
564 for m in db.class_own_methods(symbol.as_ref()) {
565 if !m.active(&db) {
566 continue;
567 }
568 children.push(DocumentSymbol {
569 name: m.name(&db),
570 kind: DocumentSymbolKind::Method,
571 location: m.location(&db),
572 children: Vec::new(),
573 });
574 }
575 for p in db.class_own_properties(symbol.as_ref()) {
576 if !p.active(&db) {
577 continue;
578 }
579 children.push(DocumentSymbol {
580 name: p.name(&db),
581 kind: DocumentSymbolKind::Property,
582 location: p.location(&db),
583 children: Vec::new(),
584 });
585 }
586 for c in db.class_own_constants(symbol.as_ref()) {
587 if !c.active(&db) {
588 continue;
589 }
590 let const_kind = if is_enum {
591 DocumentSymbolKind::EnumCase
592 } else {
593 DocumentSymbolKind::Constant
594 };
595 children.push(DocumentSymbol {
596 name: c.name(&db),
597 kind: const_kind,
598 location: c.location(&db),
599 children: Vec::new(),
600 });
601 }
602
603 out.push(DocumentSymbol {
604 name: symbol.clone(),
605 kind,
606 location: class_node.location(&db),
607 children,
608 });
609 continue;
610 }
611 if let Some(fn_node) = db.lookup_function_node(symbol.as_ref()) {
612 if !fn_node.active(&db) {
613 continue;
614 }
615 out.push(DocumentSymbol {
616 name: symbol.clone(),
617 kind: DocumentSymbolKind::Function,
618 location: fn_node.location(&db),
619 children: Vec::new(),
620 });
621 continue;
622 }
623 // Constants and other top-level declarations: emit with no
624 // location info; consumers can still surface them in an outline.
625 out.push(DocumentSymbol {
626 name: symbol,
627 kind: DocumentSymbolKind::Constant,
628 location: None,
629 children: Vec::new(),
630 });
631 }
632 out
633 }
634
635 /// Returns `true` if a function with `fqn` is registered and active in
636 /// the codebase. Case-insensitive lookup with optional leading backslash.
637 pub fn contains_function(&self, fqn: &str) -> bool {
638 let db = self.snapshot_db();
639 db.lookup_function_node(fqn).is_some_and(|n| n.active(&db))
640 }
641
642 /// Returns `true` if a class / interface / trait / enum with `fqcn` is
643 /// registered and active in the codebase.
644 pub fn contains_class(&self, fqcn: &str) -> bool {
645 let db = self.snapshot_db();
646 db.lookup_class_node(fqcn).is_some_and(|n| n.active(&db))
647 }
648
649 /// Returns `true` if `class` has a method named `name` registered. Method
650 /// names are matched case-insensitively (PHP method dispatch semantics).
651 pub fn contains_method(&self, class: &str, name: &str) -> bool {
652 let db = self.snapshot_db();
653 let name_lower = name.to_ascii_lowercase();
654 db.lookup_method_node(class, &name_lower)
655 .is_some_and(|n| n.active(&db))
656 }
657
658 /// Try to resolve `fqcn` via PSR-4 and ingest the mapped file, returning
659 /// a detailed outcome distinguishing "already there" from "freshly loaded".
660 pub fn lazy_load_class_with_outcome(&self, fqcn: &str) -> crate::LazyLoadOutcome {
661 if self.contains_class(fqcn) {
662 return crate::LazyLoadOutcome::AlreadyLoaded;
663 }
664 if self.lazy_load_class(fqcn) {
665 crate::LazyLoadOutcome::Loaded
666 } else {
667 crate::LazyLoadOutcome::NotResolvable
668 }
669 }
670
671 /// Try to resolve `fqcn` via the configured [`crate::ClassResolver`] and
672 /// ingest the mapped file.
673 ///
674 /// This is the LSP-friendly lazy-load entry point: the analyzer never
675 /// touches `vendor/` on its own, but consumers can ask it to resolve
676 /// individual symbols on demand. Designed to be called when a diagnostic
677 /// would otherwise report `UndefinedClass`.
678 ///
679 /// Returns `true` if either the class is already known or a matching
680 /// file was found and successfully ingested. Returns `false` if:
681 /// - No resolver is configured (neither `with_psr4` nor `with_class_resolver` called),
682 /// - The resolver can't map `fqcn` to a file,
683 /// - The file can't be read, or
684 /// - The file parsed but did not define `fqcn`.
685 pub fn lazy_load_class(&self, fqcn: &str) -> bool {
686 if self.contains_class(fqcn) {
687 return true;
688 }
689 let Some(resolver) = &self.resolver else {
690 return false;
691 };
692 let Some(path) = resolver.resolve(fqcn) else {
693 return false;
694 };
695 let Ok(src) = std::fs::read_to_string(&path) else {
696 return false;
697 };
698 let file: Arc<str> = Arc::from(path.to_string_lossy().as_ref());
699 self.ingest_file(file, Arc::from(src));
700 self.contains_class(fqcn)
701 }
702
703 /// Lazy-load every class transitively reachable from `fqcn` via parent /
704 /// interface / trait edges. Useful when the consumer needs not just the
705 /// requested class but enough of its inheritance chain to type-check
706 /// member access.
707 ///
708 /// Walks at most `max_depth` levels (default in batch analysis is 10).
709 /// Returns the number of classes successfully loaded (not counting
710 /// `fqcn` itself if it was already present).
711 pub fn lazy_load_class_transitive(&self, fqcn: &str, max_depth: usize) -> usize {
712 if self.resolver.is_none() {
713 return 0;
714 }
715 let mut loaded = 0;
716 let mut frontier: Vec<String> = vec![fqcn.to_string()];
717 let mut visited: std::collections::HashSet<String> = std::collections::HashSet::new();
718
719 for _ in 0..max_depth {
720 if frontier.is_empty() {
721 break;
722 }
723 let mut next: Vec<String> = Vec::new();
724 for name in frontier.drain(..) {
725 if !visited.insert(name.clone()) {
726 continue;
727 }
728 let was_present = self.contains_class(&name);
729 let resolved = self.lazy_load_class(&name);
730 if resolved && !was_present {
731 loaded += 1;
732 // Walk the new class's parent / interfaces / traits.
733 let db = self.snapshot_db();
734 if let Some(node) = db.lookup_class_node(&name) {
735 if let Some(parent) = node.parent(&db) {
736 next.push(parent.to_string());
737 }
738 for iface in node.interfaces(&db).iter() {
739 next.push(iface.to_string());
740 }
741 for tr in node.traits(&db).iter() {
742 next.push(tr.to_string());
743 }
744 for ext in node.extends(&db).iter() {
745 next.push(ext.to_string());
746 }
747 }
748 }
749 }
750 frontier = next;
751 }
752 loaded
753 }
754
755 /// Retrieve the source text the session has registered for `file`, if
756 /// any. Returns `None` when the file has never been ingested. Used by
757 /// the parallel re-analysis path to re-feed dependents to Pass 2 without
758 /// the caller having to track sources independently.
759 pub fn source_of(&self, file: &str) -> Option<Arc<str>> {
760 let guard = self.shared_db.salsa.lock();
761 let (ref db, ref files) = *guard;
762 let sf = files.get(file)?;
763 Some(sf.text(db))
764 }
765
766 /// Re-analyze every transitive dependent of `file` in parallel.
767 ///
768 /// When the user saves a file that other files depend on (e.g. editing
769 /// a base class, an interface, or a trait), those dependents may have
770 /// new diagnostics. This method computes them in parallel using rayon
771 /// and returns the per-file analysis results so the LSP server can
772 /// publish updated diagnostics in one batch.
773 ///
774 /// Source text for dependents is retrieved from the session's salsa
775 /// inputs (set by previous `ingest_file` calls) — the caller doesn't
776 /// need to track or re-read files. Files for which the session has no
777 /// source are silently skipped (returns the analyzable subset).
778 ///
779 /// Does not run inference sweeps. For full-fidelity cross-file inferred
780 /// return types, follow up with [`Self::run_inference_sweep`] over the
781 /// affected file set.
782 pub fn analyze_dependents_of(&self, file: &str) -> Vec<(Arc<str>, crate::FileAnalysis)> {
783 use rayon::prelude::*;
784
785 // Phase 1: compute dependents + gather their sources outside the
786 // analysis loop so each worker has everything it needs.
787 let dependents = self.dependency_graph().transitive_dependents(file);
788 if dependents.is_empty() {
789 return Vec::new();
790 }
791 let with_source: Vec<(Arc<str>, Arc<str>)> = dependents
792 .into_iter()
793 .filter_map(|path| {
794 let arc_path: Arc<str> = Arc::from(path.as_str());
795 let src = self.source_of(&path)?;
796 Some((arc_path, src))
797 })
798 .collect();
799 if with_source.is_empty() {
800 return Vec::new();
801 }
802
803 // Phase 2: parallel parse + analyze. Each rayon worker gets its own
804 // database snapshot via FileAnalyzer; writes are isolated to the
805 // session's canonical db (none happen here since we only run Pass 2).
806 with_source
807 .into_par_iter()
808 .map(|(file, source)| {
809 let arena = crate::arena::create_parse_arena(source.len());
810 let parsed = php_rs_parser::parse(&arena, source.as_ref());
811 let analyzer = crate::FileAnalyzer::new(self);
812 let analysis = analyzer.analyze(
813 file.clone(),
814 source.as_ref(),
815 &parsed.program,
816 &parsed.source_map,
817 );
818 (file, analysis)
819 })
820 .collect()
821 }
822
823 /// FQCNs that `file` imports via `use` statements but that aren't yet
824 /// loaded in the session.
825 ///
826 /// Designed as the input to background prefetching: after the LSP server
827 /// ingests an open buffer, it can call this and lazy-load the returned
828 /// FQCNs on a worker thread so the user's first Cmd+Click into vendor
829 /// code doesn't pay the file-read+parse cost.
830 ///
831 /// Returns an empty Vec if the file hasn't been ingested or has no
832 /// unresolved imports.
833 pub fn pending_lazy_loads(&self, file: &str) -> Vec<Arc<str>> {
834 let db = self.snapshot_db();
835 let imports = db.file_imports(file);
836 if imports.is_empty() {
837 return Vec::new();
838 }
839 let mut out = Vec::new();
840 for fqcn in imports.values() {
841 // Cheap check: skip imports already in the codebase.
842 if db.lookup_class_node(fqcn).is_some_and(|n| n.active(&db)) {
843 continue;
844 }
845 // Only worth queueing if the resolver could in principle find it.
846 if let Some(resolver) = &self.resolver {
847 if resolver.resolve(fqcn).is_some() {
848 out.push(Arc::from(fqcn.as_str()));
849 }
850 }
851 }
852 out
853 }
854
855 /// Convenience: synchronously lazy-load every import of `file` that
856 /// isn't already in the codebase. Returns the number successfully loaded.
857 ///
858 /// For non-blocking prefetch, call this from a worker thread:
859 ///
860 /// ```ignore
861 /// let s = session.clone(); // AnalysisSession is wrapped in Arc by callers
862 /// std::thread::spawn(move || {
863 /// s.prefetch_imports(&file_path);
864 /// });
865 /// ```
866 ///
867 /// Internally walks the inheritance chain of each loaded class to a
868 /// shallow depth so member access on imported types type-checks without
869 /// the user paying the cost on their first navigation.
870 pub fn prefetch_imports(&self, file: &str) -> usize {
871 let pending = self.pending_lazy_loads(file);
872 let mut loaded = 0;
873 for fqcn in pending {
874 // Use the transitive walker with a small depth so we pick up
875 // parent classes / interfaces needed for member resolution, but
876 // don't recursively pull in the entire vendor tree.
877 loaded += self.lazy_load_class_transitive(&fqcn, 2);
878 }
879 loaded
880 }
881
882 /// All class / interface / trait / enum FQCNs currently known to the
883 /// session, each paired with the file that defines them when available.
884 ///
885 /// Use this to build workspace-wide views (outline, fuzzy search, etc.).
886 /// Consumers implement their own search/match logic on top — the analyzer
887 /// only exposes the iterator.
888 pub fn all_classes(&self) -> Vec<(Arc<str>, Option<mir_codebase::storage::Location>)> {
889 let db = self.snapshot_db();
890 db.active_class_node_fqcns()
891 .into_iter()
892 .filter_map(|fqcn| {
893 let node = db.lookup_class_node(fqcn.as_ref())?;
894 if !node.active(&db) {
895 return None;
896 }
897 Some((fqcn, node.location(&db)))
898 })
899 .collect()
900 }
901
902 /// All global function FQNs currently known to the session, each paired
903 /// with their declaration location when available.
904 pub fn all_functions(&self) -> Vec<(Arc<str>, Option<mir_codebase::storage::Location>)> {
905 let db = self.snapshot_db();
906 db.active_function_node_fqns()
907 .into_iter()
908 .filter_map(|fqn| {
909 let node = db.lookup_function_node(fqn.as_ref())?;
910 if !node.active(&db) {
911 return None;
912 }
913 Some((fqn, node.location(&db)))
914 })
915 .collect()
916 }
917
918 /// Compute `file`'s outgoing dependency edges and update the cache's
919 /// reverse-dep graph in place. No-op if no cache is configured.
920 fn update_reverse_deps_for(&self, file: &str) {
921 let Some(cache) = self.cache.as_deref() else {
922 return;
923 };
924 let db = self.snapshot_db();
925 let targets = file_outgoing_dependencies(&db, file);
926 cache.update_reverse_deps_for_file(file, &targets);
927 }
928
929 /// Cross-file inference sweep. For each `(file, source)` pair, runs the
930 /// Pass 2 inference-only mode on a cloned db (parallel via rayon), then
931 /// commits the collected inferred return types to the canonical db.
932 ///
933 /// Call this on idle / save / explicit user request, **not** on every
934 /// keystroke — [`crate::FileAnalyzer::analyze`] deliberately skips
935 /// inference sweep on the hot path. Files whose source contains parse
936 /// errors are silently skipped.
937 pub fn run_inference_sweep(&self, files: &[(Arc<str>, Arc<str>)]) {
938 self.ensure_stubs_loaded();
939
940 // The priming db lives only inside `gather_inferred_types`. After it
941 // returns, all rayon-clone references to the salsa storage are dropped
942 // — required so that the subsequent `commit_inferred_return_types`
943 // call (which calls salsa's `cancel_others`) doesn't deadlock waiting
944 // for outstanding db references.
945 let (functions, methods) =
946 gather_inferred_types(self.snapshot_db(), files, self.php_version);
947
948 let mut guard = self.shared_db.salsa.lock();
949 guard.0.commit_inferred_return_types(functions, methods);
950 }
951
952 /// File dependency graph: which files depend on which other files.
953 /// Used for incremental invalidation in LSP servers and build systems.
954 ///
955 /// Dependencies are computed from:
956 /// - Direct imports (use statements)
957 /// - Class inheritance (parent classes, interfaces, traits)
958 pub fn dependency_graph(&self) -> crate::DependencyGraph {
959 let db = self.snapshot_db();
960
961 // Get all files from the session's salsa database
962 let guard = self.shared_db.salsa.lock();
963 let all_files: Vec<String> = guard.1.keys().map(|f| f.as_ref().to_string()).collect();
964 drop(guard);
965
966 // Build forward dependency graph: file → [files it depends on]
967 let mut dependencies: std::collections::HashMap<String, Vec<String>> =
968 std::collections::HashMap::new();
969 for file in &all_files {
970 let deps = file_outgoing_dependencies(&db, file);
971 dependencies.insert(file.clone(), deps.into_iter().collect());
972 }
973
974 // Build reverse dependency graph: file → [files that depend on it]
975 let mut dependents: std::collections::HashMap<String, Vec<String>> =
976 std::collections::HashMap::new();
977 for (file, deps) in &dependencies {
978 for dep in deps {
979 dependents
980 .entry(dep.clone())
981 .or_default()
982 .push(file.clone());
983 }
984 }
985
986 // Sort for determinism
987 for deps in dependents.values_mut() {
988 deps.sort();
989 }
990
991 crate::DependencyGraph {
992 dependencies,
993 dependents,
994 }
995 }
996}
997
998/// Drive Pass 2 inference-only mode in parallel across `files`, accumulating
999/// inferred function and method return types. The `db_priming` MirDb is
1000/// consumed (cloned per spawned task and dropped on return), so the caller's
1001/// canonical db can subsequently take exclusive access without deadlock.
1002///
1003/// Crate-internal so [`crate::project::ProjectAnalyzer`] can use the same
1004/// deadlock-safe helper for its lazy-load reanalysis sweep.
1005#[allow(clippy::type_complexity)]
1006pub(crate) fn gather_inferred_types(
1007 db_priming: MirDb,
1008 files: &[(Arc<str>, Arc<str>)],
1009 php_version: PhpVersion,
1010) -> (
1011 Vec<(Arc<str>, mir_types::Union)>,
1012 Vec<(Arc<str>, Arc<str>, mir_types::Union)>,
1013) {
1014 use crate::pass2::Pass2Driver;
1015 use mir_types::Union;
1016
1017 type Functions = Vec<(Arc<str>, Union)>;
1018 type Methods = Vec<(Arc<str>, Arc<str>, Union)>;
1019 let functions: Arc<Mutex<Functions>> = Arc::new(Mutex::new(Vec::new()));
1020 let methods: Arc<Mutex<Methods>> = Arc::new(Mutex::new(Vec::new()));
1021
1022 rayon::in_place_scope(|s| {
1023 for (file, source) in files {
1024 let db = db_priming.clone();
1025 let functions = Arc::clone(&functions);
1026 let methods = Arc::clone(&methods);
1027 let file = file.clone();
1028 let source = source.clone();
1029
1030 s.spawn(move |_| {
1031 let arena = crate::arena::create_parse_arena(source.len());
1032 let parsed = php_rs_parser::parse(&arena, source.as_ref());
1033 if !parsed.errors.is_empty() {
1034 return;
1035 }
1036 let driver = Pass2Driver::new_inference_only(&db as &dyn MirDatabase, php_version);
1037 driver.analyze_bodies(&parsed.program, file, source.as_ref(), &parsed.source_map);
1038 let inferred = driver.take_inferred_types();
1039 {
1040 let mut f = functions.lock();
1041 f.extend(inferred.functions);
1042 }
1043 {
1044 let mut m = methods.lock();
1045 m.extend(inferred.methods);
1046 }
1047 });
1048 }
1049 });
1050
1051 let functions = Arc::try_unwrap(functions)
1052 .map(|m| m.into_inner())
1053 .unwrap_or_else(|arc| arc.lock().clone());
1054 let methods = Arc::try_unwrap(methods)
1055 .map(|m| m.into_inner())
1056 .unwrap_or_else(|arc| arc.lock().clone());
1057
1058 (functions, methods)
1059}
1060
1061/// Compute the set of files `file` depends on: defining files of its imports,
1062/// plus parent / interfaces / traits' defining files for any classes declared
1063/// in `file`. Self-edges are excluded.
1064fn file_outgoing_dependencies(db: &dyn MirDatabase, file: &str) -> HashSet<String> {
1065 let mut targets: HashSet<String> = HashSet::new();
1066
1067 let mut add_target = |symbol: &str| {
1068 if let Some(defining_file) = db.symbol_defining_file(symbol) {
1069 let def = defining_file.as_ref().to_string();
1070 if def != file {
1071 targets.insert(def);
1072 }
1073 }
1074 };
1075
1076 let imports = db.file_imports(file);
1077 for fqcn in imports.values() {
1078 add_target(fqcn);
1079 }
1080
1081 for fqcn in db.symbols_defined_in_file(file) {
1082 let Some(node) = db.lookup_class_node(fqcn.as_ref()) else {
1083 continue;
1084 };
1085 if let Some(parent) = node.parent(db) {
1086 add_target(parent.as_ref());
1087 }
1088 for iface in node.interfaces(db).iter() {
1089 add_target(iface.as_ref());
1090 }
1091 for tr in node.traits(db).iter() {
1092 add_target(tr.as_ref());
1093 }
1094 }
1095
1096 targets
1097}