mir_analyzer/session.rs
1//! Session-based analysis API for incremental, per-file analysis.
2//!
3//! [`AnalysisSession`] owns the salsa database and per-session caches for a
4//! long-running analysis context shared across many per-file analyses. Reads
5//! clone the database under a brief lock, then run lock-free; writes hold the
6//! lock briefly to mutate canonical state. `MirDb::clone()` is cheap
7//! (Arc-wrapped registries), so this pattern gives parallel readers without
8//! blocking on concurrent writes for longer than the clone itself.
9//!
10//! See [`crate::file_analyzer::FileAnalyzer`] for the per-file Pass 2 entry
11//! point that operates against a session.
12
13use std::collections::HashSet;
14use std::path::PathBuf;
15use std::sync::Arc;
16
17use crate::cache::AnalysisCache;
18use crate::composer::Psr4Map;
19use crate::db::{MirDatabase, MirDb};
20use crate::php_version::PhpVersion;
21use crate::shared_db::SharedDb;
22
23/// Long-lived analysis context. Owns the salsa database and tracks which
24/// stubs have been loaded.
25///
26/// Cheap to clone the inner db for parallel reads; writes funnel through
27/// [`Self::ingest_file`], [`Self::invalidate_file`], and the crate-internal
28/// [`Self::with_db_mut`].
29pub struct AnalysisSession {
30 /// Shared database management (salsa, file registry, stub tracking).
31 /// Extracted to allow code sharing with ProjectAnalyzer.
32 shared_db: Arc<SharedDb>,
33 cache: Option<Arc<AnalysisCache>>,
34 /// PSR-4 / Composer autoload map. Retained alongside `resolver` so the
35 /// `psr4()` accessor can still return a typed `Psr4Map` for callers that
36 /// need Composer-specific data (project_files / vendor_files / etc.).
37 psr4: Option<Arc<Psr4Map>>,
38 /// Generic class resolver used for on-demand lazy loading. When `psr4`
39 /// is set via [`Self::with_psr4`], this is populated with the same map
40 /// re-typed as `dyn ClassResolver`. Consumers can also supply their own
41 /// resolver via [`Self::with_class_resolver`] without going through
42 /// Composer.
43 resolver: Option<Arc<dyn crate::ClassResolver>>,
44 php_version: PhpVersion,
45 user_stub_files: Vec<PathBuf>,
46 user_stub_dirs: Vec<PathBuf>,
47}
48
49impl AnalysisSession {
50 /// Create a session targeting the given PHP language version.
51 pub fn new(php_version: PhpVersion) -> Self {
52 Self {
53 shared_db: Arc::new(SharedDb::new()),
54 cache: None,
55 psr4: None,
56 resolver: None,
57 php_version,
58 user_stub_files: Vec::new(),
59 user_stub_dirs: Vec::new(),
60 }
61 }
62
63 pub fn with_cache(mut self, cache: Arc<AnalysisCache>) -> Self {
64 self.cache = Some(cache);
65 self
66 }
67
68 /// Convenience: open a disk-backed cache at `cache_dir` and attach it.
69 /// Avoids forcing callers to wrap [`AnalysisCache`] in `Arc` themselves.
70 pub fn with_cache_dir(self, cache_dir: &std::path::Path) -> Self {
71 self.with_cache(Arc::new(AnalysisCache::open(cache_dir)))
72 }
73
74 /// Attach a Composer autoload map (PSR-4, PSR-0, classmap, files).
75 /// Sets the same map as the active [`crate::ClassResolver`] so
76 /// [`Self::lazy_load_class`] works out of the box.
77 pub fn with_psr4(mut self, map: Arc<Psr4Map>) -> Self {
78 let resolver: Arc<dyn crate::ClassResolver> = map.clone();
79 self.psr4 = Some(map);
80 self.resolver = Some(resolver);
81 self
82 }
83
84 /// Attach a generic class resolver for projects that don't use Composer
85 /// (WordPress, Drupal, custom autoloaders, workspace-walk indexes).
86 /// Replaces any previously-set Composer-backed resolver.
87 pub fn with_class_resolver(mut self, resolver: Arc<dyn crate::ClassResolver>) -> Self {
88 self.resolver = Some(resolver);
89 self
90 }
91
92 pub fn with_user_stubs(mut self, files: Vec<PathBuf>, dirs: Vec<PathBuf>) -> Self {
93 self.user_stub_files = files;
94 self.user_stub_dirs = dirs;
95 self
96 }
97
98 pub fn php_version(&self) -> PhpVersion {
99 self.php_version
100 }
101
102 pub fn cache(&self) -> Option<&AnalysisCache> {
103 self.cache.as_deref()
104 }
105
106 pub fn psr4(&self) -> Option<&Psr4Map> {
107 self.psr4.as_deref()
108 }
109
110 /// Load every PHP built-in stub plus any configured user stubs.
111 ///
112 /// **Deprecated**: prefer [`Self::ensure_all_stubs_loaded`] (explicit
113 /// "comprehensive") or [`Self::ensure_essential_stubs_loaded`] (fast
114 /// cold-start with auto-discovery on demand).
115 #[doc(hidden)]
116 pub fn ensure_stubs_loaded(&self) {
117 self.ensure_all_stubs_loaded();
118 }
119
120 /// Load only the curated set of essential stubs (Core, standard, SPL,
121 /// date) plus any configured user stubs. About 25 of 120 stub files;
122 /// covers types and functions used by virtually all PHP code.
123 ///
124 /// Other extension stubs (Reflection, gd, openssl, …) can be brought in
125 /// on demand via [`Self::ensure_stubs_for_symbol`] when user code
126 /// references them. Idempotent — already-loaded stubs are skipped.
127 pub fn ensure_essential_stubs_loaded(&self) {
128 self.shared_db
129 .ingest_stub_paths(crate::stubs::ESSENTIAL_STUB_PATHS, self.php_version);
130 self.ensure_user_stubs_loaded();
131 }
132
133 /// Load every embedded PHP stub plus any configured user stubs.
134 /// Use for batch tools (CLI, full project analysis) where comprehensive
135 /// symbol coverage matters more than cold-start latency.
136 pub fn ensure_all_stubs_loaded(&self) {
137 let paths: Vec<&'static str> = crate::stubs::stub_files().iter().map(|&(p, _)| p).collect();
138 self.shared_db.ingest_stub_paths(&paths, self.php_version);
139 self.ensure_user_stubs_loaded();
140 }
141
142 /// Ensure the embedded stub that defines `name` (a function) is ingested.
143 /// Returns `true` when a matching stub exists (whether or not it was
144 /// already loaded), `false` when `name` isn't a known PHP built-in.
145 ///
146 /// Most callers should use [`Self::ensure_stubs_for_ast`] instead —
147 /// it auto-discovers needed stubs from a parsed file.
148 #[doc(hidden)]
149 pub fn ensure_stub_for_function(&self, name: &str) -> bool {
150 match crate::stubs::stub_path_for_function(name) {
151 Some(path) => {
152 self.shared_db.ingest_stub_paths(&[path], self.php_version);
153 true
154 }
155 None => false,
156 }
157 }
158
159 /// Ensure the embedded stub that defines `fqcn` (a class / interface /
160 /// trait / enum) is ingested. Case-insensitive lookup with optional
161 /// leading backslash.
162 ///
163 /// Most callers should use [`Self::ensure_stubs_for_ast`] instead.
164 #[doc(hidden)]
165 pub fn ensure_stub_for_class(&self, fqcn: &str) -> bool {
166 match crate::stubs::stub_path_for_class(fqcn) {
167 Some(path) => {
168 self.shared_db.ingest_stub_paths(&[path], self.php_version);
169 true
170 }
171 None => false,
172 }
173 }
174
175 /// Ensure the embedded stub that defines `name` (a constant) is ingested.
176 ///
177 /// Most callers should use [`Self::ensure_stubs_for_ast`] instead.
178 #[doc(hidden)]
179 pub fn ensure_stub_for_constant(&self, name: &str) -> bool {
180 match crate::stubs::stub_path_for_constant(name) {
181 Some(path) => {
182 self.shared_db.ingest_stub_paths(&[path], self.php_version);
183 true
184 }
185 None => false,
186 }
187 }
188
189 /// Number of distinct embedded stubs currently ingested into the session.
190 /// Useful for diagnostics and bench reporting.
191 pub fn loaded_stub_count(&self) -> usize {
192 self.shared_db.loaded_stubs.lock().len()
193 }
194
195 /// Auto-discover and ingest the embedded stubs needed to cover every
196 /// built-in PHP function / class / constant referenced by `source`.
197 ///
198 /// Used by [`crate::FileAnalyzer::analyze`] to keep essentials-only mode
199 /// correct without forcing callers to enumerate which stubs they need.
200 /// Idempotent — already-loaded stubs are skipped via [`Self::loaded_stubs`].
201 ///
202 /// The discovery scan is a coarse identifier sweep (see
203 /// [`crate::stubs::collect_referenced_builtin_paths`]) — it may pull in
204 /// a slightly larger set than the file strictly needs, but never misses
205 /// a referenced built-in. Cost is sub-millisecond per file.
206 ///
207 /// Fast path: if every embedded stub is already loaded (e.g. after a
208 /// batch tool called [`Self::ensure_all_stubs_loaded`]), the source scan
209 /// is skipped entirely.
210 pub fn ensure_stubs_for_source(&self, source: &str) {
211 // Cheap check first: skip the scan entirely when we already know we
212 // have everything. Avoids a ~50-500µs source walk on every analyze
213 // call in batch / warm-session scenarios.
214 {
215 let loaded = self.shared_db.loaded_stubs.lock();
216 if loaded.len() >= crate::stubs::stub_files().len() {
217 return;
218 }
219 }
220 let paths = crate::stubs::collect_referenced_builtin_paths(source);
221 if paths.is_empty() {
222 return;
223 }
224 self.shared_db.ingest_stub_paths(&paths, self.php_version);
225 }
226
227 /// Discover and ingest stubs by walking the parsed AST of a PHP file.
228 ///
229 /// Similar to [`Self::ensure_stubs_for_source`], but takes an already-parsed
230 /// AST instead of raw source text. Produces zero false positives since it
231 /// only extracts identifiers from actual AST nodes (not from strings or
232 /// comments). Preferred over `ensure_stubs_for_source` when the AST is
233 /// already available (e.g., in [`crate::FileAnalyzer`]).
234 ///
235 /// Idempotent and skips the scan if all stubs are already loaded.
236 pub fn ensure_stubs_for_ast(&self, program: &php_ast::ast::Program<'_, '_>) {
237 {
238 let loaded = self.shared_db.loaded_stubs.lock();
239 if loaded.len() >= crate::stubs::stub_files().len() {
240 return;
241 }
242 }
243 let paths = crate::stubs::collect_referenced_builtin_paths_from_ast(program);
244 if paths.is_empty() {
245 return;
246 }
247 self.shared_db.ingest_stub_paths(&paths, self.php_version);
248 }
249
250 fn ensure_user_stubs_loaded(&self) {
251 self.shared_db
252 .ingest_user_stubs(&self.user_stub_files, &self.user_stub_dirs);
253 }
254
255 /// Cheap clone of the salsa db for a read-only query. The lock is held
256 /// only for the duration of the clone, so concurrent readers never
257 /// serialize on each other or on writes for longer than the clone itself.
258 ///
259 /// **Internal API — exposes Salsa types.** Subject to change without
260 /// notice. Public consumers should use the typed query methods
261 /// ([`Self::definition_of`], [`Self::hover`], etc.) instead.
262 #[doc(hidden)]
263 pub fn snapshot_db(&self) -> MirDb {
264 self.shared_db.snapshot_db()
265 }
266
267 /// Run a closure with read access to a database snapshot.
268 ///
269 /// **Internal API — exposes Salsa types.** Subject to change without
270 /// notice.
271 #[doc(hidden)]
272 pub fn read<R>(&self, f: impl FnOnce(&dyn MirDatabase) -> R) -> R {
273 let db = self.snapshot_db();
274 f(&db)
275 }
276
277 /// Pass 1 ingestion. Updates the file's source text in the salsa db,
278 /// runs definition collection, and ingests the resulting stub slice.
279 /// Triggers stub loading on first call. Also updates the cache's reverse-
280 /// dependency graph for `file` so cross-file invalidation stays correct
281 /// across incremental edits — without rebuilding the graph from scratch.
282 ///
283 /// If `file` was previously ingested, its old definitions and reference
284 /// locations are removed first so renames / deletions don't leave stale
285 /// state in the codebase. (Without this, long-running sessions would
286 /// accumulate dead reference-location entries indefinitely.)
287 pub fn ingest_file(&self, file: Arc<str>, source: Arc<str>) {
288 self.ensure_stubs_loaded();
289 {
290 let mut guard = self.shared_db.salsa.lock();
291 guard.remove_file_definitions(file.as_ref());
292 }
293 let _file_defs = self
294 .shared_db
295 .collect_and_ingest_file(file.clone(), source.as_ref());
296 self.update_reverse_deps_for(&file);
297 }
298
299 /// Drop a file's contribution to the session: codebase definitions,
300 /// reference locations, salsa input handle, cache entry, and outgoing
301 /// reverse-dependency edges. Cache entries of *dependent* files are
302 /// also evicted (cross-file invalidation).
303 ///
304 /// Use this when a file is closed by the consumer, or before a re-ingest
305 /// of substantially changed content. (Plain re-ingest via
306 /// [`Self::ingest_file`] also drops old definitions, but does not
307 /// remove the salsa input handle — call this for full cleanup.)
308 pub fn invalidate_file(&self, file: &str) {
309 {
310 let mut guard = self.shared_db.salsa.lock();
311 guard.remove_file_definitions(file);
312 guard.remove_source_file(file);
313 }
314 if let Some(cache) = &self.cache {
315 cache.update_reverse_deps_for_file(file, &HashSet::new());
316 cache.evict_with_dependents(&[file.to_string()]);
317 }
318 }
319
320 /// Number of files currently tracked in this session's salsa input set.
321 /// Stable across reads; useful for diagnostics and memory bounds checks.
322 pub fn tracked_file_count(&self) -> usize {
323 let guard = self.shared_db.salsa.lock();
324 guard.source_file_count()
325 }
326
327 // -----------------------------------------------------------------------
328 // Read-only codebase queries
329 //
330 // All take a brief lock to clone the db, then run the lookup against the
331 // owned snapshot — concurrent edits proceed without blocking.
332 // -----------------------------------------------------------------------
333
334 /// Resolve a top-level symbol (class or function) to its declaration
335 /// location. Powers go-to-definition.
336 ///
337 /// Returns:
338 /// - `Ok(Location)` — symbol found with a source location
339 /// - `Err(NotFound)` — no such symbol in the codebase
340 /// - `Err(NoSourceLocation)` — symbol exists but has no recorded span
341 /// (e.g. some stub-only declarations)
342 pub fn definition_of(
343 &self,
344 symbol: &crate::Symbol,
345 ) -> Result<mir_codebase::storage::Location, crate::SymbolLookupError> {
346 let db = self.snapshot_db();
347 match symbol {
348 crate::Symbol::Class(fqcn) => {
349 let node = db
350 .lookup_class_node(fqcn.as_ref())
351 .filter(|n| n.active(&db))
352 .ok_or(crate::SymbolLookupError::NotFound)?;
353 node.location(&db)
354 .ok_or(crate::SymbolLookupError::NoSourceLocation)
355 }
356 crate::Symbol::Function(fqn) => {
357 let node = db
358 .lookup_function_node(fqn.as_ref())
359 .filter(|n| n.active(&db))
360 .ok_or(crate::SymbolLookupError::NotFound)?;
361 node.location(&db)
362 .ok_or(crate::SymbolLookupError::NoSourceLocation)
363 }
364 crate::Symbol::Method { class, name }
365 | crate::Symbol::Property { class, name }
366 | crate::Symbol::ClassConstant { class, name } => {
367 crate::db::member_location_via_db(&db, class, name)
368 .ok_or(crate::SymbolLookupError::NotFound)
369 }
370 crate::Symbol::GlobalConstant(_) => {
371 // Global constants don't currently store location info
372 Err(crate::SymbolLookupError::NoSourceLocation)
373 }
374 }
375 }
376
377 /// Hover information for a symbol: type, docstring, and definition location.
378 ///
379 /// Use [`crate::FileAnalysis::symbol_at`] to find the symbol at a cursor
380 /// position, then build a [`crate::Symbol`] from its `kind`. This method
381 /// assembles the displayable hover data.
382 ///
383 /// Returns `Err(NotFound)` if the symbol doesn't exist. May still return
384 /// `Ok` with `docstring: None` or `definition: None` if those specific
385 /// pieces aren't available.
386 pub fn hover(
387 &self,
388 symbol: &crate::Symbol,
389 ) -> Result<crate::HoverInfo, crate::SymbolLookupError> {
390 use mir_types::{Atomic, Union};
391 let db = self.snapshot_db();
392 match symbol {
393 crate::Symbol::Function(fqn) => {
394 let node = db
395 .lookup_function_node(fqn.as_ref())
396 .filter(|n| n.active(&db))
397 .ok_or(crate::SymbolLookupError::NotFound)?;
398 let ty = node
399 .return_type(&db)
400 .map(|t| (*t).clone())
401 .unwrap_or_else(Union::mixed);
402 let docstring = node.docstring(&db).map(|s| s.to_string());
403 let definition = node.location(&db);
404 Ok(crate::HoverInfo {
405 ty,
406 docstring,
407 definition,
408 })
409 }
410 crate::Symbol::Method { class, name } => {
411 let node = db
412 .lookup_method_node(class.as_ref(), name.as_ref())
413 .filter(|n| n.active(&db))
414 .ok_or(crate::SymbolLookupError::NotFound)?;
415 let ty = node
416 .return_type(&db)
417 .map(|t| (*t).clone())
418 .unwrap_or_else(Union::mixed);
419 let docstring = node.docstring(&db).map(|s| s.to_string());
420 let definition = node.location(&db);
421 Ok(crate::HoverInfo {
422 ty,
423 docstring,
424 definition,
425 })
426 }
427 crate::Symbol::Class(fqcn) => {
428 let node = db
429 .lookup_class_node(fqcn.as_ref())
430 .filter(|n| n.active(&db))
431 .ok_or(crate::SymbolLookupError::NotFound)?;
432 let ty = Union::single(Atomic::TNamedObject {
433 fqcn: fqcn.clone(),
434 type_params: Vec::new(),
435 });
436 let definition = node.location(&db);
437 Ok(crate::HoverInfo {
438 ty,
439 docstring: None,
440 definition,
441 })
442 }
443 crate::Symbol::Property { class, name } => {
444 let node = db
445 .lookup_property_node(class.as_ref(), name.as_ref())
446 .filter(|n| n.active(&db))
447 .ok_or(crate::SymbolLookupError::NotFound)?;
448 let ty = node.ty(&db).unwrap_or_else(Union::mixed);
449 let definition = node.location(&db);
450 Ok(crate::HoverInfo {
451 ty,
452 docstring: None,
453 definition,
454 })
455 }
456 crate::Symbol::ClassConstant { class, name } => {
457 let node = db
458 .lookup_class_constant_node(class.as_ref(), name.as_ref())
459 .filter(|n| n.active(&db))
460 .ok_or(crate::SymbolLookupError::NotFound)?;
461 let ty = node.ty(&db);
462 let definition = node.location(&db);
463 Ok(crate::HoverInfo {
464 ty,
465 docstring: None,
466 definition,
467 })
468 }
469 crate::Symbol::GlobalConstant(fqn) => {
470 let node = db
471 .lookup_global_constant_node(fqn.as_ref())
472 .filter(|n| n.active(&db))
473 .ok_or(crate::SymbolLookupError::NotFound)?;
474 let ty = node.ty(&db);
475 Ok(crate::HoverInfo {
476 ty,
477 docstring: None,
478 definition: None,
479 })
480 }
481 }
482 }
483
484 /// Every recorded reference to `symbol` with its source location as a Range.
485 /// Use [`crate::FileAnalysis::symbol_at`] to find the symbol at a cursor,
486 /// build a [`crate::Symbol`] from it, and pass it here.
487 pub fn references_to(&self, symbol: &crate::Symbol) -> Vec<(Arc<str>, crate::Range)> {
488 let db = self.snapshot_db();
489 let key = symbol.codebase_key();
490 db.reference_locations(&key)
491 .into_iter()
492 .map(|(file, line, col_start, col_end)| {
493 let range = crate::Range {
494 start: crate::Position {
495 line,
496 column: col_start as u32,
497 },
498 end: crate::Position {
499 line,
500 column: col_end as u32,
501 },
502 };
503 (file, range)
504 })
505 .collect()
506 }
507
508 /// Class-level issues (inheritance violations, abstract-method gaps, override
509 /// incompatibilities) for the given set of files.
510 ///
511 /// These checks are cross-file by nature and are not emitted by
512 /// [`crate::FileAnalyzer::analyze`]. Call this after ingesting or
513 /// re-analyzing a file and its dependents to get the full diagnostic picture.
514 ///
515 /// Circular-inheritance checks always run against the full workspace graph
516 /// regardless of the `files` filter — a cycle is a workspace-wide problem.
517 pub fn class_issues_for(&self, files: &[Arc<str>]) -> Vec<crate::Issue> {
518 let db = self.snapshot_db();
519 let file_set: HashSet<Arc<str>> = files.iter().cloned().collect();
520 let file_data: Vec<(Arc<str>, Arc<str>)> = files
521 .iter()
522 .filter_map(|f| Some((f.clone(), self.source_of(f)?)))
523 .collect();
524 crate::class::ClassAnalyzer::with_files(&db, file_set, &file_data).analyze_all()
525 }
526
527 /// All declarations defined in `file` as a **hierarchical tree**.
528 ///
529 /// Classes/interfaces/traits/enums are returned with their methods,
530 /// properties, and constants nested in `children`. Top-level functions
531 /// and constants are returned with empty `children`.
532 pub fn document_symbols(&self, file: &str) -> Vec<crate::symbol::DocumentSymbol> {
533 use crate::symbol::{DocumentSymbol, DocumentSymbolKind};
534
535 let db = self.snapshot_db();
536 let mut out = Vec::new();
537 for symbol in db.symbols_defined_in_file(file) {
538 // Try class side first — covers Class / Interface / Trait / Enum.
539 if let Some(class_node) = db.lookup_class_node(symbol.as_ref()) {
540 if !class_node.active(&db) {
541 continue;
542 }
543 let (kind, is_enum) = crate::db::class_kind_via_db(&db, symbol.as_ref())
544 .map(|k| {
545 let kind = if k.is_interface {
546 DocumentSymbolKind::Interface
547 } else if k.is_trait {
548 DocumentSymbolKind::Trait
549 } else if k.is_enum {
550 DocumentSymbolKind::Enum
551 } else {
552 DocumentSymbolKind::Class
553 };
554 (kind, k.is_enum)
555 })
556 .unwrap_or((DocumentSymbolKind::Class, false));
557
558 // Build children: methods, properties, and class constants.
559 let mut children: Vec<DocumentSymbol> = Vec::new();
560 for m in db.class_own_methods(symbol.as_ref()) {
561 if !m.active(&db) {
562 continue;
563 }
564 children.push(DocumentSymbol {
565 name: m.name(&db),
566 kind: DocumentSymbolKind::Method,
567 location: m.location(&db),
568 children: Vec::new(),
569 });
570 }
571 for p in db.class_own_properties(symbol.as_ref()) {
572 if !p.active(&db) {
573 continue;
574 }
575 children.push(DocumentSymbol {
576 name: p.name(&db),
577 kind: DocumentSymbolKind::Property,
578 location: p.location(&db),
579 children: Vec::new(),
580 });
581 }
582 for c in db.class_own_constants(symbol.as_ref()) {
583 if !c.active(&db) {
584 continue;
585 }
586 let const_kind = if is_enum {
587 DocumentSymbolKind::EnumCase
588 } else {
589 DocumentSymbolKind::Constant
590 };
591 children.push(DocumentSymbol {
592 name: c.name(&db),
593 kind: const_kind,
594 location: c.location(&db),
595 children: Vec::new(),
596 });
597 }
598
599 out.push(DocumentSymbol {
600 name: symbol.clone(),
601 kind,
602 location: class_node.location(&db),
603 children,
604 });
605 continue;
606 }
607 if let Some(fn_node) = db.lookup_function_node(symbol.as_ref()) {
608 if !fn_node.active(&db) {
609 continue;
610 }
611 out.push(DocumentSymbol {
612 name: symbol.clone(),
613 kind: DocumentSymbolKind::Function,
614 location: fn_node.location(&db),
615 children: Vec::new(),
616 });
617 continue;
618 }
619 // Constants and other top-level declarations: emit with no
620 // location info; consumers can still surface them in an outline.
621 out.push(DocumentSymbol {
622 name: symbol,
623 kind: DocumentSymbolKind::Constant,
624 location: None,
625 children: Vec::new(),
626 });
627 }
628 out
629 }
630
631 /// Returns `true` if a function with `fqn` is registered and active in
632 /// the codebase. Case-insensitive lookup with optional leading backslash.
633 pub fn contains_function(&self, fqn: &str) -> bool {
634 let db = self.snapshot_db();
635 db.lookup_function_node(fqn).is_some_and(|n| n.active(&db))
636 }
637
638 /// Returns `true` if a class / interface / trait / enum with `fqcn` is
639 /// registered and active in the codebase.
640 pub fn contains_class(&self, fqcn: &str) -> bool {
641 let db = self.snapshot_db();
642 db.lookup_class_node(fqcn).is_some_and(|n| n.active(&db))
643 }
644
645 /// Returns `true` if `class` has a method named `name` registered. Method
646 /// names are matched case-insensitively (PHP method dispatch semantics).
647 pub fn contains_method(&self, class: &str, name: &str) -> bool {
648 let db = self.snapshot_db();
649 let name_lower = name.to_ascii_lowercase();
650 db.lookup_method_node(class, &name_lower)
651 .is_some_and(|n| n.active(&db))
652 }
653
654 /// Try to resolve `fqcn` via PSR-4 and ingest the mapped file, returning
655 /// a detailed outcome distinguishing "already there" from "freshly loaded".
656 pub fn lazy_load_class_with_outcome(&self, fqcn: &str) -> crate::LazyLoadOutcome {
657 if self.contains_class(fqcn) {
658 return crate::LazyLoadOutcome::AlreadyLoaded;
659 }
660 if self.lazy_load_class(fqcn) {
661 crate::LazyLoadOutcome::Loaded
662 } else {
663 crate::LazyLoadOutcome::NotResolvable
664 }
665 }
666
667 /// Try to resolve `fqcn` via the configured [`crate::ClassResolver`] and
668 /// ingest the mapped file.
669 ///
670 /// This is the LSP-friendly lazy-load entry point: the analyzer never
671 /// touches `vendor/` on its own, but consumers can ask it to resolve
672 /// individual symbols on demand. Designed to be called when a diagnostic
673 /// would otherwise report `UndefinedClass`.
674 ///
675 /// Returns `true` if either the class is already known or a matching
676 /// file was found and successfully ingested. Returns `false` if:
677 /// - No resolver is configured (neither `with_psr4` nor `with_class_resolver` called),
678 /// - The resolver can't map `fqcn` to a file,
679 /// - The file can't be read, or
680 /// - The file parsed but did not define `fqcn`.
681 pub fn lazy_load_class(&self, fqcn: &str) -> bool {
682 if self.contains_class(fqcn) {
683 return true;
684 }
685 let Some(resolver) = &self.resolver else {
686 return false;
687 };
688 let Some(path) = resolver.resolve(fqcn) else {
689 return false;
690 };
691 let Ok(src) = std::fs::read_to_string(&path) else {
692 return false;
693 };
694 let file: Arc<str> = Arc::from(path.to_string_lossy().as_ref());
695 self.ingest_file(file, Arc::from(src));
696 self.contains_class(fqcn)
697 }
698
699 /// Lazy-load every class transitively reachable from `fqcn` via parent /
700 /// interface / trait edges. Useful when the consumer needs not just the
701 /// requested class but enough of its inheritance chain to type-check
702 /// member access.
703 ///
704 /// Walks at most `max_depth` levels (default in batch analysis is 10).
705 /// Returns the number of classes successfully loaded (not counting
706 /// `fqcn` itself if it was already present).
707 pub fn lazy_load_class_transitive(&self, fqcn: &str, max_depth: usize) -> usize {
708 if self.resolver.is_none() {
709 return 0;
710 }
711 let mut loaded = 0;
712 let mut frontier: Vec<String> = vec![fqcn.to_string()];
713 let mut visited: std::collections::HashSet<String> = std::collections::HashSet::new();
714
715 for _ in 0..max_depth {
716 if frontier.is_empty() {
717 break;
718 }
719 let mut next: Vec<String> = Vec::new();
720 for name in frontier.drain(..) {
721 if !visited.insert(name.clone()) {
722 continue;
723 }
724 let was_present = self.contains_class(&name);
725 let resolved = self.lazy_load_class(&name);
726 if resolved && !was_present {
727 loaded += 1;
728 // Walk the new class's parent / interfaces / traits.
729 let db = self.snapshot_db();
730 if let Some(node) = db.lookup_class_node(&name) {
731 if let Some(parent) = node.parent(&db) {
732 next.push(parent.to_string());
733 }
734 for iface in node.interfaces(&db).iter() {
735 next.push(iface.to_string());
736 }
737 for tr in node.traits(&db).iter() {
738 next.push(tr.to_string());
739 }
740 for ext in node.extends(&db).iter() {
741 next.push(ext.to_string());
742 }
743 }
744 }
745 }
746 frontier = next;
747 }
748 loaded
749 }
750
751 /// Retrieve the source text the session has registered for `file`, if
752 /// any. Returns `None` when the file has never been ingested. Used by
753 /// the parallel re-analysis path to re-feed dependents to Pass 2 without
754 /// the caller having to track sources independently.
755 pub fn source_of(&self, file: &str) -> Option<Arc<str>> {
756 let guard = self.shared_db.salsa.lock();
757 let sf = guard.lookup_source_file(file)?;
758 Some(sf.text(&*guard))
759 }
760
761 /// Re-analyze every transitive dependent of `file` in parallel.
762 ///
763 /// When the user saves a file that other files depend on (e.g. editing
764 /// a base class, an interface, or a trait), those dependents may have
765 /// new diagnostics. This method computes them in parallel using rayon
766 /// and returns the per-file analysis results so the LSP server can
767 /// publish updated diagnostics in one batch.
768 ///
769 /// Source text for dependents is retrieved from the session's salsa
770 /// inputs (set by previous `ingest_file` calls) — the caller doesn't
771 /// need to track or re-read files. Files for which the session has no
772 /// source are silently skipped (returns the analyzable subset).
773 ///
774 /// Does not run inference sweeps. For full-fidelity cross-file inferred
775 /// return types, follow up with [`Self::run_inference_sweep`] over the
776 /// affected file set.
777 pub fn analyze_dependents_of(&self, file: &str) -> Vec<(Arc<str>, crate::FileAnalysis)> {
778 use rayon::prelude::*;
779
780 // Phase 1: compute dependents + gather their sources outside the
781 // analysis loop so each worker has everything it needs.
782 let dependents = self.dependency_graph().transitive_dependents(file);
783 if dependents.is_empty() {
784 return Vec::new();
785 }
786 let with_source: Vec<(Arc<str>, Arc<str>)> = dependents
787 .into_iter()
788 .filter_map(|path| {
789 let arc_path: Arc<str> = Arc::from(path.as_str());
790 let src = self.source_of(&path)?;
791 Some((arc_path, src))
792 })
793 .collect();
794 if with_source.is_empty() {
795 return Vec::new();
796 }
797
798 // Phase 2: parallel parse + analyze. Each rayon worker gets its own
799 // database snapshot via FileAnalyzer; writes are isolated to the
800 // session's canonical db (none happen here since we only run Pass 2).
801 with_source
802 .into_par_iter()
803 .map(|(file, source)| {
804 let arena = crate::arena::create_parse_arena(source.len());
805 let parsed = php_rs_parser::parse(&arena, source.as_ref());
806 let analyzer = crate::FileAnalyzer::new(self);
807 let analysis = analyzer.analyze(
808 file.clone(),
809 source.as_ref(),
810 &parsed.program,
811 &parsed.source_map,
812 );
813 (file, analysis)
814 })
815 .collect()
816 }
817
818 /// FQCNs that `file` imports via `use` statements but that aren't yet
819 /// loaded in the session.
820 ///
821 /// Designed as the input to background prefetching: after the LSP server
822 /// ingests an open buffer, it can call this and lazy-load the returned
823 /// FQCNs on a worker thread so the user's first Cmd+Click into vendor
824 /// code doesn't pay the file-read+parse cost.
825 ///
826 /// Returns an empty Vec if the file hasn't been ingested or has no
827 /// unresolved imports.
828 pub fn pending_lazy_loads(&self, file: &str) -> Vec<Arc<str>> {
829 let db = self.snapshot_db();
830 let imports = db.file_imports(file);
831 if imports.is_empty() {
832 return Vec::new();
833 }
834 let mut out = Vec::new();
835 for fqcn in imports.values() {
836 // Cheap check: skip imports already in the codebase.
837 if db.lookup_class_node(fqcn).is_some_and(|n| n.active(&db)) {
838 continue;
839 }
840 // Only worth queueing if the resolver could in principle find it.
841 if let Some(resolver) = &self.resolver {
842 if resolver.resolve(fqcn).is_some() {
843 out.push(Arc::from(fqcn.as_str()));
844 }
845 }
846 }
847 out
848 }
849
850 /// Convenience: synchronously lazy-load every import of `file` that
851 /// isn't already in the codebase. Returns the number successfully loaded.
852 ///
853 /// For non-blocking prefetch, call this from a worker thread:
854 ///
855 /// ```ignore
856 /// let s = session.clone(); // AnalysisSession is wrapped in Arc by callers
857 /// std::thread::spawn(move || {
858 /// s.prefetch_imports(&file_path);
859 /// });
860 /// ```
861 ///
862 /// Internally walks the inheritance chain of each loaded class to a
863 /// shallow depth so member access on imported types type-checks without
864 /// the user paying the cost on their first navigation.
865 pub fn prefetch_imports(&self, file: &str) -> usize {
866 let pending = self.pending_lazy_loads(file);
867 let mut loaded = 0;
868 for fqcn in pending {
869 // Use the transitive walker with a small depth so we pick up
870 // parent classes / interfaces needed for member resolution, but
871 // don't recursively pull in the entire vendor tree.
872 loaded += self.lazy_load_class_transitive(&fqcn, 2);
873 }
874 loaded
875 }
876
877 /// All class / interface / trait / enum FQCNs currently known to the
878 /// session, each paired with the file that defines them when available.
879 ///
880 /// Use this to build workspace-wide views (outline, fuzzy search, etc.).
881 /// Consumers implement their own search/match logic on top — the analyzer
882 /// only exposes the iterator.
883 pub fn all_classes(&self) -> Vec<(Arc<str>, Option<mir_codebase::storage::Location>)> {
884 let db = self.snapshot_db();
885 db.active_class_node_fqcns()
886 .into_iter()
887 .filter_map(|fqcn| {
888 let node = db.lookup_class_node(fqcn.as_ref())?;
889 if !node.active(&db) {
890 return None;
891 }
892 Some((fqcn, node.location(&db)))
893 })
894 .collect()
895 }
896
897 /// All global function FQNs currently known to the session, each paired
898 /// with their declaration location when available.
899 pub fn all_functions(&self) -> Vec<(Arc<str>, Option<mir_codebase::storage::Location>)> {
900 let db = self.snapshot_db();
901 db.active_function_node_fqns()
902 .into_iter()
903 .filter_map(|fqn| {
904 let node = db.lookup_function_node(fqn.as_ref())?;
905 if !node.active(&db) {
906 return None;
907 }
908 Some((fqn, node.location(&db)))
909 })
910 .collect()
911 }
912
913 /// Compute `file`'s outgoing dependency edges and update the cache's
914 /// reverse-dep graph in place. No-op if no cache is configured.
915 fn update_reverse_deps_for(&self, file: &str) {
916 let Some(cache) = self.cache.as_deref() else {
917 return;
918 };
919 let db = self.snapshot_db();
920 let targets = file_outgoing_dependencies(&db, file);
921 cache.update_reverse_deps_for_file(file, &targets);
922 }
923
924 /// Cross-file inference sweep. For each `(file, source)` pair, calls the
925 /// Salsa-tracked `infer_file_return_types` query in parallel, then commits
926 /// the collected inferred return types to INPUT fields.
927 ///
928 /// Files must already be ingested via [`Self::ingest_file`] before calling
929 /// this method. Subsequent [`FileAnalyzer::analyze`] calls read the committed
930 /// INPUT fields via O(1) lookups with no lock contention.
931 pub fn run_inference_sweep(&self, files: &[(Arc<str>, Arc<str>)]) {
932 use rayon::prelude::*;
933 let db_priming = self.snapshot_db();
934 let inferred_results: Vec<crate::db::InferredFileTypes> = files
935 .par_iter()
936 .map_with(db_priming, |db, (path, _src)| {
937 if let Some(sf) = db.lookup_source_file(path) {
938 crate::db::infer_file_return_types(db, sf)
939 } else {
940 crate::db::InferredFileTypes::empty()
941 }
942 })
943 .collect();
944 let mut functions = Vec::new();
945 let mut methods = Vec::new();
946 for result in inferred_results {
947 for (fqn, ty) in result.functions.iter() {
948 functions.push((fqn.clone(), (**ty).clone()));
949 }
950 for ((fqcn, name), ty) in result.methods.iter() {
951 methods.push((fqcn.clone(), name.clone(), (**ty).clone()));
952 }
953 }
954 let mut guard = self.shared_db.salsa.lock();
955 guard.commit_inferred_return_types(functions, methods);
956 }
957
958 /// File dependency graph: which files depend on which other files.
959 /// Used for incremental invalidation in LSP servers and build systems.
960 ///
961 /// Dependencies are computed from:
962 /// - Direct imports (use statements)
963 /// - Class inheritance (parent classes, interfaces, traits)
964 pub fn dependency_graph(&self) -> crate::DependencyGraph {
965 let db = self.snapshot_db();
966
967 // Get all files from the session's salsa database
968 let guard = self.shared_db.salsa.lock();
969 let all_files: Vec<String> = guard
970 .source_file_paths()
971 .iter()
972 .map(|f| f.as_ref().to_string())
973 .collect();
974 drop(guard);
975
976 // Build forward dependency graph: file → [files it depends on]
977 let mut dependencies: std::collections::HashMap<String, Vec<String>> =
978 std::collections::HashMap::new();
979 for file in &all_files {
980 let deps = file_outgoing_dependencies(&db, file);
981 dependencies.insert(file.clone(), deps.into_iter().collect());
982 }
983
984 // Build reverse dependency graph: file → [files that depend on it]
985 let mut dependents: std::collections::HashMap<String, Vec<String>> =
986 std::collections::HashMap::new();
987 for (file, deps) in &dependencies {
988 for dep in deps {
989 dependents
990 .entry(dep.clone())
991 .or_default()
992 .push(file.clone());
993 }
994 }
995
996 // Sort for determinism
997 for deps in dependents.values_mut() {
998 deps.sort();
999 }
1000
1001 crate::DependencyGraph {
1002 dependencies,
1003 dependents,
1004 }
1005 }
1006}
1007
1008/// Compute the set of files `file` depends on: defining files of its imports,
1009/// plus parent / interfaces / traits' defining files for any classes declared
1010/// in `file`. Self-edges are excluded.
1011fn file_outgoing_dependencies(db: &dyn MirDatabase, file: &str) -> HashSet<String> {
1012 let mut targets: HashSet<String> = HashSet::new();
1013
1014 let mut add_target = |symbol: &str| {
1015 if let Some(defining_file) = db.symbol_defining_file(symbol) {
1016 let def = defining_file.as_ref().to_string();
1017 if def != file {
1018 targets.insert(def);
1019 }
1020 }
1021 };
1022
1023 let imports = db.file_imports(file);
1024 for fqcn in imports.values() {
1025 add_target(fqcn);
1026 }
1027
1028 for fqcn in db.symbols_defined_in_file(file) {
1029 let Some(node) = db.lookup_class_node(fqcn.as_ref()) else {
1030 continue;
1031 };
1032 if let Some(parent) = node.parent(db) {
1033 add_target(parent.as_ref());
1034 }
1035 for iface in node.interfaces(db).iter() {
1036 add_target(iface.as_ref());
1037 }
1038 for tr in node.traits(db).iter() {
1039 add_target(tr.as_ref());
1040 }
1041 }
1042
1043 // Also track bare-FQN references recorded during Pass 2 (new \Foo(), \Foo::method(),
1044 // \foo()) that do not appear in use-import statements.
1045 for (symbol_key, _, _, _) in db.extract_file_reference_locations(file) {
1046 let lookup: &str = match symbol_key.split_once("::") {
1047 Some((class, _)) => class,
1048 None => &symbol_key,
1049 };
1050 add_target(lookup);
1051 }
1052
1053 targets
1054}