mir_analyzer/session.rs
1//! Session-based analysis API for incremental, per-file analysis.
2//!
3//! [`AnalysisSession`] owns the salsa database and per-session caches for a
4//! long-running analysis context shared across many per-file analyses. Reads
5//! clone the database under a brief lock, then run lock-free; writes hold the
6//! lock briefly to mutate canonical state. `MirDb::clone()` is cheap
7//! (Arc-wrapped registries), so this pattern gives parallel readers without
8//! blocking on concurrent writes for longer than the clone itself.
9//!
10//! See [`crate::file_analyzer::FileAnalyzer`] for the per-file Pass 2 entry
11//! point that operates against a session.
12
13use std::collections::{HashMap, HashSet};
14use std::path::PathBuf;
15use std::sync::Arc;
16
17use parking_lot::RwLock;
18
19use crate::cache::AnalysisCache;
20use crate::composer::Psr4Map;
21use crate::db::{MirDatabase, MirDb};
22use crate::php_version::PhpVersion;
23use crate::shared_db::SharedDb;
24
25/// Long-lived analysis context. Owns the salsa database and tracks which
26/// stubs have been loaded.
27///
28/// Cheap to clone the inner db for parallel reads; writes funnel through
29/// [`Self::ingest_file`], [`Self::invalidate_file`], and the crate-internal
30/// [`Self::with_db_mut`].
31pub struct AnalysisSession {
32 /// Shared database management (salsa, file registry, stub tracking).
33 /// Extracted to allow code sharing with ProjectAnalyzer.
34 shared_db: Arc<SharedDb>,
35 cache: Option<Arc<AnalysisCache>>,
36 /// PSR-4 / Composer autoload map. Retained alongside `resolver` so the
37 /// `psr4()` accessor can still return a typed `Psr4Map` for callers that
38 /// need Composer-specific data (project_files / vendor_files / etc.).
39 psr4: Option<Arc<Psr4Map>>,
40 /// Generic class resolver used for on-demand lazy loading. When `psr4`
41 /// is set via [`Self::with_psr4`], this is populated with the same map
42 /// re-typed as `dyn ClassResolver`. Consumers can also supply their own
43 /// resolver via [`Self::with_class_resolver`] without going through
44 /// Composer.
45 resolver: Option<Arc<dyn crate::ClassResolver>>,
46 php_version: PhpVersion,
47 user_stub_files: Vec<PathBuf>,
48 user_stub_dirs: Vec<PathBuf>,
49 /// In-memory reverse dependency map: target_file → set of files that
50 /// depend on it. Always maintained (not gated on disk cache presence),
51 /// enabling `analyze_dependents_of` and `dependency_graph()` without a
52 /// disk cache. Updated in `ingest_file` and `invalidate_file`.
53 reverse_dep_map: Arc<RwLock<HashMap<String, HashSet<String>>>>,
54}
55
56impl AnalysisSession {
57 /// Create a session targeting the given PHP language version.
58 pub fn new(php_version: PhpVersion) -> Self {
59 Self {
60 shared_db: Arc::new(SharedDb::new()),
61 cache: None,
62 psr4: None,
63 resolver: None,
64 php_version,
65 user_stub_files: Vec::new(),
66 user_stub_dirs: Vec::new(),
67 reverse_dep_map: Arc::new(RwLock::new(HashMap::new())),
68 }
69 }
70
71 pub fn with_cache(mut self, cache: Arc<AnalysisCache>) -> Self {
72 self.cache = Some(cache);
73 self
74 }
75
76 /// Convenience: open a disk-backed cache at `cache_dir` and attach it.
77 /// Avoids forcing callers to wrap [`AnalysisCache`] in `Arc` themselves.
78 pub fn with_cache_dir(self, cache_dir: &std::path::Path) -> Self {
79 self.with_cache(Arc::new(AnalysisCache::open(cache_dir)))
80 }
81
82 /// Attach a Composer autoload map (PSR-4, PSR-0, classmap, files).
83 /// Sets the same map as the active [`crate::ClassResolver`] so
84 /// [`Self::lazy_load_class`] works out of the box.
85 pub fn with_psr4(mut self, map: Arc<Psr4Map>) -> Self {
86 let resolver: Arc<dyn crate::ClassResolver> = map.clone();
87 self.psr4 = Some(map);
88 self.resolver = Some(resolver);
89 self
90 }
91
92 /// Attach a generic class resolver for projects that don't use Composer
93 /// (WordPress, Drupal, custom autoloaders, workspace-walk indexes).
94 /// Replaces any previously-set Composer-backed resolver.
95 pub fn with_class_resolver(mut self, resolver: Arc<dyn crate::ClassResolver>) -> Self {
96 self.resolver = Some(resolver);
97 self
98 }
99
100 pub fn with_user_stubs(mut self, files: Vec<PathBuf>, dirs: Vec<PathBuf>) -> Self {
101 self.user_stub_files = files;
102 self.user_stub_dirs = dirs;
103 self
104 }
105
106 pub fn php_version(&self) -> PhpVersion {
107 self.php_version
108 }
109
110 pub fn cache(&self) -> Option<&AnalysisCache> {
111 self.cache.as_deref()
112 }
113
114 pub fn psr4(&self) -> Option<&Psr4Map> {
115 self.psr4.as_deref()
116 }
117
118 /// Load every PHP built-in stub plus any configured user stubs.
119 ///
120 /// **Deprecated**: prefer [`Self::ensure_all_stubs_loaded`] (explicit
121 /// "comprehensive") or [`Self::ensure_essential_stubs_loaded`] (fast
122 /// cold-start with auto-discovery on demand).
123 #[doc(hidden)]
124 pub fn ensure_stubs_loaded(&self) {
125 self.ensure_all_stubs_loaded();
126 }
127
128 /// Load only the curated set of essential stubs (Core, standard, SPL,
129 /// date) plus any configured user stubs. About 25 of 120 stub files;
130 /// covers types and functions used by virtually all PHP code.
131 ///
132 /// Other extension stubs (Reflection, gd, openssl, …) can be brought in
133 /// on demand via [`Self::ensure_stubs_for_symbol`] when user code
134 /// references them. Idempotent — already-loaded stubs are skipped.
135 pub fn ensure_essential_stubs_loaded(&self) {
136 self.shared_db
137 .ingest_stub_paths(crate::stubs::ESSENTIAL_STUB_PATHS, self.php_version);
138 self.ensure_user_stubs_loaded();
139 }
140
141 /// Load every embedded PHP stub plus any configured user stubs.
142 /// Use for batch tools (CLI, full project analysis) where comprehensive
143 /// symbol coverage matters more than cold-start latency.
144 pub fn ensure_all_stubs_loaded(&self) {
145 let paths: Vec<&'static str> = crate::stubs::stub_files().iter().map(|&(p, _)| p).collect();
146 self.shared_db.ingest_stub_paths(&paths, self.php_version);
147 self.ensure_user_stubs_loaded();
148 }
149
150 /// Ensure the embedded stub that defines `name` (a function) is ingested.
151 /// Returns `true` when a matching stub exists (whether or not it was
152 /// already loaded), `false` when `name` isn't a known PHP built-in.
153 ///
154 /// Most callers should use [`Self::ensure_stubs_for_ast`] instead —
155 /// it auto-discovers needed stubs from a parsed file.
156 #[doc(hidden)]
157 pub fn ensure_stub_for_function(&self, name: &str) -> bool {
158 match crate::stubs::stub_path_for_function(name) {
159 Some(path) => {
160 self.shared_db.ingest_stub_paths(&[path], self.php_version);
161 true
162 }
163 None => false,
164 }
165 }
166
167 /// Ensure the embedded stub that defines `fqcn` (a class / interface /
168 /// trait / enum) is ingested. Case-insensitive lookup with optional
169 /// leading backslash.
170 ///
171 /// Most callers should use [`Self::ensure_stubs_for_ast`] instead.
172 #[doc(hidden)]
173 pub fn ensure_stub_for_class(&self, fqcn: &str) -> bool {
174 match crate::stubs::stub_path_for_class(fqcn) {
175 Some(path) => {
176 self.shared_db.ingest_stub_paths(&[path], self.php_version);
177 true
178 }
179 None => false,
180 }
181 }
182
183 /// Ensure the embedded stub that defines `name` (a constant) is ingested.
184 ///
185 /// Most callers should use [`Self::ensure_stubs_for_ast`] instead.
186 #[doc(hidden)]
187 pub fn ensure_stub_for_constant(&self, name: &str) -> bool {
188 match crate::stubs::stub_path_for_constant(name) {
189 Some(path) => {
190 self.shared_db.ingest_stub_paths(&[path], self.php_version);
191 true
192 }
193 None => false,
194 }
195 }
196
197 /// Number of distinct embedded stubs currently ingested into the session.
198 /// Useful for diagnostics and bench reporting.
199 pub fn loaded_stub_count(&self) -> usize {
200 self.shared_db.loaded_stubs.lock().len()
201 }
202
203 /// Auto-discover and ingest the embedded stubs needed to cover every
204 /// built-in PHP function / class / constant referenced by `source`.
205 ///
206 /// Used by [`crate::FileAnalyzer::analyze`] to keep essentials-only mode
207 /// correct without forcing callers to enumerate which stubs they need.
208 /// Idempotent — already-loaded stubs are skipped via [`Self::loaded_stubs`].
209 ///
210 /// The discovery scan is a coarse identifier sweep (see
211 /// [`crate::stubs::collect_referenced_builtin_paths`]) — it may pull in
212 /// a slightly larger set than the file strictly needs, but never misses
213 /// a referenced built-in. Cost is sub-millisecond per file.
214 ///
215 /// Fast path: if every embedded stub is already loaded (e.g. after a
216 /// batch tool called [`Self::ensure_all_stubs_loaded`]), the source scan
217 /// is skipped entirely.
218 pub fn ensure_stubs_for_source(&self, source: &str) {
219 // Cheap check first: skip the scan entirely when we already know we
220 // have everything. Avoids a ~50-500µs source walk on every analyze
221 // call in batch / warm-session scenarios.
222 {
223 let loaded = self.shared_db.loaded_stubs.lock();
224 if loaded.len() >= crate::stubs::stub_files().len() {
225 return;
226 }
227 }
228 let paths = crate::stubs::collect_referenced_builtin_paths(source);
229 if paths.is_empty() {
230 return;
231 }
232 self.shared_db.ingest_stub_paths(&paths, self.php_version);
233 }
234
235 /// Discover and ingest stubs by walking the parsed AST of a PHP file.
236 ///
237 /// Similar to [`Self::ensure_stubs_for_source`], but takes an already-parsed
238 /// AST instead of raw source text. Produces zero false positives since it
239 /// only extracts identifiers from actual AST nodes (not from strings or
240 /// comments). Preferred over `ensure_stubs_for_source` when the AST is
241 /// already available (e.g., in [`crate::FileAnalyzer`]).
242 ///
243 /// Idempotent and skips the scan if all stubs are already loaded.
244 pub fn ensure_stubs_for_ast(&self, program: &php_ast::ast::Program<'_, '_>) {
245 {
246 let loaded = self.shared_db.loaded_stubs.lock();
247 if loaded.len() >= crate::stubs::stub_files().len() {
248 return;
249 }
250 }
251 let paths = crate::stubs::collect_referenced_builtin_paths_from_ast(program);
252 if paths.is_empty() {
253 return;
254 }
255 self.shared_db.ingest_stub_paths(&paths, self.php_version);
256 }
257
258 fn ensure_user_stubs_loaded(&self) {
259 self.shared_db
260 .ingest_user_stubs(&self.user_stub_files, &self.user_stub_dirs);
261 }
262
263 /// Cheap clone of the salsa db for a read-only query. The lock is held
264 /// only for the duration of the clone, so concurrent readers never
265 /// serialize on each other or on writes for longer than the clone itself.
266 ///
267 /// **Internal API — exposes Salsa types.** Subject to change without
268 /// notice. Public consumers should use the typed query methods
269 /// ([`Self::definition_of`], [`Self::hover`], etc.) instead.
270 #[doc(hidden)]
271 pub fn snapshot_db(&self) -> MirDb {
272 self.shared_db.snapshot_db()
273 }
274
275 /// Run a closure with read access to a database snapshot.
276 ///
277 /// **Internal API — exposes Salsa types.** Subject to change without
278 /// notice.
279 #[doc(hidden)]
280 pub fn read<R>(&self, f: impl FnOnce(&dyn MirDatabase) -> R) -> R {
281 let db = self.snapshot_db();
282 f(&db)
283 }
284
285 /// Pass 1 ingestion. Updates the file's source text in the salsa db,
286 /// runs definition collection, and ingests the resulting stub slice.
287 /// Triggers stub loading on first call. Also updates the cache's reverse-
288 /// dependency graph for `file` so cross-file invalidation stays correct
289 /// across incremental edits — without rebuilding the graph from scratch.
290 ///
291 /// If `file` was previously ingested, its old definitions and reference
292 /// locations are removed first so renames / deletions don't leave stale
293 /// state in the codebase. (Without this, long-running sessions would
294 /// accumulate dead reference-location entries indefinitely.)
295 pub fn ingest_file(&self, file: Arc<str>, source: Arc<str>) {
296 self.ensure_stubs_loaded();
297 {
298 let mut guard = self.shared_db.salsa.write();
299 guard.remove_file_definitions(file.as_ref());
300 }
301 let _file_defs = self
302 .shared_db
303 .collect_and_ingest_file(file.clone(), source.as_ref());
304 self.update_reverse_deps_for(&file);
305 }
306
307 /// Drop a file's contribution to the session: codebase definitions,
308 /// reference locations, salsa input handle, cache entry, and outgoing
309 /// reverse-dependency edges. Cache entries of *dependent* files are
310 /// also evicted (cross-file invalidation).
311 ///
312 /// Use this when a file is closed by the consumer, or before a re-ingest
313 /// of substantially changed content. (Plain re-ingest via
314 /// [`Self::ingest_file`] also drops old definitions, but does not
315 /// remove the salsa input handle — call this for full cleanup.)
316 pub fn invalidate_file(&self, file: &str) {
317 {
318 let mut guard = self.shared_db.salsa.write();
319 guard.remove_file_definitions(file);
320 guard.remove_source_file(file);
321 }
322 // Remove this file's outgoing deps from the in-memory reverse dep map.
323 self.update_in_memory_reverse_deps(file, &HashSet::new());
324 if let Some(cache) = &self.cache {
325 cache.update_reverse_deps_for_file(file, &HashSet::new());
326 cache.evict_with_dependents(&[file.to_string()]);
327 }
328 }
329
330 /// Number of files currently tracked in this session's salsa input set.
331 /// Stable across reads; useful for diagnostics and memory bounds checks.
332 pub fn tracked_file_count(&self) -> usize {
333 let guard = self.shared_db.salsa.read();
334 guard.source_file_count()
335 }
336
337 // -----------------------------------------------------------------------
338 // Read-only codebase queries
339 //
340 // All take a brief lock to clone the db, then run the lookup against the
341 // owned snapshot — concurrent edits proceed without blocking.
342 // -----------------------------------------------------------------------
343
344 /// Resolve a top-level symbol (class or function) to its declaration
345 /// location. Powers go-to-definition.
346 ///
347 /// Returns:
348 /// - `Ok(Location)` — symbol found with a source location
349 /// - `Err(NotFound)` — no such symbol in the codebase
350 /// - `Err(NoSourceLocation)` — symbol exists but has no recorded span
351 /// (e.g. some stub-only declarations)
352 pub fn definition_of(
353 &self,
354 symbol: &crate::Symbol,
355 ) -> Result<mir_codebase::storage::Location, crate::SymbolLookupError> {
356 let db = self.snapshot_db();
357 match symbol {
358 crate::Symbol::Class(fqcn) => {
359 let node = db
360 .lookup_class_node(fqcn.as_ref())
361 .filter(|n| n.active(&db))
362 .ok_or(crate::SymbolLookupError::NotFound)?;
363 node.location(&db)
364 .ok_or(crate::SymbolLookupError::NoSourceLocation)
365 }
366 crate::Symbol::Function(fqn) => {
367 let node = db
368 .lookup_function_node(fqn.as_ref())
369 .filter(|n| n.active(&db))
370 .ok_or(crate::SymbolLookupError::NotFound)?;
371 node.location(&db)
372 .ok_or(crate::SymbolLookupError::NoSourceLocation)
373 }
374 crate::Symbol::Method { class, name }
375 | crate::Symbol::Property { class, name }
376 | crate::Symbol::ClassConstant { class, name } => {
377 crate::db::member_location_via_db(&db, class, name)
378 .ok_or(crate::SymbolLookupError::NotFound)
379 }
380 crate::Symbol::GlobalConstant(_) => {
381 // Global constants don't currently store location info
382 Err(crate::SymbolLookupError::NoSourceLocation)
383 }
384 }
385 }
386
387 /// Hover information for a symbol: type, docstring, and definition location.
388 ///
389 /// Use [`crate::FileAnalysis::symbol_at`] to find the symbol at a cursor
390 /// position, then build a [`crate::Symbol`] from its `kind`. This method
391 /// assembles the displayable hover data.
392 ///
393 /// Returns `Err(NotFound)` if the symbol doesn't exist. May still return
394 /// `Ok` with `docstring: None` or `definition: None` if those specific
395 /// pieces aren't available.
396 pub fn hover(
397 &self,
398 symbol: &crate::Symbol,
399 ) -> Result<crate::HoverInfo, crate::SymbolLookupError> {
400 use mir_types::{Atomic, Union};
401 let db = self.snapshot_db();
402 match symbol {
403 crate::Symbol::Function(fqn) => {
404 let node = db
405 .lookup_function_node(fqn.as_ref())
406 .filter(|n| n.active(&db))
407 .ok_or(crate::SymbolLookupError::NotFound)?;
408 let ty = node
409 .return_type(&db)
410 .map(|t| (*t).clone())
411 .unwrap_or_else(Union::mixed);
412 let docstring = node.docstring(&db).map(|s| s.to_string());
413 let definition = node.location(&db);
414 Ok(crate::HoverInfo {
415 ty,
416 docstring,
417 definition,
418 })
419 }
420 crate::Symbol::Method { class, name } => {
421 let node = db
422 .lookup_method_node(class.as_ref(), name.as_ref())
423 .filter(|n| n.active(&db))
424 .ok_or(crate::SymbolLookupError::NotFound)?;
425 let ty = node
426 .return_type(&db)
427 .map(|t| (*t).clone())
428 .unwrap_or_else(Union::mixed);
429 let docstring = node.docstring(&db).map(|s| s.to_string());
430 let definition = node.location(&db);
431 Ok(crate::HoverInfo {
432 ty,
433 docstring,
434 definition,
435 })
436 }
437 crate::Symbol::Class(fqcn) => {
438 let node = db
439 .lookup_class_node(fqcn.as_ref())
440 .filter(|n| n.active(&db))
441 .ok_or(crate::SymbolLookupError::NotFound)?;
442 let ty = Union::single(Atomic::TNamedObject {
443 fqcn: fqcn.clone(),
444 type_params: Vec::new(),
445 });
446 let definition = node.location(&db);
447 Ok(crate::HoverInfo {
448 ty,
449 docstring: None,
450 definition,
451 })
452 }
453 crate::Symbol::Property { class, name } => {
454 let node = db
455 .lookup_property_node(class.as_ref(), name.as_ref())
456 .filter(|n| n.active(&db))
457 .ok_or(crate::SymbolLookupError::NotFound)?;
458 let ty = node.ty(&db).unwrap_or_else(Union::mixed);
459 let definition = node.location(&db);
460 Ok(crate::HoverInfo {
461 ty,
462 docstring: None,
463 definition,
464 })
465 }
466 crate::Symbol::ClassConstant { class, name } => {
467 let node = db
468 .lookup_class_constant_node(class.as_ref(), name.as_ref())
469 .filter(|n| n.active(&db))
470 .ok_or(crate::SymbolLookupError::NotFound)?;
471 let ty = node.ty(&db);
472 let definition = node.location(&db);
473 Ok(crate::HoverInfo {
474 ty,
475 docstring: None,
476 definition,
477 })
478 }
479 crate::Symbol::GlobalConstant(fqn) => {
480 let node = db
481 .lookup_global_constant_node(fqn.as_ref())
482 .filter(|n| n.active(&db))
483 .ok_or(crate::SymbolLookupError::NotFound)?;
484 let ty = node.ty(&db);
485 Ok(crate::HoverInfo {
486 ty,
487 docstring: None,
488 definition: None,
489 })
490 }
491 }
492 }
493
494 /// Every recorded reference to `symbol` with its source location as a Range.
495 /// Use [`crate::FileAnalysis::symbol_at`] to find the symbol at a cursor,
496 /// build a [`crate::Symbol`] from it, and pass it here.
497 pub fn references_to(&self, symbol: &crate::Symbol) -> Vec<(Arc<str>, crate::Range)> {
498 let db = self.snapshot_db();
499 let key = symbol.codebase_key();
500 db.reference_locations(&key)
501 .into_iter()
502 .map(|(file, line, col_start, col_end)| {
503 let range = crate::Range {
504 start: crate::Position {
505 line,
506 column: col_start as u32,
507 },
508 end: crate::Position {
509 line,
510 column: col_end as u32,
511 },
512 };
513 (file, range)
514 })
515 .collect()
516 }
517
518 /// Class-level issues (inheritance violations, abstract-method gaps, override
519 /// incompatibilities) for the given set of files.
520 ///
521 /// These checks are cross-file by nature and are not emitted by
522 /// [`crate::FileAnalyzer::analyze`]. Call this after ingesting or
523 /// re-analyzing a file and its dependents to get the full diagnostic picture.
524 ///
525 /// Circular-inheritance checks always run against the full workspace graph
526 /// regardless of the `files` filter — a cycle is a workspace-wide problem.
527 pub fn class_issues_for(&self, files: &[Arc<str>]) -> Vec<crate::Issue> {
528 let db = self.snapshot_db();
529 let file_set: HashSet<Arc<str>> = files.iter().cloned().collect();
530 let file_data: Vec<(Arc<str>, Arc<str>)> = files
531 .iter()
532 .filter_map(|f| Some((f.clone(), self.source_of(f)?)))
533 .collect();
534 crate::class::ClassAnalyzer::with_files(&db, file_set, &file_data).analyze_all()
535 }
536
537 /// All declarations defined in `file` as a **hierarchical tree**.
538 ///
539 /// Classes/interfaces/traits/enums are returned with their methods,
540 /// properties, and constants nested in `children`. Top-level functions
541 /// and constants are returned with empty `children`.
542 pub fn document_symbols(&self, file: &str) -> Vec<crate::symbol::DocumentSymbol> {
543 use crate::symbol::{DocumentSymbol, DocumentSymbolKind};
544
545 let db = self.snapshot_db();
546 let mut out = Vec::new();
547 for symbol in db.symbols_defined_in_file(file) {
548 // Try class side first — covers Class / Interface / Trait / Enum.
549 if let Some(class_node) = db.lookup_class_node(symbol.as_ref()) {
550 if !class_node.active(&db) {
551 continue;
552 }
553 let (kind, is_enum) = crate::db::class_kind_via_db(&db, symbol.as_ref())
554 .map(|k| {
555 let kind = if k.is_interface {
556 DocumentSymbolKind::Interface
557 } else if k.is_trait {
558 DocumentSymbolKind::Trait
559 } else if k.is_enum {
560 DocumentSymbolKind::Enum
561 } else {
562 DocumentSymbolKind::Class
563 };
564 (kind, k.is_enum)
565 })
566 .unwrap_or((DocumentSymbolKind::Class, false));
567
568 // Build children: methods, properties, and class constants.
569 let mut children: Vec<DocumentSymbol> = Vec::new();
570 for m in db.class_own_methods(symbol.as_ref()) {
571 if !m.active(&db) {
572 continue;
573 }
574 children.push(DocumentSymbol {
575 name: m.name(&db),
576 kind: DocumentSymbolKind::Method,
577 location: m.location(&db),
578 children: Vec::new(),
579 });
580 }
581 for p in db.class_own_properties(symbol.as_ref()) {
582 if !p.active(&db) {
583 continue;
584 }
585 children.push(DocumentSymbol {
586 name: p.name(&db),
587 kind: DocumentSymbolKind::Property,
588 location: p.location(&db),
589 children: Vec::new(),
590 });
591 }
592 for c in db.class_own_constants(symbol.as_ref()) {
593 if !c.active(&db) {
594 continue;
595 }
596 let const_kind = if is_enum {
597 DocumentSymbolKind::EnumCase
598 } else {
599 DocumentSymbolKind::Constant
600 };
601 children.push(DocumentSymbol {
602 name: c.name(&db),
603 kind: const_kind,
604 location: c.location(&db),
605 children: Vec::new(),
606 });
607 }
608
609 out.push(DocumentSymbol {
610 name: symbol.clone(),
611 kind,
612 location: class_node.location(&db),
613 children,
614 });
615 continue;
616 }
617 if let Some(fn_node) = db.lookup_function_node(symbol.as_ref()) {
618 if !fn_node.active(&db) {
619 continue;
620 }
621 out.push(DocumentSymbol {
622 name: symbol.clone(),
623 kind: DocumentSymbolKind::Function,
624 location: fn_node.location(&db),
625 children: Vec::new(),
626 });
627 continue;
628 }
629 // Constants and other top-level declarations: emit with no
630 // location info; consumers can still surface them in an outline.
631 out.push(DocumentSymbol {
632 name: symbol,
633 kind: DocumentSymbolKind::Constant,
634 location: None,
635 children: Vec::new(),
636 });
637 }
638 out
639 }
640
641 /// Returns `true` if a function with `fqn` is registered and active in
642 /// the codebase. Case-insensitive lookup with optional leading backslash.
643 pub fn contains_function(&self, fqn: &str) -> bool {
644 let db = self.snapshot_db();
645 db.lookup_function_node(fqn).is_some_and(|n| n.active(&db))
646 }
647
648 /// Returns `true` if a class / interface / trait / enum with `fqcn` is
649 /// registered and active in the codebase.
650 pub fn contains_class(&self, fqcn: &str) -> bool {
651 let db = self.snapshot_db();
652 db.lookup_class_node(fqcn).is_some_and(|n| n.active(&db))
653 }
654
655 /// Returns `true` if `class` has a method named `name` registered. Method
656 /// names are matched case-insensitively (PHP method dispatch semantics).
657 pub fn contains_method(&self, class: &str, name: &str) -> bool {
658 let db = self.snapshot_db();
659 let name_lower = name.to_ascii_lowercase();
660 db.lookup_method_node(class, &name_lower)
661 .is_some_and(|n| n.active(&db))
662 }
663
664 /// Try to resolve `fqcn` via PSR-4 and ingest the mapped file, returning
665 /// a detailed outcome distinguishing "already there" from "freshly loaded".
666 pub fn lazy_load_class_with_outcome(&self, fqcn: &str) -> crate::LazyLoadOutcome {
667 if self.contains_class(fqcn) {
668 return crate::LazyLoadOutcome::AlreadyLoaded;
669 }
670 if self.lazy_load_class(fqcn) {
671 crate::LazyLoadOutcome::Loaded
672 } else {
673 crate::LazyLoadOutcome::NotResolvable
674 }
675 }
676
677 /// Try to resolve `fqcn` via the configured [`crate::ClassResolver`] and
678 /// ingest the mapped file.
679 ///
680 /// This is the LSP-friendly lazy-load entry point: the analyzer never
681 /// touches `vendor/` on its own, but consumers can ask it to resolve
682 /// individual symbols on demand. Designed to be called when a diagnostic
683 /// would otherwise report `UndefinedClass`.
684 ///
685 /// Returns `true` if either the class is already known or a matching
686 /// file was found and successfully ingested. Returns `false` if:
687 /// - No resolver is configured (neither `with_psr4` nor `with_class_resolver` called),
688 /// - The resolver can't map `fqcn` to a file,
689 /// - The file can't be read, or
690 /// - The file parsed but did not define `fqcn`.
691 pub fn lazy_load_class(&self, fqcn: &str) -> bool {
692 if self.contains_class(fqcn) {
693 return true;
694 }
695 let Some(resolver) = &self.resolver else {
696 return false;
697 };
698 let Some(path) = resolver.resolve(fqcn) else {
699 return false;
700 };
701 let Ok(src) = std::fs::read_to_string(&path) else {
702 return false;
703 };
704 let file: Arc<str> = Arc::from(path.to_string_lossy().as_ref());
705 self.ingest_file(file, Arc::from(src));
706 self.contains_class(fqcn)
707 }
708
709 /// Lazy-load every class transitively reachable from `fqcn` via parent /
710 /// interface / trait edges. Useful when the consumer needs not just the
711 /// requested class but enough of its inheritance chain to type-check
712 /// member access.
713 ///
714 /// Walks at most `max_depth` levels (default in batch analysis is 10).
715 /// Returns the number of classes successfully loaded (not counting
716 /// `fqcn` itself if it was already present).
717 pub fn lazy_load_class_transitive(&self, fqcn: &str, max_depth: usize) -> usize {
718 if self.resolver.is_none() {
719 return 0;
720 }
721 let mut loaded = 0;
722 let mut frontier: Vec<String> = vec![fqcn.to_string()];
723 let mut visited: std::collections::HashSet<String> = std::collections::HashSet::new();
724
725 for _ in 0..max_depth {
726 if frontier.is_empty() {
727 break;
728 }
729 let mut next: Vec<String> = Vec::new();
730 for name in frontier.drain(..) {
731 if !visited.insert(name.clone()) {
732 continue;
733 }
734 let was_present = self.contains_class(&name);
735 let resolved = self.lazy_load_class(&name);
736 if resolved && !was_present {
737 loaded += 1;
738 // Walk the new class's parent / interfaces / traits.
739 let db = self.snapshot_db();
740 if let Some(node) = db.lookup_class_node(&name) {
741 if let Some(parent) = node.parent(&db) {
742 next.push(parent.to_string());
743 }
744 for iface in node.interfaces(&db).iter() {
745 next.push(iface.to_string());
746 }
747 for tr in node.traits(&db).iter() {
748 next.push(tr.to_string());
749 }
750 for ext in node.extends(&db).iter() {
751 next.push(ext.to_string());
752 }
753 }
754 }
755 }
756 frontier = next;
757 }
758 loaded
759 }
760
761 /// Retrieve the source text the session has registered for `file`, if
762 /// any. Returns `None` when the file has never been ingested. Used by
763 /// the parallel re-analysis path to re-feed dependents to Pass 2 without
764 /// the caller having to track sources independently.
765 pub fn source_of(&self, file: &str) -> Option<Arc<str>> {
766 let db = self.snapshot_db();
767 let sf = db.lookup_source_file(file)?;
768 Some(sf.text(&db))
769 }
770
771 /// Re-analyze every transitive dependent of `file` in parallel.
772 ///
773 /// When the user saves a file that other files depend on (e.g. editing
774 /// a base class, an interface, or a trait), those dependents may have
775 /// new diagnostics. This method computes them in parallel using rayon
776 /// and returns the per-file analysis results so the LSP server can
777 /// publish updated diagnostics in one batch.
778 ///
779 /// Source text for dependents is retrieved from the session's salsa
780 /// inputs (set by previous `ingest_file` calls) — the caller doesn't
781 /// need to track or re-read files. Files for which the session has no
782 /// source are silently skipped (returns the analyzable subset).
783 ///
784 /// Does not run inference sweeps. For full-fidelity cross-file inferred
785 /// return types, follow up with [`Self::run_inference_sweep`] over the
786 /// affected file set.
787 pub fn analyze_dependents_of(&self, file: &str) -> Vec<(Arc<str>, crate::FileAnalysis)> {
788 use rayon::prelude::*;
789
790 // Phase 1: compute dependents + gather their sources outside the
791 // analysis loop so each worker has everything it needs.
792 let dependents = self.dependency_graph().transitive_dependents(file);
793 if dependents.is_empty() {
794 return Vec::new();
795 }
796 let with_source: Vec<(Arc<str>, Arc<str>)> = dependents
797 .into_iter()
798 .filter_map(|path| {
799 let arc_path: Arc<str> = Arc::from(path.as_str());
800 let src = self.source_of(&path)?;
801 Some((arc_path, src))
802 })
803 .collect();
804 if with_source.is_empty() {
805 return Vec::new();
806 }
807
808 // Phase 2: parallel parse + analyze. Each rayon worker gets its own
809 // database snapshot via FileAnalyzer; writes are isolated to the
810 // session's canonical db (none happen here since we only run Pass 2).
811 with_source
812 .into_par_iter()
813 .map(|(file, source)| {
814 let arena = crate::arena::create_parse_arena(source.len());
815 let parsed = php_rs_parser::parse(&arena, source.as_ref());
816 let analyzer = crate::FileAnalyzer::new(self);
817 let analysis = analyzer.analyze(
818 file.clone(),
819 source.as_ref(),
820 &parsed.program,
821 &parsed.source_map,
822 );
823 (file, analysis)
824 })
825 .collect()
826 }
827
828 /// FQCNs that `file` imports via `use` statements but that aren't yet
829 /// loaded in the session.
830 ///
831 /// Designed as the input to background prefetching: after the LSP server
832 /// ingests an open buffer, it can call this and lazy-load the returned
833 /// FQCNs on a worker thread so the user's first Cmd+Click into vendor
834 /// code doesn't pay the file-read+parse cost.
835 ///
836 /// Returns an empty Vec if the file hasn't been ingested or has no
837 /// unresolved imports.
838 pub fn pending_lazy_loads(&self, file: &str) -> Vec<Arc<str>> {
839 let db = self.snapshot_db();
840 let imports = db.file_imports(file);
841 if imports.is_empty() {
842 return Vec::new();
843 }
844 let mut out = Vec::new();
845 for fqcn in imports.values() {
846 // Cheap check: skip imports already in the codebase.
847 if db.lookup_class_node(fqcn).is_some_and(|n| n.active(&db)) {
848 continue;
849 }
850 // Only worth queueing if the resolver could in principle find it.
851 if let Some(resolver) = &self.resolver {
852 if resolver.resolve(fqcn).is_some() {
853 out.push(Arc::from(fqcn.as_str()));
854 }
855 }
856 }
857 out
858 }
859
860 /// Convenience: synchronously lazy-load every import of `file` that
861 /// isn't already in the codebase. Returns the number successfully loaded.
862 ///
863 /// For non-blocking prefetch, call this from a worker thread:
864 ///
865 /// ```ignore
866 /// let s = session.clone(); // AnalysisSession is wrapped in Arc by callers
867 /// std::thread::spawn(move || {
868 /// s.prefetch_imports(&file_path);
869 /// });
870 /// ```
871 ///
872 /// Internally walks the inheritance chain of each loaded class to a
873 /// shallow depth so member access on imported types type-checks without
874 /// the user paying the cost on their first navigation.
875 pub fn prefetch_imports(&self, file: &str) -> usize {
876 let pending = self.pending_lazy_loads(file);
877 let mut loaded = 0;
878 for fqcn in pending {
879 // Use the transitive walker with a small depth so we pick up
880 // parent classes / interfaces needed for member resolution, but
881 // don't recursively pull in the entire vendor tree.
882 loaded += self.lazy_load_class_transitive(&fqcn, 2);
883 }
884 loaded
885 }
886
887 /// All class / interface / trait / enum FQCNs currently known to the
888 /// session, each paired with the file that defines them when available.
889 ///
890 /// Use this to build workspace-wide views (outline, fuzzy search, etc.).
891 /// Consumers implement their own search/match logic on top — the analyzer
892 /// only exposes the iterator.
893 pub fn all_classes(&self) -> Vec<(Arc<str>, Option<mir_codebase::storage::Location>)> {
894 let db = self.snapshot_db();
895 db.active_class_node_fqcns()
896 .into_iter()
897 .filter_map(|fqcn| {
898 let node = db.lookup_class_node(fqcn.as_ref())?;
899 if !node.active(&db) {
900 return None;
901 }
902 Some((fqcn, node.location(&db)))
903 })
904 .collect()
905 }
906
907 /// All global function FQNs currently known to the session, each paired
908 /// with their declaration location when available.
909 pub fn all_functions(&self) -> Vec<(Arc<str>, Option<mir_codebase::storage::Location>)> {
910 let db = self.snapshot_db();
911 db.active_function_node_fqns()
912 .into_iter()
913 .filter_map(|fqn| {
914 let node = db.lookup_function_node(fqn.as_ref())?;
915 if !node.active(&db) {
916 return None;
917 }
918 Some((fqn, node.location(&db)))
919 })
920 .collect()
921 }
922
923 /// Compute `file`'s outgoing dependency edges and update both the in-memory
924 /// reverse-dep map (always) and the disk cache's reverse-dep graph (if configured).
925 fn update_reverse_deps_for(&self, file: &str) {
926 let db = self.snapshot_db();
927 let targets = file_outgoing_dependencies(&db, file);
928
929 // Always update the in-memory map.
930 self.update_in_memory_reverse_deps(file, &targets);
931
932 // Also persist to disk cache if configured.
933 if let Some(cache) = self.cache.as_deref() {
934 cache.update_reverse_deps_for_file(file, &targets);
935 }
936 }
937
938 /// Update the in-memory reverse dependency map for `file` with `new_targets`.
939 /// Removes `file` from all existing entries, then adds it as a dependent of
940 /// each target in `new_targets` (excluding self-edges).
941 fn update_in_memory_reverse_deps(&self, file: &str, new_targets: &HashSet<String>) {
942 let mut map = self.reverse_dep_map.write();
943 for dependents in map.values_mut() {
944 dependents.remove(file);
945 }
946 map.retain(|_, dependents| !dependents.is_empty());
947 for target in new_targets {
948 if target != file {
949 map.entry(target.clone())
950 .or_default()
951 .insert(file.to_string());
952 }
953 }
954 }
955
956 /// BFS transitive dependents of `file` using the in-memory reverse dep map.
957 ///
958 /// O(D) where D is the number of transitive dependents — faster than
959 /// [`Self::dependency_graph().transitive_dependents()`] which rebuilds the
960 /// full graph on every call. Only covers Pass 1 structural dependencies
961 /// (imports, class hierarchy, type hints); does not include bare FQN body
962 /// references recorded during Pass 2. For full fidelity, use
963 /// `dependency_graph().transitive_dependents()` after Pass 2 is complete.
964 pub fn structural_dependents_of(&self, file: &str) -> Vec<String> {
965 let map = self.reverse_dep_map.read();
966 let mut visited: HashSet<String> = HashSet::new();
967 let mut queue = vec![file.to_string()];
968 let mut result = Vec::new();
969 while let Some(current) = queue.pop() {
970 if !visited.insert(current.clone()) {
971 continue;
972 }
973 if let Some(deps) = map.get(¤t) {
974 for dep in deps {
975 if !visited.contains(dep) {
976 queue.push(dep.clone());
977 result.push(dep.clone());
978 }
979 }
980 }
981 }
982 result
983 }
984
985 /// Cross-file inference sweep. For each `(file, source)` pair, calls the
986 /// Salsa-tracked `infer_file_return_types` query in parallel, then commits
987 /// the collected inferred return types to INPUT fields.
988 ///
989 /// Files must already be ingested via [`Self::ingest_file`] before calling
990 /// this method. Subsequent [`FileAnalyzer::analyze`] calls read the committed
991 /// INPUT fields via O(1) lookups with no lock contention.
992 pub fn run_inference_sweep(&self, files: &[(Arc<str>, Arc<str>)]) {
993 use rayon::prelude::*;
994 let db_priming = self.snapshot_db();
995 let inferred_results: Vec<crate::db::InferredFileTypes> = files
996 .par_iter()
997 .map_with(db_priming, |db, (path, _src)| {
998 if let Some(sf) = db.lookup_source_file(path) {
999 crate::db::infer_file_return_types(db, sf)
1000 } else {
1001 crate::db::InferredFileTypes::empty()
1002 }
1003 })
1004 .collect();
1005 let mut functions = Vec::new();
1006 let mut methods = Vec::new();
1007 for result in inferred_results {
1008 for (fqn, ty) in result.functions.iter() {
1009 functions.push((fqn.clone(), (**ty).clone()));
1010 }
1011 for ((fqcn, name), ty) in result.methods.iter() {
1012 methods.push((fqcn.clone(), name.clone(), (**ty).clone()));
1013 }
1014 }
1015 let mut guard = self.shared_db.salsa.write();
1016 guard.commit_inferred_return_types(functions, methods);
1017 }
1018
1019 /// File dependency graph: which files depend on which other files.
1020 /// Used for incremental invalidation in LSP servers and build systems.
1021 ///
1022 /// File dependency graph: which files depend on which other files.
1023 /// Used for incremental invalidation in LSP servers and build systems.
1024 ///
1025 /// O(edges) — iterates the `file_references` forward index (file → symbol
1026 /// keys it references) which is always current, then resolves each symbol
1027 /// to its defining file via O(1) lookup. Total cost is O(E) where E is the
1028 /// number of (file, symbol) reference edges, vs. the old O(F × S × R) scan.
1029 pub fn dependency_graph(&self) -> crate::DependencyGraph {
1030 let db = self.snapshot_db();
1031
1032 let all_files: Vec<String> = db
1033 .source_file_paths()
1034 .iter()
1035 .map(|f| f.as_ref().to_string())
1036 .collect();
1037
1038 let mut dependencies: HashMap<String, Vec<String>> = HashMap::new();
1039 let mut dependents: HashMap<String, Vec<String>> = HashMap::new();
1040
1041 for file in &all_files {
1042 // O(degree(file)) — forward index lookup, no full-table scan.
1043 let symbol_keys = db.file_referenced_symbols(file);
1044 let mut file_deps: HashSet<String> = HashSet::new();
1045 for symbol_key in &symbol_keys {
1046 let lookup: &str = match symbol_key.split_once("::") {
1047 Some((class, _)) => class,
1048 None => symbol_key.as_ref(),
1049 };
1050 if let Some(def_file) = db.symbol_defining_file(lookup) {
1051 let def = def_file.as_ref().to_string();
1052 if &def != file {
1053 file_deps.insert(def);
1054 }
1055 }
1056 }
1057 for dep in &file_deps {
1058 dependents
1059 .entry(dep.clone())
1060 .or_default()
1061 .push(file.clone());
1062 dependencies
1063 .entry(file.clone())
1064 .or_default()
1065 .push(dep.clone());
1066 }
1067 }
1068
1069 for deps in dependents.values_mut() {
1070 deps.sort();
1071 }
1072
1073 crate::DependencyGraph {
1074 dependencies,
1075 dependents,
1076 }
1077 }
1078}
1079
1080/// Compute the set of files `file` depends on: defining files of its imports,
1081/// plus parent / interfaces / traits' defining files for any classes declared
1082/// in `file`. Self-edges are excluded.
1083fn file_outgoing_dependencies(db: &dyn MirDatabase, file: &str) -> HashSet<String> {
1084 let mut targets: HashSet<String> = HashSet::new();
1085
1086 let mut add_target = |symbol: &str| {
1087 if let Some(defining_file) = db.symbol_defining_file(symbol) {
1088 let def = defining_file.as_ref().to_string();
1089 if def != file {
1090 targets.insert(def);
1091 }
1092 }
1093 };
1094
1095 let extract_named_objects = |union: &mir_types::Union| {
1096 union
1097 .types
1098 .iter()
1099 .filter_map(|atomic| match atomic {
1100 mir_types::atomic::Atomic::TNamedObject { fqcn, .. } => Some(fqcn.clone()),
1101 _ => None,
1102 })
1103 .collect::<Vec<_>>()
1104 };
1105
1106 let imports = db.file_imports(file);
1107 for fqcn in imports.values() {
1108 add_target(fqcn);
1109 }
1110
1111 for fqcn in db.symbols_defined_in_file(file) {
1112 let Some(node) = db.lookup_class_node(fqcn.as_ref()) else {
1113 continue;
1114 };
1115 if let Some(parent) = node.parent(db) {
1116 add_target(parent.as_ref());
1117 }
1118 for iface in node.interfaces(db).iter() {
1119 add_target(iface.as_ref());
1120 }
1121 for tr in node.traits(db).iter() {
1122 add_target(tr.as_ref());
1123 }
1124
1125 // Add types from properties
1126 for prop in db.class_own_properties(fqcn.as_ref()).iter() {
1127 if let Some(ty) = prop.ty(db) {
1128 for named in extract_named_objects(&ty) {
1129 add_target(named.as_ref());
1130 }
1131 }
1132 }
1133
1134 // Add types from methods
1135 for method in db.class_own_methods(fqcn.as_ref()).iter() {
1136 // Parameter types
1137 for param in method.params(db).iter() {
1138 if let Some(ty) = ¶m.ty {
1139 for named in extract_named_objects(ty.as_ref()) {
1140 add_target(named.as_ref());
1141 }
1142 }
1143 }
1144 // Return type
1145 if let Some(rt) = method.return_type(db) {
1146 for named in extract_named_objects(rt.as_ref()) {
1147 add_target(named.as_ref());
1148 }
1149 }
1150 }
1151 }
1152
1153 // Add types from global functions
1154 for fqn in db.active_function_node_fqns() {
1155 let Some(node) = db.lookup_function_node(fqn.as_ref()) else {
1156 continue;
1157 };
1158 if let Some(file_of_fn) = db.symbol_defining_file(fqn.as_ref()) {
1159 if file_of_fn.as_ref() != file {
1160 continue;
1161 }
1162 } else {
1163 continue;
1164 }
1165
1166 // Parameter types
1167 for param in node.params(db).iter() {
1168 if let Some(ty) = ¶m.ty {
1169 for named in extract_named_objects(ty.as_ref()) {
1170 add_target(named.as_ref());
1171 }
1172 }
1173 }
1174 // Return type
1175 if let Some(rt) = node.return_type(db) {
1176 for named in extract_named_objects(rt.as_ref()) {
1177 add_target(named.as_ref());
1178 }
1179 }
1180 }
1181
1182 // Also track bare-FQN references recorded during Pass 2 (new \Foo(), \Foo::method(),
1183 // \foo()) that do not appear in use-import statements.
1184 for symbol_key in db.file_referenced_symbols(file) {
1185 let lookup: &str = match symbol_key.split_once("::") {
1186 Some((class, _)) => class,
1187 None => &symbol_key,
1188 };
1189 add_target(lookup);
1190 }
1191
1192 targets
1193}