mir_analyzer/session.rs
1//! Session-based analysis API for incremental, per-file analysis.
2//!
3//! [`AnalysisSession`] owns the salsa database and per-session caches for a
4//! long-running analysis context shared across many per-file analyses. Reads
5//! clone the database under a brief lock, then run lock-free; writes hold the
6//! lock briefly to mutate canonical state. `MirDbStorage::clone()` is cheap
7//! (Arc-wrapped registries), so this pattern gives parallel readers without
8//! blocking on concurrent writes for longer than the clone itself.
9//!
10//! See [`crate::file_analyzer::FileAnalyzer`] for the per-file analysis
11//! entry point that operates against a session.
12
13use rustc_hash::{FxHashMap as HashMap, FxHashSet as HashSet};
14use std::path::PathBuf;
15use std::sync::Arc;
16
17use parking_lot::RwLock;
18
19use mir_codebase::{FileId, FileIdMap};
20
21use crate::analyzer_db::AnalyzerDb;
22use crate::cache::AnalysisCache;
23use crate::composer::Psr4Map;
24use crate::db::{MirDatabase, MirDbStorage, RefLoc};
25use crate::php_version::PhpVersion;
26
27/// Long-lived analysis context. Owns the salsa database and tracks which
28/// stubs have been loaded.
29///
30/// Cheap to clone the inner db for parallel reads; writes funnel through
31/// [`Self::ingest_file`], [`Self::invalidate_file`], and the crate-internal
32/// [`Self::with_db_mut`].
33#[derive(Clone)]
34pub struct AnalysisSession {
35 /// Shared database management (salsa, file registry, stub tracking).
36 pub(crate) db: Arc<AnalyzerDb>,
37 pub(crate) cache: Option<Arc<AnalysisCache>>,
38 /// PSR-4 / Composer autoload map. Retained alongside `resolver` so the
39 /// `psr4()` accessor can still return a typed `Psr4Map` for callers that
40 /// need Composer-specific data (project_files / vendor_files / etc.).
41 pub(crate) psr4: Option<Arc<Psr4Map>>,
42 /// Generic class resolver used for on-demand lazy loading. When `psr4`
43 /// is set via [`Self::with_psr4`], this is populated with the same map
44 /// re-typed as `dyn ClassResolver`. Consumers can also supply their own
45 /// resolver via [`Self::with_class_resolver`] without going through
46 /// Composer.
47 resolver: Option<Arc<dyn crate::ClassResolver>>,
48 pub(crate) php_version: PhpVersion,
49 pub(crate) user_stub_files: Vec<PathBuf>,
50 pub(crate) user_stub_dirs: Vec<PathBuf>,
51 /// Path ↔ FileId mapping shared with `reverse_dep_map`.
52 file_id_map: Arc<RwLock<FileIdMap>>,
53 /// In-memory reverse dependency map: target_file → set of files that
54 /// depend on it. Always maintained (not gated on disk cache presence),
55 /// enabling `reanalyze_dependents` and `dependency_graph()` without a
56 /// disk cache. Updated in `ingest_file` and `invalidate_file`.
57 reverse_dep_map: Arc<RwLock<HashMap<FileId, HashSet<FileId>>>>,
58 /// Tracks symbols that were previously defined in a file but have since
59 /// been removed (deleted or renamed). When `ingest_file` detects that
60 /// a symbol disappears, it records it here so `dependency_graph()` can
61 /// still produce edges to files that reference the now-gone symbol.
62 ///
63 /// Keyed by the file that used to define the symbols. Symbols are removed
64 /// from the set when re-added to the same file on a subsequent ingest.
65 /// The set may contain symbols with no current referencers; those are
66 /// harmless — the `symbol_referencers_of` lookup returns empty.
67 stale_defined_symbols: Arc<RwLock<HashMap<String, HashSet<Arc<str>>>>>,
68 /// Negative cache: FQCNs that `load_class` already failed on.
69 /// The value is the resolver-mapped path (when known) so eviction on
70 /// `set_file_text` / `ingest_file` is a path equality check rather than
71 /// re-running the resolver per entry. `None` means the resolver itself
72 /// couldn't map the FQCN; those entries survive file edits (no source
73 /// change makes a never-resolvable name resolvable).
74 /// Bounded to `UNRESOLVABLE_CACHE_CAP`; clears on overflow.
75 unresolvable_fqcns: UnresolvableCache,
76 /// Pluggable source-text provider for lazy-load. Defaults to filesystem
77 /// reads ([`crate::FsSourceProvider`]); LSPs swap in a VFS-backed
78 /// implementation so unsaved buffers override on-disk content.
79 source_provider: Arc<dyn crate::SourceProvider>,
80}
81
82/// FQCN → optional resolver-mapped path. See the field doc on
83/// `AnalysisSession::unresolvable_fqcns`.
84type UnresolvableCache = Arc<RwLock<HashMap<Arc<str>, Option<Arc<str>>>>>;
85
86/// Cap on the negative-resolution cache. Sized to accommodate a large
87/// workspace's worth of genuinely-missing references without unbounded
88/// growth. On overflow the cache is cleared; the cost is a few extra
89/// resolver calls until it re-fills.
90const UNRESOLVABLE_CACHE_CAP: usize = 10_000;
91
92impl AnalysisSession {
93 /// Create a session targeting the given PHP language version.
94 pub fn new(php_version: PhpVersion) -> Self {
95 Self {
96 db: Arc::new(AnalyzerDb::new()),
97 cache: None,
98 psr4: None,
99 resolver: None,
100 php_version,
101 user_stub_files: Vec::new(),
102 user_stub_dirs: Vec::new(),
103 file_id_map: Arc::new(RwLock::new(FileIdMap::new())),
104 reverse_dep_map: Arc::new(RwLock::new(HashMap::default())),
105 stale_defined_symbols: Arc::new(RwLock::new(HashMap::default())),
106 unresolvable_fqcns: Arc::new(RwLock::new(HashMap::default())),
107 source_provider: Arc::new(crate::FsSourceProvider),
108 }
109 }
110
111 /// Swap in a custom [`crate::SourceProvider`]. LSPs install a VFS-backed
112 /// provider here so the analyzer reads from unsaved editor buffers
113 /// instead of disk.
114 pub fn with_source_provider(mut self, provider: Arc<dyn crate::SourceProvider>) -> Self {
115 self.source_provider = provider;
116 self
117 }
118
119 /// Attach a pre-built [`AnalysisCache`] (the body-analysis issue cache) and
120 /// open a sibling definition [`StubSlice`] cache under the same root, so
121 /// callers using this builder get the same speedup as `with_cache_dir`.
122 ///
123 /// Rebuilds the shared database to attach the definition cache — call
124 /// **before** any file is ingested. A debug assertion catches misuse.
125 ///
126 /// [`StubSlice`]: mir_codebase::storage::StubSlice
127 pub fn with_cache(mut self, cache: Arc<AnalysisCache>) -> Self {
128 debug_assert_eq!(
129 self.db.source_file_count(),
130 0,
131 "AnalysisSession::with_cache must be called before any file is ingested"
132 );
133 let dir = cache.cache_dir().to_path_buf();
134 self.db = Arc::new(AnalyzerDb::new().with_cache_dir(&dir));
135 self.cache = Some(cache);
136 self
137 }
138
139 /// Convenience: open a disk-backed cache at `cache_dir` and attach it.
140 ///
141 /// Attaches both the body-analysis issue cache ([`AnalysisCache`]) and the
142 /// definition [`StubSlice`] cache to the shared database. Builds a fresh
143 /// [`AnalyzerDb`] internally — call **before** any file is ingested. A
144 /// debug assertion catches misuse.
145 ///
146 /// [`StubSlice`]: mir_codebase::storage::StubSlice
147 pub fn with_cache_dir(mut self, cache_dir: &std::path::Path) -> Self {
148 debug_assert_eq!(
149 self.db.source_file_count(),
150 0,
151 "AnalysisSession::with_cache_dir must be called before any file is ingested"
152 );
153 self.db = Arc::new(AnalyzerDb::new().with_cache_dir(cache_dir));
154 self.cache = Some(Arc::new(AnalysisCache::open(cache_dir)));
155 self
156 }
157
158 /// Attach a Composer autoload map (PSR-4, PSR-0, classmap, files).
159 /// Sets the same map as the active [`crate::ClassResolver`] so
160 /// [`Self::load_class`] works out of the box.
161 pub fn with_psr4(mut self, map: Arc<Psr4Map>) -> Self {
162 let user_resolver: Arc<dyn crate::ClassResolver> = map.clone();
163 // Wrap with stub awareness so `find_class_like` / `resolve_fqcn_to_path`
164 // can map built-in PHP class FQCNs (`ArrayObject`, `Exception`, …)
165 // to their stub virtual paths.
166 let resolver: Arc<dyn crate::ClassResolver> = Arc::new(crate::ChainedClassResolver::new(
167 user_resolver,
168 Arc::new(crate::StubClassResolver),
169 ));
170 self.psr4 = Some(map);
171 self.resolver = Some(resolver.clone());
172 // Mirror into MirDbStorage so salsa-tracked resolver queries
173 // (`db::resolve_fqcn_to_path`) see the same resolver and are
174 // invalidated on swap.
175 self.db.salsa.write().set_resolver(Some(resolver));
176 self
177 }
178
179 /// Attach a generic class resolver for projects that don't use Composer
180 /// (WordPress, Drupal, custom autoloaders, workspace-walk indexes).
181 /// Replaces any previously-set Composer-backed resolver. Automatically
182 /// wrapped with stub awareness so PHP built-ins remain resolvable.
183 pub fn with_class_resolver(mut self, resolver: Arc<dyn crate::ClassResolver>) -> Self {
184 let wrapped: Arc<dyn crate::ClassResolver> = Arc::new(crate::ChainedClassResolver::new(
185 resolver,
186 Arc::new(crate::StubClassResolver),
187 ));
188 self.db.salsa.write().set_resolver(Some(wrapped.clone()));
189 self.resolver = Some(wrapped);
190 self
191 }
192
193 pub fn with_user_stubs(mut self, files: Vec<PathBuf>, dirs: Vec<PathBuf>) -> Self {
194 self.user_stub_files = files;
195 self.user_stub_dirs = dirs;
196 self
197 }
198
199 pub fn php_version(&self) -> PhpVersion {
200 self.php_version
201 }
202
203 pub fn cache(&self) -> Option<&AnalysisCache> {
204 self.cache.as_deref()
205 }
206
207 pub fn psr4(&self) -> Option<&Psr4Map> {
208 self.psr4.as_deref()
209 }
210
211 /// Deprecated — stub loading is now fully lazy per-AST.
212 ///
213 /// This is an alias for [`Self::ensure_all_stubs`] kept for API
214 /// compatibility. Internal analysis paths use [`Self::prepare_ast_for_analysis`]
215 /// which loads only the stubs referenced by the file under analysis.
216 #[deprecated(note = "use ensure_all_stubs() or ensure_stubs_for_ast() instead")]
217 pub fn ensure_essential_stubs(&self) {
218 self.ensure_all_stubs();
219 }
220
221 /// Load every embedded PHP stub plus any configured user stubs.
222 /// Use for batch tools (CLI, full project analysis) where comprehensive
223 /// symbol coverage matters more than cold-start latency.
224 pub fn ensure_all_stubs(&self) {
225 let paths: Vec<&'static str> = crate::stubs::stub_files().iter().map(|&(p, _)| p).collect();
226 self.db.ingest_stub_paths(&paths, self.php_version);
227 self.ensure_user_stubs_loaded();
228 }
229
230 /// Ensure the embedded stub that defines `name` (a function) is ingested.
231 /// Returns `true` when a matching stub exists (whether or not it was
232 /// already loaded), `false` when `name` isn't a known PHP built-in.
233 ///
234 /// Most callers should use [`Self::ensure_stubs_for_ast`] instead —
235 /// it auto-discovers needed stubs from a parsed file.
236 #[doc(hidden)]
237 pub fn ensure_stub_for_function(&self, name: &str) -> bool {
238 match crate::stubs::stub_path_for_function(name) {
239 Some(path) => {
240 self.db.ingest_stub_paths(&[path], self.php_version);
241 true
242 }
243 None => false,
244 }
245 }
246
247 /// Ensure the embedded stub that defines `fqcn` (a class / interface /
248 /// trait / enum) is ingested. Case-insensitive lookup with optional
249 /// leading backslash.
250 ///
251 /// Most callers should use [`Self::ensure_stubs_for_ast`] instead.
252 #[doc(hidden)]
253 pub fn ensure_stub_for_class(&self, fqcn: &str) -> bool {
254 match crate::stubs::stub_path_for_class(fqcn) {
255 Some(path) => {
256 self.db.ingest_stub_paths(&[path], self.php_version);
257 true
258 }
259 None => false,
260 }
261 }
262
263 /// Ensure the embedded stub that defines `name` (a constant) is ingested.
264 ///
265 /// Most callers should use [`Self::ensure_stubs_for_ast`] instead.
266 #[doc(hidden)]
267 pub fn ensure_stub_for_constant(&self, name: &str) -> bool {
268 match crate::stubs::stub_path_for_constant(name) {
269 Some(path) => {
270 self.db.ingest_stub_paths(&[path], self.php_version);
271 true
272 }
273 None => false,
274 }
275 }
276
277 /// Number of distinct embedded stubs currently ingested into the session.
278 /// Useful for diagnostics and bench reporting.
279 pub fn loaded_stub_count(&self) -> usize {
280 self.db.loaded_stubs.lock().len()
281 }
282
283 /// Auto-discover and ingest the embedded stubs needed to cover every
284 /// built-in PHP function / class / constant referenced by `source`.
285 ///
286 /// Used by [`crate::FileAnalyzer::analyze`] to keep essentials-only mode
287 /// correct without forcing callers to enumerate which stubs they need.
288 /// Idempotent — already-loaded stubs are skipped via [`Self::loaded_stubs`].
289 ///
290 /// The discovery scan is a coarse identifier sweep (see
291 /// [`crate::stubs::collect_referenced_builtin_paths`]) — it may pull in
292 /// a slightly larger set than the file strictly needs, but never misses
293 /// a referenced built-in. Cost is sub-millisecond per file.
294 ///
295 /// Fast path: if every embedded stub is already loaded (e.g. after a
296 /// batch tool called [`Self::ensure_all_stubs`]), the source scan
297 /// is skipped entirely.
298 pub fn ensure_stubs_for_source(&self, source: &str) {
299 // Cheap check first: skip the scan entirely when we already know we
300 // have everything. Avoids a ~50-500µs source walk on every analyze
301 // call in batch / warm-session scenarios.
302 {
303 let loaded = self.db.loaded_stubs.lock();
304 if loaded.len() >= crate::stubs::stub_files().len() {
305 return;
306 }
307 }
308 let paths = crate::stubs::collect_referenced_builtin_paths(source);
309 if paths.is_empty() {
310 return;
311 }
312 self.db.ingest_stub_paths(&paths, self.php_version);
313 }
314
315 /// Discover and ingest stubs by walking the parsed AST of a PHP file.
316 ///
317 /// Similar to [`Self::ensure_stubs_for_source`], but takes an already-parsed
318 /// AST instead of raw source text. Produces zero false positives since it
319 /// only extracts identifiers from actual AST nodes (not from strings or
320 /// comments). Preferred over `ensure_stubs_for_source` when the AST is
321 /// already available (e.g., in [`crate::FileAnalyzer`]).
322 ///
323 /// Idempotent and skips the scan if all stubs are already loaded.
324 pub fn ensure_stubs_for_ast(&self, program: &php_ast::owned::Program) {
325 {
326 let loaded = self.db.loaded_stubs.lock();
327 if loaded.len() >= crate::stubs::stub_files().len() {
328 return;
329 }
330 }
331 let paths = crate::stubs::collect_referenced_builtin_paths_from_ast(program);
332 if paths.is_empty() {
333 return;
334 }
335 self.db.ingest_stub_paths(&paths, self.php_version);
336 }
337
338 /// Scan a parsed AST for class references and lazy-load any that are
339 /// PSR-4-resolvable but not yet registered as `SourceFile` inputs. After
340 /// this call, `find_class_like(fqcn)` can pull-resolve the referenced
341 /// classes without needing a retry loop.
342 ///
343 /// The current implementation reuses [`crate::diagnostics::collect_referenced_class_fqcns`]
344 /// already used by the diagnostics pass. Missing classes are passed
345 /// through [`Self::load_class_transitive`] so their inheritance
346 /// chain is also primed (body analysis reads parents/interfaces while
347 /// resolving members).
348 /// Returns true if this session has a configured class resolver
349 /// (typically a PSR-4 / classmap autoloader chained with the stub
350 /// resolver). Used by `FileAnalyzer` to skip the AST-scan preload
351 /// when no resolver is wired up.
352 pub fn has_resolver(&self) -> bool {
353 self.resolver.is_some()
354 }
355
356 /// Run both pre-passes (builtin-stub loading and PSR-4 class preloading)
357 /// in one call. Replaces the two separate `ensure_stubs_for_ast` /
358 /// `preload_psr4_classes_for_ast` calls at every `FileAnalyzer::analyze`
359 /// site.
360 pub fn prepare_ast_for_analysis(&self, program: &php_ast::owned::Program, file: &str) {
361 self.ensure_stubs_for_ast(program);
362 self.preload_psr4_classes_for_ast(program, file);
363 }
364
365 pub fn preload_psr4_classes_for_ast(&self, program: &php_ast::owned::Program, file: &str) {
366 if self.resolver.is_none() {
367 return;
368 }
369 let refs = collect_class_refs_from_ast(program);
370 if refs.is_empty() {
371 return;
372 }
373 // Resolve names against the file's namespace/imports up front, then
374 // drop the snapshot before lazy-loading (which mutates inputs).
375 let resolved: Vec<String> = {
376 let db = self.snapshot_db();
377 refs.into_iter()
378 .map(|raw| crate::db::resolve_name(&db, file, &raw))
379 .collect()
380 };
381 for fqcn in resolved {
382 if self.contains_class(&fqcn) {
383 continue;
384 }
385 let _ = self.load_class(&fqcn);
386 }
387 }
388
389 fn ensure_user_stubs_loaded(&self) {
390 self.db
391 .ingest_user_stubs(&self.user_stub_files, &self.user_stub_dirs);
392 }
393
394 /// Cheap clone of the salsa db for a read-only query. The lock is held
395 /// only for the duration of the clone, so concurrent readers never
396 /// serialize on each other or on writes for longer than the clone itself.
397 ///
398 /// **Internal API — exposes Salsa types.** Subject to change without
399 /// notice. Public consumers should use the typed query methods
400 /// ([`Self::definition_of`], [`Self::hover`], etc.) instead.
401 #[doc(hidden)]
402 pub fn snapshot_db(&self) -> MirDbStorage {
403 self.db.snapshot_db()
404 }
405
406 /// Commit a batch of reference locations from a db snapshot into the
407 /// session's shared maps. Called by [`crate::FileAnalyzer`] and
408 /// [`crate::BatchFileAnalyzer`] after parallel body analysis to flush the pending
409 /// buffers that accumulate in worker db clones.
410 pub(crate) fn commit_ref_locs_batch(&self, locs: Vec<RefLoc>) {
411 if locs.is_empty() {
412 return;
413 }
414 let guard = self.db.salsa.read();
415 guard.commit_reference_locations_batch(locs);
416 }
417
418 /// Run a closure with read access to a database snapshot.
419 ///
420 /// **Internal API — exposes Salsa types.** Subject to change without
421 /// notice.
422 #[doc(hidden)]
423 pub fn read<R>(&self, f: impl FnOnce(&dyn MirDatabase) -> R) -> R {
424 let db = self.snapshot_db();
425 f(&db)
426 }
427
428 /// definition-collection ingestion. Updates the file's source text in the salsa db,
429 /// runs definition collection, and ingests the resulting stub slice.
430 /// Triggers stub loading on first call. Also updates the cache's reverse-
431 /// dependency graph for `file` so cross-file invalidation stays correct
432 /// across incremental edits — without rebuilding the graph from scratch.
433 ///
434 /// If `file` was previously ingested, its old definitions and reference
435 /// locations are removed first so renames / deletions don't leave stale
436 /// state in the codebase. (Without this, long-running sessions would
437 /// accumulate dead reference-location entries indefinitely.)
438 pub fn ingest_file(&self, file: Arc<str>, source: Arc<str>) {
439 self.ensure_all_stubs();
440
441 // Snapshot symbols defined before clearing — O(symbols_in_file) with forward index.
442 let old_symbols: HashSet<Arc<str>> = {
443 let guard = self.db.salsa.read();
444 guard.file_defined_symbols(file.as_ref())
445 };
446
447 {
448 let mut guard = self.db.salsa.write();
449 guard.remove_file_definitions(file.as_ref());
450 }
451 let _file_defs =
452 self.db
453 .collect_and_ingest_file(file.clone(), source.as_ref(), self.php_version);
454
455 // Snapshot symbols after ingesting — O(symbols_in_file).
456 let new_symbols: HashSet<Arc<str>> = {
457 let guard = self.db.salsa.read();
458 guard.file_defined_symbols(file.as_ref())
459 };
460
461 // Symbols removed from this file must be tracked so dependency_graph()
462 // can still produce edges to files referencing the now-gone symbols.
463 let deleted: Vec<Arc<str>> = old_symbols.difference(&new_symbols).cloned().collect();
464 let re_added: Vec<Arc<str>> = new_symbols.difference(&old_symbols).cloned().collect();
465 if !deleted.is_empty() || !re_added.is_empty() {
466 let mut stale = self.stale_defined_symbols.write();
467 let entry = stale.entry(file.as_ref().to_string()).or_default();
468 for sym in deleted {
469 entry.insert(sym);
470 }
471 for sym in &re_added {
472 entry.remove(sym);
473 }
474 if entry.is_empty() {
475 stale.remove(file.as_ref());
476 }
477 }
478
479 self.update_reverse_deps_for(&file);
480 // Only evict cache entries whose resolver-mapped path equals this
481 // file. FQCNs the resolver can't map (psr4 miss) stay cached — no
482 // ingest could change their fate. Avoids the per-keystroke storm
483 // where wholesale clearing forces every unresolved FQCN to re-hit
484 // the resolver on the next FileAnalyzer iteration.
485 self.evict_unresolvable_for_file(&file);
486
487 // If the workspace symbol index singleton has already been built,
488 // check whether this edit changed any declared names. If so, rebuild
489 // the singleton so subsequent `find_class_like` / `find_function`
490 // calls see the new names. Body-only edits skip this (name-only
491 // PartialEq on FileDeclarations returns equal → no rebuild → the
492 // HIGH-durability singleton dep short-circuits in O(1)).
493 {
494 let mut guard = self.db.salsa.write();
495 if guard.workspace_symbol_index_singleton().is_some() {
496 if let Some(sf) = guard.lookup_source_file(file.as_ref()) {
497 if guard.file_declarations_changed(sf) {
498 guard.rebuild_workspace_symbol_index();
499 }
500 }
501 }
502 }
503 }
504
505 /// Register `source` as the text of `file` in the salsa input layer **without**
506 /// parsing or running definition collection.
507 ///
508 /// This is the LSP-friendly bulk-population entry point: after a workspace
509 /// scan, callers can feed every discovered file's text to the session
510 /// cheaply (an Arc clone plus a HashMap insert per file). Name resolution
511 /// then happens on demand via [`Self::load_class`], which reads
512 /// the file from disk through the configured [`crate::ClassResolver`] and
513 /// runs definition collection lazily when a class FQCN actually needs to resolve.
514 ///
515 /// Contrast with [`Self::ingest_file`], which eagerly parses, runs definition collection,
516 /// and populates the symbol index. Use `ingest_file` for files the user is
517 /// actively editing (where in-memory text diverges from disk); use
518 /// `set_file_text` for files known only through the workspace scan.
519 ///
520 /// Clears the negative cache: a previously-unresolvable FQCN may now
521 /// resolve if its defining file is among the newly-registered set.
522 pub fn set_file_text(&self, file: Arc<str>, source: Arc<str>) {
523 {
524 let mut guard = self.db.salsa.write();
525 guard.upsert_source_file(file.clone(), source);
526 }
527 self.evict_unresolvable_for_file(&file);
528 }
529
530 /// Bulk-register vendor / library files with HIGH salsa durability.
531 ///
532 /// HIGH-durability files are not expected to change during the session.
533 /// When a LOW-durability project file is edited, salsa can skip O(N)
534 /// dependency verification for every HIGH-durability file, reducing
535 /// `workspace_symbol_index` re-verification cost to O(project files only).
536 ///
537 /// Definition collection runs lazily on first symbol access; no parsing at call time.
538 pub fn set_vendor_files<I>(&self, files: I)
539 where
540 I: IntoIterator<Item = (Arc<str>, Arc<str>)>,
541 {
542 let mut guard = self.db.salsa.write();
543 for (file, source) in files {
544 guard.upsert_source_file_with_durability(file, source, salsa::Durability::HIGH);
545 }
546 }
547
548 /// Build or refresh the `WorkspaceSymbolIndexSingleton` from all currently
549 /// registered files.
550 ///
551 /// After this call, `find_class_like`, `find_function`, and
552 /// `find_global_constant` read `singleton.index(db)` — a single
553 /// `Durability::HIGH` tracked dep — instead of recomputing the full
554 /// O(N_files) dep list via `workspace_symbol_index`. On subsequent
555 /// LOW-durability (project-file) body edits the dep short-circuits in O(1).
556 ///
557 /// Call this once after all vendor + stub + project files have been
558 /// ingested (end of workspace warm-up). Also called automatically by
559 /// [`Self::ingest_file`] when a file's declared names change.
560 pub fn rebuild_workspace_symbol_index(&self) {
561 self.db.salsa.write().rebuild_workspace_symbol_index();
562 }
563
564 /// Bulk variant of [`Self::set_file_text`]. Acquires the salsa write lock
565 /// once for the entire batch instead of once per file.
566 ///
567 /// The intended LSP scan loop is:
568 /// ```text
569 /// let files: Vec<_> = walk_workspace()
570 /// .map(|path| (path, fs::read(&path).unwrap()))
571 /// .collect();
572 /// session.set_workspace_files(files);
573 /// ```
574 /// After this call, every file's source text is known to salsa. No
575 /// parsing has happened yet — Definition collection runs per file on the first
576 /// `load_class` that needs to consult it.
577 pub fn set_workspace_files<I>(&self, files: I)
578 where
579 I: IntoIterator<Item = (Arc<str>, Arc<str>)>,
580 {
581 let registered_paths: Vec<Arc<str>> = {
582 let mut guard = self.db.salsa.write();
583 files
584 .into_iter()
585 .map(|(file, source)| {
586 guard.upsert_source_file(file.clone(), source);
587 file
588 })
589 .collect()
590 };
591 if !registered_paths.is_empty() && self.resolver.is_some() {
592 self.evict_unresolvable_for_files(®istered_paths);
593 }
594 }
595
596 /// Drop a file's contribution to the session: codebase definitions,
597 /// reference locations, salsa input handle, cache entry, and outgoing
598 /// reverse-dependency edges. Cache entries of *dependent* files are
599 /// also evicted (cross-file invalidation).
600 ///
601 /// Use this when a file is closed by the consumer, or before a re-ingest
602 /// of substantially changed content. (Plain re-ingest via
603 /// [`Self::ingest_file`] also drops old definitions, but does not
604 /// remove the salsa input handle — call this for full cleanup.)
605 pub fn invalidate_file(&self, file: &str) {
606 {
607 let mut guard = self.db.salsa.write();
608 guard.remove_file_definitions(file);
609 guard.remove_source_file(file);
610 }
611 // Remove this file's outgoing deps from the in-memory reverse dep map.
612 self.update_in_memory_reverse_deps(file, &HashSet::default());
613 // Clear stale symbol tracking for this file — it's fully gone.
614 self.stale_defined_symbols.write().remove(file);
615 if let Some(cache) = &self.cache {
616 cache.update_reverse_deps_for_file(file, &HashSet::default());
617 cache.evict_with_dependents(&[file.to_string()]);
618 }
619 // The file is gone; cache entries that previously mapped to it stay
620 // unresolvable until the file (or another with matching symbols) is
621 // ingested again. Selective evict mirrors the ingest path.
622 self.evict_unresolvable_for_file(file);
623 }
624
625 /// Number of files currently tracked in this session's salsa input set.
626 /// Stable across reads; useful for diagnostics and memory bounds checks.
627 pub fn tracked_file_count(&self) -> usize {
628 let guard = self.db.salsa.read();
629 guard.source_file_count()
630 }
631
632 // -----------------------------------------------------------------------
633 // Read-only codebase queries
634 //
635 // All take a brief lock to clone the db, then run the lookup against the
636 // owned snapshot — concurrent edits proceed without blocking.
637 // -----------------------------------------------------------------------
638
639 /// Resolve a top-level symbol (class or function) to its declaration
640 /// location. Powers go-to-definition.
641 ///
642 /// **Side effects:** if the symbol isn't yet known, this may invoke the
643 /// configured [`crate::SourceProvider`] to fault in additional files and
644 /// mutate the salsa input set. Use [`Self::definition_of_cached`] for a
645 /// pure variant that only consults already-loaded state.
646 ///
647 /// Returns:
648 /// - `Ok(Location)` — symbol found with a source location
649 /// - `Err(NotFound)` — no such symbol in the codebase
650 /// - `Err(NoSourceLocation)` — symbol exists but has no recorded span
651 /// (e.g. some stub-only declarations)
652 pub fn definition_of(
653 &self,
654 symbol: &crate::Name,
655 ) -> Result<mir_types::Location, crate::SymbolLookupError> {
656 // Trigger any necessary lazy-load mutations before snapshotting.
657 match symbol {
658 crate::Name::Class(fqcn) => {
659 let _ = self.load_class(fqcn.as_ref());
660 }
661 crate::Name::Function(fqn) => {
662 let _ = self.load_class(fqn.as_ref());
663 }
664 crate::Name::Method { class, .. }
665 | crate::Name::Property { class, .. }
666 | crate::Name::ClassConstant { class, .. } => {
667 let _ = self.load_class(class.as_ref());
668 }
669 _ => {}
670 }
671 self.definition_of_cached(symbol)
672 }
673
674 /// Pure variant of [`Self::definition_of`]. Never invokes the
675 /// [`crate::SourceProvider`] and never mutates salsa inputs; resolves
676 /// only against state already loaded by `set_file_text` / `ingest_file`.
677 /// Returns `Err(NotFound)` when the symbol isn't in the loaded set, even
678 /// if a resolver could in principle map it.
679 pub fn definition_of_cached(
680 &self,
681 symbol: &crate::Name,
682 ) -> Result<mir_types::Location, crate::SymbolLookupError> {
683 let db = self.snapshot_db();
684 match symbol {
685 crate::Name::Class(fqcn) => {
686 let here = crate::db::Fqcn::from_str(&db, fqcn.as_ref());
687 let class = crate::db::find_class_like(&db, here)
688 .ok_or(crate::SymbolLookupError::NotFound)?;
689 class
690 .location()
691 .cloned()
692 .ok_or(crate::SymbolLookupError::NoSourceLocation)
693 }
694 crate::Name::Function(fqn) => {
695 let here = crate::db::Fqcn::from_str(&db, fqn.as_ref());
696 let f = crate::db::find_function(&db, here)
697 .ok_or(crate::SymbolLookupError::NotFound)?;
698 f.location
699 .clone()
700 .ok_or(crate::SymbolLookupError::NoSourceLocation)
701 }
702 crate::Name::Method { class, name }
703 | crate::Name::Property { class, name }
704 | crate::Name::ClassConstant { class, name } => {
705 crate::db::member_location(&db, class, name)
706 .ok_or(crate::SymbolLookupError::NotFound)
707 }
708 crate::Name::GlobalConstant(_) => Err(crate::SymbolLookupError::NoSourceLocation),
709 }
710 }
711
712 /// Hover information for a symbol: type, docstring, and definition location.
713 ///
714 /// Use [`crate::FileAnalysis::symbol_at`] to find the symbol at a cursor
715 /// position, then build a [`crate::Name`] from its `kind`. This method
716 /// assembles the displayable hover data.
717 ///
718 /// **Side effects:** when `symbol`'s owning class isn't yet loaded, this
719 /// may invoke the configured [`crate::SourceProvider`] to fault in
720 /// dependencies. Use [`Self::hover_cached`] for a pure variant.
721 ///
722 /// Returns `Err(NotFound)` if the symbol doesn't exist. May still return
723 /// `Ok` with `docstring: None` or `definition: None` if those specific
724 /// pieces aren't available.
725 pub fn hover(
726 &self,
727 symbol: &crate::Name,
728 ) -> Result<crate::HoverInfo, crate::SymbolLookupError> {
729 // Trigger lazy loading for class-rooted symbols before snapshotting.
730 // No-op when the class is already known; ensures inherited member
731 // lookups have the chain present.
732 match symbol {
733 crate::Name::Class(fqcn) => {
734 self.load_class(fqcn.as_ref());
735 }
736 crate::Name::Method { class, .. }
737 | crate::Name::Property { class, .. }
738 | crate::Name::ClassConstant { class, .. } => {
739 // 10 mirrors the default depth used by reanalyze_dependents.
740 self.load_class_transitive(class.as_ref(), 10);
741 }
742 _ => {}
743 }
744 self.hover_cached(symbol)
745 }
746
747 /// Pure variant of [`Self::hover`]. Never invokes the
748 /// [`crate::SourceProvider`]; consults only the already-loaded db.
749 pub fn hover_cached(
750 &self,
751 symbol: &crate::Name,
752 ) -> Result<crate::HoverInfo, crate::SymbolLookupError> {
753 use mir_types::{Atomic, Type};
754 let db = self.snapshot_db();
755 match symbol {
756 crate::Name::Function(fqn) => {
757 let here = crate::db::Fqcn::from_str(&db, fqn.as_ref());
758 let f = crate::db::find_function(&db, here)
759 .ok_or(crate::SymbolLookupError::NotFound)?;
760 let ty = f
761 .return_type
762 .as_deref()
763 .cloned()
764 .unwrap_or_else(Type::mixed);
765 let docstring = f.docstring.as_ref().map(|s| s.to_string());
766 Ok(crate::HoverInfo {
767 ty,
768 docstring,
769 definition: f.location.clone(),
770 })
771 }
772 crate::Name::Method { class, name } => {
773 let here = crate::db::Fqcn::from_str(&db, class.as_ref());
774 let (_, m) = crate::db::find_method_in_chain(&db, here, name)
775 .ok_or(crate::SymbolLookupError::NotFound)?;
776 let ty = m
777 .return_type
778 .as_deref()
779 .cloned()
780 .unwrap_or_else(Type::mixed);
781 let docstring = m.docstring.as_ref().map(|s| s.to_string());
782 Ok(crate::HoverInfo {
783 ty,
784 docstring,
785 definition: m.location.clone(),
786 })
787 }
788 crate::Name::Class(fqcn) => {
789 let here = crate::db::Fqcn::from_str(&db, fqcn.as_ref());
790 let class = crate::db::find_class_like(&db, here)
791 .ok_or(crate::SymbolLookupError::NotFound)?;
792 let ty = Type::single(Atomic::TNamedObject {
793 fqcn: mir_types::Name::from(fqcn.as_ref()),
794 type_params: mir_types::union::empty_type_params(),
795 });
796 Ok(crate::HoverInfo {
797 ty,
798 docstring: None,
799 definition: class.location().cloned(),
800 })
801 }
802 crate::Name::Property { class, name } => {
803 let here = crate::db::Fqcn::from_str(&db, class.as_ref());
804 let (_, p) = crate::db::find_property_in_chain(&db, here, name)
805 .ok_or(crate::SymbolLookupError::NotFound)?;
806 let ty = p.ty.clone().unwrap_or_else(Type::mixed);
807 Ok(crate::HoverInfo {
808 ty,
809 docstring: None,
810 definition: p.location.clone(),
811 })
812 }
813 crate::Name::ClassConstant { class, name } => {
814 let here = crate::db::Fqcn::from_str(&db, class.as_ref());
815 let (_, c) = crate::db::find_class_constant_in_chain(&db, here, name)
816 .ok_or(crate::SymbolLookupError::NotFound)?;
817 Ok(crate::HoverInfo {
818 ty: c.ty.clone(),
819 docstring: None,
820 definition: c.location.clone(),
821 })
822 }
823 crate::Name::GlobalConstant(fqn) => {
824 let here = crate::db::Fqcn::from_str(&db, fqn.as_ref());
825 let ty = crate::db::find_global_constant(&db, here)
826 .ok_or(crate::SymbolLookupError::NotFound)?;
827 Ok(crate::HoverInfo {
828 ty: (*ty).clone(),
829 docstring: None,
830 definition: None,
831 })
832 }
833 }
834 }
835
836 /// Raw reference locations indexed by string symbol key, kept for tests
837 /// that use the legacy stringly-typed API. Prefer [`Self::references_to`]
838 /// with a typed [`crate::Name`].
839 #[doc(hidden)]
840 pub fn reference_locations(&self, symbol: &str) -> Vec<(Arc<str>, u32, u16, u16)> {
841 use crate::db::MirDatabase;
842 let db = self.snapshot_db();
843 db.reference_locations(symbol)
844 }
845
846 /// Every recorded reference to `symbol` with its source location as a Range.
847 /// Use [`crate::FileAnalysis::symbol_at`] to find the symbol at a cursor,
848 /// build a [`crate::Name`] from it, and pass it here.
849 pub fn references_to(&self, symbol: &crate::Name) -> Vec<(Arc<str>, crate::Range)> {
850 let db = self.snapshot_db();
851 let key = symbol.codebase_key();
852 db.reference_locations(&key)
853 .into_iter()
854 .map(|(file, line, col_start, col_end)| {
855 let range = crate::Range {
856 start: crate::Position {
857 line,
858 column: col_start as u32,
859 },
860 end: crate::Position {
861 line,
862 column: col_end as u32,
863 },
864 };
865 (file, range)
866 })
867 .collect()
868 }
869
870 /// Class-level issues (inheritance violations, abstract-method gaps, override
871 /// incompatibilities) for the given set of files.
872 ///
873 /// These checks are cross-file by nature and are not emitted by
874 /// [`crate::FileAnalyzer::analyze`]. Call this after ingesting or
875 /// re-analyzing a file and its dependents to get the full diagnostic picture.
876 ///
877 /// Circular-inheritance checks always run against the full workspace graph
878 /// regardless of the `files` filter — a cycle is a workspace-wide problem.
879 pub fn class_issues(&self, files: &[Arc<str>]) -> Vec<crate::Issue> {
880 let db = self.snapshot_db();
881 let file_set: HashSet<Arc<str>> = files.iter().cloned().collect();
882 let file_data: Vec<(Arc<str>, Arc<str>)> = files
883 .iter()
884 .filter_map(|f| Some((f.clone(), self.source_of(f)?)))
885 .collect();
886 crate::class::ClassAnalyzer::with_files(&db, file_set, &file_data).analyze_all()
887 }
888
889 /// All declarations defined in `file` as a **hierarchical tree**.
890 ///
891 /// Classes/interfaces/traits/enums are returned with their methods,
892 /// properties, and constants nested in `children`. Top-level functions
893 /// and constants are returned with empty `children`.
894 pub fn document_symbols(&self, file: &str) -> Vec<crate::symbol::DocumentSymbol> {
895 use crate::symbol::{DeclarationKind, DocumentSymbol};
896
897 let db = self.snapshot_db();
898 let Some(sf) = db.lookup_source_file(file) else {
899 return Vec::new();
900 };
901 let defs = crate::db::collect_file_definitions(&db, sf);
902 let mut out: Vec<DocumentSymbol> = Vec::new();
903
904 let class_children =
905 |methods: &indexmap::IndexMap<Arc<str>, Arc<mir_codebase::storage::MethodDef>>,
906 props: Option<&indexmap::IndexMap<Arc<str>, mir_codebase::storage::PropertyDef>>,
907 consts: &indexmap::IndexMap<Arc<str>, mir_codebase::storage::ConstantDef>,
908 is_enum: bool|
909 -> Vec<DocumentSymbol> {
910 let mut out: Vec<DocumentSymbol> = Vec::new();
911 for (_, m) in methods.iter() {
912 out.push(DocumentSymbol {
913 name: m.name.clone(),
914 kind: DeclarationKind::Method,
915 location: m.location.clone(),
916 children: Vec::new(),
917 });
918 }
919 if let Some(props) = props {
920 for (_, p) in props.iter() {
921 out.push(DocumentSymbol {
922 name: p.name.clone(),
923 kind: DeclarationKind::Property,
924 location: p.location.clone(),
925 children: Vec::new(),
926 });
927 }
928 }
929 let const_kind = if is_enum {
930 DeclarationKind::EnumCase
931 } else {
932 DeclarationKind::Constant
933 };
934 for (_, c) in consts.iter() {
935 out.push(DocumentSymbol {
936 name: c.name.clone(),
937 kind: const_kind,
938 location: c.location.clone(),
939 children: Vec::new(),
940 });
941 }
942 out
943 };
944
945 for c in defs.slice.classes.iter() {
946 out.push(DocumentSymbol {
947 name: c.fqcn.clone(),
948 kind: DeclarationKind::Class,
949 location: c.location.clone(),
950 children: class_children(
951 &c.own_methods,
952 Some(&c.own_properties),
953 &c.own_constants,
954 false,
955 ),
956 });
957 }
958 for i in defs.slice.interfaces.iter() {
959 out.push(DocumentSymbol {
960 name: i.fqcn.clone(),
961 kind: DeclarationKind::Interface,
962 location: i.location.clone(),
963 children: class_children(&i.own_methods, None, &i.own_constants, false),
964 });
965 }
966 for t in defs.slice.traits.iter() {
967 out.push(DocumentSymbol {
968 name: t.fqcn.clone(),
969 kind: DeclarationKind::Trait,
970 location: t.location.clone(),
971 children: class_children(
972 &t.own_methods,
973 Some(&t.own_properties),
974 &t.own_constants,
975 false,
976 ),
977 });
978 }
979 for e in defs.slice.enums.iter() {
980 let mut children = class_children(&e.own_methods, None, &e.own_constants, true);
981 for (_, case) in e.cases.iter() {
982 children.push(DocumentSymbol {
983 name: case.name.clone(),
984 kind: DeclarationKind::EnumCase,
985 location: case.location.clone(),
986 children: Vec::new(),
987 });
988 }
989 out.push(DocumentSymbol {
990 name: e.fqcn.clone(),
991 kind: DeclarationKind::Enum,
992 location: e.location.clone(),
993 children,
994 });
995 }
996 for f in defs.slice.functions.iter() {
997 out.push(DocumentSymbol {
998 name: f.fqn.clone(),
999 kind: DeclarationKind::Function,
1000 location: f.location.clone(),
1001 children: Vec::new(),
1002 });
1003 }
1004 for (name, _) in defs.slice.constants.iter() {
1005 out.push(DocumentSymbol {
1006 name: name.clone(),
1007 kind: DeclarationKind::Constant,
1008 location: None,
1009 children: Vec::new(),
1010 });
1011 }
1012 out
1013 }
1014
1015 /// Returns `true` if a function with `fqn` is registered and active in
1016 /// the codebase. Case-insensitive lookup with optional leading backslash.
1017 pub fn contains_function(&self, fqn: &str) -> bool {
1018 let db = self.snapshot_db();
1019 crate::db::function_exists(&db, fqn)
1020 }
1021
1022 /// Returns `true` if a class / interface / trait / enum with `fqcn` is
1023 /// registered and active in the codebase.
1024 pub fn contains_class(&self, fqcn: &str) -> bool {
1025 let db = self.snapshot_db();
1026 crate::db::class_exists(&db, fqcn)
1027 }
1028
1029 /// Returns `true` if `class` has a method named `name` registered. Method
1030 /// names are matched case-insensitively (PHP method dispatch semantics).
1031 pub fn contains_method(&self, class: &str, name: &str) -> bool {
1032 let db = self.snapshot_db();
1033 crate::db::has_method_in_chain(&db, class, name)
1034 }
1035
1036 /// Resolve `fqcn` via the configured [`crate::ClassResolver`] and ingest
1037 /// the mapped file. The session keeps a negative cache so repeated calls
1038 /// for an unresolvable name don't re-hit the resolver; the cache is
1039 /// invalidated on any [`Self::ingest_file`] / [`Self::invalidate_file`].
1040 ///
1041 /// This is the LSP-friendly entry point: the analyzer never touches
1042 /// `vendor/` on its own, but consumers can ask it to resolve individual
1043 /// symbols on demand. Designed to be called when a diagnostic would
1044 /// otherwise report `UndefinedClass`.
1045 ///
1046 /// Returns a [`crate::LoadOutcome`] distinguishing
1047 /// already-loaded / freshly-loaded / not-resolvable. Use
1048 /// [`crate::LoadOutcome::is_loaded`] when only success matters.
1049 pub fn load_class(&self, fqcn: &str) -> crate::LoadOutcome {
1050 if self.contains_class(fqcn) {
1051 return crate::LoadOutcome::AlreadyLoaded;
1052 }
1053 if self.unresolvable_fqcns.read().contains_key(fqcn) {
1054 return crate::LoadOutcome::NotResolvable;
1055 }
1056 if self.try_resolve_and_ingest(fqcn) {
1057 crate::LoadOutcome::Loaded
1058 } else {
1059 // Cache the failure with the resolver-mapped path (if any) so
1060 // future file edits can selectively evict.
1061 let resolved_path: Option<Arc<str>> = self
1062 .resolver
1063 .as_ref()
1064 .and_then(|r| r.resolve(fqcn))
1065 .map(|p| Arc::from(p.to_string_lossy().as_ref()));
1066 let key: Arc<str> = Arc::from(fqcn);
1067 let mut cache = self.unresolvable_fqcns.write();
1068 if cache.len() >= UNRESOLVABLE_CACHE_CAP {
1069 cache.clear();
1070 }
1071 cache.insert(key, resolved_path);
1072 crate::LoadOutcome::NotResolvable
1073 }
1074 }
1075
1076 /// Inner load path: resolver lookup + ingest, no caching. Returns `true`
1077 /// iff `fqcn` ends up registered. Failure buckets are recorded for
1078 /// telemetry.
1079 fn try_resolve_and_ingest(&self, fqcn: &str) -> bool {
1080 use crate::metrics::{record_lazy_load_failure, LazyLoadFailure};
1081 let Some(resolver) = &self.resolver else {
1082 record_lazy_load_failure(LazyLoadFailure::NoResolver, fqcn);
1083 return false;
1084 };
1085 let Some(path) = resolver.resolve(fqcn) else {
1086 record_lazy_load_failure(LazyLoadFailure::ResolverNone, fqcn);
1087 return false;
1088 };
1089 let file: Arc<str> = Arc::from(path.to_string_lossy().as_ref());
1090 // Prefer in-memory text from a prior `set_file_text` /
1091 // `set_workspace_files` call; fall back to disk. This makes the LSP's
1092 // unsaved-edit buffer authoritative over the on-disk content for the
1093 // same path.
1094 let src: Arc<str> = match self.source_of(&file) {
1095 Some(text) => text,
1096 None => match self.source_provider.read(&path.to_string_lossy()) {
1097 Some(text) => text,
1098 None => {
1099 record_lazy_load_failure(LazyLoadFailure::SourceUnreadable, fqcn);
1100 return false;
1101 }
1102 },
1103 };
1104 self.ingest_file(file, src);
1105 if self.contains_class(fqcn) {
1106 true
1107 } else {
1108 record_lazy_load_failure(LazyLoadFailure::IngestThenMissing, fqcn);
1109 false
1110 }
1111 }
1112
1113 /// Lazy-load every class transitively reachable from `fqcn` via parent /
1114 /// interface / trait edges. Useful when the consumer needs not just the
1115 /// requested class but enough of its inheritance chain to type-check
1116 /// member access.
1117 ///
1118 /// Walks at most `max_depth` levels (default in batch analysis is 10).
1119 /// Returns the number of classes successfully loaded (not counting
1120 /// `fqcn` itself if it was already present).
1121 pub fn load_class_transitive(&self, fqcn: &str, max_depth: usize) -> usize {
1122 if self.resolver.is_none() {
1123 return 0;
1124 }
1125 let mut loaded = 0;
1126 let mut frontier: Vec<String> = vec![fqcn.to_string()];
1127 let mut visited: std::collections::HashSet<String> = std::collections::HashSet::default();
1128
1129 for _ in 0..max_depth {
1130 if frontier.is_empty() {
1131 break;
1132 }
1133 let mut next: Vec<String> = Vec::new();
1134 for name in frontier.drain(..) {
1135 if !visited.insert(name.clone()) {
1136 continue;
1137 }
1138 let was_present = self.contains_class(&name);
1139 let resolved = self.load_class(&name).is_loaded();
1140 if resolved && !was_present {
1141 loaded += 1;
1142 // Walk the new class's parent / interfaces / traits via pull.
1143 let db = self.snapshot_db();
1144 let here = crate::db::Fqcn::from_str(&db, name.as_str());
1145 if let Some(class) = crate::db::find_class_like(&db, here) {
1146 if let Some(parent) = class.parent() {
1147 next.push(parent.to_string());
1148 }
1149 for iface in class.interfaces().iter() {
1150 next.push(iface.to_string());
1151 }
1152 for tr in class.class_traits().iter() {
1153 next.push(tr.to_string());
1154 }
1155 for ext in class.extends().iter() {
1156 next.push(ext.to_string());
1157 }
1158 }
1159 }
1160 }
1161 frontier = next;
1162 }
1163 loaded
1164 }
1165
1166 /// Evict every negative-cache entry whose stored resolver-mapped path
1167 /// equals `file`. FQCNs cached as never-resolvable (path `None`) are left
1168 /// alone — no source-text change can make them resolvable.
1169 fn evict_unresolvable_for_file(&self, file: &str) {
1170 let mut cache = self.unresolvable_fqcns.write();
1171 if cache.is_empty() {
1172 return;
1173 }
1174 cache.retain(|_fqcn, path| path.as_deref() != Some(file));
1175 }
1176
1177 /// Bulk variant of [`Self::evict_unresolvable_for_file`]. One `HashSet`
1178 /// build + one pass over the cache; no resolver calls.
1179 fn evict_unresolvable_for_files(&self, files: &[Arc<str>]) {
1180 let mut cache = self.unresolvable_fqcns.write();
1181 if cache.is_empty() {
1182 return;
1183 }
1184 let registered: HashSet<&str> = files.iter().map(|f| f.as_ref()).collect();
1185 cache.retain(|_fqcn, path| match path {
1186 Some(p) => !registered.contains(p.as_ref()),
1187 None => true,
1188 });
1189 }
1190
1191 /// Retrieve the source text the session has registered for `file`, if
1192 /// any. Returns `None` when the file has never been ingested. Used by
1193 /// the parallel re-analysis path to re-feed dependents to body analysis without
1194 /// the caller having to track sources independently.
1195 pub fn source_of(&self, file: &str) -> Option<Arc<str>> {
1196 let db = self.snapshot_db();
1197 let sf = db.lookup_source_file(file)?;
1198 Some(sf.text(&db))
1199 }
1200
1201 /// Re-analyze every transitive dependent of `file` in parallel.
1202 ///
1203 /// When the user saves a file that other files depend on (e.g. editing
1204 /// a base class, an interface, or a trait), those dependents may have
1205 /// new diagnostics. This method computes them in parallel using rayon
1206 /// and returns the per-file analysis results so the LSP server can
1207 /// publish updated diagnostics in one batch.
1208 ///
1209 /// Source text for dependents is retrieved from the session's salsa
1210 /// inputs (set by previous `ingest_file` calls) — the caller doesn't
1211 /// need to track or re-read files. Files for which the session has no
1212 /// source are silently skipped (returns the analyzable subset).
1213 ///
1214 /// Cross-file inferred return types are resolved on demand via salsa.
1215 pub fn reanalyze_dependents(&self, file: &str) -> Vec<(Arc<str>, crate::FileAnalysis)> {
1216 use rayon::prelude::*;
1217
1218 // Phase 1: compute dependents + gather their sources outside the
1219 // analysis loop so each worker has everything it needs.
1220 let dependents = self.dependency_graph().transitive_dependents(file);
1221 if dependents.is_empty() {
1222 return Vec::new();
1223 }
1224 let with_source: Vec<(Arc<str>, Arc<str>)> = dependents
1225 .into_iter()
1226 .filter_map(|path| {
1227 let arc_path: Arc<str> = Arc::from(path.as_str());
1228 let src = self.source_of(&path)?;
1229 Some((arc_path, src))
1230 })
1231 .collect();
1232 if with_source.is_empty() {
1233 return Vec::new();
1234 }
1235
1236 // Phase 2: parallel parse + analyze. Each rayon worker gets its own
1237 // database snapshot via FileAnalyzer; writes are isolated to the
1238 // session's canonical db (none happen here since we only run body analysis).
1239 with_source
1240 .into_par_iter()
1241 .map(|(file, source)| {
1242 let parsed = php_rs_parser::parse(source.as_ref());
1243 let analyzer = crate::FileAnalyzer::new(self);
1244 let analysis = analyzer.analyze(
1245 file.clone(),
1246 source.as_ref(),
1247 &parsed.program,
1248 &parsed.source_map,
1249 );
1250 (file, analysis)
1251 })
1252 .collect()
1253 }
1254
1255 /// FQCNs that `file` imports via `use` statements but that aren't yet
1256 /// loaded in the session.
1257 ///
1258 /// Designed as the input to background prefetching: after the LSP server
1259 /// ingests an open buffer, it can call this and lazy-load the returned
1260 /// FQCNs on a worker thread so the user's first Cmd+Click into vendor
1261 /// code doesn't pay the file-read+parse cost.
1262 ///
1263 /// Returns an empty Vec if the file hasn't been ingested or has no
1264 /// unresolved imports.
1265 pub fn pending_lazy_loads(&self, file: &str) -> Vec<Arc<str>> {
1266 let db = self.snapshot_db();
1267 let imports = db.file_imports(file);
1268 if imports.is_empty() {
1269 return Vec::new();
1270 }
1271 let mut out = Vec::new();
1272 for fqcn in imports.values() {
1273 let here = crate::db::Fqcn::new(&db, *fqcn);
1274 if crate::db::find_class_like(&db, here).is_some() {
1275 continue;
1276 }
1277 if let Some(resolver) = &self.resolver {
1278 if resolver.resolve(fqcn.as_str()).is_some() {
1279 out.push(Arc::from(fqcn.as_str()));
1280 }
1281 }
1282 }
1283 out
1284 }
1285
1286 /// Convenience: synchronously lazy-load every import of `file` that
1287 /// isn't already in the codebase. Returns the number successfully loaded.
1288 ///
1289 /// For non-blocking prefetch, call this from a worker thread:
1290 ///
1291 /// ```ignore
1292 /// let s = session.clone(); // AnalysisSession is wrapped in Arc by callers
1293 /// std::thread::spawn(move || {
1294 /// s.prefetch_imports(&file_path);
1295 /// });
1296 /// ```
1297 ///
1298 /// Internally walks the inheritance chain of each loaded class to a
1299 /// shallow depth so member access on imported types type-checks without
1300 /// the user paying the cost on their first navigation.
1301 pub fn prefetch_imports(&self, file: &str) -> usize {
1302 let pending = self.pending_lazy_loads(file);
1303 let mut loaded = 0;
1304 for fqcn in pending {
1305 // Use the transitive walker with a small depth so we pick up
1306 // parent classes / interfaces needed for member resolution, but
1307 // don't recursively pull in the entire vendor tree.
1308 loaded += self.load_class_transitive(&fqcn, 2);
1309 }
1310 loaded
1311 }
1312
1313 /// All class / interface / trait / enum FQCNs currently known to the
1314 /// session, each paired with the file that defines them when available.
1315 ///
1316 /// Use this to build workspace-wide views (outline, fuzzy search, etc.).
1317 /// Consumers implement their own search/match logic on top — the analyzer
1318 /// only exposes the iterator.
1319 pub fn all_classes(&self) -> Vec<(Arc<str>, Option<mir_types::Location>)> {
1320 let db = self.snapshot_db();
1321 crate::db::workspace_classes(&db)
1322 .iter()
1323 .filter_map(|fqcn| {
1324 let here = crate::db::Fqcn::from_str(&db, fqcn.as_ref());
1325 crate::db::find_class_like(&db, here)
1326 .map(|class| (fqcn.clone(), class.location().cloned()))
1327 })
1328 .collect()
1329 }
1330
1331 /// All global function FQNs currently known to the session, each paired
1332 /// with their declaration location when available.
1333 pub fn all_functions(&self) -> Vec<(Arc<str>, Option<mir_types::Location>)> {
1334 let db = self.snapshot_db();
1335 crate::db::workspace_functions(&db)
1336 .iter()
1337 .filter_map(|fqn| {
1338 let here = crate::db::Fqcn::from_str(&db, fqn.as_ref());
1339 crate::db::find_function(&db, here).map(|f| (fqn.clone(), f.location.clone()))
1340 })
1341 .collect()
1342 }
1343
1344 /// Compute `file`'s outgoing dependency edges and update both the in-memory
1345 /// reverse-dep map (always) and the disk cache's reverse-dep graph (if configured).
1346 fn update_reverse_deps_for(&self, file: &str) {
1347 let db = self.snapshot_db();
1348 let targets = file_outgoing_dependencies(&db, file);
1349
1350 // Always update the in-memory map.
1351 self.update_in_memory_reverse_deps(file, &targets);
1352
1353 // Also persist to disk cache if configured.
1354 if let Some(cache) = self.cache.as_deref() {
1355 cache.update_reverse_deps_for_file(file, &targets);
1356 }
1357 }
1358
1359 /// Update the in-memory reverse dependency map for `file` with `new_targets`.
1360 /// Removes `file` from all existing entries, then adds it as a dependent of
1361 /// each target in `new_targets` (excluding self-edges).
1362 fn update_in_memory_reverse_deps(&self, file: &str, new_targets: &HashSet<String>) {
1363 let file_id = self.file_id_map.write().assign_or_get(file);
1364 let target_ids: Vec<FileId> = {
1365 let mut id_map = self.file_id_map.write();
1366 new_targets
1367 .iter()
1368 .map(|t| id_map.assign_or_get(t))
1369 .collect()
1370 };
1371
1372 let mut map = self.reverse_dep_map.write();
1373 for dependents in map.values_mut() {
1374 dependents.remove(&file_id);
1375 }
1376 map.retain(|_, dependents| !dependents.is_empty());
1377 for target_id in target_ids {
1378 if target_id != file_id {
1379 map.entry(target_id).or_default().insert(file_id);
1380 }
1381 }
1382 }
1383
1384 /// BFS transitive dependents of `file` using the in-memory reverse dep map.
1385 ///
1386 /// O(D) where D is the number of transitive dependents — faster than
1387 /// [`Self::dependency_graph().transitive_dependents()`] which rebuilds the
1388 /// full graph on every call. Only covers structural dependencies from definition collection
1389 /// (imports, class hierarchy, type hints); does not include bare FQN body
1390 /// references recorded during body analysis. For full fidelity, use
1391 /// `dependency_graph().transitive_dependents()` after body analysis is complete.
1392 pub fn structural_dependents(&self, file: &str) -> Vec<String> {
1393 let Some(start_id) = self.file_id_map.read().get(file) else {
1394 return Vec::new();
1395 };
1396 let map = self.reverse_dep_map.read();
1397 let mut visited: HashSet<FileId> = HashSet::default();
1398 let mut queue = vec![start_id];
1399 let mut result_ids = Vec::new();
1400 while let Some(current_id) = queue.pop() {
1401 if !visited.insert(current_id) {
1402 continue;
1403 }
1404 if let Some(deps) = map.get(¤t_id) {
1405 for &dep_id in deps {
1406 if !visited.contains(&dep_id) {
1407 queue.push(dep_id);
1408 result_ids.push(dep_id);
1409 }
1410 }
1411 }
1412 }
1413 drop(map);
1414 let id_map = self.file_id_map.read();
1415 result_ids
1416 .iter()
1417 .filter_map(|&id| id_map.path(id))
1418 .map(|s| s.to_string())
1419 .collect()
1420 }
1421
1422 /// File dependency graph: which files depend on which other files.
1423 /// Used for incremental invalidation in LSP servers and build systems.
1424 ///
1425 /// File dependency graph: which files depend on which other files.
1426 /// Used for incremental invalidation in LSP servers and build systems.
1427 ///
1428 /// O(edges) — iterates the `file_references` forward index (file → symbol
1429 /// keys it references) which is always current, then resolves each symbol
1430 /// to its defining file via O(1) lookup. Total cost is O(E) where E is the
1431 /// number of (file, symbol) reference edges, vs. the old O(F × S × R) scan.
1432 pub fn dependency_graph(&self) -> crate::DependencyGraph {
1433 let db = self.snapshot_db();
1434
1435 let all_files: Vec<String> = db
1436 .source_file_paths()
1437 .iter()
1438 .map(|f| f.as_ref().to_string())
1439 .collect();
1440
1441 let mut dependencies: HashMap<String, Vec<String>> = HashMap::default();
1442 let mut dependents: HashMap<String, Vec<String>> = HashMap::default();
1443
1444 for file in &all_files {
1445 // O(degree(file)) — forward index lookup, no full-table scan.
1446 let symbol_keys = db.file_referenced_symbols(file);
1447 let mut file_deps: HashSet<String> = HashSet::default();
1448 for symbol_key in &symbol_keys {
1449 let lookup: &str = match symbol_key.split_once("::") {
1450 Some((class, _)) => class,
1451 None => symbol_key.as_ref(),
1452 };
1453 if let Some(def_file) = db.symbol_defining_file(lookup) {
1454 let def = def_file.as_ref().to_string();
1455 if &def != file {
1456 file_deps.insert(def);
1457 }
1458 }
1459 }
1460 for dep in &file_deps {
1461 dependents
1462 .entry(dep.clone())
1463 .or_default()
1464 .push(file.clone());
1465 dependencies
1466 .entry(file.clone())
1467 .or_default()
1468 .push(dep.clone());
1469 }
1470 }
1471
1472 // Merge structural deps from definition collection from the incremental reverse_dep_map.
1473 // dependency_graph() above only captures bare-FQN references recorded during body analysis;
1474 // the reverse_dep_map covers imports, class hierarchy (extends/implements/use),
1475 // and type-hint-only references that never appear in file_referenced_symbols.
1476 // Together they give a complete picture without requiring body analysis on every file.
1477 {
1478 let id_map = self.file_id_map.read();
1479 let rev = self.reverse_dep_map.read();
1480 for (&target_id, dep_set) in rev.iter() {
1481 let Some(target) = id_map.path(target_id) else {
1482 continue;
1483 };
1484 let target = target.to_string();
1485 for &dep_id in dep_set {
1486 let Some(dep) = id_map.path(dep_id) else {
1487 continue;
1488 };
1489 let dep = dep.to_string();
1490 if dep != target {
1491 dependents
1492 .entry(target.clone())
1493 .or_default()
1494 .push(dep.clone());
1495 dependencies
1496 .entry(dep.clone())
1497 .or_default()
1498 .push(target.clone());
1499 }
1500 }
1501 }
1502 }
1503
1504 for deps in dependents.values_mut() {
1505 deps.sort();
1506 deps.dedup();
1507 }
1508 for deps in dependencies.values_mut() {
1509 deps.sort();
1510 deps.dedup();
1511 }
1512
1513 // Augment with stale dependents: files referencing symbols that were
1514 // deleted from their defining file. These edges disappear from the
1515 // symbol_defining_file lookup but the referencing file still needs
1516 // re-analysis to surface the now-broken reference.
1517 {
1518 let stale = self.stale_defined_symbols.read();
1519 if !stale.is_empty() {
1520 for (file, deleted_syms) in stale.iter() {
1521 for sym in deleted_syms {
1522 let lookup: &str = match sym.split_once("::") {
1523 Some((class, _)) => class,
1524 None => sym.as_ref(),
1525 };
1526 for referencing_file in db.symbol_referencers_of(lookup) {
1527 let ref_file = referencing_file.as_ref().to_string();
1528 if &ref_file != file {
1529 dependents
1530 .entry(file.clone())
1531 .or_default()
1532 .push(ref_file.clone());
1533 dependencies.entry(ref_file).or_default().push(file.clone());
1534 }
1535 }
1536 }
1537 }
1538 // Re-sort and dedup since we may have added entries.
1539 for deps in dependents.values_mut() {
1540 deps.sort();
1541 deps.dedup();
1542 }
1543 for deps in dependencies.values_mut() {
1544 deps.sort();
1545 deps.dedup();
1546 }
1547 }
1548 }
1549
1550 crate::DependencyGraph {
1551 dependencies,
1552 dependents,
1553 }
1554 }
1555}
1556
1557/// Compute the set of files `file` depends on: defining files of its imports,
1558/// plus parent / interfaces / traits' defining files for any classes declared
1559/// in `file`. Self-edges are excluded.
1560fn file_outgoing_dependencies(db: &dyn MirDatabase, file: &str) -> HashSet<String> {
1561 let mut targets: HashSet<String> = HashSet::default();
1562
1563 let mut add_target = |symbol: &str| {
1564 if let Some(defining_file) = db.symbol_defining_file(symbol) {
1565 let def = defining_file.as_ref().to_string();
1566 if def != file {
1567 targets.insert(def);
1568 }
1569 }
1570 };
1571
1572 let extract_named_objects = |union: &mir_types::Type| {
1573 union
1574 .types
1575 .iter()
1576 .filter_map(|atomic| match atomic {
1577 mir_types::atomic::Atomic::TNamedObject { fqcn, .. } => Some(*fqcn),
1578 _ => None,
1579 })
1580 .collect::<Vec<_>>()
1581 };
1582
1583 let imports = db.file_imports(file);
1584 for fqcn in imports.values() {
1585 add_target(fqcn.as_str());
1586 }
1587
1588 // Walk every class/interface/trait/enum/function defined in this file
1589 // via the pull-path slice. Push-path lookup_*_node have been retired.
1590 if let Some(sf) = db.lookup_source_file(file) {
1591 let defs = crate::db::collect_file_definitions(db, sf);
1592 for c in defs.slice.classes.iter() {
1593 if let Some(p) = &c.parent {
1594 add_target(p);
1595 }
1596 for iface in c.interfaces.iter() {
1597 add_target(iface);
1598 }
1599 for tr in c.traits.iter() {
1600 add_target(tr);
1601 }
1602 for prop in c.own_properties.values() {
1603 if let Some(ty) = &prop.ty {
1604 for named in extract_named_objects(ty) {
1605 add_target(named.as_ref());
1606 }
1607 }
1608 }
1609 for method in c.own_methods.values() {
1610 for param in method.params.iter() {
1611 if let Some(ty) = ¶m.ty {
1612 for named in extract_named_objects(ty.as_ref()) {
1613 add_target(named.as_ref());
1614 }
1615 }
1616 }
1617 if let Some(rt) = method.return_type.as_deref() {
1618 for named in extract_named_objects(rt) {
1619 add_target(named.as_ref());
1620 }
1621 }
1622 }
1623 }
1624 for i in defs.slice.interfaces.iter() {
1625 for ext in i.extends.iter() {
1626 add_target(ext);
1627 }
1628 for method in i.own_methods.values() {
1629 for param in method.params.iter() {
1630 if let Some(ty) = ¶m.ty {
1631 for named in extract_named_objects(ty.as_ref()) {
1632 add_target(named.as_ref());
1633 }
1634 }
1635 }
1636 if let Some(rt) = method.return_type.as_deref() {
1637 for named in extract_named_objects(rt) {
1638 add_target(named.as_ref());
1639 }
1640 }
1641 }
1642 }
1643 for t in defs.slice.traits.iter() {
1644 for tr in t.traits.iter() {
1645 add_target(tr);
1646 }
1647 }
1648 for f in defs.slice.functions.iter() {
1649 for param in f.params.iter() {
1650 if let Some(ty) = ¶m.ty {
1651 for named in extract_named_objects(ty.as_ref()) {
1652 add_target(named.as_ref());
1653 }
1654 }
1655 }
1656 if let Some(rt) = f.return_type.as_deref() {
1657 for named in extract_named_objects(rt) {
1658 add_target(named.as_ref());
1659 }
1660 }
1661 }
1662 }
1663
1664 // Also track bare-FQN references recorded during body analysis (new \Foo(), \Foo::method(),
1665 // \foo()) that do not appear in use-import statements.
1666 for symbol_key in db.file_referenced_symbols(file) {
1667 let lookup: &str = match symbol_key.split_once("::") {
1668 Some((class, _)) => class,
1669 None => &symbol_key,
1670 };
1671 add_target(lookup);
1672 }
1673
1674 targets
1675}
1676
1677/// AST visitor that collects class FQCN references for PSR-4 preloading.
1678/// Captures identifiers from `new X`, static calls / property / constant
1679/// access, type hints, and `instanceof`. Does *not* normalize via PSR-4 /
1680/// imports — callers run the raw string through `resolve_name`.
1681fn collect_class_refs_from_ast(program: &php_ast::owned::Program) -> Vec<String> {
1682 use php_ast::ast::BinaryOp;
1683 use php_ast::owned::visitor::{
1684 walk_owned_catch_clause, walk_owned_expr, walk_owned_program, walk_owned_type_hint,
1685 OwnedVisitor,
1686 };
1687 use php_ast::owned::{ExprKind, TypeHintKind};
1688 use std::ops::ControlFlow;
1689
1690 fn owned_name_str(name: &php_ast::owned::Name) -> String {
1691 let joined: String = name
1692 .parts
1693 .iter()
1694 .map(|p| p.as_ref())
1695 .collect::<Vec<&str>>()
1696 .join("\\");
1697 if name.kind == php_ast::ast::NameKind::FullyQualified {
1698 format!("\\{joined}")
1699 } else {
1700 joined
1701 }
1702 }
1703
1704 struct V {
1705 names: std::collections::HashSet<String>,
1706 }
1707 impl OwnedVisitor for V {
1708 fn visit_expr(&mut self, expr: &php_ast::owned::Expr) -> ControlFlow<()> {
1709 match &expr.kind {
1710 ExprKind::New(n) => {
1711 if let ExprKind::Identifier(name) = &n.class.kind {
1712 self.names.insert(name.as_ref().to_string());
1713 }
1714 }
1715 ExprKind::StaticMethodCall(c) => {
1716 if let ExprKind::Identifier(name) = &c.class.kind {
1717 self.names.insert(name.as_ref().to_string());
1718 }
1719 }
1720 ExprKind::StaticPropertyAccess(a) => {
1721 if let ExprKind::Identifier(name) = &a.class.kind {
1722 self.names.insert(name.as_ref().to_string());
1723 }
1724 }
1725 ExprKind::ClassConstAccess(a) => {
1726 if let ExprKind::Identifier(name) = &a.class.kind {
1727 self.names.insert(name.as_ref().to_string());
1728 }
1729 }
1730 ExprKind::Binary(b) if b.op == BinaryOp::Instanceof => {
1731 if let ExprKind::Identifier(name) = &b.right.kind {
1732 self.names.insert(name.as_ref().to_string());
1733 }
1734 }
1735 _ => {}
1736 }
1737 walk_owned_expr(self, expr)
1738 }
1739
1740 fn visit_type_hint(&mut self, hint: &php_ast::owned::TypeHint) -> ControlFlow<()> {
1741 if let TypeHintKind::Named(name) = &hint.kind {
1742 let s = owned_name_str(name);
1743 if !s.is_empty() {
1744 self.names.insert(s);
1745 }
1746 }
1747 walk_owned_type_hint(self, hint)
1748 }
1749
1750 fn visit_catch_clause(&mut self, catch: &php_ast::owned::CatchClause) -> ControlFlow<()> {
1751 for ty in catch.types.iter() {
1752 self.names.insert(owned_name_str(ty));
1753 }
1754 walk_owned_catch_clause(self, catch)
1755 }
1756 }
1757 let mut v = V {
1758 names: std::collections::HashSet::default(),
1759 };
1760 let _ = walk_owned_program(&mut v, program);
1761 v.names.into_iter().collect()
1762}