mir_analyzer/session.rs
1//! Session-based analysis API for incremental, per-file analysis.
2//!
3//! [`AnalysisSession`] owns the salsa database and per-session caches for a
4//! long-running analysis context shared across many per-file analyses. Reads
5//! clone the database under a brief lock, then run lock-free; writes hold the
6//! lock briefly to mutate canonical state. `MirDbStorage::clone()` is cheap
7//! (Arc-wrapped registries), so this pattern gives parallel readers without
8//! blocking on concurrent writes for longer than the clone itself.
9//!
10//! See [`crate::file_analyzer::FileAnalyzer`] for the per-file analysis
11//! entry point that operates against a session.
12
13use rustc_hash::{FxHashMap as HashMap, FxHashSet as HashSet};
14use std::path::PathBuf;
15use std::sync::Arc;
16
17use parking_lot::RwLock;
18
19use mir_codebase::{FileId, FileIdMap};
20
21use crate::analyzer_db::AnalyzerDb;
22use crate::cache::AnalysisCache;
23use crate::composer::Psr4Map;
24use crate::db::{MirDatabase, MirDbStorage, RefLoc};
25use crate::php_version::PhpVersion;
26
27/// Long-lived analysis context. Owns the salsa database and tracks which
28/// stubs have been loaded.
29///
30/// Cheap to clone the inner db for parallel reads; writes funnel through
31/// [`Self::ingest_file`], [`Self::invalidate_file`], and the crate-internal
32/// [`Self::with_db_mut`].
33#[derive(Clone)]
34pub struct AnalysisSession {
35 /// Shared database management (salsa, file registry, stub tracking).
36 pub(crate) db: Arc<AnalyzerDb>,
37 pub(crate) cache: Option<Arc<AnalysisCache>>,
38 /// PSR-4 / Composer autoload map. Retained alongside `resolver` so the
39 /// `psr4()` accessor can still return a typed `Psr4Map` for callers that
40 /// need Composer-specific data (project_files / vendor_files / etc.).
41 pub(crate) psr4: Option<Arc<Psr4Map>>,
42 /// Generic class resolver used for on-demand lazy loading. When `psr4`
43 /// is set via [`Self::with_psr4`], this is populated with the same map
44 /// re-typed as `dyn ClassResolver`. Consumers can also supply their own
45 /// resolver via [`Self::with_class_resolver`] without going through
46 /// Composer.
47 resolver: Option<Arc<dyn crate::ClassResolver>>,
48 pub(crate) php_version: PhpVersion,
49 pub(crate) user_stub_files: Vec<PathBuf>,
50 pub(crate) user_stub_dirs: Vec<PathBuf>,
51 /// Path ↔ FileId mapping shared with `reverse_dep_map`.
52 file_id_map: Arc<RwLock<FileIdMap>>,
53 /// In-memory reverse dependency map: target_file → set of files that
54 /// depend on it. Always maintained (not gated on disk cache presence),
55 /// enabling `reanalyze_dependents` and `dependency_graph()` without a
56 /// disk cache. Updated in `ingest_file` and `invalidate_file`.
57 reverse_dep_map: Arc<RwLock<HashMap<FileId, HashSet<FileId>>>>,
58 /// Tracks symbols that were previously defined in a file but have since
59 /// been removed (deleted or renamed). When `ingest_file` detects that
60 /// a symbol disappears, it records it here so `dependency_graph()` can
61 /// still produce edges to files that reference the now-gone symbol.
62 ///
63 /// Keyed by the file that used to define the symbols. Symbols are removed
64 /// from the set when re-added to the same file on a subsequent ingest.
65 /// The set may contain symbols with no current referencers; those are
66 /// harmless — the `symbol_referencers_of` lookup returns empty.
67 stale_defined_symbols: Arc<RwLock<HashMap<String, HashSet<Arc<str>>>>>,
68 /// Negative cache: FQCNs that `load_class` already failed on.
69 /// The value is the resolver-mapped path (when known) so eviction on
70 /// `set_file_text` / `ingest_file` is a path equality check rather than
71 /// re-running the resolver per entry. `None` means the resolver itself
72 /// couldn't map the FQCN; those entries survive file edits (no source
73 /// change makes a never-resolvable name resolvable).
74 /// Bounded to `UNRESOLVABLE_CACHE_CAP`; clears on overflow.
75 unresolvable_fqcns: UnresolvableCache,
76 /// Pluggable source-text provider for lazy-load. Defaults to filesystem
77 /// reads ([`crate::FsSourceProvider`]); LSPs swap in a VFS-backed
78 /// implementation so unsaved buffers override on-disk content.
79 source_provider: Arc<dyn crate::SourceProvider>,
80}
81
82/// FQCN → optional resolver-mapped path. See the field doc on
83/// `AnalysisSession::unresolvable_fqcns`.
84type UnresolvableCache = Arc<RwLock<HashMap<Arc<str>, Option<Arc<str>>>>>;
85
86/// Cap on the negative-resolution cache. Sized to accommodate a large
87/// workspace's worth of genuinely-missing references without unbounded
88/// growth. On overflow the cache is cleared; the cost is a few extra
89/// resolver calls until it re-fills.
90const UNRESOLVABLE_CACHE_CAP: usize = 10_000;
91
92impl AnalysisSession {
93 /// Create a session targeting the given PHP language version.
94 pub fn new(php_version: PhpVersion) -> Self {
95 let db = Arc::new(AnalyzerDb::new());
96 db.salsa
97 .write()
98 .set_php_version(Arc::from(php_version.to_string().as_str()));
99 Self {
100 db,
101 cache: None,
102 psr4: None,
103 resolver: None,
104 php_version,
105 user_stub_files: Vec::new(),
106 user_stub_dirs: Vec::new(),
107 file_id_map: Arc::new(RwLock::new(FileIdMap::new())),
108 reverse_dep_map: Arc::new(RwLock::new(HashMap::default())),
109 stale_defined_symbols: Arc::new(RwLock::new(HashMap::default())),
110 unresolvable_fqcns: Arc::new(RwLock::new(HashMap::default())),
111 source_provider: Arc::new(crate::FsSourceProvider),
112 }
113 }
114
115 /// Swap in a custom [`crate::SourceProvider`]. LSPs install a VFS-backed
116 /// provider here so the analyzer reads from unsaved editor buffers
117 /// instead of disk.
118 pub fn with_source_provider(mut self, provider: Arc<dyn crate::SourceProvider>) -> Self {
119 self.source_provider = provider;
120 self
121 }
122
123 /// Attach a pre-built [`AnalysisCache`] (the body-analysis issue cache) and
124 /// open a sibling definition [`StubSlice`] cache under the same root, so
125 /// callers using this builder get the same speedup as `with_cache_dir`.
126 ///
127 /// Rebuilds the shared database to attach the definition cache — call
128 /// **before** any file is ingested. A debug assertion catches misuse.
129 ///
130 /// [`StubSlice`]: mir_codebase::storage::StubSlice
131 pub fn with_cache(mut self, cache: Arc<AnalysisCache>) -> Self {
132 debug_assert_eq!(
133 self.db.source_file_count(),
134 0,
135 "AnalysisSession::with_cache must be called before any file is ingested"
136 );
137 let dir = cache.cache_dir().to_path_buf();
138 self.db = Arc::new(AnalyzerDb::new().with_cache_dir(&dir));
139 self.db
140 .salsa
141 .write()
142 .set_php_version(Arc::from(self.php_version.to_string().as_str()));
143 self.cache = Some(cache);
144 self
145 }
146
147 /// Convenience: open a disk-backed cache at `cache_dir` and attach it.
148 ///
149 /// Attaches both the body-analysis issue cache ([`AnalysisCache`]) and the
150 /// definition [`StubSlice`] cache to the shared database. Builds a fresh
151 /// [`AnalyzerDb`] internally — call **before** any file is ingested. A
152 /// debug assertion catches misuse.
153 ///
154 /// [`StubSlice`]: mir_codebase::storage::StubSlice
155 pub fn with_cache_dir(mut self, cache_dir: &std::path::Path) -> Self {
156 debug_assert_eq!(
157 self.db.source_file_count(),
158 0,
159 "AnalysisSession::with_cache_dir must be called before any file is ingested"
160 );
161 self.db = Arc::new(AnalyzerDb::new().with_cache_dir(cache_dir));
162 self.db
163 .salsa
164 .write()
165 .set_php_version(Arc::from(self.php_version.to_string().as_str()));
166 self.cache = Some(Arc::new(AnalysisCache::open(cache_dir)));
167 self
168 }
169
170 /// Attach a Composer autoload map (PSR-4, PSR-0, classmap, files).
171 /// Sets the same map as the active [`crate::ClassResolver`] so
172 /// [`Self::load_class`] works out of the box.
173 pub fn with_psr4(mut self, map: Arc<Psr4Map>) -> Self {
174 let user_resolver: Arc<dyn crate::ClassResolver> = map.clone();
175 // Wrap with stub awareness so `find_class_like` / `resolve_fqcn_to_path`
176 // can map built-in PHP class FQCNs (`ArrayObject`, `Exception`, …)
177 // to their stub virtual paths.
178 let resolver: Arc<dyn crate::ClassResolver> = Arc::new(crate::ChainedClassResolver::new(
179 user_resolver,
180 Arc::new(crate::StubClassResolver),
181 ));
182 self.psr4 = Some(map);
183 self.resolver = Some(resolver.clone());
184 // Mirror into MirDbStorage so salsa-tracked resolver queries
185 // (`db::resolve_fqcn_to_path`) see the same resolver and are
186 // invalidated on swap.
187 self.db.salsa.write().set_resolver(Some(resolver));
188 self
189 }
190
191 /// Attach a generic class resolver for projects that don't use Composer
192 /// (WordPress, Drupal, custom autoloaders, workspace-walk indexes).
193 /// Replaces any previously-set Composer-backed resolver. Automatically
194 /// wrapped with stub awareness so PHP built-ins remain resolvable.
195 pub fn with_class_resolver(mut self, resolver: Arc<dyn crate::ClassResolver>) -> Self {
196 let wrapped: Arc<dyn crate::ClassResolver> = Arc::new(crate::ChainedClassResolver::new(
197 resolver,
198 Arc::new(crate::StubClassResolver),
199 ));
200 self.db.salsa.write().set_resolver(Some(wrapped.clone()));
201 self.resolver = Some(wrapped);
202 self
203 }
204
205 pub fn with_user_stubs(mut self, files: Vec<PathBuf>, dirs: Vec<PathBuf>) -> Self {
206 self.user_stub_files = files;
207 self.user_stub_dirs = dirs;
208 self
209 }
210
211 pub fn php_version(&self) -> PhpVersion {
212 self.php_version
213 }
214
215 pub fn cache(&self) -> Option<&AnalysisCache> {
216 self.cache.as_deref()
217 }
218
219 pub fn psr4(&self) -> Option<&Psr4Map> {
220 self.psr4.as_deref()
221 }
222
223 /// Deprecated — stub loading is now fully lazy per-AST.
224 ///
225 /// This is an alias for [`Self::ensure_all_stubs`] kept for API
226 /// compatibility. Internal analysis paths use [`Self::prepare_ast_for_analysis`]
227 /// which loads only the stubs referenced by the file under analysis.
228 #[deprecated(note = "use ensure_all_stubs() or ensure_stubs_for_ast() instead")]
229 pub fn ensure_essential_stubs(&self) {
230 self.ensure_all_stubs();
231 }
232
233 /// Load every embedded PHP stub plus any configured user stubs.
234 /// Use for batch tools (CLI, full project analysis) where comprehensive
235 /// symbol coverage matters more than cold-start latency.
236 pub fn ensure_all_stubs(&self) {
237 let paths: Vec<&'static str> = crate::stubs::stub_files().iter().map(|&(p, _)| p).collect();
238 self.db.ingest_stub_paths(&paths, self.php_version);
239 self.ensure_user_stubs_loaded();
240 }
241
242 /// Ensure the embedded stub that defines `name` (a function) is ingested.
243 /// Returns `true` when a matching stub exists (whether or not it was
244 /// already loaded), `false` when `name` isn't a known PHP built-in.
245 ///
246 /// Most callers should use [`Self::ensure_stubs_for_ast`] instead —
247 /// it auto-discovers needed stubs from a parsed file.
248 #[doc(hidden)]
249 pub fn ensure_stub_for_function(&self, name: &str) -> bool {
250 match crate::stubs::stub_path_for_function(name) {
251 Some(path) => {
252 self.db.ingest_stub_paths(&[path], self.php_version);
253 true
254 }
255 None => false,
256 }
257 }
258
259 /// Ensure the embedded stub that defines `fqcn` (a class / interface /
260 /// trait / enum) is ingested. Case-insensitive lookup with optional
261 /// leading backslash.
262 ///
263 /// Most callers should use [`Self::ensure_stubs_for_ast`] instead.
264 #[doc(hidden)]
265 pub fn ensure_stub_for_class(&self, fqcn: &str) -> bool {
266 match crate::stubs::stub_path_for_class(fqcn) {
267 Some(path) => {
268 self.db.ingest_stub_paths(&[path], self.php_version);
269 true
270 }
271 None => false,
272 }
273 }
274
275 /// Ensure the embedded stub that defines `name` (a constant) is ingested.
276 ///
277 /// Most callers should use [`Self::ensure_stubs_for_ast`] instead.
278 #[doc(hidden)]
279 pub fn ensure_stub_for_constant(&self, name: &str) -> bool {
280 match crate::stubs::stub_path_for_constant(name) {
281 Some(path) => {
282 self.db.ingest_stub_paths(&[path], self.php_version);
283 true
284 }
285 None => false,
286 }
287 }
288
289 /// Number of distinct embedded stubs currently ingested into the session.
290 /// Useful for diagnostics and bench reporting.
291 pub fn loaded_stub_count(&self) -> usize {
292 self.db.loaded_stubs.lock().len()
293 }
294
295 /// Auto-discover and ingest the embedded stubs needed to cover every
296 /// built-in PHP function / class / constant referenced by `source`.
297 ///
298 /// Used by [`crate::FileAnalyzer::analyze`] to keep essentials-only mode
299 /// correct without forcing callers to enumerate which stubs they need.
300 /// Idempotent — already-loaded stubs are skipped via [`Self::loaded_stubs`].
301 ///
302 /// The discovery scan is a coarse identifier sweep (see
303 /// [`crate::stubs::collect_referenced_builtin_paths`]) — it may pull in
304 /// a slightly larger set than the file strictly needs, but never misses
305 /// a referenced built-in. Cost is sub-millisecond per file.
306 ///
307 /// Fast path: if every embedded stub is already loaded (e.g. after a
308 /// batch tool called [`Self::ensure_all_stubs`]), the source scan
309 /// is skipped entirely.
310 pub fn ensure_stubs_for_source(&self, source: &str) {
311 // Cheap check first: skip the scan entirely when we already know we
312 // have everything. Avoids a ~50-500µs source walk on every analyze
313 // call in batch / warm-session scenarios.
314 {
315 let loaded = self.db.loaded_stubs.lock();
316 if loaded.len() >= crate::stubs::stub_files().len() {
317 return;
318 }
319 }
320 let paths = crate::stubs::collect_referenced_builtin_paths(source);
321 if paths.is_empty() {
322 return;
323 }
324 self.db.ingest_stub_paths(&paths, self.php_version);
325 }
326
327 /// Discover and ingest stubs by walking the parsed AST of a PHP file.
328 ///
329 /// Similar to [`Self::ensure_stubs_for_source`], but takes an already-parsed
330 /// AST instead of raw source text. Produces zero false positives since it
331 /// only extracts identifiers from actual AST nodes (not from strings or
332 /// comments). Preferred over `ensure_stubs_for_source` when the AST is
333 /// already available (e.g., in [`crate::FileAnalyzer`]).
334 ///
335 /// Idempotent and skips the scan if all stubs are already loaded.
336 pub fn ensure_stubs_for_ast(&self, program: &php_ast::owned::Program) {
337 {
338 let loaded = self.db.loaded_stubs.lock();
339 if loaded.len() >= crate::stubs::stub_files().len() {
340 return;
341 }
342 }
343 let paths = crate::stubs::collect_referenced_builtin_paths_from_ast(program);
344 if paths.is_empty() {
345 return;
346 }
347 self.db.ingest_stub_paths(&paths, self.php_version);
348 }
349
350 /// Returns true if this session has a configured class resolver
351 /// (typically a PSR-4 / classmap autoloader chained with the stub
352 /// resolver). Used by `FileAnalyzer` to skip the AST-scan preload
353 /// when no resolver is wired up.
354 pub fn has_resolver(&self) -> bool {
355 self.resolver.is_some()
356 }
357
358 /// Run both pre-passes (builtin-stub loading and PSR-4 class preloading)
359 /// in one call. Replaces the two separate `ensure_stubs_for_ast` /
360 /// `preload_psr4_classes_for_ast` calls at every `FileAnalyzer::analyze`
361 /// site.
362 pub fn prepare_ast_for_analysis(&self, program: &php_ast::owned::Program, file: &str) {
363 self.ensure_stubs_for_ast(program);
364 self.preload_psr4_classes_for_ast(program, file);
365 }
366
367 /// Scan a parsed AST for class references and lazy-load any that are
368 /// PSR-4-resolvable but not yet registered as `SourceFile` inputs, together
369 /// with their full declared-type closure (see [`Self::load_class_transitive`]).
370 /// After this call, `find_class_like` can pull-resolve not just the
371 /// referenced classes but the types named in their signatures and
372 /// inheritance chains — so open-file diagnostics are as complete as the
373 /// batch path, without a post-analysis retry loop.
374 pub fn preload_psr4_classes_for_ast(&self, program: &php_ast::owned::Program, file: &str) {
375 if self.resolver.is_none() {
376 return;
377 }
378 let refs = collect_class_refs_from_ast(program);
379 if refs.is_empty() {
380 return;
381 }
382 // Resolve names against the file's namespace/imports up front, then
383 // drop the snapshot before lazy-loading (which mutates inputs).
384 let resolved: Vec<String> = {
385 let db = self.snapshot_db();
386 refs.into_iter()
387 .map(|raw| crate::db::resolve_name(&db, file, &raw))
388 .collect()
389 };
390 for fqcn in resolved {
391 // Load each referenced class together with its declared-type closure
392 // (inheritance chain + signature types) so member access and chained
393 // calls on lazily-loaded types type-check in a single pass.
394 //
395 // Depth 3 is empirically the knee of the curve: on Laravel hub files
396 // it produces diagnostics identical to the unbounded (depth-10)
397 // closure while loading ~2-3× fewer classes (e.g. 117 vs 314 for
398 // Builder.php), roughly halving cold-open latency. Depth 2 is cheaper
399 // still but measurably less complete (leaves more types unresolved).
400 // `load_class_transitive` short-circuits classes already present.
401 self.load_class_transitive(&fqcn, 3);
402 }
403 }
404
405 fn ensure_user_stubs_loaded(&self) {
406 self.db
407 .ingest_user_stubs(&self.user_stub_files, &self.user_stub_dirs);
408 }
409
410 /// Cheap clone of the salsa db for a read-only query. The lock is held
411 /// only for the duration of the clone, so concurrent readers never
412 /// serialize on each other or on writes for longer than the clone itself.
413 ///
414 /// **Internal API — exposes Salsa types.** Subject to change without
415 /// notice. Public consumers should use the typed query methods
416 /// ([`Self::definition_of`], [`Self::hover`], etc.) instead.
417 #[doc(hidden)]
418 pub fn snapshot_db(&self) -> MirDbStorage {
419 self.db.snapshot_db()
420 }
421
422 /// Commit a batch of reference locations from a db snapshot into the
423 /// session's shared maps. Called by [`crate::FileAnalyzer`] and
424 /// [`crate::BatchFileAnalyzer`] after parallel body analysis to flush the pending
425 /// buffers that accumulate in worker db clones.
426 pub(crate) fn commit_ref_locs_batch(&self, locs: Vec<RefLoc>) {
427 if locs.is_empty() {
428 return;
429 }
430 let guard = self.db.salsa.read();
431 guard.commit_reference_locations_batch(locs);
432 }
433
434 /// Run a closure with read access to a database snapshot.
435 ///
436 /// **Internal API — exposes Salsa types.** Subject to change without
437 /// notice.
438 #[doc(hidden)]
439 pub fn read<R>(&self, f: impl FnOnce(&dyn MirDatabase) -> R) -> R {
440 let db = self.snapshot_db();
441 f(&db)
442 }
443
444 /// definition-collection ingestion. Updates the file's source text in the salsa db,
445 /// runs definition collection, and ingests the resulting stub slice.
446 /// Triggers stub loading on first call. Also updates the cache's reverse-
447 /// dependency graph for `file` so cross-file invalidation stays correct
448 /// across incremental edits — without rebuilding the graph from scratch.
449 ///
450 /// If `file` was previously ingested, its old definitions and reference
451 /// locations are removed first so renames / deletions don't leave stale
452 /// state in the codebase. (Without this, long-running sessions would
453 /// accumulate dead reference-location entries indefinitely.)
454 pub fn ingest_file(&self, file: Arc<str>, source: Arc<str>) {
455 self.ensure_all_stubs();
456
457 // Snapshot symbols defined before clearing — O(symbols_in_file) with forward index.
458 let old_symbols: HashSet<Arc<str>> = {
459 let guard = self.db.salsa.read();
460 guard.file_defined_symbols(file.as_ref())
461 };
462
463 {
464 let mut guard = self.db.salsa.write();
465 guard.remove_file_definitions(file.as_ref());
466 }
467 let _file_defs =
468 self.db
469 .collect_and_ingest_file(file.clone(), source.as_ref(), self.php_version);
470
471 // Snapshot symbols after ingesting — O(symbols_in_file).
472 let new_symbols: HashSet<Arc<str>> = {
473 let guard = self.db.salsa.read();
474 guard.file_defined_symbols(file.as_ref())
475 };
476
477 // Symbols removed from this file must be tracked so dependency_graph()
478 // can still produce edges to files referencing the now-gone symbols.
479 let deleted: Vec<Arc<str>> = old_symbols.difference(&new_symbols).cloned().collect();
480 let re_added: Vec<Arc<str>> = new_symbols.difference(&old_symbols).cloned().collect();
481 if !deleted.is_empty() || !re_added.is_empty() {
482 let mut stale = self.stale_defined_symbols.write();
483 let entry = stale.entry(file.as_ref().to_string()).or_default();
484 for sym in deleted {
485 entry.insert(sym);
486 }
487 for sym in &re_added {
488 entry.remove(sym);
489 }
490 if entry.is_empty() {
491 stale.remove(file.as_ref());
492 }
493 }
494
495 self.update_reverse_deps_for(&file);
496 // Evict cached analysis results for files that depend on this one so
497 // that the next re_analyze_file call re-analyses them rather than
498 // replaying a stale cache entry. Mirrors the eviction in
499 // `re_analyze_file` (batch.rs) but applies to the ingest path used by
500 // LSP servers that edit a single file without re-analysing it.
501 if let Some(cache) = self.cache.as_deref() {
502 cache.evict_with_dependents(&[file.to_string()]);
503 }
504 // Only evict cache entries whose resolver-mapped path equals this
505 // file. FQCNs the resolver can't map (psr4 miss) stay cached — no
506 // ingest could change their fate. Avoids the per-keystroke storm
507 // where wholesale clearing forces every unresolved FQCN to re-hit
508 // the resolver on the next FileAnalyzer iteration.
509 self.evict_unresolvable_for_file(&file);
510
511 // If the workspace symbol index singleton has already been built,
512 // check whether this edit changed any declared names. If so, rebuild
513 // the singleton so subsequent `find_class_like` / `find_function`
514 // calls see the new names. Body-only edits skip this (name-only
515 // PartialEq on FileDeclarations returns equal → no rebuild → the
516 // HIGH-durability singleton dep short-circuits in O(1)).
517 {
518 let mut guard = self.db.salsa.write();
519 if guard.workspace_symbol_index_singleton().is_some() {
520 if let Some(sf) = guard.lookup_source_file(file.as_ref()) {
521 if guard.file_declarations_changed(sf) {
522 guard.rebuild_workspace_symbol_index();
523 }
524 }
525 }
526 }
527 }
528
529 /// Register `source` as the text of `file` in the salsa input layer **without**
530 /// parsing or running definition collection.
531 ///
532 /// This is the LSP-friendly bulk-population entry point: after a workspace
533 /// scan, callers can feed every discovered file's text to the session
534 /// cheaply (an Arc clone plus a HashMap insert per file). Name resolution
535 /// then happens on demand via [`Self::load_class`], which reads
536 /// the file from disk through the configured [`crate::ClassResolver`] and
537 /// runs definition collection lazily when a class FQCN actually needs to resolve.
538 ///
539 /// Contrast with [`Self::ingest_file`], which eagerly parses, runs definition collection,
540 /// and populates the symbol index. Use `ingest_file` for files the user is
541 /// actively editing (where in-memory text diverges from disk); use
542 /// `set_file_text` for files known only through the workspace scan.
543 ///
544 /// Clears the negative cache: a previously-unresolvable FQCN may now
545 /// resolve if its defining file is among the newly-registered set.
546 pub fn set_file_text(&self, file: Arc<str>, source: Arc<str>) {
547 {
548 let mut guard = self.db.salsa.write();
549 guard.upsert_source_file(file.clone(), source);
550 }
551 self.evict_unresolvable_for_file(&file);
552 }
553
554 /// Bulk-register vendor / library files with HIGH salsa durability.
555 ///
556 /// HIGH-durability files are not expected to change during the session.
557 /// When a LOW-durability project file is edited, salsa can skip O(N)
558 /// dependency verification for every HIGH-durability file, reducing
559 /// `workspace_symbol_index` re-verification cost to O(project files only).
560 ///
561 /// Definition collection runs lazily on first symbol access; no parsing at call time.
562 pub fn set_vendor_files<I>(&self, files: I)
563 where
564 I: IntoIterator<Item = (Arc<str>, Arc<str>)>,
565 {
566 let mut guard = self.db.salsa.write();
567 for (file, source) in files {
568 guard.upsert_source_file_with_durability(file, source, salsa::Durability::HIGH);
569 }
570 }
571
572 /// Build or refresh the `WorkspaceSymbolIndexSingleton` from all currently
573 /// registered files.
574 ///
575 /// After this call, `find_class_like`, `find_function`, and
576 /// `find_global_constant` read `singleton.index(db)` — a single
577 /// `Durability::HIGH` tracked dep — instead of recomputing the full
578 /// O(N_files) dep list via `workspace_symbol_index`. On subsequent
579 /// LOW-durability (project-file) body edits the dep short-circuits in O(1).
580 ///
581 /// Call this once after all vendor + stub + project files have been
582 /// ingested (end of workspace warm-up). Also called automatically by
583 /// [`Self::ingest_file`] when a file's declared names change.
584 pub fn rebuild_workspace_symbol_index(&self) {
585 self.db.salsa.write().rebuild_workspace_symbol_index();
586 }
587
588 /// Bulk variant of [`Self::set_file_text`]. Acquires the salsa write lock
589 /// once for the entire batch instead of once per file.
590 ///
591 /// The intended LSP scan loop is:
592 /// ```text
593 /// let files: Vec<_> = walk_workspace()
594 /// .map(|path| (path, fs::read(&path).unwrap()))
595 /// .collect();
596 /// session.set_workspace_files(files);
597 /// ```
598 /// After this call, every file's source text is known to salsa. No
599 /// parsing has happened yet — Definition collection runs per file on the first
600 /// `load_class` that needs to consult it.
601 pub fn set_workspace_files<I>(&self, files: I)
602 where
603 I: IntoIterator<Item = (Arc<str>, Arc<str>)>,
604 {
605 let registered_paths: Vec<Arc<str>> = {
606 let mut guard = self.db.salsa.write();
607 files
608 .into_iter()
609 .map(|(file, source)| {
610 guard.upsert_source_file(file.clone(), source);
611 file
612 })
613 .collect()
614 };
615 if !registered_paths.is_empty() && self.resolver.is_some() {
616 self.evict_unresolvable_for_files(®istered_paths);
617 }
618 }
619
620 /// Drop a file's contribution to the session: codebase definitions,
621 /// reference locations, salsa input handle, cache entry, and outgoing
622 /// reverse-dependency edges. Cache entries of *dependent* files are
623 /// also evicted (cross-file invalidation).
624 ///
625 /// Use this when a file is closed by the consumer, or before a re-ingest
626 /// of substantially changed content. (Plain re-ingest via
627 /// [`Self::ingest_file`] also drops old definitions, but does not
628 /// remove the salsa input handle — call this for full cleanup.)
629 pub fn invalidate_file(&self, file: &str) {
630 {
631 let mut guard = self.db.salsa.write();
632 guard.remove_file_definitions(file);
633 guard.remove_source_file(file);
634 }
635 // Remove this file's outgoing deps from the in-memory reverse dep map.
636 self.update_in_memory_reverse_deps(file, &HashSet::default());
637 // Clear stale symbol tracking for this file — it's fully gone.
638 self.stale_defined_symbols.write().remove(file);
639 if let Some(cache) = &self.cache {
640 cache.update_reverse_deps_for_file(file, &HashSet::default());
641 cache.evict_with_dependents(&[file.to_string()]);
642 }
643 // The file is gone; cache entries that previously mapped to it stay
644 // unresolvable until the file (or another with matching symbols) is
645 // ingested again. Selective evict mirrors the ingest path.
646 self.evict_unresolvable_for_file(file);
647 }
648
649 /// Number of files currently tracked in this session's salsa input set.
650 /// Stable across reads; useful for diagnostics and memory bounds checks.
651 pub fn tracked_file_count(&self) -> usize {
652 let guard = self.db.salsa.read();
653 guard.source_file_count()
654 }
655
656 // -----------------------------------------------------------------------
657 // Read-only codebase queries
658 //
659 // All take a brief lock to clone the db, then run the lookup against the
660 // owned snapshot — concurrent edits proceed without blocking.
661 // -----------------------------------------------------------------------
662
663 /// Resolve a top-level symbol (class or function) to its declaration
664 /// location. Powers go-to-definition.
665 ///
666 /// **Side effects:** if the symbol isn't yet known, this may invoke the
667 /// configured [`crate::SourceProvider`] to fault in additional files and
668 /// mutate the salsa input set. Use [`Self::definition_of_cached`] for a
669 /// pure variant that only consults already-loaded state.
670 ///
671 /// Returns:
672 /// - `Ok(Location)` — symbol found with a source location
673 /// - `Err(NotFound)` — no such symbol in the codebase
674 /// - `Err(NoSourceLocation)` — symbol exists but has no recorded span
675 /// (e.g. some stub-only declarations)
676 pub fn definition_of(
677 &self,
678 symbol: &crate::Name,
679 ) -> Result<mir_types::Location, crate::SymbolLookupError> {
680 // Trigger any necessary lazy-load mutations before snapshotting.
681 match symbol {
682 crate::Name::Class(fqcn) => {
683 let _ = self.load_class(fqcn.as_ref());
684 }
685 crate::Name::Function(fqn) => {
686 let _ = self.load_class(fqn.as_ref());
687 }
688 crate::Name::Method { class, .. }
689 | crate::Name::Property { class, .. }
690 | crate::Name::ClassConstant { class, .. } => {
691 let _ = self.load_class(class.as_ref());
692 }
693 _ => {}
694 }
695 self.definition_of_cached(symbol)
696 }
697
698 /// Pure variant of [`Self::definition_of`]. Never invokes the
699 /// [`crate::SourceProvider`] and never mutates salsa inputs; resolves
700 /// only against state already loaded by `set_file_text` / `ingest_file`.
701 /// Returns `Err(NotFound)` when the symbol isn't in the loaded set, even
702 /// if a resolver could in principle map it.
703 pub fn definition_of_cached(
704 &self,
705 symbol: &crate::Name,
706 ) -> Result<mir_types::Location, crate::SymbolLookupError> {
707 let db = self.snapshot_db();
708 match symbol {
709 crate::Name::Class(fqcn) => {
710 let here = crate::db::Fqcn::from_str(&db, fqcn.as_ref());
711 let class = crate::db::find_class_like(&db, here)
712 .ok_or(crate::SymbolLookupError::NotFound)?;
713 class
714 .location()
715 .cloned()
716 .ok_or(crate::SymbolLookupError::NoSourceLocation)
717 }
718 crate::Name::Function(fqn) => {
719 let here = crate::db::Fqcn::from_str(&db, fqn.as_ref());
720 let f = crate::db::find_function(&db, here)
721 .ok_or(crate::SymbolLookupError::NotFound)?;
722 f.location
723 .clone()
724 .ok_or(crate::SymbolLookupError::NoSourceLocation)
725 }
726 crate::Name::Method { class, name }
727 | crate::Name::Property { class, name }
728 | crate::Name::ClassConstant { class, name } => {
729 crate::db::member_location(&db, class, name)
730 .ok_or(crate::SymbolLookupError::NotFound)
731 }
732 crate::Name::GlobalConstant(_) => Err(crate::SymbolLookupError::NoSourceLocation),
733 }
734 }
735
736 /// Hover information for a symbol: type, docstring, and definition location.
737 ///
738 /// Use [`crate::FileAnalysis::symbol_at`] to find the symbol at a cursor
739 /// position, then build a [`crate::Name`] from its `kind`. This method
740 /// assembles the displayable hover data.
741 ///
742 /// **Side effects:** when `symbol`'s owning class isn't yet loaded, this
743 /// may invoke the configured [`crate::SourceProvider`] to fault in
744 /// dependencies. Use [`Self::hover_cached`] for a pure variant.
745 ///
746 /// Returns `Err(NotFound)` if the symbol doesn't exist. May still return
747 /// `Ok` with `docstring: None` or `definition: None` if those specific
748 /// pieces aren't available.
749 pub fn hover(
750 &self,
751 symbol: &crate::Name,
752 ) -> Result<crate::HoverInfo, crate::SymbolLookupError> {
753 // Trigger lazy loading for class-rooted symbols before snapshotting.
754 // No-op when the class is already known; ensures inherited member
755 // lookups have the chain present.
756 match symbol {
757 crate::Name::Class(fqcn) => {
758 self.load_class(fqcn.as_ref());
759 }
760 crate::Name::Method { class, .. }
761 | crate::Name::Property { class, .. }
762 | crate::Name::ClassConstant { class, .. } => {
763 // 10 mirrors the default depth used by reanalyze_dependents.
764 self.load_class_transitive(class.as_ref(), 10);
765 }
766 _ => {}
767 }
768 self.hover_cached(symbol)
769 }
770
771 /// Pure variant of [`Self::hover`]. Never invokes the
772 /// [`crate::SourceProvider`]; consults only the already-loaded db.
773 pub fn hover_cached(
774 &self,
775 symbol: &crate::Name,
776 ) -> Result<crate::HoverInfo, crate::SymbolLookupError> {
777 use mir_types::{Atomic, Type};
778 let db = self.snapshot_db();
779 match symbol {
780 crate::Name::Function(fqn) => {
781 let here = crate::db::Fqcn::from_str(&db, fqn.as_ref());
782 let f = crate::db::find_function(&db, here)
783 .ok_or(crate::SymbolLookupError::NotFound)?;
784 let ty = f
785 .return_type
786 .as_deref()
787 .cloned()
788 .unwrap_or_else(Type::mixed);
789 let docstring = f.docstring.as_ref().map(|s| s.to_string());
790 Ok(crate::HoverInfo {
791 ty,
792 docstring,
793 definition: f.location.clone(),
794 })
795 }
796 crate::Name::Method { class, name } => {
797 let here = crate::db::Fqcn::from_str(&db, class.as_ref());
798 let (_, m) = crate::db::find_method_in_chain(&db, here, name)
799 .ok_or(crate::SymbolLookupError::NotFound)?;
800 let ty = m
801 .return_type
802 .as_deref()
803 .cloned()
804 .unwrap_or_else(Type::mixed);
805 let docstring = m.docstring.as_ref().map(|s| s.to_string());
806 Ok(crate::HoverInfo {
807 ty,
808 docstring,
809 definition: m.location.clone(),
810 })
811 }
812 crate::Name::Class(fqcn) => {
813 let here = crate::db::Fqcn::from_str(&db, fqcn.as_ref());
814 let class = crate::db::find_class_like(&db, here)
815 .ok_or(crate::SymbolLookupError::NotFound)?;
816 let ty = Type::single(Atomic::TNamedObject {
817 fqcn: mir_types::Name::from(fqcn.as_ref()),
818 type_params: mir_types::union::empty_type_params(),
819 });
820 Ok(crate::HoverInfo {
821 ty,
822 docstring: None,
823 definition: class.location().cloned(),
824 })
825 }
826 crate::Name::Property { class, name } => {
827 let here = crate::db::Fqcn::from_str(&db, class.as_ref());
828 let (_, p) = crate::db::find_property_in_chain(&db, here, name)
829 .ok_or(crate::SymbolLookupError::NotFound)?;
830 let ty = p.ty.clone().unwrap_or_else(Type::mixed);
831 Ok(crate::HoverInfo {
832 ty,
833 docstring: None,
834 definition: p.location.clone(),
835 })
836 }
837 crate::Name::ClassConstant { class, name } => {
838 let here = crate::db::Fqcn::from_str(&db, class.as_ref());
839 let (_, c) = crate::db::find_class_constant_in_chain(&db, here, name)
840 .ok_or(crate::SymbolLookupError::NotFound)?;
841 Ok(crate::HoverInfo {
842 ty: c.ty.clone(),
843 docstring: None,
844 definition: c.location.clone(),
845 })
846 }
847 crate::Name::GlobalConstant(fqn) => {
848 let here = crate::db::Fqcn::from_str(&db, fqn.as_ref());
849 let ty = crate::db::find_global_constant(&db, here)
850 .ok_or(crate::SymbolLookupError::NotFound)?;
851 Ok(crate::HoverInfo {
852 ty: (*ty).clone(),
853 docstring: None,
854 definition: None,
855 })
856 }
857 }
858 }
859
860 /// Raw reference locations indexed by string symbol key, kept for tests
861 /// that use the legacy stringly-typed API. Prefer [`Self::references_to`]
862 /// with a typed [`crate::Name`].
863 #[doc(hidden)]
864 pub fn reference_locations(&self, symbol: &str) -> Vec<(Arc<str>, u32, u16, u16)> {
865 use crate::db::MirDatabase;
866 let db = self.snapshot_db();
867 db.reference_locations(symbol)
868 }
869
870 /// Every recorded reference to `symbol` with its source location as a Range.
871 /// Use [`crate::FileAnalysis::symbol_at`] to find the symbol at a cursor,
872 /// build a [`crate::Name`] from it, and pass it here.
873 pub fn references_to(&self, symbol: &crate::Name) -> Vec<(Arc<str>, crate::Range)> {
874 let db = self.snapshot_db();
875 let key = symbol.codebase_key();
876 db.reference_locations(&key)
877 .into_iter()
878 .map(|(file, line, col_start, col_end)| {
879 let range = crate::Range {
880 start: crate::Position {
881 line,
882 column: col_start as u32,
883 },
884 end: crate::Position {
885 line,
886 column: col_end as u32,
887 },
888 };
889 (file, range)
890 })
891 .collect()
892 }
893
894 /// Class-level issues (inheritance violations, abstract-method gaps, override
895 /// incompatibilities) for the given set of files.
896 ///
897 /// These checks are cross-file by nature and are not emitted by
898 /// [`crate::FileAnalyzer::analyze`]. Call this after ingesting or
899 /// re-analyzing a file and its dependents to get the full diagnostic picture.
900 ///
901 /// Circular-inheritance checks always run against the full workspace graph
902 /// regardless of the `files` filter — a cycle is a workspace-wide problem.
903 pub fn class_issues(&self, files: &[Arc<str>]) -> Vec<crate::Issue> {
904 let db = self.snapshot_db();
905 let file_set: HashSet<Arc<str>> = files.iter().cloned().collect();
906 let file_data: Vec<(Arc<str>, Arc<str>)> = files
907 .iter()
908 .filter_map(|f| Some((f.clone(), self.source_of(f)?)))
909 .collect();
910 crate::class::ClassAnalyzer::with_files(&db, file_set, &file_data).analyze_all()
911 }
912
913 /// All declarations defined in `file` as a **hierarchical tree**.
914 ///
915 /// Classes/interfaces/traits/enums are returned with their methods,
916 /// properties, and constants nested in `children`. Top-level functions
917 /// and constants are returned with empty `children`.
918 pub fn document_symbols(&self, file: &str) -> Vec<crate::symbol::DocumentSymbol> {
919 use crate::symbol::{DeclarationKind, DocumentSymbol};
920
921 let db = self.snapshot_db();
922 let Some(sf) = db.lookup_source_file(file) else {
923 return Vec::new();
924 };
925 let defs = crate::db::collect_file_definitions(&db, sf);
926 let mut out: Vec<DocumentSymbol> = Vec::new();
927
928 let class_children =
929 |methods: &indexmap::IndexMap<Arc<str>, Arc<mir_codebase::storage::MethodDef>>,
930 props: Option<&indexmap::IndexMap<Arc<str>, mir_codebase::storage::PropertyDef>>,
931 consts: &indexmap::IndexMap<Arc<str>, mir_codebase::storage::ConstantDef>,
932 is_enum: bool|
933 -> Vec<DocumentSymbol> {
934 let mut out: Vec<DocumentSymbol> = Vec::new();
935 for (_, m) in methods.iter() {
936 out.push(DocumentSymbol {
937 name: m.name.clone(),
938 kind: DeclarationKind::Method,
939 location: m.location.clone(),
940 children: Vec::new(),
941 });
942 }
943 if let Some(props) = props {
944 for (_, p) in props.iter() {
945 out.push(DocumentSymbol {
946 name: p.name.clone(),
947 kind: DeclarationKind::Property,
948 location: p.location.clone(),
949 children: Vec::new(),
950 });
951 }
952 }
953 let const_kind = if is_enum {
954 DeclarationKind::EnumCase
955 } else {
956 DeclarationKind::Constant
957 };
958 for (_, c) in consts.iter() {
959 out.push(DocumentSymbol {
960 name: c.name.clone(),
961 kind: const_kind,
962 location: c.location.clone(),
963 children: Vec::new(),
964 });
965 }
966 out
967 };
968
969 for c in defs.slice.classes.iter() {
970 out.push(DocumentSymbol {
971 name: c.fqcn.clone(),
972 kind: DeclarationKind::Class,
973 location: c.location.clone(),
974 children: class_children(
975 &c.own_methods,
976 Some(&c.own_properties),
977 &c.own_constants,
978 false,
979 ),
980 });
981 }
982 for i in defs.slice.interfaces.iter() {
983 out.push(DocumentSymbol {
984 name: i.fqcn.clone(),
985 kind: DeclarationKind::Interface,
986 location: i.location.clone(),
987 children: class_children(&i.own_methods, None, &i.own_constants, false),
988 });
989 }
990 for t in defs.slice.traits.iter() {
991 out.push(DocumentSymbol {
992 name: t.fqcn.clone(),
993 kind: DeclarationKind::Trait,
994 location: t.location.clone(),
995 children: class_children(
996 &t.own_methods,
997 Some(&t.own_properties),
998 &t.own_constants,
999 false,
1000 ),
1001 });
1002 }
1003 for e in defs.slice.enums.iter() {
1004 let mut children = class_children(&e.own_methods, None, &e.own_constants, true);
1005 for (_, case) in e.cases.iter() {
1006 children.push(DocumentSymbol {
1007 name: case.name.clone(),
1008 kind: DeclarationKind::EnumCase,
1009 location: case.location.clone(),
1010 children: Vec::new(),
1011 });
1012 }
1013 out.push(DocumentSymbol {
1014 name: e.fqcn.clone(),
1015 kind: DeclarationKind::Enum,
1016 location: e.location.clone(),
1017 children,
1018 });
1019 }
1020 for f in defs.slice.functions.iter() {
1021 out.push(DocumentSymbol {
1022 name: f.fqn.clone(),
1023 kind: DeclarationKind::Function,
1024 location: f.location.clone(),
1025 children: Vec::new(),
1026 });
1027 }
1028 for (name, _) in defs.slice.constants.iter() {
1029 out.push(DocumentSymbol {
1030 name: name.clone(),
1031 kind: DeclarationKind::Constant,
1032 location: None,
1033 children: Vec::new(),
1034 });
1035 }
1036 out
1037 }
1038
1039 /// Returns `true` if a function with `fqn` is registered and active in
1040 /// the codebase. Case-insensitive lookup with optional leading backslash.
1041 pub fn contains_function(&self, fqn: &str) -> bool {
1042 let db = self.snapshot_db();
1043 crate::db::function_exists(&db, fqn)
1044 }
1045
1046 /// Returns `true` if a class / interface / trait / enum with `fqcn` is
1047 /// registered and active in the codebase.
1048 pub fn contains_class(&self, fqcn: &str) -> bool {
1049 let db = self.snapshot_db();
1050 crate::db::class_exists(&db, fqcn)
1051 }
1052
1053 /// Returns `true` if `class` has a method named `name` registered. Method
1054 /// names are matched case-insensitively (PHP method dispatch semantics).
1055 pub fn contains_method(&self, class: &str, name: &str) -> bool {
1056 let db = self.snapshot_db();
1057 crate::db::has_method_in_chain(&db, class, name)
1058 }
1059
1060 /// Resolve `fqcn` via the configured [`crate::ClassResolver`] and ingest
1061 /// the mapped file. The session keeps a negative cache so repeated calls
1062 /// for an unresolvable name don't re-hit the resolver; the cache is
1063 /// invalidated on any [`Self::ingest_file`] / [`Self::invalidate_file`].
1064 ///
1065 /// This is the LSP-friendly entry point: the analyzer never touches
1066 /// `vendor/` on its own, but consumers can ask it to resolve individual
1067 /// symbols on demand. Designed to be called when a diagnostic would
1068 /// otherwise report `UndefinedClass`.
1069 ///
1070 /// Returns a [`crate::LoadOutcome`] distinguishing
1071 /// already-loaded / freshly-loaded / not-resolvable. Use
1072 /// [`crate::LoadOutcome::is_loaded`] when only success matters.
1073 pub fn load_class(&self, fqcn: &str) -> crate::LoadOutcome {
1074 if self.contains_class(fqcn) {
1075 return crate::LoadOutcome::AlreadyLoaded;
1076 }
1077 if self.unresolvable_fqcns.read().contains_key(fqcn) {
1078 return crate::LoadOutcome::NotResolvable;
1079 }
1080 if self.try_resolve_and_ingest(fqcn) {
1081 crate::LoadOutcome::Loaded
1082 } else {
1083 // Cache the failure with the resolver-mapped path (if any) so
1084 // future file edits can selectively evict.
1085 let resolved_path: Option<Arc<str>> = self
1086 .resolver
1087 .as_ref()
1088 .and_then(|r| r.resolve(fqcn))
1089 .map(|p| Arc::from(p.to_string_lossy().as_ref()));
1090 let key: Arc<str> = Arc::from(fqcn);
1091 let mut cache = self.unresolvable_fqcns.write();
1092 if cache.len() >= UNRESOLVABLE_CACHE_CAP {
1093 cache.clear();
1094 }
1095 cache.insert(key, resolved_path);
1096 crate::LoadOutcome::NotResolvable
1097 }
1098 }
1099
1100 /// Inner load path: resolver lookup + ingest, no caching. Returns `true`
1101 /// iff `fqcn` ends up registered. Failure buckets are recorded for
1102 /// telemetry.
1103 fn try_resolve_and_ingest(&self, fqcn: &str) -> bool {
1104 use crate::metrics::{record_lazy_load_failure, LazyLoadFailure};
1105 let Some(resolver) = &self.resolver else {
1106 record_lazy_load_failure(LazyLoadFailure::NoResolver, fqcn);
1107 return false;
1108 };
1109 let Some(path) = resolver.resolve(fqcn) else {
1110 record_lazy_load_failure(LazyLoadFailure::ResolverNone, fqcn);
1111 return false;
1112 };
1113 let file: Arc<str> = Arc::from(path.to_string_lossy().as_ref());
1114 // Prefer in-memory text from a prior `set_file_text` /
1115 // `set_workspace_files` call; fall back to disk. This makes the LSP's
1116 // unsaved-edit buffer authoritative over the on-disk content for the
1117 // same path.
1118 let src: Arc<str> = match self.source_of(&file) {
1119 Some(text) => text,
1120 None => match self.source_provider.read(&path.to_string_lossy()) {
1121 Some(text) => text,
1122 None => {
1123 record_lazy_load_failure(LazyLoadFailure::SourceUnreadable, fqcn);
1124 return false;
1125 }
1126 },
1127 };
1128 self.ingest_file(file, src);
1129 if self.contains_class(fqcn) {
1130 true
1131 } else {
1132 record_lazy_load_failure(LazyLoadFailure::IngestThenMissing, fqcn);
1133 false
1134 }
1135 }
1136
1137 /// Lazy-load the full *declared-type closure* transitively reachable from
1138 /// `fqcn`: not just its parent / interface / trait inheritance chain, but
1139 /// also the classes named in its members' signatures (method return /
1140 /// parameter types, property types, constant types, generic args, mixins,
1141 /// `@throws`). This mirrors the batch path's
1142 /// [`crate::batch::collect_class_referenced_fqcns`] closure, so an open
1143 /// buffer gets the same complete diagnostics the CLI produces: a value
1144 /// whose type comes from a vendor method's return type, or a member
1145 /// inherited from a vendor parent, resolves instead of degrading to `mixed`.
1146 ///
1147 /// Walks at most `max_depth` levels (default in batch analysis is 10).
1148 /// Returns the number of classes successfully loaded (not counting classes
1149 /// that were already present).
1150 pub fn load_class_transitive(&self, fqcn: &str, max_depth: usize) -> usize {
1151 if self.resolver.is_none() {
1152 return 0;
1153 }
1154 let mut loaded = 0;
1155 let mut frontier: Vec<String> = vec![fqcn.to_string()];
1156 let mut visited: std::collections::HashSet<String> = std::collections::HashSet::default();
1157
1158 for _ in 0..max_depth {
1159 if frontier.is_empty() {
1160 break;
1161 }
1162 // Phase 1: load every class in this frontier level (mutates inputs).
1163 let mut to_expand: Vec<String> = Vec::with_capacity(frontier.len());
1164 for name in frontier.drain(..) {
1165 if !visited.insert(name.clone()) {
1166 continue;
1167 }
1168 let was_present = self.contains_class(&name);
1169 if !self.load_class(&name).is_loaded() {
1170 continue;
1171 }
1172 if !was_present {
1173 loaded += 1;
1174 }
1175 to_expand.push(name);
1176 }
1177
1178 // Phase 2: follow the declared-type closure of every resolved class
1179 // — including already-present ones, so a class loaded shallowly
1180 // elsewhere still gets its references expanded. One db snapshot for
1181 // the whole level (all loads above are committed); `visited` and
1182 // `contains_class` keep re-walks cheap and loop-free.
1183 let mut next: Vec<String> = Vec::new();
1184 if !to_expand.is_empty() {
1185 let db = self.snapshot_db();
1186 for name in &to_expand {
1187 let here = crate::db::Fqcn::from_str(&db, name.as_str());
1188 if let Some(class) = crate::db::find_class_like(&db, here) {
1189 crate::batch::collect_class_referenced_fqcns(&class, &mut next);
1190 }
1191 }
1192 }
1193 frontier = next;
1194 }
1195 loaded
1196 }
1197
1198 /// Evict every negative-cache entry whose stored resolver-mapped path
1199 /// equals `file`. FQCNs cached as never-resolvable (path `None`) are left
1200 /// alone — no source-text change can make them resolvable.
1201 fn evict_unresolvable_for_file(&self, file: &str) {
1202 let mut cache = self.unresolvable_fqcns.write();
1203 if cache.is_empty() {
1204 return;
1205 }
1206 cache.retain(|_fqcn, path| path.as_deref() != Some(file));
1207 }
1208
1209 /// Bulk variant of [`Self::evict_unresolvable_for_file`]. One `HashSet`
1210 /// build + one pass over the cache; no resolver calls.
1211 fn evict_unresolvable_for_files(&self, files: &[Arc<str>]) {
1212 let mut cache = self.unresolvable_fqcns.write();
1213 if cache.is_empty() {
1214 return;
1215 }
1216 let registered: HashSet<&str> = files.iter().map(|f| f.as_ref()).collect();
1217 cache.retain(|_fqcn, path| match path {
1218 Some(p) => !registered.contains(p.as_ref()),
1219 None => true,
1220 });
1221 }
1222
1223 /// Retrieve the source text the session has registered for `file`, if
1224 /// any. Returns `None` when the file has never been ingested. Used by
1225 /// the parallel re-analysis path to re-feed dependents to body analysis without
1226 /// the caller having to track sources independently.
1227 pub fn source_of(&self, file: &str) -> Option<Arc<str>> {
1228 let db = self.snapshot_db();
1229 let sf = db.lookup_source_file(file)?;
1230 Some(sf.text(&db))
1231 }
1232
1233 /// Re-analyze every transitive dependent of `file` in parallel.
1234 ///
1235 /// When the user saves a file that other files depend on (e.g. editing
1236 /// a base class, an interface, or a trait), those dependents may have
1237 /// new diagnostics. This method computes them in parallel using rayon
1238 /// and returns the per-file analysis results so the LSP server can
1239 /// publish updated diagnostics in one batch.
1240 ///
1241 /// Source text for dependents is retrieved from the session's salsa
1242 /// inputs (set by previous `ingest_file` calls) — the caller doesn't
1243 /// need to track or re-read files. Files for which the session has no
1244 /// source are silently skipped (returns the analyzable subset).
1245 ///
1246 /// Cross-file inferred return types are resolved on demand via salsa.
1247 pub fn reanalyze_dependents(&self, file: &str) -> Vec<(Arc<str>, crate::FileAnalysis)> {
1248 use rayon::prelude::*;
1249
1250 // Phase 1: compute dependents + gather their sources outside the
1251 // analysis loop so each worker has everything it needs.
1252 let dependents = self.dependency_graph().transitive_dependents(file);
1253 if dependents.is_empty() {
1254 return Vec::new();
1255 }
1256 let with_source: Vec<(Arc<str>, Arc<str>)> = dependents
1257 .into_iter()
1258 .filter_map(|path| {
1259 let arc_path: Arc<str> = Arc::from(path.as_str());
1260 let src = self.source_of(&path)?;
1261 Some((arc_path, src))
1262 })
1263 .collect();
1264 if with_source.is_empty() {
1265 return Vec::new();
1266 }
1267
1268 // Phase 2: parallel parse + analyze. Each rayon worker gets its own
1269 // database snapshot via FileAnalyzer; writes are isolated to the
1270 // session's canonical db (none happen here since we only run body analysis).
1271 with_source
1272 .into_par_iter()
1273 .map(|(file, source)| {
1274 let parsed = php_rs_parser::parse(source.as_ref());
1275 let analyzer = crate::FileAnalyzer::new(self);
1276 let analysis = analyzer.analyze(
1277 file.clone(),
1278 source.as_ref(),
1279 &parsed.program,
1280 &parsed.source_map,
1281 );
1282 (file, analysis)
1283 })
1284 .collect()
1285 }
1286
1287 /// FQCNs that `file` imports via `use` statements but that aren't yet
1288 /// loaded in the session.
1289 ///
1290 /// Designed as the input to background prefetching: after the LSP server
1291 /// ingests an open buffer, it can call this and lazy-load the returned
1292 /// FQCNs on a worker thread so the user's first Cmd+Click into vendor
1293 /// code doesn't pay the file-read+parse cost.
1294 ///
1295 /// Returns an empty Vec if the file hasn't been ingested or has no
1296 /// unresolved imports.
1297 pub fn pending_lazy_loads(&self, file: &str) -> Vec<Arc<str>> {
1298 let db = self.snapshot_db();
1299 let imports = db.file_imports(file);
1300 if imports.is_empty() {
1301 return Vec::new();
1302 }
1303 let mut out = Vec::new();
1304 for fqcn in imports.values() {
1305 let here = crate::db::Fqcn::new(&db, *fqcn);
1306 if crate::db::find_class_like(&db, here).is_some() {
1307 continue;
1308 }
1309 if let Some(resolver) = &self.resolver {
1310 if resolver.resolve(fqcn.as_str()).is_some() {
1311 out.push(Arc::from(fqcn.as_str()));
1312 }
1313 }
1314 }
1315 out
1316 }
1317
1318 /// Convenience: synchronously lazy-load every import of `file` that
1319 /// isn't already in the codebase. Returns the number successfully loaded.
1320 ///
1321 /// For non-blocking prefetch, call this from a worker thread:
1322 ///
1323 /// ```ignore
1324 /// let s = session.clone(); // AnalysisSession is wrapped in Arc by callers
1325 /// std::thread::spawn(move || {
1326 /// s.prefetch_imports(&file_path);
1327 /// });
1328 /// ```
1329 ///
1330 /// Internally walks the inheritance chain of each loaded class to a
1331 /// shallow depth so member access on imported types type-checks without
1332 /// the user paying the cost on their first navigation.
1333 pub fn prefetch_imports(&self, file: &str) -> usize {
1334 let pending = self.pending_lazy_loads(file);
1335 let mut loaded = 0;
1336 for fqcn in pending {
1337 // Use the transitive walker with a small depth so we pick up
1338 // parent classes / interfaces needed for member resolution, but
1339 // don't recursively pull in the entire vendor tree.
1340 loaded += self.load_class_transitive(&fqcn, 2);
1341 }
1342 loaded
1343 }
1344
1345 /// All class / interface / trait / enum FQCNs currently known to the
1346 /// session, each paired with the file that defines them when available.
1347 ///
1348 /// Use this to build workspace-wide views (outline, fuzzy search, etc.).
1349 /// Consumers implement their own search/match logic on top — the analyzer
1350 /// only exposes the iterator.
1351 pub fn all_classes(&self) -> Vec<(Arc<str>, Option<mir_types::Location>)> {
1352 let db = self.snapshot_db();
1353 crate::db::workspace_classes(&db)
1354 .iter()
1355 .filter_map(|fqcn| {
1356 let here = crate::db::Fqcn::from_str(&db, fqcn.as_ref());
1357 crate::db::find_class_like(&db, here)
1358 .map(|class| (fqcn.clone(), class.location().cloned()))
1359 })
1360 .collect()
1361 }
1362
1363 /// All global function FQNs currently known to the session, each paired
1364 /// with their declaration location when available.
1365 pub fn all_functions(&self) -> Vec<(Arc<str>, Option<mir_types::Location>)> {
1366 let db = self.snapshot_db();
1367 crate::db::workspace_functions(&db)
1368 .iter()
1369 .filter_map(|fqn| {
1370 let here = crate::db::Fqcn::from_str(&db, fqn.as_ref());
1371 crate::db::find_function(&db, here).map(|f| (fqn.clone(), f.location.clone()))
1372 })
1373 .collect()
1374 }
1375
1376 /// Compute `file`'s outgoing dependency edges and update both the in-memory
1377 /// reverse-dep map (always) and the disk cache's reverse-dep graph (if configured).
1378 fn update_reverse_deps_for(&self, file: &str) {
1379 let db = self.snapshot_db();
1380 let targets = file_outgoing_dependencies(&db, file);
1381
1382 // Always update the in-memory map.
1383 self.update_in_memory_reverse_deps(file, &targets);
1384
1385 // Also persist to disk cache if configured.
1386 if let Some(cache) = self.cache.as_deref() {
1387 cache.update_reverse_deps_for_file(file, &targets);
1388 }
1389 }
1390
1391 /// Update the in-memory reverse dependency map for `file` with `new_targets`.
1392 /// Removes `file` from all existing entries, then adds it as a dependent of
1393 /// each target in `new_targets` (excluding self-edges).
1394 fn update_in_memory_reverse_deps(&self, file: &str, new_targets: &HashSet<String>) {
1395 let file_id = self.file_id_map.write().assign_or_get(file);
1396 let target_ids: Vec<FileId> = {
1397 let mut id_map = self.file_id_map.write();
1398 new_targets
1399 .iter()
1400 .map(|t| id_map.assign_or_get(t))
1401 .collect()
1402 };
1403
1404 let mut map = self.reverse_dep_map.write();
1405 for dependents in map.values_mut() {
1406 dependents.remove(&file_id);
1407 }
1408 map.retain(|_, dependents| !dependents.is_empty());
1409 for target_id in target_ids {
1410 if target_id != file_id {
1411 map.entry(target_id).or_default().insert(file_id);
1412 }
1413 }
1414 }
1415
1416 /// BFS transitive dependents of `file` using the in-memory reverse dep map.
1417 ///
1418 /// O(D) where D is the number of transitive dependents — faster than
1419 /// [`Self::dependency_graph().transitive_dependents()`] which rebuilds the
1420 /// full graph on every call. Only covers structural dependencies from definition collection
1421 /// (imports, class hierarchy, type hints); does not include bare FQN body
1422 /// references recorded during body analysis. For full fidelity, use
1423 /// `dependency_graph().transitive_dependents()` after body analysis is complete.
1424 pub fn structural_dependents(&self, file: &str) -> Vec<String> {
1425 let Some(start_id) = self.file_id_map.read().get(file) else {
1426 return Vec::new();
1427 };
1428 let map = self.reverse_dep_map.read();
1429 let mut visited: HashSet<FileId> = HashSet::default();
1430 let mut queue = vec![start_id];
1431 let mut result_ids = Vec::new();
1432 while let Some(current_id) = queue.pop() {
1433 if !visited.insert(current_id) {
1434 continue;
1435 }
1436 if let Some(deps) = map.get(¤t_id) {
1437 for &dep_id in deps {
1438 if !visited.contains(&dep_id) {
1439 queue.push(dep_id);
1440 result_ids.push(dep_id);
1441 }
1442 }
1443 }
1444 }
1445 drop(map);
1446 let id_map = self.file_id_map.read();
1447 result_ids
1448 .iter()
1449 .filter_map(|&id| id_map.path(id))
1450 .map(|s| s.to_string())
1451 .collect()
1452 }
1453
1454 /// File dependency graph: which files depend on which other files.
1455 /// Used for incremental invalidation in LSP servers and build systems.
1456 ///
1457 /// File dependency graph: which files depend on which other files.
1458 /// Used for incremental invalidation in LSP servers and build systems.
1459 ///
1460 /// O(edges) — iterates the `file_references` forward index (file → symbol
1461 /// keys it references) which is always current, then resolves each symbol
1462 /// to its defining file via O(1) lookup. Total cost is O(E) where E is the
1463 /// number of (file, symbol) reference edges, vs. the old O(F × S × R) scan.
1464 pub fn dependency_graph(&self) -> crate::DependencyGraph {
1465 let db = self.snapshot_db();
1466
1467 let all_files: Vec<String> = db
1468 .source_file_paths()
1469 .iter()
1470 .map(|f| f.as_ref().to_string())
1471 .collect();
1472
1473 let mut dependencies: HashMap<String, Vec<String>> = HashMap::default();
1474 let mut dependents: HashMap<String, Vec<String>> = HashMap::default();
1475
1476 for file in &all_files {
1477 // O(degree(file)) — forward index lookup, no full-table scan.
1478 let symbol_keys = db.file_referenced_symbols(file);
1479 let mut file_deps: HashSet<String> = HashSet::default();
1480 for symbol_key in &symbol_keys {
1481 let lookup: &str = match symbol_key.split_once("::") {
1482 Some((class, _)) => class,
1483 None => symbol_key.as_ref(),
1484 };
1485 if let Some(def_file) = db.symbol_defining_file(lookup) {
1486 let def = def_file.as_ref().to_string();
1487 if &def != file {
1488 file_deps.insert(def);
1489 }
1490 }
1491 }
1492 for dep in &file_deps {
1493 dependents
1494 .entry(dep.clone())
1495 .or_default()
1496 .push(file.clone());
1497 dependencies
1498 .entry(file.clone())
1499 .or_default()
1500 .push(dep.clone());
1501 }
1502 }
1503
1504 // Merge structural deps from definition collection from the incremental reverse_dep_map.
1505 // dependency_graph() above only captures bare-FQN references recorded during body analysis;
1506 // the reverse_dep_map covers imports, class hierarchy (extends/implements/use),
1507 // and type-hint-only references that never appear in file_referenced_symbols.
1508 // Together they give a complete picture without requiring body analysis on every file.
1509 {
1510 let id_map = self.file_id_map.read();
1511 let rev = self.reverse_dep_map.read();
1512 for (&target_id, dep_set) in rev.iter() {
1513 let Some(target) = id_map.path(target_id) else {
1514 continue;
1515 };
1516 let target = target.to_string();
1517 for &dep_id in dep_set {
1518 let Some(dep) = id_map.path(dep_id) else {
1519 continue;
1520 };
1521 let dep = dep.to_string();
1522 if dep != target {
1523 dependents
1524 .entry(target.clone())
1525 .or_default()
1526 .push(dep.clone());
1527 dependencies
1528 .entry(dep.clone())
1529 .or_default()
1530 .push(target.clone());
1531 }
1532 }
1533 }
1534 }
1535
1536 for deps in dependents.values_mut() {
1537 deps.sort();
1538 deps.dedup();
1539 }
1540 for deps in dependencies.values_mut() {
1541 deps.sort();
1542 deps.dedup();
1543 }
1544
1545 // Augment with stale dependents: files referencing symbols that were
1546 // deleted from their defining file. These edges disappear from the
1547 // symbol_defining_file lookup but the referencing file still needs
1548 // re-analysis to surface the now-broken reference.
1549 {
1550 let stale = self.stale_defined_symbols.read();
1551 if !stale.is_empty() {
1552 for (file, deleted_syms) in stale.iter() {
1553 for sym in deleted_syms {
1554 let lookup: &str = match sym.split_once("::") {
1555 Some((class, _)) => class,
1556 None => sym.as_ref(),
1557 };
1558 for referencing_file in db.symbol_referencers_of(lookup) {
1559 let ref_file = referencing_file.as_ref().to_string();
1560 if &ref_file != file {
1561 dependents
1562 .entry(file.clone())
1563 .or_default()
1564 .push(ref_file.clone());
1565 dependencies.entry(ref_file).or_default().push(file.clone());
1566 }
1567 }
1568 }
1569 }
1570 // Re-sort and dedup since we may have added entries.
1571 for deps in dependents.values_mut() {
1572 deps.sort();
1573 deps.dedup();
1574 }
1575 for deps in dependencies.values_mut() {
1576 deps.sort();
1577 deps.dedup();
1578 }
1579 }
1580 }
1581
1582 crate::DependencyGraph {
1583 dependencies,
1584 dependents,
1585 }
1586 }
1587}
1588
1589/// Compute the set of files `file` depends on: defining files of its imports,
1590/// plus parent / interfaces / traits' defining files for any classes declared
1591/// in `file`. Self-edges are excluded.
1592fn file_outgoing_dependencies(db: &dyn MirDatabase, file: &str) -> HashSet<String> {
1593 let mut targets: HashSet<String> = HashSet::default();
1594
1595 let mut add_target = |symbol: &str| {
1596 if let Some(defining_file) = db.symbol_defining_file(symbol) {
1597 let def = defining_file.as_ref().to_string();
1598 if def != file {
1599 targets.insert(def);
1600 }
1601 }
1602 };
1603
1604 let extract_named_objects = |union: &mir_types::Type| {
1605 union
1606 .types
1607 .iter()
1608 .filter_map(|atomic| match atomic {
1609 mir_types::atomic::Atomic::TNamedObject { fqcn, .. } => Some(*fqcn),
1610 _ => None,
1611 })
1612 .collect::<Vec<_>>()
1613 };
1614
1615 let imports = db.file_imports(file);
1616 for fqcn in imports.values() {
1617 add_target(fqcn.as_str());
1618 }
1619
1620 // Walk every class/interface/trait/enum/function defined in this file
1621 // via the pull-path slice. Push-path lookup_*_node have been retired.
1622 if let Some(sf) = db.lookup_source_file(file) {
1623 let defs = crate::db::collect_file_definitions(db, sf);
1624 for c in defs.slice.classes.iter() {
1625 if let Some(p) = &c.parent {
1626 add_target(p);
1627 }
1628 for iface in c.interfaces.iter() {
1629 add_target(iface);
1630 }
1631 for tr in c.traits.iter() {
1632 add_target(tr);
1633 }
1634 for prop in c.own_properties.values() {
1635 if let Some(ty) = &prop.ty {
1636 for named in extract_named_objects(ty) {
1637 add_target(named.as_ref());
1638 }
1639 }
1640 }
1641 for method in c.own_methods.values() {
1642 for param in method.params.iter() {
1643 if let Some(ty) = ¶m.ty {
1644 for named in extract_named_objects(ty.as_ref()) {
1645 add_target(named.as_ref());
1646 }
1647 }
1648 }
1649 if let Some(rt) = method.return_type.as_deref() {
1650 for named in extract_named_objects(rt) {
1651 add_target(named.as_ref());
1652 }
1653 }
1654 }
1655 }
1656 for i in defs.slice.interfaces.iter() {
1657 for ext in i.extends.iter() {
1658 add_target(ext);
1659 }
1660 for method in i.own_methods.values() {
1661 for param in method.params.iter() {
1662 if let Some(ty) = ¶m.ty {
1663 for named in extract_named_objects(ty.as_ref()) {
1664 add_target(named.as_ref());
1665 }
1666 }
1667 }
1668 if let Some(rt) = method.return_type.as_deref() {
1669 for named in extract_named_objects(rt) {
1670 add_target(named.as_ref());
1671 }
1672 }
1673 }
1674 }
1675 for t in defs.slice.traits.iter() {
1676 for tr in t.traits.iter() {
1677 add_target(tr);
1678 }
1679 }
1680 for f in defs.slice.functions.iter() {
1681 for param in f.params.iter() {
1682 if let Some(ty) = ¶m.ty {
1683 for named in extract_named_objects(ty.as_ref()) {
1684 add_target(named.as_ref());
1685 }
1686 }
1687 }
1688 if let Some(rt) = f.return_type.as_deref() {
1689 for named in extract_named_objects(rt) {
1690 add_target(named.as_ref());
1691 }
1692 }
1693 }
1694 }
1695
1696 // Also track bare-FQN references recorded during body analysis (new \Foo(), \Foo::method(),
1697 // \foo()) that do not appear in use-import statements.
1698 for symbol_key in db.file_referenced_symbols(file) {
1699 let lookup: &str = match symbol_key.split_once("::") {
1700 Some((class, _)) => class,
1701 None => &symbol_key,
1702 };
1703 add_target(lookup);
1704 }
1705
1706 targets
1707}
1708
1709/// AST visitor that collects class FQCN references for PSR-4 preloading.
1710/// Captures identifiers from `new X`, static calls / property / constant
1711/// access, type hints, and `instanceof`. Does *not* normalize via PSR-4 /
1712/// imports — callers run the raw string through `resolve_name`.
1713fn collect_class_refs_from_ast(program: &php_ast::owned::Program) -> Vec<String> {
1714 use php_ast::ast::BinaryOp;
1715 use php_ast::owned::visitor::{
1716 walk_owned_catch_clause, walk_owned_expr, walk_owned_program, walk_owned_type_hint,
1717 OwnedVisitor,
1718 };
1719 use php_ast::owned::{ExprKind, TypeHintKind};
1720 use std::ops::ControlFlow;
1721
1722 fn owned_name_str(name: &php_ast::owned::Name) -> String {
1723 let joined: String = name
1724 .parts
1725 .iter()
1726 .map(|p| p.as_ref())
1727 .collect::<Vec<&str>>()
1728 .join("\\");
1729 if name.kind == php_ast::ast::NameKind::FullyQualified {
1730 format!("\\{joined}")
1731 } else {
1732 joined
1733 }
1734 }
1735
1736 struct V {
1737 names: std::collections::HashSet<String>,
1738 }
1739 impl OwnedVisitor for V {
1740 fn visit_expr(&mut self, expr: &php_ast::owned::Expr) -> ControlFlow<()> {
1741 match &expr.kind {
1742 ExprKind::New(n) => {
1743 if let ExprKind::Identifier(name) = &n.class.kind {
1744 self.names.insert(name.as_ref().to_string());
1745 }
1746 }
1747 ExprKind::StaticMethodCall(c) => {
1748 if let ExprKind::Identifier(name) = &c.class.kind {
1749 self.names.insert(name.as_ref().to_string());
1750 }
1751 }
1752 ExprKind::StaticPropertyAccess(a) => {
1753 if let ExprKind::Identifier(name) = &a.class.kind {
1754 self.names.insert(name.as_ref().to_string());
1755 }
1756 }
1757 ExprKind::ClassConstAccess(a) => {
1758 if let ExprKind::Identifier(name) = &a.class.kind {
1759 self.names.insert(name.as_ref().to_string());
1760 }
1761 }
1762 ExprKind::Binary(b) if b.op == BinaryOp::Instanceof => {
1763 if let ExprKind::Identifier(name) = &b.right.kind {
1764 self.names.insert(name.as_ref().to_string());
1765 }
1766 }
1767 _ => {}
1768 }
1769 walk_owned_expr(self, expr)
1770 }
1771
1772 fn visit_type_hint(&mut self, hint: &php_ast::owned::TypeHint) -> ControlFlow<()> {
1773 if let TypeHintKind::Named(name) = &hint.kind {
1774 let s = owned_name_str(name);
1775 if !s.is_empty() {
1776 self.names.insert(s);
1777 }
1778 }
1779 walk_owned_type_hint(self, hint)
1780 }
1781
1782 fn visit_catch_clause(&mut self, catch: &php_ast::owned::CatchClause) -> ControlFlow<()> {
1783 for ty in catch.types.iter() {
1784 self.names.insert(owned_name_str(ty));
1785 }
1786 walk_owned_catch_clause(self, catch)
1787 }
1788 }
1789 let mut v = V {
1790 names: std::collections::HashSet::default(),
1791 };
1792 let _ = walk_owned_program(&mut v, program);
1793 v.names.into_iter().collect()
1794}