Skip to main content

mir_analyzer/
session.rs

1//! Session-based analysis API for incremental, per-file analysis.
2//!
3//! [`AnalysisSession`] owns the salsa database and per-session caches for a
4//! long-running analysis context shared across many per-file analyses. Reads
5//! clone the database under a brief lock, then run lock-free; writes hold the
6//! lock briefly to mutate canonical state. `MirDb::clone()` is cheap
7//! (Arc-wrapped registries), so this pattern gives parallel readers without
8//! blocking on concurrent writes for longer than the clone itself.
9//!
10//! See [`crate::file_analyzer::FileAnalyzer`] for the per-file Pass 2 entry
11//! point that operates against a session.
12
13use std::collections::{HashMap, HashSet};
14use std::path::PathBuf;
15use std::sync::Arc;
16
17use parking_lot::RwLock;
18
19use crate::cache::AnalysisCache;
20use crate::composer::Psr4Map;
21use crate::db::{MirDatabase, MirDb};
22use crate::php_version::PhpVersion;
23use crate::shared_db::SharedDb;
24
25/// Long-lived analysis context. Owns the salsa database and tracks which
26/// stubs have been loaded.
27///
28/// Cheap to clone the inner db for parallel reads; writes funnel through
29/// [`Self::ingest_file`], [`Self::invalidate_file`], and the crate-internal
30/// [`Self::with_db_mut`].
31pub struct AnalysisSession {
32    /// Shared database management (salsa, file registry, stub tracking).
33    /// Extracted to allow code sharing with ProjectAnalyzer.
34    shared_db: Arc<SharedDb>,
35    cache: Option<Arc<AnalysisCache>>,
36    /// PSR-4 / Composer autoload map. Retained alongside `resolver` so the
37    /// `psr4()` accessor can still return a typed `Psr4Map` for callers that
38    /// need Composer-specific data (project_files / vendor_files / etc.).
39    psr4: Option<Arc<Psr4Map>>,
40    /// Generic class resolver used for on-demand lazy loading. When `psr4`
41    /// is set via [`Self::with_psr4`], this is populated with the same map
42    /// re-typed as `dyn ClassResolver`. Consumers can also supply their own
43    /// resolver via [`Self::with_class_resolver`] without going through
44    /// Composer.
45    resolver: Option<Arc<dyn crate::ClassResolver>>,
46    php_version: PhpVersion,
47    user_stub_files: Vec<PathBuf>,
48    user_stub_dirs: Vec<PathBuf>,
49    /// In-memory reverse dependency map: target_file → set of files that
50    /// depend on it. Always maintained (not gated on disk cache presence),
51    /// enabling `analyze_dependents_of` and `dependency_graph()` without a
52    /// disk cache. Updated in `ingest_file` and `invalidate_file`.
53    reverse_dep_map: Arc<RwLock<HashMap<String, HashSet<String>>>>,
54}
55
56impl AnalysisSession {
57    /// Create a session targeting the given PHP language version.
58    pub fn new(php_version: PhpVersion) -> Self {
59        Self {
60            shared_db: Arc::new(SharedDb::new()),
61            cache: None,
62            psr4: None,
63            resolver: None,
64            php_version,
65            user_stub_files: Vec::new(),
66            user_stub_dirs: Vec::new(),
67            reverse_dep_map: Arc::new(RwLock::new(HashMap::new())),
68        }
69    }
70
71    pub fn with_cache(mut self, cache: Arc<AnalysisCache>) -> Self {
72        self.cache = Some(cache);
73        self
74    }
75
76    /// Convenience: open a disk-backed cache at `cache_dir` and attach it.
77    /// Avoids forcing callers to wrap [`AnalysisCache`] in `Arc` themselves.
78    pub fn with_cache_dir(self, cache_dir: &std::path::Path) -> Self {
79        self.with_cache(Arc::new(AnalysisCache::open(cache_dir)))
80    }
81
82    /// Attach a Composer autoload map (PSR-4, PSR-0, classmap, files).
83    /// Sets the same map as the active [`crate::ClassResolver`] so
84    /// [`Self::lazy_load_class`] works out of the box.
85    pub fn with_psr4(mut self, map: Arc<Psr4Map>) -> Self {
86        let resolver: Arc<dyn crate::ClassResolver> = map.clone();
87        self.psr4 = Some(map);
88        self.resolver = Some(resolver);
89        self
90    }
91
92    /// Attach a generic class resolver for projects that don't use Composer
93    /// (WordPress, Drupal, custom autoloaders, workspace-walk indexes).
94    /// Replaces any previously-set Composer-backed resolver.
95    pub fn with_class_resolver(mut self, resolver: Arc<dyn crate::ClassResolver>) -> Self {
96        self.resolver = Some(resolver);
97        self
98    }
99
100    pub fn with_user_stubs(mut self, files: Vec<PathBuf>, dirs: Vec<PathBuf>) -> Self {
101        self.user_stub_files = files;
102        self.user_stub_dirs = dirs;
103        self
104    }
105
106    pub fn php_version(&self) -> PhpVersion {
107        self.php_version
108    }
109
110    pub fn cache(&self) -> Option<&AnalysisCache> {
111        self.cache.as_deref()
112    }
113
114    pub fn psr4(&self) -> Option<&Psr4Map> {
115        self.psr4.as_deref()
116    }
117
118    /// Load every PHP built-in stub plus any configured user stubs.
119    ///
120    /// **Deprecated**: prefer [`Self::ensure_all_stubs_loaded`] (explicit
121    /// "comprehensive") or [`Self::ensure_essential_stubs_loaded`] (fast
122    /// cold-start with auto-discovery on demand).
123    #[doc(hidden)]
124    pub fn ensure_stubs_loaded(&self) {
125        self.ensure_all_stubs_loaded();
126    }
127
128    /// Load only the curated set of essential stubs (Core, standard, SPL,
129    /// date) plus any configured user stubs. About 25 of 120 stub files;
130    /// covers types and functions used by virtually all PHP code.
131    ///
132    /// Other extension stubs (Reflection, gd, openssl, …) can be brought in
133    /// on demand via [`Self::ensure_stubs_for_symbol`] when user code
134    /// references them. Idempotent — already-loaded stubs are skipped.
135    pub fn ensure_essential_stubs_loaded(&self) {
136        self.shared_db
137            .ingest_stub_paths(crate::stubs::ESSENTIAL_STUB_PATHS, self.php_version);
138        self.ensure_user_stubs_loaded();
139    }
140
141    /// Load every embedded PHP stub plus any configured user stubs.
142    /// Use for batch tools (CLI, full project analysis) where comprehensive
143    /// symbol coverage matters more than cold-start latency.
144    pub fn ensure_all_stubs_loaded(&self) {
145        let paths: Vec<&'static str> = crate::stubs::stub_files().iter().map(|&(p, _)| p).collect();
146        self.shared_db.ingest_stub_paths(&paths, self.php_version);
147        self.ensure_user_stubs_loaded();
148    }
149
150    /// Ensure the embedded stub that defines `name` (a function) is ingested.
151    /// Returns `true` when a matching stub exists (whether or not it was
152    /// already loaded), `false` when `name` isn't a known PHP built-in.
153    ///
154    /// Most callers should use [`Self::ensure_stubs_for_ast`] instead —
155    /// it auto-discovers needed stubs from a parsed file.
156    #[doc(hidden)]
157    pub fn ensure_stub_for_function(&self, name: &str) -> bool {
158        match crate::stubs::stub_path_for_function(name) {
159            Some(path) => {
160                self.shared_db.ingest_stub_paths(&[path], self.php_version);
161                true
162            }
163            None => false,
164        }
165    }
166
167    /// Ensure the embedded stub that defines `fqcn` (a class / interface /
168    /// trait / enum) is ingested. Case-insensitive lookup with optional
169    /// leading backslash.
170    ///
171    /// Most callers should use [`Self::ensure_stubs_for_ast`] instead.
172    #[doc(hidden)]
173    pub fn ensure_stub_for_class(&self, fqcn: &str) -> bool {
174        match crate::stubs::stub_path_for_class(fqcn) {
175            Some(path) => {
176                self.shared_db.ingest_stub_paths(&[path], self.php_version);
177                true
178            }
179            None => false,
180        }
181    }
182
183    /// Ensure the embedded stub that defines `name` (a constant) is ingested.
184    ///
185    /// Most callers should use [`Self::ensure_stubs_for_ast`] instead.
186    #[doc(hidden)]
187    pub fn ensure_stub_for_constant(&self, name: &str) -> bool {
188        match crate::stubs::stub_path_for_constant(name) {
189            Some(path) => {
190                self.shared_db.ingest_stub_paths(&[path], self.php_version);
191                true
192            }
193            None => false,
194        }
195    }
196
197    /// Number of distinct embedded stubs currently ingested into the session.
198    /// Useful for diagnostics and bench reporting.
199    pub fn loaded_stub_count(&self) -> usize {
200        self.shared_db.loaded_stubs.lock().len()
201    }
202
203    /// Auto-discover and ingest the embedded stubs needed to cover every
204    /// built-in PHP function / class / constant referenced by `source`.
205    ///
206    /// Used by [`crate::FileAnalyzer::analyze`] to keep essentials-only mode
207    /// correct without forcing callers to enumerate which stubs they need.
208    /// Idempotent — already-loaded stubs are skipped via [`Self::loaded_stubs`].
209    ///
210    /// The discovery scan is a coarse identifier sweep (see
211    /// [`crate::stubs::collect_referenced_builtin_paths`]) — it may pull in
212    /// a slightly larger set than the file strictly needs, but never misses
213    /// a referenced built-in. Cost is sub-millisecond per file.
214    ///
215    /// Fast path: if every embedded stub is already loaded (e.g. after a
216    /// batch tool called [`Self::ensure_all_stubs_loaded`]), the source scan
217    /// is skipped entirely.
218    pub fn ensure_stubs_for_source(&self, source: &str) {
219        // Cheap check first: skip the scan entirely when we already know we
220        // have everything. Avoids a ~50-500µs source walk on every analyze
221        // call in batch / warm-session scenarios.
222        {
223            let loaded = self.shared_db.loaded_stubs.lock();
224            if loaded.len() >= crate::stubs::stub_files().len() {
225                return;
226            }
227        }
228        let paths = crate::stubs::collect_referenced_builtin_paths(source);
229        if paths.is_empty() {
230            return;
231        }
232        self.shared_db.ingest_stub_paths(&paths, self.php_version);
233    }
234
235    /// Discover and ingest stubs by walking the parsed AST of a PHP file.
236    ///
237    /// Similar to [`Self::ensure_stubs_for_source`], but takes an already-parsed
238    /// AST instead of raw source text. Produces zero false positives since it
239    /// only extracts identifiers from actual AST nodes (not from strings or
240    /// comments). Preferred over `ensure_stubs_for_source` when the AST is
241    /// already available (e.g., in [`crate::FileAnalyzer`]).
242    ///
243    /// Idempotent and skips the scan if all stubs are already loaded.
244    pub fn ensure_stubs_for_ast(&self, program: &php_ast::ast::Program<'_, '_>) {
245        {
246            let loaded = self.shared_db.loaded_stubs.lock();
247            if loaded.len() >= crate::stubs::stub_files().len() {
248                return;
249            }
250        }
251        let paths = crate::stubs::collect_referenced_builtin_paths_from_ast(program);
252        if paths.is_empty() {
253            return;
254        }
255        self.shared_db.ingest_stub_paths(&paths, self.php_version);
256    }
257
258    fn ensure_user_stubs_loaded(&self) {
259        self.shared_db
260            .ingest_user_stubs(&self.user_stub_files, &self.user_stub_dirs);
261    }
262
263    /// Cheap clone of the salsa db for a read-only query. The lock is held
264    /// only for the duration of the clone, so concurrent readers never
265    /// serialize on each other or on writes for longer than the clone itself.
266    ///
267    /// **Internal API — exposes Salsa types.** Subject to change without
268    /// notice. Public consumers should use the typed query methods
269    /// ([`Self::definition_of`], [`Self::hover`], etc.) instead.
270    #[doc(hidden)]
271    pub fn snapshot_db(&self) -> MirDb {
272        self.shared_db.snapshot_db()
273    }
274
275    /// Run a closure with read access to a database snapshot.
276    ///
277    /// **Internal API — exposes Salsa types.** Subject to change without
278    /// notice.
279    #[doc(hidden)]
280    pub fn read<R>(&self, f: impl FnOnce(&dyn MirDatabase) -> R) -> R {
281        let db = self.snapshot_db();
282        f(&db)
283    }
284
285    /// Pass 1 ingestion. Updates the file's source text in the salsa db,
286    /// runs definition collection, and ingests the resulting stub slice.
287    /// Triggers stub loading on first call. Also updates the cache's reverse-
288    /// dependency graph for `file` so cross-file invalidation stays correct
289    /// across incremental edits — without rebuilding the graph from scratch.
290    ///
291    /// If `file` was previously ingested, its old definitions and reference
292    /// locations are removed first so renames / deletions don't leave stale
293    /// state in the codebase. (Without this, long-running sessions would
294    /// accumulate dead reference-location entries indefinitely.)
295    pub fn ingest_file(&self, file: Arc<str>, source: Arc<str>) {
296        self.ensure_stubs_loaded();
297        {
298            let mut guard = self.shared_db.salsa.write();
299            guard.remove_file_definitions(file.as_ref());
300        }
301        let _file_defs = self
302            .shared_db
303            .collect_and_ingest_file(file.clone(), source.as_ref());
304        self.update_reverse_deps_for(&file);
305    }
306
307    /// Drop a file's contribution to the session: codebase definitions,
308    /// reference locations, salsa input handle, cache entry, and outgoing
309    /// reverse-dependency edges. Cache entries of *dependent* files are
310    /// also evicted (cross-file invalidation).
311    ///
312    /// Use this when a file is closed by the consumer, or before a re-ingest
313    /// of substantially changed content. (Plain re-ingest via
314    /// [`Self::ingest_file`] also drops old definitions, but does not
315    /// remove the salsa input handle — call this for full cleanup.)
316    pub fn invalidate_file(&self, file: &str) {
317        {
318            let mut guard = self.shared_db.salsa.write();
319            guard.remove_file_definitions(file);
320            guard.remove_source_file(file);
321        }
322        // Remove this file's outgoing deps from the in-memory reverse dep map.
323        self.update_in_memory_reverse_deps(file, &HashSet::new());
324        if let Some(cache) = &self.cache {
325            cache.update_reverse_deps_for_file(file, &HashSet::new());
326            cache.evict_with_dependents(&[file.to_string()]);
327        }
328    }
329
330    /// Number of files currently tracked in this session's salsa input set.
331    /// Stable across reads; useful for diagnostics and memory bounds checks.
332    pub fn tracked_file_count(&self) -> usize {
333        let guard = self.shared_db.salsa.read();
334        guard.source_file_count()
335    }
336
337    // -----------------------------------------------------------------------
338    // Read-only codebase queries
339    //
340    // All take a brief lock to clone the db, then run the lookup against the
341    // owned snapshot — concurrent edits proceed without blocking.
342    // -----------------------------------------------------------------------
343
344    /// Resolve a top-level symbol (class or function) to its declaration
345    /// location. Powers go-to-definition.
346    ///
347    /// Returns:
348    /// - `Ok(Location)` — symbol found with a source location
349    /// - `Err(NotFound)` — no such symbol in the codebase
350    /// - `Err(NoSourceLocation)` — symbol exists but has no recorded span
351    ///   (e.g. some stub-only declarations)
352    pub fn definition_of(
353        &self,
354        symbol: &crate::Symbol,
355    ) -> Result<mir_codebase::storage::Location, crate::SymbolLookupError> {
356        let db = self.snapshot_db();
357        match symbol {
358            crate::Symbol::Class(fqcn) => {
359                let node = db
360                    .lookup_class_node(fqcn.as_ref())
361                    .filter(|n| n.active(&db))
362                    .ok_or(crate::SymbolLookupError::NotFound)?;
363                node.location(&db)
364                    .ok_or(crate::SymbolLookupError::NoSourceLocation)
365            }
366            crate::Symbol::Function(fqn) => {
367                let node = db
368                    .lookup_function_node(fqn.as_ref())
369                    .filter(|n| n.active(&db))
370                    .ok_or(crate::SymbolLookupError::NotFound)?;
371                node.location(&db)
372                    .ok_or(crate::SymbolLookupError::NoSourceLocation)
373            }
374            crate::Symbol::Method { class, name }
375            | crate::Symbol::Property { class, name }
376            | crate::Symbol::ClassConstant { class, name } => {
377                crate::db::member_location_via_db(&db, class, name)
378                    .ok_or(crate::SymbolLookupError::NotFound)
379            }
380            crate::Symbol::GlobalConstant(_) => {
381                // Global constants don't currently store location info
382                Err(crate::SymbolLookupError::NoSourceLocation)
383            }
384        }
385    }
386
387    /// Hover information for a symbol: type, docstring, and definition location.
388    ///
389    /// Use [`crate::FileAnalysis::symbol_at`] to find the symbol at a cursor
390    /// position, then build a [`crate::Symbol`] from its `kind`. This method
391    /// assembles the displayable hover data.
392    ///
393    /// Returns `Err(NotFound)` if the symbol doesn't exist. May still return
394    /// `Ok` with `docstring: None` or `definition: None` if those specific
395    /// pieces aren't available.
396    pub fn hover(
397        &self,
398        symbol: &crate::Symbol,
399    ) -> Result<crate::HoverInfo, crate::SymbolLookupError> {
400        use mir_types::{Atomic, Union};
401        let db = self.snapshot_db();
402        match symbol {
403            crate::Symbol::Function(fqn) => {
404                let node = db
405                    .lookup_function_node(fqn.as_ref())
406                    .filter(|n| n.active(&db))
407                    .ok_or(crate::SymbolLookupError::NotFound)?;
408                let ty = node
409                    .return_type(&db)
410                    .map(|t| (*t).clone())
411                    .unwrap_or_else(Union::mixed);
412                let docstring = node.docstring(&db).map(|s| s.to_string());
413                let definition = node.location(&db);
414                Ok(crate::HoverInfo {
415                    ty,
416                    docstring,
417                    definition,
418                })
419            }
420            crate::Symbol::Method { class, name } => {
421                let node = db
422                    .lookup_method_node(class.as_ref(), name.as_ref())
423                    .filter(|n| n.active(&db))
424                    .ok_or(crate::SymbolLookupError::NotFound)?;
425                let ty = node
426                    .return_type(&db)
427                    .map(|t| (*t).clone())
428                    .unwrap_or_else(Union::mixed);
429                let docstring = node.docstring(&db).map(|s| s.to_string());
430                let definition = node.location(&db);
431                Ok(crate::HoverInfo {
432                    ty,
433                    docstring,
434                    definition,
435                })
436            }
437            crate::Symbol::Class(fqcn) => {
438                let node = db
439                    .lookup_class_node(fqcn.as_ref())
440                    .filter(|n| n.active(&db))
441                    .ok_or(crate::SymbolLookupError::NotFound)?;
442                let ty = Union::single(Atomic::TNamedObject {
443                    fqcn: fqcn.clone(),
444                    type_params: Vec::new(),
445                });
446                let definition = node.location(&db);
447                Ok(crate::HoverInfo {
448                    ty,
449                    docstring: None,
450                    definition,
451                })
452            }
453            crate::Symbol::Property { class, name } => {
454                let node = db
455                    .lookup_property_node(class.as_ref(), name.as_ref())
456                    .filter(|n| n.active(&db))
457                    .ok_or(crate::SymbolLookupError::NotFound)?;
458                let ty = node.ty(&db).unwrap_or_else(Union::mixed);
459                let definition = node.location(&db);
460                Ok(crate::HoverInfo {
461                    ty,
462                    docstring: None,
463                    definition,
464                })
465            }
466            crate::Symbol::ClassConstant { class, name } => {
467                let node = db
468                    .lookup_class_constant_node(class.as_ref(), name.as_ref())
469                    .filter(|n| n.active(&db))
470                    .ok_or(crate::SymbolLookupError::NotFound)?;
471                let ty = node.ty(&db);
472                let definition = node.location(&db);
473                Ok(crate::HoverInfo {
474                    ty,
475                    docstring: None,
476                    definition,
477                })
478            }
479            crate::Symbol::GlobalConstant(fqn) => {
480                let node = db
481                    .lookup_global_constant_node(fqn.as_ref())
482                    .filter(|n| n.active(&db))
483                    .ok_or(crate::SymbolLookupError::NotFound)?;
484                let ty = node.ty(&db);
485                Ok(crate::HoverInfo {
486                    ty,
487                    docstring: None,
488                    definition: None,
489                })
490            }
491        }
492    }
493
494    /// Every recorded reference to `symbol` with its source location as a Range.
495    /// Use [`crate::FileAnalysis::symbol_at`] to find the symbol at a cursor,
496    /// build a [`crate::Symbol`] from it, and pass it here.
497    pub fn references_to(&self, symbol: &crate::Symbol) -> Vec<(Arc<str>, crate::Range)> {
498        let db = self.snapshot_db();
499        let key = symbol.codebase_key();
500        db.reference_locations(&key)
501            .into_iter()
502            .map(|(file, line, col_start, col_end)| {
503                let range = crate::Range {
504                    start: crate::Position {
505                        line,
506                        column: col_start as u32,
507                    },
508                    end: crate::Position {
509                        line,
510                        column: col_end as u32,
511                    },
512                };
513                (file, range)
514            })
515            .collect()
516    }
517
518    /// Class-level issues (inheritance violations, abstract-method gaps, override
519    /// incompatibilities) for the given set of files.
520    ///
521    /// These checks are cross-file by nature and are not emitted by
522    /// [`crate::FileAnalyzer::analyze`]. Call this after ingesting or
523    /// re-analyzing a file and its dependents to get the full diagnostic picture.
524    ///
525    /// Circular-inheritance checks always run against the full workspace graph
526    /// regardless of the `files` filter — a cycle is a workspace-wide problem.
527    pub fn class_issues_for(&self, files: &[Arc<str>]) -> Vec<crate::Issue> {
528        let db = self.snapshot_db();
529        let file_set: HashSet<Arc<str>> = files.iter().cloned().collect();
530        let file_data: Vec<(Arc<str>, Arc<str>)> = files
531            .iter()
532            .filter_map(|f| Some((f.clone(), self.source_of(f)?)))
533            .collect();
534        crate::class::ClassAnalyzer::with_files(&db, file_set, &file_data).analyze_all()
535    }
536
537    /// All declarations defined in `file` as a **hierarchical tree**.
538    ///
539    /// Classes/interfaces/traits/enums are returned with their methods,
540    /// properties, and constants nested in `children`. Top-level functions
541    /// and constants are returned with empty `children`.
542    pub fn document_symbols(&self, file: &str) -> Vec<crate::symbol::DocumentSymbol> {
543        use crate::symbol::{DocumentSymbol, DocumentSymbolKind};
544
545        let db = self.snapshot_db();
546        let mut out = Vec::new();
547        for symbol in db.symbols_defined_in_file(file) {
548            // Try class side first — covers Class / Interface / Trait / Enum.
549            if let Some(class_node) = db.lookup_class_node(symbol.as_ref()) {
550                if !class_node.active(&db) {
551                    continue;
552                }
553                let (kind, is_enum) = crate::db::class_kind_via_db(&db, symbol.as_ref())
554                    .map(|k| {
555                        let kind = if k.is_interface {
556                            DocumentSymbolKind::Interface
557                        } else if k.is_trait {
558                            DocumentSymbolKind::Trait
559                        } else if k.is_enum {
560                            DocumentSymbolKind::Enum
561                        } else {
562                            DocumentSymbolKind::Class
563                        };
564                        (kind, k.is_enum)
565                    })
566                    .unwrap_or((DocumentSymbolKind::Class, false));
567
568                // Build children: methods, properties, and class constants.
569                let mut children: Vec<DocumentSymbol> = Vec::new();
570                for m in db.class_own_methods(symbol.as_ref()) {
571                    if !m.active(&db) {
572                        continue;
573                    }
574                    children.push(DocumentSymbol {
575                        name: m.name(&db),
576                        kind: DocumentSymbolKind::Method,
577                        location: m.location(&db),
578                        children: Vec::new(),
579                    });
580                }
581                for p in db.class_own_properties(symbol.as_ref()) {
582                    if !p.active(&db) {
583                        continue;
584                    }
585                    children.push(DocumentSymbol {
586                        name: p.name(&db),
587                        kind: DocumentSymbolKind::Property,
588                        location: p.location(&db),
589                        children: Vec::new(),
590                    });
591                }
592                for c in db.class_own_constants(symbol.as_ref()) {
593                    if !c.active(&db) {
594                        continue;
595                    }
596                    let const_kind = if is_enum {
597                        DocumentSymbolKind::EnumCase
598                    } else {
599                        DocumentSymbolKind::Constant
600                    };
601                    children.push(DocumentSymbol {
602                        name: c.name(&db),
603                        kind: const_kind,
604                        location: c.location(&db),
605                        children: Vec::new(),
606                    });
607                }
608
609                out.push(DocumentSymbol {
610                    name: symbol.clone(),
611                    kind,
612                    location: class_node.location(&db),
613                    children,
614                });
615                continue;
616            }
617            if let Some(fn_node) = db.lookup_function_node(symbol.as_ref()) {
618                if !fn_node.active(&db) {
619                    continue;
620                }
621                out.push(DocumentSymbol {
622                    name: symbol.clone(),
623                    kind: DocumentSymbolKind::Function,
624                    location: fn_node.location(&db),
625                    children: Vec::new(),
626                });
627                continue;
628            }
629            // Constants and other top-level declarations: emit with no
630            // location info; consumers can still surface them in an outline.
631            out.push(DocumentSymbol {
632                name: symbol,
633                kind: DocumentSymbolKind::Constant,
634                location: None,
635                children: Vec::new(),
636            });
637        }
638        out
639    }
640
641    /// Returns `true` if a function with `fqn` is registered and active in
642    /// the codebase. Case-insensitive lookup with optional leading backslash.
643    pub fn contains_function(&self, fqn: &str) -> bool {
644        let db = self.snapshot_db();
645        db.lookup_function_node(fqn).is_some_and(|n| n.active(&db))
646    }
647
648    /// Returns `true` if a class / interface / trait / enum with `fqcn` is
649    /// registered and active in the codebase.
650    pub fn contains_class(&self, fqcn: &str) -> bool {
651        let db = self.snapshot_db();
652        db.lookup_class_node(fqcn).is_some_and(|n| n.active(&db))
653    }
654
655    /// Returns `true` if `class` has a method named `name` registered. Method
656    /// names are matched case-insensitively (PHP method dispatch semantics).
657    pub fn contains_method(&self, class: &str, name: &str) -> bool {
658        let db = self.snapshot_db();
659        let name_lower = name.to_ascii_lowercase();
660        db.lookup_method_node(class, &name_lower)
661            .is_some_and(|n| n.active(&db))
662    }
663
664    /// Try to resolve `fqcn` via PSR-4 and ingest the mapped file, returning
665    /// a detailed outcome distinguishing "already there" from "freshly loaded".
666    pub fn lazy_load_class_with_outcome(&self, fqcn: &str) -> crate::LazyLoadOutcome {
667        if self.contains_class(fqcn) {
668            return crate::LazyLoadOutcome::AlreadyLoaded;
669        }
670        if self.lazy_load_class(fqcn) {
671            crate::LazyLoadOutcome::Loaded
672        } else {
673            crate::LazyLoadOutcome::NotResolvable
674        }
675    }
676
677    /// Try to resolve `fqcn` via the configured [`crate::ClassResolver`] and
678    /// ingest the mapped file.
679    ///
680    /// This is the LSP-friendly lazy-load entry point: the analyzer never
681    /// touches `vendor/` on its own, but consumers can ask it to resolve
682    /// individual symbols on demand. Designed to be called when a diagnostic
683    /// would otherwise report `UndefinedClass`.
684    ///
685    /// Returns `true` if either the class is already known or a matching
686    /// file was found and successfully ingested. Returns `false` if:
687    /// - No resolver is configured (neither `with_psr4` nor `with_class_resolver` called),
688    /// - The resolver can't map `fqcn` to a file,
689    /// - The file can't be read, or
690    /// - The file parsed but did not define `fqcn`.
691    pub fn lazy_load_class(&self, fqcn: &str) -> bool {
692        if self.contains_class(fqcn) {
693            return true;
694        }
695        let Some(resolver) = &self.resolver else {
696            return false;
697        };
698        let Some(path) = resolver.resolve(fqcn) else {
699            return false;
700        };
701        let Ok(src) = std::fs::read_to_string(&path) else {
702            return false;
703        };
704        let file: Arc<str> = Arc::from(path.to_string_lossy().as_ref());
705        self.ingest_file(file, Arc::from(src));
706        self.contains_class(fqcn)
707    }
708
709    /// Lazy-load every class transitively reachable from `fqcn` via parent /
710    /// interface / trait edges. Useful when the consumer needs not just the
711    /// requested class but enough of its inheritance chain to type-check
712    /// member access.
713    ///
714    /// Walks at most `max_depth` levels (default in batch analysis is 10).
715    /// Returns the number of classes successfully loaded (not counting
716    /// `fqcn` itself if it was already present).
717    pub fn lazy_load_class_transitive(&self, fqcn: &str, max_depth: usize) -> usize {
718        if self.resolver.is_none() {
719            return 0;
720        }
721        let mut loaded = 0;
722        let mut frontier: Vec<String> = vec![fqcn.to_string()];
723        let mut visited: std::collections::HashSet<String> = std::collections::HashSet::new();
724
725        for _ in 0..max_depth {
726            if frontier.is_empty() {
727                break;
728            }
729            let mut next: Vec<String> = Vec::new();
730            for name in frontier.drain(..) {
731                if !visited.insert(name.clone()) {
732                    continue;
733                }
734                let was_present = self.contains_class(&name);
735                let resolved = self.lazy_load_class(&name);
736                if resolved && !was_present {
737                    loaded += 1;
738                    // Walk the new class's parent / interfaces / traits.
739                    let db = self.snapshot_db();
740                    if let Some(node) = db.lookup_class_node(&name) {
741                        if let Some(parent) = node.parent(&db) {
742                            next.push(parent.to_string());
743                        }
744                        for iface in node.interfaces(&db).iter() {
745                            next.push(iface.to_string());
746                        }
747                        for tr in node.traits(&db).iter() {
748                            next.push(tr.to_string());
749                        }
750                        for ext in node.extends(&db).iter() {
751                            next.push(ext.to_string());
752                        }
753                    }
754                }
755            }
756            frontier = next;
757        }
758        loaded
759    }
760
761    /// Retrieve the source text the session has registered for `file`, if
762    /// any. Returns `None` when the file has never been ingested. Used by
763    /// the parallel re-analysis path to re-feed dependents to Pass 2 without
764    /// the caller having to track sources independently.
765    pub fn source_of(&self, file: &str) -> Option<Arc<str>> {
766        let db = self.snapshot_db();
767        let sf = db.lookup_source_file(file)?;
768        Some(sf.text(&db))
769    }
770
771    /// Re-analyze every transitive dependent of `file` in parallel.
772    ///
773    /// When the user saves a file that other files depend on (e.g. editing
774    /// a base class, an interface, or a trait), those dependents may have
775    /// new diagnostics. This method computes them in parallel using rayon
776    /// and returns the per-file analysis results so the LSP server can
777    /// publish updated diagnostics in one batch.
778    ///
779    /// Source text for dependents is retrieved from the session's salsa
780    /// inputs (set by previous `ingest_file` calls) — the caller doesn't
781    /// need to track or re-read files. Files for which the session has no
782    /// source are silently skipped (returns the analyzable subset).
783    ///
784    /// Does not run inference sweeps. For full-fidelity cross-file inferred
785    /// return types, follow up with [`Self::run_inference_sweep`] over the
786    /// affected file set.
787    pub fn analyze_dependents_of(&self, file: &str) -> Vec<(Arc<str>, crate::FileAnalysis)> {
788        use rayon::prelude::*;
789
790        // Phase 1: compute dependents + gather their sources outside the
791        // analysis loop so each worker has everything it needs.
792        let dependents = self.dependency_graph().transitive_dependents(file);
793        if dependents.is_empty() {
794            return Vec::new();
795        }
796        let with_source: Vec<(Arc<str>, Arc<str>)> = dependents
797            .into_iter()
798            .filter_map(|path| {
799                let arc_path: Arc<str> = Arc::from(path.as_str());
800                let src = self.source_of(&path)?;
801                Some((arc_path, src))
802            })
803            .collect();
804        if with_source.is_empty() {
805            return Vec::new();
806        }
807
808        // Phase 2: parallel parse + analyze. Each rayon worker gets its own
809        // database snapshot via FileAnalyzer; writes are isolated to the
810        // session's canonical db (none happen here since we only run Pass 2).
811        with_source
812            .into_par_iter()
813            .map(|(file, source)| {
814                let arena = crate::arena::create_parse_arena(source.len());
815                let parsed = php_rs_parser::parse(&arena, source.as_ref());
816                let analyzer = crate::FileAnalyzer::new(self);
817                let analysis = analyzer.analyze(
818                    file.clone(),
819                    source.as_ref(),
820                    &parsed.program,
821                    &parsed.source_map,
822                );
823                (file, analysis)
824            })
825            .collect()
826    }
827
828    /// FQCNs that `file` imports via `use` statements but that aren't yet
829    /// loaded in the session.
830    ///
831    /// Designed as the input to background prefetching: after the LSP server
832    /// ingests an open buffer, it can call this and lazy-load the returned
833    /// FQCNs on a worker thread so the user's first Cmd+Click into vendor
834    /// code doesn't pay the file-read+parse cost.
835    ///
836    /// Returns an empty Vec if the file hasn't been ingested or has no
837    /// unresolved imports.
838    pub fn pending_lazy_loads(&self, file: &str) -> Vec<Arc<str>> {
839        let db = self.snapshot_db();
840        let imports = db.file_imports(file);
841        if imports.is_empty() {
842            return Vec::new();
843        }
844        let mut out = Vec::new();
845        for fqcn in imports.values() {
846            // Cheap check: skip imports already in the codebase.
847            if db.lookup_class_node(fqcn).is_some_and(|n| n.active(&db)) {
848                continue;
849            }
850            // Only worth queueing if the resolver could in principle find it.
851            if let Some(resolver) = &self.resolver {
852                if resolver.resolve(fqcn).is_some() {
853                    out.push(Arc::from(fqcn.as_str()));
854                }
855            }
856        }
857        out
858    }
859
860    /// Convenience: synchronously lazy-load every import of `file` that
861    /// isn't already in the codebase. Returns the number successfully loaded.
862    ///
863    /// For non-blocking prefetch, call this from a worker thread:
864    ///
865    /// ```ignore
866    /// let s = session.clone();  // AnalysisSession is wrapped in Arc by callers
867    /// std::thread::spawn(move || {
868    ///     s.prefetch_imports(&file_path);
869    /// });
870    /// ```
871    ///
872    /// Internally walks the inheritance chain of each loaded class to a
873    /// shallow depth so member access on imported types type-checks without
874    /// the user paying the cost on their first navigation.
875    pub fn prefetch_imports(&self, file: &str) -> usize {
876        let pending = self.pending_lazy_loads(file);
877        let mut loaded = 0;
878        for fqcn in pending {
879            // Use the transitive walker with a small depth so we pick up
880            // parent classes / interfaces needed for member resolution, but
881            // don't recursively pull in the entire vendor tree.
882            loaded += self.lazy_load_class_transitive(&fqcn, 2);
883        }
884        loaded
885    }
886
887    /// All class / interface / trait / enum FQCNs currently known to the
888    /// session, each paired with the file that defines them when available.
889    ///
890    /// Use this to build workspace-wide views (outline, fuzzy search, etc.).
891    /// Consumers implement their own search/match logic on top — the analyzer
892    /// only exposes the iterator.
893    pub fn all_classes(&self) -> Vec<(Arc<str>, Option<mir_codebase::storage::Location>)> {
894        let db = self.snapshot_db();
895        db.active_class_node_fqcns()
896            .into_iter()
897            .filter_map(|fqcn| {
898                let node = db.lookup_class_node(fqcn.as_ref())?;
899                if !node.active(&db) {
900                    return None;
901                }
902                Some((fqcn, node.location(&db)))
903            })
904            .collect()
905    }
906
907    /// All global function FQNs currently known to the session, each paired
908    /// with their declaration location when available.
909    pub fn all_functions(&self) -> Vec<(Arc<str>, Option<mir_codebase::storage::Location>)> {
910        let db = self.snapshot_db();
911        db.active_function_node_fqns()
912            .into_iter()
913            .filter_map(|fqn| {
914                let node = db.lookup_function_node(fqn.as_ref())?;
915                if !node.active(&db) {
916                    return None;
917                }
918                Some((fqn, node.location(&db)))
919            })
920            .collect()
921    }
922
923    /// Compute `file`'s outgoing dependency edges and update both the in-memory
924    /// reverse-dep map (always) and the disk cache's reverse-dep graph (if configured).
925    fn update_reverse_deps_for(&self, file: &str) {
926        let db = self.snapshot_db();
927        let targets = file_outgoing_dependencies(&db, file);
928
929        // Always update the in-memory map.
930        self.update_in_memory_reverse_deps(file, &targets);
931
932        // Also persist to disk cache if configured.
933        if let Some(cache) = self.cache.as_deref() {
934            cache.update_reverse_deps_for_file(file, &targets);
935        }
936    }
937
938    /// Update the in-memory reverse dependency map for `file` with `new_targets`.
939    /// Removes `file` from all existing entries, then adds it as a dependent of
940    /// each target in `new_targets` (excluding self-edges).
941    fn update_in_memory_reverse_deps(&self, file: &str, new_targets: &HashSet<String>) {
942        let mut map = self.reverse_dep_map.write();
943        for dependents in map.values_mut() {
944            dependents.remove(file);
945        }
946        map.retain(|_, dependents| !dependents.is_empty());
947        for target in new_targets {
948            if target != file {
949                map.entry(target.clone())
950                    .or_default()
951                    .insert(file.to_string());
952            }
953        }
954    }
955
956    /// BFS transitive dependents of `file` using the in-memory reverse dep map.
957    ///
958    /// O(D) where D is the number of transitive dependents — faster than
959    /// [`Self::dependency_graph().transitive_dependents()`] which rebuilds the
960    /// full graph on every call. Only covers Pass 1 structural dependencies
961    /// (imports, class hierarchy, type hints); does not include bare FQN body
962    /// references recorded during Pass 2. For full fidelity, use
963    /// `dependency_graph().transitive_dependents()` after Pass 2 is complete.
964    pub fn structural_dependents_of(&self, file: &str) -> Vec<String> {
965        let map = self.reverse_dep_map.read();
966        let mut visited: HashSet<String> = HashSet::new();
967        let mut queue = vec![file.to_string()];
968        let mut result = Vec::new();
969        while let Some(current) = queue.pop() {
970            if !visited.insert(current.clone()) {
971                continue;
972            }
973            if let Some(deps) = map.get(&current) {
974                for dep in deps {
975                    if !visited.contains(dep) {
976                        queue.push(dep.clone());
977                        result.push(dep.clone());
978                    }
979                }
980            }
981        }
982        result
983    }
984
985    /// Cross-file inference sweep. For each `(file, source)` pair, calls the
986    /// Salsa-tracked `infer_file_return_types` query in parallel, then commits
987    /// the collected inferred return types to INPUT fields.
988    ///
989    /// Files must already be ingested via [`Self::ingest_file`] before calling
990    /// this method. Subsequent [`FileAnalyzer::analyze`] calls read the committed
991    /// INPUT fields via O(1) lookups with no lock contention.
992    pub fn run_inference_sweep(&self, files: &[(Arc<str>, Arc<str>)]) {
993        use rayon::prelude::*;
994        let db_priming = self.snapshot_db();
995        let inferred_results: Vec<crate::db::InferredFileTypes> = files
996            .par_iter()
997            .map_with(db_priming, |db, (path, _src)| {
998                if let Some(sf) = db.lookup_source_file(path) {
999                    crate::db::infer_file_return_types(db, sf)
1000                } else {
1001                    crate::db::InferredFileTypes::empty()
1002                }
1003            })
1004            .collect();
1005        let mut functions = Vec::new();
1006        let mut methods = Vec::new();
1007        for result in inferred_results {
1008            for (fqn, ty) in result.functions.iter() {
1009                functions.push((fqn.clone(), (**ty).clone()));
1010            }
1011            for ((fqcn, name), ty) in result.methods.iter() {
1012                methods.push((fqcn.clone(), name.clone(), (**ty).clone()));
1013            }
1014        }
1015        let mut guard = self.shared_db.salsa.write();
1016        guard.commit_inferred_return_types(functions, methods);
1017    }
1018
1019    /// File dependency graph: which files depend on which other files.
1020    /// Used for incremental invalidation in LSP servers and build systems.
1021    ///
1022    /// File dependency graph: which files depend on which other files.
1023    /// Used for incremental invalidation in LSP servers and build systems.
1024    ///
1025    /// O(edges) — iterates the `file_references` forward index (file → symbol
1026    /// keys it references) which is always current, then resolves each symbol
1027    /// to its defining file via O(1) lookup.  Total cost is O(E) where E is the
1028    /// number of (file, symbol) reference edges, vs. the old O(F × S × R) scan.
1029    pub fn dependency_graph(&self) -> crate::DependencyGraph {
1030        let db = self.snapshot_db();
1031
1032        let all_files: Vec<String> = db
1033            .source_file_paths()
1034            .iter()
1035            .map(|f| f.as_ref().to_string())
1036            .collect();
1037
1038        let mut dependencies: HashMap<String, Vec<String>> = HashMap::new();
1039        let mut dependents: HashMap<String, Vec<String>> = HashMap::new();
1040
1041        for file in &all_files {
1042            // O(degree(file)) — forward index lookup, no full-table scan.
1043            let symbol_keys = db.file_referenced_symbols(file);
1044            let mut file_deps: HashSet<String> = HashSet::new();
1045            for symbol_key in &symbol_keys {
1046                let lookup: &str = match symbol_key.split_once("::") {
1047                    Some((class, _)) => class,
1048                    None => symbol_key.as_ref(),
1049                };
1050                if let Some(def_file) = db.symbol_defining_file(lookup) {
1051                    let def = def_file.as_ref().to_string();
1052                    if &def != file {
1053                        file_deps.insert(def);
1054                    }
1055                }
1056            }
1057            for dep in &file_deps {
1058                dependents
1059                    .entry(dep.clone())
1060                    .or_default()
1061                    .push(file.clone());
1062                dependencies
1063                    .entry(file.clone())
1064                    .or_default()
1065                    .push(dep.clone());
1066            }
1067        }
1068
1069        for deps in dependents.values_mut() {
1070            deps.sort();
1071        }
1072
1073        crate::DependencyGraph {
1074            dependencies,
1075            dependents,
1076        }
1077    }
1078}
1079
1080/// Compute the set of files `file` depends on: defining files of its imports,
1081/// plus parent / interfaces / traits' defining files for any classes declared
1082/// in `file`. Self-edges are excluded.
1083fn file_outgoing_dependencies(db: &dyn MirDatabase, file: &str) -> HashSet<String> {
1084    let mut targets: HashSet<String> = HashSet::new();
1085
1086    let mut add_target = |symbol: &str| {
1087        if let Some(defining_file) = db.symbol_defining_file(symbol) {
1088            let def = defining_file.as_ref().to_string();
1089            if def != file {
1090                targets.insert(def);
1091            }
1092        }
1093    };
1094
1095    let extract_named_objects = |union: &mir_types::Union| {
1096        union
1097            .types
1098            .iter()
1099            .filter_map(|atomic| match atomic {
1100                mir_types::atomic::Atomic::TNamedObject { fqcn, .. } => Some(fqcn.clone()),
1101                _ => None,
1102            })
1103            .collect::<Vec<_>>()
1104    };
1105
1106    let imports = db.file_imports(file);
1107    for fqcn in imports.values() {
1108        add_target(fqcn);
1109    }
1110
1111    for fqcn in db.symbols_defined_in_file(file) {
1112        let Some(node) = db.lookup_class_node(fqcn.as_ref()) else {
1113            continue;
1114        };
1115        if let Some(parent) = node.parent(db) {
1116            add_target(parent.as_ref());
1117        }
1118        for iface in node.interfaces(db).iter() {
1119            add_target(iface.as_ref());
1120        }
1121        for tr in node.traits(db).iter() {
1122            add_target(tr.as_ref());
1123        }
1124
1125        // Add types from properties
1126        for prop in db.class_own_properties(fqcn.as_ref()).iter() {
1127            if let Some(ty) = prop.ty(db) {
1128                for named in extract_named_objects(&ty) {
1129                    add_target(named.as_ref());
1130                }
1131            }
1132        }
1133
1134        // Add types from methods
1135        for method in db.class_own_methods(fqcn.as_ref()).iter() {
1136            // Parameter types
1137            for param in method.params(db).iter() {
1138                if let Some(ty) = &param.ty {
1139                    for named in extract_named_objects(ty.as_ref()) {
1140                        add_target(named.as_ref());
1141                    }
1142                }
1143            }
1144            // Return type
1145            if let Some(rt) = method.return_type(db) {
1146                for named in extract_named_objects(rt.as_ref()) {
1147                    add_target(named.as_ref());
1148                }
1149            }
1150        }
1151    }
1152
1153    // Add types from global functions
1154    for fqn in db.active_function_node_fqns() {
1155        let Some(node) = db.lookup_function_node(fqn.as_ref()) else {
1156            continue;
1157        };
1158        if let Some(file_of_fn) = db.symbol_defining_file(fqn.as_ref()) {
1159            if file_of_fn.as_ref() != file {
1160                continue;
1161            }
1162        } else {
1163            continue;
1164        }
1165
1166        // Parameter types
1167        for param in node.params(db).iter() {
1168            if let Some(ty) = &param.ty {
1169                for named in extract_named_objects(ty.as_ref()) {
1170                    add_target(named.as_ref());
1171                }
1172            }
1173        }
1174        // Return type
1175        if let Some(rt) = node.return_type(db) {
1176            for named in extract_named_objects(rt.as_ref()) {
1177                add_target(named.as_ref());
1178            }
1179        }
1180    }
1181
1182    // Also track bare-FQN references recorded during Pass 2 (new \Foo(), \Foo::method(),
1183    // \foo()) that do not appear in use-import statements.
1184    for symbol_key in db.file_referenced_symbols(file) {
1185        let lookup: &str = match symbol_key.split_once("::") {
1186            Some((class, _)) => class,
1187            None => &symbol_key,
1188        };
1189        add_target(lookup);
1190    }
1191
1192    targets
1193}