Skip to main content

mir_analyzer/
session.rs

1//! Session-based analysis API for incremental, per-file analysis.
2//!
3//! [`AnalysisSession`] owns the salsa database and per-session caches for a
4//! long-running analysis context shared across many per-file analyses. Reads
5//! clone the database under a brief lock, then run lock-free; writes hold the
6//! lock briefly to mutate canonical state. `MirDb::clone()` is cheap
7//! (Arc-wrapped registries), so this pattern gives parallel readers without
8//! blocking on concurrent writes for longer than the clone itself.
9//!
10//! See [`crate::file_analyzer::FileAnalyzer`] for the per-file Pass 2 entry
11//! point that operates against a session.
12
13use std::collections::{HashMap, HashSet};
14use std::path::PathBuf;
15use std::sync::{Arc, Mutex};
16
17use rayon::prelude::*;
18use salsa::Setter as _;
19
20use crate::cache::AnalysisCache;
21use crate::composer::Psr4Map;
22use crate::db::{collect_file_definitions, FileDefinitions, MirDatabase, MirDb, SourceFile};
23use crate::php_version::PhpVersion;
24
25/// Long-lived analysis context. Owns the salsa database and tracks which
26/// stubs have been loaded.
27///
28/// Cheap to clone the inner db for parallel reads; writes funnel through
29/// [`Self::ingest_file`], [`Self::invalidate_file`], and the crate-internal
30/// [`Self::with_db_mut`].
31pub struct AnalysisSession {
32    salsa: Mutex<(MirDb, HashMap<Arc<str>, SourceFile>)>,
33    cache: Option<Arc<AnalysisCache>>,
34    psr4: Option<Arc<Psr4Map>>,
35    /// Set of stub virtual paths that have already been ingested. Replaces an
36    /// older `AtomicBool stubs_loaded` flag — tracking individual paths lets
37    /// us lazy-load extension stubs on demand without re-ingesting essentials.
38    loaded_stubs: Mutex<HashSet<&'static str>>,
39    /// True once user stubs (configured via [`Self::with_user_stubs`]) have
40    /// been ingested. They are loaded together with the essential set on the
41    /// first call to a stubs-loading method.
42    user_stubs_loaded: Mutex<bool>,
43    php_version: PhpVersion,
44    user_stub_files: Vec<PathBuf>,
45    user_stub_dirs: Vec<PathBuf>,
46}
47
48impl AnalysisSession {
49    /// Create a session targeting the given PHP language version.
50    pub fn new(php_version: PhpVersion) -> Self {
51        Self {
52            salsa: Mutex::new((MirDb::default(), HashMap::new())),
53            cache: None,
54            psr4: None,
55            loaded_stubs: Mutex::new(HashSet::new()),
56            user_stubs_loaded: Mutex::new(false),
57            php_version,
58            user_stub_files: Vec::new(),
59            user_stub_dirs: Vec::new(),
60        }
61    }
62
63    pub fn with_cache(mut self, cache: Arc<AnalysisCache>) -> Self {
64        self.cache = Some(cache);
65        self
66    }
67
68    pub fn with_psr4(mut self, map: Arc<Psr4Map>) -> Self {
69        self.psr4 = Some(map);
70        self
71    }
72
73    pub fn with_user_stubs(mut self, files: Vec<PathBuf>, dirs: Vec<PathBuf>) -> Self {
74        self.user_stub_files = files;
75        self.user_stub_dirs = dirs;
76        self
77    }
78
79    pub fn php_version(&self) -> PhpVersion {
80        self.php_version
81    }
82
83    pub fn cache(&self) -> Option<&AnalysisCache> {
84        self.cache.as_deref()
85    }
86
87    pub fn psr4(&self) -> Option<&Psr4Map> {
88        self.psr4.as_deref()
89    }
90
91    /// Load every PHP built-in stub plus any configured user stubs.
92    /// Idempotent. Equivalent to the legacy "load everything" behavior; use
93    /// [`Self::ensure_essential_stubs_loaded`] in incremental scenarios where
94    /// cold-start latency matters more than comprehensive stub coverage.
95    pub fn ensure_stubs_loaded(&self) {
96        self.ensure_all_stubs_loaded();
97    }
98
99    /// Load only the curated set of essential stubs (Core, standard, SPL,
100    /// date) plus any configured user stubs. About 25 of 120 stub files;
101    /// covers types and functions used by virtually all PHP code.
102    ///
103    /// Other extension stubs (Reflection, gd, openssl, …) can be brought in
104    /// on demand via [`Self::ensure_stubs_for_symbol`] when user code
105    /// references them. Idempotent — already-loaded stubs are skipped.
106    pub fn ensure_essential_stubs_loaded(&self) {
107        self.ingest_stub_paths(crate::stubs::ESSENTIAL_STUB_PATHS);
108        self.ensure_user_stubs_loaded();
109    }
110
111    /// Load every embedded PHP stub plus any configured user stubs.
112    /// Use for batch tools (CLI, full project analysis) where comprehensive
113    /// symbol coverage matters more than cold-start latency.
114    pub fn ensure_all_stubs_loaded(&self) {
115        let paths: Vec<&'static str> = crate::stubs::stub_files().iter().map(|&(p, _)| p).collect();
116        self.ingest_stub_paths(&paths);
117        self.ensure_user_stubs_loaded();
118    }
119
120    /// Ensure the embedded stub that defines `name` (a function) is ingested.
121    /// Returns `true` when a matching stub exists (whether or not it was
122    /// already loaded), `false` when `name` isn't a known PHP built-in.
123    pub fn ensure_stub_for_function(&self, name: &str) -> bool {
124        match crate::stubs::stub_path_for_function(name) {
125            Some(path) => {
126                self.ingest_stub_paths(&[path]);
127                true
128            }
129            None => false,
130        }
131    }
132
133    /// Ensure the embedded stub that defines `fqcn` (a class / interface /
134    /// trait / enum) is ingested. Case-insensitive lookup with optional
135    /// leading backslash.
136    pub fn ensure_stub_for_class(&self, fqcn: &str) -> bool {
137        match crate::stubs::stub_path_for_class(fqcn) {
138            Some(path) => {
139                self.ingest_stub_paths(&[path]);
140                true
141            }
142            None => false,
143        }
144    }
145
146    /// Ensure the embedded stub that defines `name` (a constant) is ingested.
147    pub fn ensure_stub_for_constant(&self, name: &str) -> bool {
148        match crate::stubs::stub_path_for_constant(name) {
149            Some(path) => {
150                self.ingest_stub_paths(&[path]);
151                true
152            }
153            None => false,
154        }
155    }
156
157    /// Number of distinct embedded stubs currently ingested into the session.
158    /// Useful for diagnostics and bench reporting.
159    pub fn loaded_stub_count(&self) -> usize {
160        self.loaded_stubs.lock().expect("loaded_stubs lock").len()
161    }
162
163    /// Auto-discover and ingest the embedded stubs needed to cover every
164    /// built-in PHP function / class / constant referenced by `source`.
165    ///
166    /// Used by [`crate::FileAnalyzer::analyze`] to keep essentials-only mode
167    /// correct without forcing callers to enumerate which stubs they need.
168    /// Idempotent — already-loaded stubs are skipped via [`Self::loaded_stubs`].
169    ///
170    /// The discovery scan is a coarse identifier sweep (see
171    /// [`crate::stubs::collect_referenced_builtin_paths`]) — it may pull in
172    /// a slightly larger set than the file strictly needs, but never misses
173    /// a referenced built-in. Cost is sub-millisecond per file.
174    ///
175    /// Fast path: if every embedded stub is already loaded (e.g. after a
176    /// batch tool called [`Self::ensure_all_stubs_loaded`]), the source scan
177    /// is skipped entirely.
178    pub fn ensure_stubs_for_source(&self, source: &str) {
179        // Cheap check first: skip the scan entirely when we already know we
180        // have everything. Avoids a ~50-500µs source walk on every analyze
181        // call in batch / warm-session scenarios.
182        {
183            let loaded = self.loaded_stubs.lock().expect("loaded_stubs lock");
184            if loaded.len() >= crate::stubs::stub_files().len() {
185                return;
186            }
187        }
188        let paths = crate::stubs::collect_referenced_builtin_paths(source);
189        if paths.is_empty() {
190            return;
191        }
192        self.ingest_stub_paths(&paths);
193    }
194
195    /// Internal: parse + ingest each path in `paths` that hasn't already been
196    /// ingested. Holds the salsa write lock per file (brief), and the
197    /// `loaded_stubs` set lock briefly to record paths.
198    fn ingest_stub_paths(&self, paths: &[&'static str]) {
199        // Pick out the not-yet-loaded paths first to avoid redundant parsing.
200        let needed: Vec<&'static str> = {
201            let loaded = self.loaded_stubs.lock().expect("loaded_stubs lock");
202            paths
203                .iter()
204                .copied()
205                .filter(|p| !loaded.contains(p))
206                .collect()
207        };
208        if needed.is_empty() {
209            return;
210        }
211
212        let php_version = self.php_version;
213        // Parse in parallel; ingest serially under the salsa write lock.
214        let slices: Vec<(&'static str, mir_codebase::storage::StubSlice)> = needed
215            .par_iter()
216            .filter_map(|&path| {
217                crate::stubs::stub_content_for_path(path).map(|content| {
218                    let slice =
219                        crate::stubs::stub_slice_from_source(path, content, Some(php_version));
220                    (path, slice)
221                })
222            })
223            .collect();
224
225        let mut guard = self.salsa.lock().expect("salsa lock poisoned");
226        let mut loaded = self.loaded_stubs.lock().expect("loaded_stubs lock");
227        for (path, slice) in slices {
228            if loaded.insert(path) {
229                guard.0.ingest_stub_slice(&slice);
230            }
231        }
232    }
233
234    fn ensure_user_stubs_loaded(&self) {
235        if self.user_stub_files.is_empty() && self.user_stub_dirs.is_empty() {
236            return;
237        }
238        let mut guard = self.user_stubs_loaded.lock().expect("user_stubs lock");
239        if *guard {
240            return;
241        }
242        let slices = crate::stubs::user_stub_slices(&self.user_stub_files, &self.user_stub_dirs);
243        let mut salsa = self.salsa.lock().expect("salsa lock poisoned");
244        for slice in slices {
245            salsa.0.ingest_stub_slice(&slice);
246        }
247        *guard = true;
248    }
249
250    /// Cheap clone of the salsa db for a read-only query. The lock is held
251    /// only for the duration of the clone, so concurrent readers never
252    /// serialize on each other or on writes for longer than the clone itself.
253    pub fn snapshot_db(&self) -> MirDb {
254        let guard = self.salsa.lock().expect("salsa lock poisoned");
255        guard.0.clone()
256    }
257
258    /// Run a closure with read access to a database snapshot. The snapshot is
259    /// taken under a brief lock, then the closure runs without holding it.
260    pub fn read<R>(&self, f: impl FnOnce(&dyn MirDatabase) -> R) -> R {
261        let db = self.snapshot_db();
262        f(&db)
263    }
264
265    /// Pass 1 ingestion. Updates the file's source text in the salsa db,
266    /// runs definition collection, and ingests the resulting stub slice.
267    /// Triggers stub loading on first call. Also updates the cache's reverse-
268    /// dependency graph for `file` so cross-file invalidation stays correct
269    /// across incremental edits — without rebuilding the graph from scratch.
270    ///
271    /// If `file` was previously ingested, its old definitions and reference
272    /// locations are removed first so renames / deletions don't leave stale
273    /// state in the codebase. (Without this, long-running sessions would
274    /// accumulate dead reference-location entries indefinitely.)
275    pub fn ingest_file(&self, file: Arc<str>, source: Arc<str>) -> FileDefinitions {
276        self.ensure_stubs_loaded();
277        let file_defs = {
278            let mut guard = self.salsa.lock().expect("salsa lock poisoned");
279            let (ref mut db, ref mut files) = *guard;
280            let salsa_file = match files.get(&file) {
281                Some(&sf) => {
282                    // Re-ingestion: drop old definitions + reference locations
283                    // before collecting fresh ones. Mirrors what
284                    // ProjectAnalyzer::re_analyze_file does.
285                    db.remove_file_definitions(file.as_ref());
286                    if sf.text(db).as_ref() != source.as_ref() {
287                        sf.set_text(db).to(source.clone());
288                    }
289                    sf
290                }
291                None => {
292                    let sf = SourceFile::new(db, file.clone(), source.clone());
293                    files.insert(file.clone(), sf);
294                    sf
295                }
296            };
297            collect_file_definitions(db, salsa_file)
298        };
299        {
300            let mut guard = self.salsa.lock().expect("salsa lock poisoned");
301            guard.0.ingest_stub_slice(&file_defs.slice);
302        }
303        self.update_reverse_deps_for(&file);
304        file_defs
305    }
306
307    /// Drop a file's contribution to the session: codebase definitions,
308    /// reference locations, salsa input handle, cache entry, and outgoing
309    /// reverse-dependency edges. Cache entries of *dependent* files are
310    /// also evicted (cross-file invalidation).
311    ///
312    /// Use this when a file is closed by the consumer, or before a re-ingest
313    /// of substantially changed content. (Plain re-ingest via
314    /// [`Self::ingest_file`] also drops old definitions, but does not
315    /// remove the salsa input handle — call this for full cleanup.)
316    pub fn invalidate_file(&self, file: &str) {
317        {
318            let mut guard = self.salsa.lock().expect("salsa lock poisoned");
319            let (ref mut db, ref mut files) = *guard;
320            db.remove_file_definitions(file);
321            files.remove(file);
322        }
323        if let Some(cache) = &self.cache {
324            cache.update_reverse_deps_for_file(file, &HashSet::new());
325            cache.evict_with_dependents(&[file.to_string()]);
326        }
327    }
328
329    /// Number of files currently tracked in this session's salsa input set.
330    /// Stable across reads; useful for diagnostics and memory bounds checks.
331    pub fn tracked_file_count(&self) -> usize {
332        let guard = self.salsa.lock().expect("salsa lock poisoned");
333        guard.1.len()
334    }
335
336    // -----------------------------------------------------------------------
337    // Read-only codebase queries
338    //
339    // All take a brief lock to clone the db, then run the lookup against the
340    // owned snapshot — concurrent edits proceed without blocking.
341    // -----------------------------------------------------------------------
342
343    /// Resolve `symbol` (a class FQCN or function FQN) to its declaration
344    /// location. Powers go-to-definition for top-level symbols. Returns
345    /// `None` if the symbol isn't known to the codebase or has no recorded
346    /// source span (e.g. some stub-only declarations).
347    pub fn definition_of(&self, symbol: &str) -> Option<mir_codebase::storage::Location> {
348        let db = self.snapshot_db();
349        db.lookup_class_node(symbol)
350            .filter(|n| n.active(&db))
351            .and_then(|n| n.location(&db))
352            .or_else(|| {
353                db.lookup_function_node(symbol)
354                    .filter(|n| n.active(&db))
355                    .and_then(|n| n.location(&db))
356            })
357    }
358
359    /// Resolve a class member (method / property / class constant / enum case)
360    /// to its declaration location, walking the inheritance chain.
361    pub fn member_definition(
362        &self,
363        fqcn: &str,
364        member_name: &str,
365    ) -> Option<mir_codebase::storage::Location> {
366        let db = self.snapshot_db();
367        crate::db::member_location_via_db(&db, fqcn, member_name)
368    }
369
370    /// Every recorded reference to `symbol` (as `(file, line, col_start,
371    /// col_end)`). Use [`crate::symbol::ResolvedSymbol::codebase_key`] to
372    /// build the lookup key from a `ResolvedSymbol` returned by
373    /// [`crate::FileAnalysis::symbol_at`].
374    pub fn references_to(&self, symbol: &str) -> Vec<(Arc<str>, u32, u16, u16)> {
375        let db = self.snapshot_db();
376        db.reference_locations(symbol)
377    }
378
379    /// All declarations defined in `file` (classes, interfaces, traits, enums,
380    /// functions, constants). Powers outline / document-symbols views and any
381    /// other consumer that needs the file's top-level symbol set. Returns an
382    /// empty Vec if `file` hasn't been ingested.
383    pub fn document_symbols(&self, file: &str) -> Vec<crate::symbol::DocumentSymbol> {
384        use crate::symbol::{DocumentSymbol, DocumentSymbolKind};
385
386        let db = self.snapshot_db();
387        let mut out = Vec::new();
388        for symbol in db.symbols_defined_in_file(file) {
389            // Try class side first — covers Class / Interface / Trait / Enum.
390            if let Some(class_node) = db.lookup_class_node(symbol.as_ref()) {
391                if !class_node.active(&db) {
392                    continue;
393                }
394                let kind = crate::db::class_kind_via_db(&db, symbol.as_ref())
395                    .map(|k| {
396                        if k.is_interface {
397                            DocumentSymbolKind::Interface
398                        } else if k.is_trait {
399                            DocumentSymbolKind::Trait
400                        } else if k.is_enum {
401                            DocumentSymbolKind::Enum
402                        } else {
403                            DocumentSymbolKind::Class
404                        }
405                    })
406                    .unwrap_or(DocumentSymbolKind::Class);
407                out.push(DocumentSymbol {
408                    name: symbol.clone(),
409                    kind,
410                    location: class_node.location(&db),
411                });
412                continue;
413            }
414            if let Some(fn_node) = db.lookup_function_node(symbol.as_ref()) {
415                if !fn_node.active(&db) {
416                    continue;
417                }
418                out.push(DocumentSymbol {
419                    name: symbol.clone(),
420                    kind: DocumentSymbolKind::Function,
421                    location: fn_node.location(&db),
422                });
423                continue;
424            }
425            // Constants and other top-level declarations: emit with no
426            // location info; consumers can still surface them in an outline.
427            out.push(DocumentSymbol {
428                name: symbol,
429                kind: DocumentSymbolKind::Constant,
430                location: None,
431            });
432        }
433        out
434    }
435
436    /// Compute `file`'s outgoing dependency edges and update the cache's
437    /// reverse-dep graph in place. No-op if no cache is configured.
438    fn update_reverse_deps_for(&self, file: &str) {
439        let Some(cache) = self.cache.as_deref() else {
440            return;
441        };
442        let db = self.snapshot_db();
443        let targets = file_outgoing_dependencies(&db, file);
444        cache.update_reverse_deps_for_file(file, &targets);
445    }
446
447    /// Cross-file inference sweep. For each `(file, source)` pair, runs the
448    /// Pass 2 inference-only mode on a cloned db (parallel via rayon), then
449    /// commits the collected inferred return types to the canonical db.
450    ///
451    /// Call this on idle / save / explicit user request, **not** on every
452    /// keystroke — [`crate::FileAnalyzer::analyze`] deliberately skips
453    /// inference sweep on the hot path. Files whose source contains parse
454    /// errors are silently skipped.
455    pub fn run_inference_sweep(&self, files: &[(Arc<str>, Arc<str>)]) {
456        self.ensure_stubs_loaded();
457
458        // The priming db lives only inside `gather_inferred_types`. After it
459        // returns, all rayon-clone references to the salsa storage are dropped
460        // — required so that the subsequent `commit_inferred_return_types`
461        // call (which calls salsa's `cancel_others`) doesn't deadlock waiting
462        // for outstanding db references.
463        let (functions, methods) =
464            gather_inferred_types(self.snapshot_db(), files, self.php_version);
465
466        let mut guard = self.salsa.lock().expect("salsa lock poisoned");
467        guard.0.commit_inferred_return_types(functions, methods);
468    }
469}
470
471/// Drive Pass 2 inference-only mode in parallel across `files`, accumulating
472/// inferred function and method return types. The `db_priming` MirDb is
473/// consumed (cloned per spawned task and dropped on return), so the caller's
474/// canonical db can subsequently take exclusive access without deadlock.
475///
476/// Crate-internal so [`crate::project::ProjectAnalyzer`] can use the same
477/// deadlock-safe helper for its lazy-load reanalysis sweep.
478#[allow(clippy::type_complexity)]
479pub(crate) fn gather_inferred_types(
480    db_priming: MirDb,
481    files: &[(Arc<str>, Arc<str>)],
482    php_version: PhpVersion,
483) -> (
484    Vec<(Arc<str>, mir_types::Union)>,
485    Vec<(Arc<str>, Arc<str>, mir_types::Union)>,
486) {
487    use crate::pass2::Pass2Driver;
488    use mir_types::Union;
489    use std::sync::Mutex as StdMutex;
490
491    type Functions = Vec<(Arc<str>, Union)>;
492    type Methods = Vec<(Arc<str>, Arc<str>, Union)>;
493    let functions: Arc<StdMutex<Functions>> = Arc::new(StdMutex::new(Vec::new()));
494    let methods: Arc<StdMutex<Methods>> = Arc::new(StdMutex::new(Vec::new()));
495
496    rayon::in_place_scope(|s| {
497        for (file, source) in files {
498            let db = db_priming.clone();
499            let functions = Arc::clone(&functions);
500            let methods = Arc::clone(&methods);
501            let file = file.clone();
502            let source = source.clone();
503
504            s.spawn(move |_| {
505                let arena = bumpalo::Bump::new();
506                let parsed = php_rs_parser::parse(&arena, source.as_ref());
507                if !parsed.errors.is_empty() {
508                    return;
509                }
510                let driver = Pass2Driver::new_inference_only(&db as &dyn MirDatabase, php_version);
511                driver.analyze_bodies(&parsed.program, file, source.as_ref(), &parsed.source_map);
512                let inferred = driver.take_inferred_types();
513                if let Ok(mut f) = functions.lock() {
514                    f.extend(inferred.functions);
515                }
516                if let Ok(mut m) = methods.lock() {
517                    m.extend(inferred.methods);
518                }
519            });
520        }
521    });
522
523    let functions = Arc::try_unwrap(functions)
524        .map(|m| m.into_inner().unwrap_or_default())
525        .unwrap_or_else(|arc| arc.lock().unwrap().clone());
526    let methods = Arc::try_unwrap(methods)
527        .map(|m| m.into_inner().unwrap_or_default())
528        .unwrap_or_else(|arc| arc.lock().unwrap().clone());
529
530    (functions, methods)
531}
532
533/// Compute the set of files `file` depends on: defining files of its imports,
534/// plus parent / interfaces / traits' defining files for any classes declared
535/// in `file`. Self-edges are excluded.
536fn file_outgoing_dependencies(db: &dyn MirDatabase, file: &str) -> HashSet<String> {
537    let mut targets: HashSet<String> = HashSet::new();
538
539    let mut add_target = |symbol: &str| {
540        if let Some(defining_file) = db.symbol_defining_file(symbol) {
541            let def = defining_file.as_ref().to_string();
542            if def != file {
543                targets.insert(def);
544            }
545        }
546    };
547
548    let imports = db.file_imports(file);
549    for fqcn in imports.values() {
550        add_target(fqcn);
551    }
552
553    for fqcn in db.symbols_defined_in_file(file) {
554        let Some(node) = db.lookup_class_node(fqcn.as_ref()) else {
555            continue;
556        };
557        if let Some(parent) = node.parent(db) {
558            add_target(parent.as_ref());
559        }
560        for iface in node.interfaces(db).iter() {
561            add_target(iface.as_ref());
562        }
563        for tr in node.traits(db).iter() {
564            add_target(tr.as_ref());
565        }
566    }
567
568    targets
569}