Skip to main content

mir_analyzer/
session.rs

1//! Session-based analysis API for incremental, per-file analysis.
2//!
3//! [`AnalysisSession`] owns the salsa database and per-session caches for a
4//! long-running analysis context shared across many per-file analyses. Reads
5//! clone the database under a brief lock, then run lock-free; writes hold the
6//! lock briefly to mutate canonical state. `MirDb::clone()` is cheap
7//! (Arc-wrapped registries), so this pattern gives parallel readers without
8//! blocking on concurrent writes for longer than the clone itself.
9//!
10//! See [`crate::file_analyzer::FileAnalyzer`] for the per-file Pass 2 entry
11//! point that operates against a session.
12
13use std::collections::{HashMap, HashSet};
14use std::path::PathBuf;
15use std::sync::atomic::{AtomicBool, Ordering};
16use std::sync::Arc;
17
18use parking_lot::Mutex;
19
20use rayon::prelude::*;
21use salsa::Setter as _;
22
23use crate::cache::AnalysisCache;
24use crate::composer::Psr4Map;
25use crate::db::{collect_file_definitions, FileDefinitions, MirDatabase, MirDb, SourceFile};
26use crate::php_version::PhpVersion;
27
28/// Long-lived analysis context. Owns the salsa database and tracks which
29/// stubs have been loaded.
30///
31/// Cheap to clone the inner db for parallel reads; writes funnel through
32/// [`Self::ingest_file`], [`Self::invalidate_file`], and the crate-internal
33/// [`Self::with_db_mut`].
34pub struct AnalysisSession {
35    salsa: Mutex<(MirDb, HashMap<Arc<str>, SourceFile>)>,
36    cache: Option<Arc<AnalysisCache>>,
37    psr4: Option<Arc<Psr4Map>>,
38    /// Set of stub virtual paths that have already been ingested. Replaces an
39    /// older `AtomicBool stubs_loaded` flag — tracking individual paths lets
40    /// us lazy-load extension stubs on demand without re-ingesting essentials.
41    loaded_stubs: Mutex<HashSet<&'static str>>,
42    /// True once user stubs (configured via [`Self::with_user_stubs`]) have
43    /// been ingested. They are loaded together with the essential set on the
44    /// first call to a stubs-loading method.
45    user_stubs_loaded: AtomicBool,
46    php_version: PhpVersion,
47    user_stub_files: Vec<PathBuf>,
48    user_stub_dirs: Vec<PathBuf>,
49}
50
51impl AnalysisSession {
52    /// Create a session targeting the given PHP language version.
53    pub fn new(php_version: PhpVersion) -> Self {
54        Self {
55            salsa: Mutex::new((MirDb::default(), HashMap::new())),
56            cache: None,
57            psr4: None,
58            loaded_stubs: Mutex::new(HashSet::new()),
59            user_stubs_loaded: AtomicBool::new(false),
60            php_version,
61            user_stub_files: Vec::new(),
62            user_stub_dirs: Vec::new(),
63        }
64    }
65
66    pub fn with_cache(mut self, cache: Arc<AnalysisCache>) -> Self {
67        self.cache = Some(cache);
68        self
69    }
70
71    pub fn with_psr4(mut self, map: Arc<Psr4Map>) -> Self {
72        self.psr4 = Some(map);
73        self
74    }
75
76    pub fn with_user_stubs(mut self, files: Vec<PathBuf>, dirs: Vec<PathBuf>) -> Self {
77        self.user_stub_files = files;
78        self.user_stub_dirs = dirs;
79        self
80    }
81
82    pub fn php_version(&self) -> PhpVersion {
83        self.php_version
84    }
85
86    pub fn cache(&self) -> Option<&AnalysisCache> {
87        self.cache.as_deref()
88    }
89
90    pub fn psr4(&self) -> Option<&Psr4Map> {
91        self.psr4.as_deref()
92    }
93
94    /// Load every PHP built-in stub plus any configured user stubs.
95    /// Idempotent. Equivalent to the legacy "load everything" behavior; use
96    /// [`Self::ensure_essential_stubs_loaded`] in incremental scenarios where
97    /// cold-start latency matters more than comprehensive stub coverage.
98    pub fn ensure_stubs_loaded(&self) {
99        self.ensure_all_stubs_loaded();
100    }
101
102    /// Load only the curated set of essential stubs (Core, standard, SPL,
103    /// date) plus any configured user stubs. About 25 of 120 stub files;
104    /// covers types and functions used by virtually all PHP code.
105    ///
106    /// Other extension stubs (Reflection, gd, openssl, …) can be brought in
107    /// on demand via [`Self::ensure_stubs_for_symbol`] when user code
108    /// references them. Idempotent — already-loaded stubs are skipped.
109    pub fn ensure_essential_stubs_loaded(&self) {
110        self.ingest_stub_paths(crate::stubs::ESSENTIAL_STUB_PATHS);
111        self.ensure_user_stubs_loaded();
112    }
113
114    /// Load every embedded PHP stub plus any configured user stubs.
115    /// Use for batch tools (CLI, full project analysis) where comprehensive
116    /// symbol coverage matters more than cold-start latency.
117    pub fn ensure_all_stubs_loaded(&self) {
118        let paths: Vec<&'static str> = crate::stubs::stub_files().iter().map(|&(p, _)| p).collect();
119        self.ingest_stub_paths(&paths);
120        self.ensure_user_stubs_loaded();
121    }
122
123    /// Ensure the embedded stub that defines `name` (a function) is ingested.
124    /// Returns `true` when a matching stub exists (whether or not it was
125    /// already loaded), `false` when `name` isn't a known PHP built-in.
126    pub fn ensure_stub_for_function(&self, name: &str) -> bool {
127        match crate::stubs::stub_path_for_function(name) {
128            Some(path) => {
129                self.ingest_stub_paths(&[path]);
130                true
131            }
132            None => false,
133        }
134    }
135
136    /// Ensure the embedded stub that defines `fqcn` (a class / interface /
137    /// trait / enum) is ingested. Case-insensitive lookup with optional
138    /// leading backslash.
139    pub fn ensure_stub_for_class(&self, fqcn: &str) -> bool {
140        match crate::stubs::stub_path_for_class(fqcn) {
141            Some(path) => {
142                self.ingest_stub_paths(&[path]);
143                true
144            }
145            None => false,
146        }
147    }
148
149    /// Ensure the embedded stub that defines `name` (a constant) is ingested.
150    pub fn ensure_stub_for_constant(&self, name: &str) -> bool {
151        match crate::stubs::stub_path_for_constant(name) {
152            Some(path) => {
153                self.ingest_stub_paths(&[path]);
154                true
155            }
156            None => false,
157        }
158    }
159
160    /// Number of distinct embedded stubs currently ingested into the session.
161    /// Useful for diagnostics and bench reporting.
162    pub fn loaded_stub_count(&self) -> usize {
163        self.loaded_stubs.lock().len()
164    }
165
166    /// Auto-discover and ingest the embedded stubs needed to cover every
167    /// built-in PHP function / class / constant referenced by `source`.
168    ///
169    /// Used by [`crate::FileAnalyzer::analyze`] to keep essentials-only mode
170    /// correct without forcing callers to enumerate which stubs they need.
171    /// Idempotent — already-loaded stubs are skipped via [`Self::loaded_stubs`].
172    ///
173    /// The discovery scan is a coarse identifier sweep (see
174    /// [`crate::stubs::collect_referenced_builtin_paths`]) — it may pull in
175    /// a slightly larger set than the file strictly needs, but never misses
176    /// a referenced built-in. Cost is sub-millisecond per file.
177    ///
178    /// Fast path: if every embedded stub is already loaded (e.g. after a
179    /// batch tool called [`Self::ensure_all_stubs_loaded`]), the source scan
180    /// is skipped entirely.
181    pub fn ensure_stubs_for_source(&self, source: &str) {
182        // Cheap check first: skip the scan entirely when we already know we
183        // have everything. Avoids a ~50-500µs source walk on every analyze
184        // call in batch / warm-session scenarios.
185        {
186            let loaded = self.loaded_stubs.lock();
187            if loaded.len() >= crate::stubs::stub_files().len() {
188                return;
189            }
190        }
191        let paths = crate::stubs::collect_referenced_builtin_paths(source);
192        if paths.is_empty() {
193            return;
194        }
195        self.ingest_stub_paths(&paths);
196    }
197
198    /// Discover and ingest stubs by walking the parsed AST of a PHP file.
199    ///
200    /// Similar to [`Self::ensure_stubs_for_source`], but takes an already-parsed
201    /// AST instead of raw source text. Produces zero false positives since it
202    /// only extracts identifiers from actual AST nodes (not from strings or
203    /// comments). Preferred over `ensure_stubs_for_source` when the AST is
204    /// already available (e.g., in [`crate::FileAnalyzer`]).
205    ///
206    /// Idempotent and skips the scan if all stubs are already loaded.
207    pub fn ensure_stubs_for_ast(&self, program: &php_ast::ast::Program<'_, '_>) {
208        {
209            let loaded = self.loaded_stubs.lock();
210            if loaded.len() >= crate::stubs::stub_files().len() {
211                return;
212            }
213        }
214        let paths = crate::stubs::collect_referenced_builtin_paths_from_ast(program);
215        if paths.is_empty() {
216            return;
217        }
218        self.ingest_stub_paths(&paths);
219    }
220
221    /// Internal: parse + ingest each path in `paths` that hasn't already been
222    /// ingested. Holds the salsa write lock per file (brief), and the
223    /// `loaded_stubs` set lock briefly to record paths.
224    fn ingest_stub_paths(&self, paths: &[&'static str]) {
225        // Pick out the not-yet-loaded paths first to avoid redundant parsing.
226        let needed: Vec<&'static str> = {
227            let loaded = self.loaded_stubs.lock();
228            paths
229                .iter()
230                .copied()
231                .filter(|p| !loaded.contains(p))
232                .collect()
233        };
234        if needed.is_empty() {
235            return;
236        }
237
238        let php_version = self.php_version;
239        // Parse in parallel; ingest serially under the salsa write lock.
240        let slices: Vec<(&'static str, mir_codebase::storage::StubSlice)> = needed
241            .par_iter()
242            .filter_map(|&path| {
243                crate::stubs::stub_content_for_path(path).map(|content| {
244                    let slice =
245                        crate::stubs::stub_slice_from_source(path, content, Some(php_version));
246                    (path, slice)
247                })
248            })
249            .collect();
250
251        let mut guard = self.salsa.lock();
252        let mut loaded = self.loaded_stubs.lock();
253        for (path, slice) in slices {
254            if loaded.insert(path) {
255                guard.0.ingest_stub_slice(&slice);
256            }
257        }
258    }
259
260    fn ensure_user_stubs_loaded(&self) {
261        if self.user_stub_files.is_empty() && self.user_stub_dirs.is_empty() {
262            return;
263        }
264        let was_loaded = self.user_stubs_loaded.load(Ordering::Relaxed);
265        if was_loaded {
266            return;
267        }
268        let slices = crate::stubs::user_stub_slices(&self.user_stub_files, &self.user_stub_dirs);
269        let mut salsa = self.salsa.lock();
270        for slice in slices {
271            salsa.0.ingest_stub_slice(&slice);
272        }
273        self.user_stubs_loaded.store(true, Ordering::Relaxed);
274    }
275
276    /// Cheap clone of the salsa db for a read-only query. The lock is held
277    /// only for the duration of the clone, so concurrent readers never
278    /// serialize on each other or on writes for longer than the clone itself.
279    pub fn snapshot_db(&self) -> MirDb {
280        let guard = self.salsa.lock();
281        guard.0.clone()
282    }
283
284    /// Run a closure with read access to a database snapshot. The snapshot is
285    /// taken under a brief lock, then the closure runs without holding it.
286    pub fn read<R>(&self, f: impl FnOnce(&dyn MirDatabase) -> R) -> R {
287        let db = self.snapshot_db();
288        f(&db)
289    }
290
291    /// Pass 1 ingestion. Updates the file's source text in the salsa db,
292    /// runs definition collection, and ingests the resulting stub slice.
293    /// Triggers stub loading on first call. Also updates the cache's reverse-
294    /// dependency graph for `file` so cross-file invalidation stays correct
295    /// across incremental edits — without rebuilding the graph from scratch.
296    ///
297    /// If `file` was previously ingested, its old definitions and reference
298    /// locations are removed first so renames / deletions don't leave stale
299    /// state in the codebase. (Without this, long-running sessions would
300    /// accumulate dead reference-location entries indefinitely.)
301    pub fn ingest_file(&self, file: Arc<str>, source: Arc<str>) -> FileDefinitions {
302        self.ensure_stubs_loaded();
303        let file_defs = {
304            let mut guard = self.salsa.lock();
305            let (ref mut db, ref mut files) = *guard;
306            let salsa_file = match files.get(&file) {
307                Some(&sf) => {
308                    // Re-ingestion: drop old definitions + reference locations
309                    // before collecting fresh ones. Mirrors what
310                    // ProjectAnalyzer::re_analyze_file does.
311                    db.remove_file_definitions(file.as_ref());
312                    if sf.text(db).as_ref() != source.as_ref() {
313                        sf.set_text(db).to(source.clone());
314                    }
315                    sf
316                }
317                None => {
318                    let file_cloned = file.clone();
319                    let sf = SourceFile::new(db, file_cloned.clone(), source.clone());
320                    files.insert(file_cloned, sf);
321                    sf
322                }
323            };
324            collect_file_definitions(db, salsa_file)
325        };
326        {
327            let mut guard = self.salsa.lock();
328            guard.0.ingest_stub_slice(&file_defs.slice);
329        }
330        self.update_reverse_deps_for(&file);
331        file_defs
332    }
333
334    /// Drop a file's contribution to the session: codebase definitions,
335    /// reference locations, salsa input handle, cache entry, and outgoing
336    /// reverse-dependency edges. Cache entries of *dependent* files are
337    /// also evicted (cross-file invalidation).
338    ///
339    /// Use this when a file is closed by the consumer, or before a re-ingest
340    /// of substantially changed content. (Plain re-ingest via
341    /// [`Self::ingest_file`] also drops old definitions, but does not
342    /// remove the salsa input handle — call this for full cleanup.)
343    pub fn invalidate_file(&self, file: &str) {
344        {
345            let mut guard = self.salsa.lock();
346            let (ref mut db, ref mut files) = *guard;
347            db.remove_file_definitions(file);
348            files.remove(file);
349        }
350        if let Some(cache) = &self.cache {
351            cache.update_reverse_deps_for_file(file, &HashSet::new());
352            cache.evict_with_dependents(&[file.to_string()]);
353        }
354    }
355
356    /// Number of files currently tracked in this session's salsa input set.
357    /// Stable across reads; useful for diagnostics and memory bounds checks.
358    pub fn tracked_file_count(&self) -> usize {
359        let guard = self.salsa.lock();
360        guard.1.len()
361    }
362
363    // -----------------------------------------------------------------------
364    // Read-only codebase queries
365    //
366    // All take a brief lock to clone the db, then run the lookup against the
367    // owned snapshot — concurrent edits proceed without blocking.
368    // -----------------------------------------------------------------------
369
370    /// Resolve `symbol` (a class FQCN or function FQN) to its declaration
371    /// location. Powers go-to-definition for top-level symbols. Returns
372    /// `None` if the symbol isn't known to the codebase or has no recorded
373    /// source span (e.g. some stub-only declarations).
374    pub fn definition_of(&self, symbol: &str) -> Option<mir_codebase::storage::Location> {
375        let db = self.snapshot_db();
376        db.lookup_class_node(symbol)
377            .filter(|n| n.active(&db))
378            .and_then(|n| n.location(&db))
379            .or_else(|| {
380                db.lookup_function_node(symbol)
381                    .filter(|n| n.active(&db))
382                    .and_then(|n| n.location(&db))
383            })
384    }
385
386    /// Resolve a class member (method / property / class constant / enum case)
387    /// to its declaration location, walking the inheritance chain.
388    pub fn member_definition(
389        &self,
390        fqcn: &str,
391        member_name: &str,
392    ) -> Option<mir_codebase::storage::Location> {
393        let db = self.snapshot_db();
394        crate::db::member_location_via_db(&db, fqcn, member_name)
395    }
396
397    /// Every recorded reference to `symbol` (as `(file, line, col_start,
398    /// col_end)`). Use [`crate::symbol::ResolvedSymbol::codebase_key`] to
399    /// build the lookup key from a `ResolvedSymbol` returned by
400    /// [`crate::FileAnalysis::symbol_at`].
401    pub fn references_to(&self, symbol: &str) -> Vec<(Arc<str>, u32, u16, u16)> {
402        let db = self.snapshot_db();
403        db.reference_locations(symbol)
404    }
405
406    /// All declarations defined in `file` (classes, interfaces, traits, enums,
407    /// functions, constants). Powers outline / document-symbols views and any
408    /// other consumer that needs the file's top-level symbol set. Returns an
409    /// empty Vec if `file` hasn't been ingested.
410    pub fn document_symbols(&self, file: &str) -> Vec<crate::symbol::DocumentSymbol> {
411        use crate::symbol::{DocumentSymbol, DocumentSymbolKind};
412
413        let db = self.snapshot_db();
414        let mut out = Vec::new();
415        for symbol in db.symbols_defined_in_file(file) {
416            // Try class side first — covers Class / Interface / Trait / Enum.
417            if let Some(class_node) = db.lookup_class_node(symbol.as_ref()) {
418                if !class_node.active(&db) {
419                    continue;
420                }
421                let kind = crate::db::class_kind_via_db(&db, symbol.as_ref())
422                    .map(|k| {
423                        if k.is_interface {
424                            DocumentSymbolKind::Interface
425                        } else if k.is_trait {
426                            DocumentSymbolKind::Trait
427                        } else if k.is_enum {
428                            DocumentSymbolKind::Enum
429                        } else {
430                            DocumentSymbolKind::Class
431                        }
432                    })
433                    .unwrap_or(DocumentSymbolKind::Class);
434                out.push(DocumentSymbol {
435                    name: symbol.clone(),
436                    kind,
437                    location: class_node.location(&db),
438                });
439                continue;
440            }
441            if let Some(fn_node) = db.lookup_function_node(symbol.as_ref()) {
442                if !fn_node.active(&db) {
443                    continue;
444                }
445                out.push(DocumentSymbol {
446                    name: symbol.clone(),
447                    kind: DocumentSymbolKind::Function,
448                    location: fn_node.location(&db),
449                });
450                continue;
451            }
452            // Constants and other top-level declarations: emit with no
453            // location info; consumers can still surface them in an outline.
454            out.push(DocumentSymbol {
455                name: symbol,
456                kind: DocumentSymbolKind::Constant,
457                location: None,
458            });
459        }
460        out
461    }
462
463    /// Compute `file`'s outgoing dependency edges and update the cache's
464    /// reverse-dep graph in place. No-op if no cache is configured.
465    fn update_reverse_deps_for(&self, file: &str) {
466        let Some(cache) = self.cache.as_deref() else {
467            return;
468        };
469        let db = self.snapshot_db();
470        let targets = file_outgoing_dependencies(&db, file);
471        cache.update_reverse_deps_for_file(file, &targets);
472    }
473
474    /// Cross-file inference sweep. For each `(file, source)` pair, runs the
475    /// Pass 2 inference-only mode on a cloned db (parallel via rayon), then
476    /// commits the collected inferred return types to the canonical db.
477    ///
478    /// Call this on idle / save / explicit user request, **not** on every
479    /// keystroke — [`crate::FileAnalyzer::analyze`] deliberately skips
480    /// inference sweep on the hot path. Files whose source contains parse
481    /// errors are silently skipped.
482    pub fn run_inference_sweep(&self, files: &[(Arc<str>, Arc<str>)]) {
483        self.ensure_stubs_loaded();
484
485        // The priming db lives only inside `gather_inferred_types`. After it
486        // returns, all rayon-clone references to the salsa storage are dropped
487        // — required so that the subsequent `commit_inferred_return_types`
488        // call (which calls salsa's `cancel_others`) doesn't deadlock waiting
489        // for outstanding db references.
490        let (functions, methods) =
491            gather_inferred_types(self.snapshot_db(), files, self.php_version);
492
493        let mut guard = self.salsa.lock();
494        guard.0.commit_inferred_return_types(functions, methods);
495    }
496}
497
498/// Drive Pass 2 inference-only mode in parallel across `files`, accumulating
499/// inferred function and method return types. The `db_priming` MirDb is
500/// consumed (cloned per spawned task and dropped on return), so the caller's
501/// canonical db can subsequently take exclusive access without deadlock.
502///
503/// Crate-internal so [`crate::project::ProjectAnalyzer`] can use the same
504/// deadlock-safe helper for its lazy-load reanalysis sweep.
505#[allow(clippy::type_complexity)]
506pub(crate) fn gather_inferred_types(
507    db_priming: MirDb,
508    files: &[(Arc<str>, Arc<str>)],
509    php_version: PhpVersion,
510) -> (
511    Vec<(Arc<str>, mir_types::Union)>,
512    Vec<(Arc<str>, Arc<str>, mir_types::Union)>,
513) {
514    use crate::pass2::Pass2Driver;
515    use mir_types::Union;
516
517    type Functions = Vec<(Arc<str>, Union)>;
518    type Methods = Vec<(Arc<str>, Arc<str>, Union)>;
519    let functions: Arc<Mutex<Functions>> = Arc::new(Mutex::new(Vec::new()));
520    let methods: Arc<Mutex<Methods>> = Arc::new(Mutex::new(Vec::new()));
521
522    rayon::in_place_scope(|s| {
523        for (file, source) in files {
524            let db = db_priming.clone();
525            let functions = Arc::clone(&functions);
526            let methods = Arc::clone(&methods);
527            let file = file.clone();
528            let source = source.clone();
529
530            s.spawn(move |_| {
531                let arena = crate::arena::create_parse_arena(source.len());
532                let parsed = php_rs_parser::parse(&arena, source.as_ref());
533                if !parsed.errors.is_empty() {
534                    return;
535                }
536                let driver = Pass2Driver::new_inference_only(&db as &dyn MirDatabase, php_version);
537                driver.analyze_bodies(&parsed.program, file, source.as_ref(), &parsed.source_map);
538                let inferred = driver.take_inferred_types();
539                {
540                    let mut f = functions.lock();
541                    f.extend(inferred.functions);
542                }
543                {
544                    let mut m = methods.lock();
545                    m.extend(inferred.methods);
546                }
547            });
548        }
549    });
550
551    let functions = Arc::try_unwrap(functions)
552        .map(|m| m.into_inner())
553        .unwrap_or_else(|arc| arc.lock().clone());
554    let methods = Arc::try_unwrap(methods)
555        .map(|m| m.into_inner())
556        .unwrap_or_else(|arc| arc.lock().clone());
557
558    (functions, methods)
559}
560
561/// Compute the set of files `file` depends on: defining files of its imports,
562/// plus parent / interfaces / traits' defining files for any classes declared
563/// in `file`. Self-edges are excluded.
564fn file_outgoing_dependencies(db: &dyn MirDatabase, file: &str) -> HashSet<String> {
565    let mut targets: HashSet<String> = HashSet::new();
566
567    let mut add_target = |symbol: &str| {
568        if let Some(defining_file) = db.symbol_defining_file(symbol) {
569            let def = defining_file.as_ref().to_string();
570            if def != file {
571                targets.insert(def);
572            }
573        }
574    };
575
576    let imports = db.file_imports(file);
577    for fqcn in imports.values() {
578        add_target(fqcn);
579    }
580
581    for fqcn in db.symbols_defined_in_file(file) {
582        let Some(node) = db.lookup_class_node(fqcn.as_ref()) else {
583            continue;
584        };
585        if let Some(parent) = node.parent(db) {
586            add_target(parent.as_ref());
587        }
588        for iface in node.interfaces(db).iter() {
589            add_target(iface.as_ref());
590        }
591        for tr in node.traits(db).iter() {
592            add_target(tr.as_ref());
593        }
594    }
595
596    targets
597}