Skip to main content

mir_analyzer/session/
mod.rs

1//! Session-based analysis API for incremental, per-file analysis.
2//!
3//! [`AnalysisSession`] owns the salsa database and per-session caches for a
4//! long-running analysis context shared across many per-file analyses. Reads
5//! clone the database under a brief lock, then run lock-free; writes hold the
6//! lock briefly to mutate canonical state. `MirDbStorage::clone()` is cheap
7//! (Arc-wrapped registries), so this pattern gives parallel readers without
8//! blocking on concurrent writes for longer than the clone itself.
9//!
10//! See [`crate::file_analyzer::FileAnalyzer`] for the per-file analysis
11//! entry point that operates against a session.
12
13use rustc_hash::{FxHashMap as HashMap, FxHashSet as HashSet};
14use std::path::PathBuf;
15use std::sync::Arc;
16
17use parking_lot::RwLock;
18
19use crate::analyzer_db::AnalyzerDb;
20use crate::cache::AnalysisCache;
21use crate::composer::Psr4Map;
22use crate::db::{MirDatabase, MirDbStorage, RefLoc};
23use crate::php_version::PhpVersion;
24
25/// Long-lived analysis context. Owns the salsa database and tracks which
26/// stubs have been loaded.
27///
28/// Cheap to clone the inner db for parallel reads; writes funnel through
29/// [`Self::ingest_file`], [`Self::invalidate_file`], and the crate-internal
30/// [`Self::with_db_mut`].
31#[derive(Clone)]
32pub struct AnalysisSession {
33    /// Shared database management (salsa, file registry, stub tracking).
34    pub(crate) db: Arc<AnalyzerDb>,
35    pub(crate) cache: Option<Arc<AnalysisCache>>,
36    /// PSR-4 / Composer autoload map. Retained alongside `resolver` so the
37    /// `psr4()` accessor can still return a typed `Psr4Map` for callers that
38    /// need Composer-specific data (project_files / vendor_files / etc.).
39    pub(crate) psr4: Option<Arc<Psr4Map>>,
40    /// Generic class resolver used for on-demand lazy loading. When `psr4`
41    /// is set via [`Self::with_psr4`], this is populated with the same map
42    /// re-typed as `dyn ClassResolver`. Consumers can also supply their own
43    /// resolver via [`Self::with_class_resolver`] without going through
44    /// Composer.
45    resolver: Option<Arc<dyn crate::ClassResolver>>,
46    pub(crate) php_version: PhpVersion,
47    pub(crate) user_stub_files: Vec<PathBuf>,
48    pub(crate) user_stub_dirs: Vec<PathBuf>,
49    /// Tracks symbols that were previously defined in a file but have since
50    /// been removed (deleted or renamed). When `ingest_file` detects that
51    /// a symbol disappears, it records it here so `dependency_graph()` can
52    /// still produce edges to files that reference the now-gone symbol.
53    ///
54    /// Keyed by the file that used to define the symbols. Symbols are removed
55    /// from the set when re-added to the same file on a subsequent ingest.
56    /// The set may contain symbols with no current referencers; those are
57    /// harmless — the `symbol_referencers_of` lookup returns empty.
58    stale_defined_symbols: Arc<RwLock<HashMap<String, HashSet<Arc<str>>>>>,
59    /// Negative cache: FQCNs that `load_class` already failed on.
60    /// The value is the resolver-mapped path (when known) so eviction on
61    /// `set_file_text` / `ingest_file` is a path equality check rather than
62    /// re-running the resolver per entry. `None` means the resolver itself
63    /// couldn't map the FQCN; those entries survive file edits (no source
64    /// change makes a never-resolvable name resolvable).
65    /// Bounded to `UNRESOLVABLE_CACHE_CAP`; clears on overflow.
66    unresolvable_fqcns: UnresolvableCache,
67    /// Pluggable source-text provider for lazy-load. Defaults to filesystem
68    /// reads ([`crate::FsSourceProvider`]); LSPs swap in a VFS-backed
69    /// implementation so unsaved buffers override on-disk content.
70    source_provider: Arc<dyn crate::SourceProvider>,
71    /// Vendor `autoload.files` entries not yet indexed. `Some(paths)` means
72    /// pending; `None` means the load has already run (idempotent). Populated
73    /// by [`Self::with_psr4`]; drained by [`Self::ensure_vendor_eager_functions`],
74    /// which is called automatically from [`Self::prepare_ast_for_analysis`].
75    ///
76    /// The mutex is held for the full duration of the load so concurrent callers
77    /// block until indexing is complete rather than proceeding with a stale
78    /// workspace snapshot.
79    pub(crate) pending_eager_function_files: Arc<parking_lot::Mutex<Option<Vec<PathBuf>>>>,
80}
81
82/// FQCN → optional resolver-mapped path. See the field doc on
83/// `AnalysisSession::unresolvable_fqcns`.
84type UnresolvableCache = Arc<RwLock<HashMap<Arc<str>, Option<Arc<str>>>>>;
85
86/// Cap on the negative-resolution cache. Sized to accommodate a large
87/// workspace's worth of genuinely-missing references without unbounded
88/// growth. On overflow the cache is cleared; the cost is a few extra
89/// resolver calls until it re-fills.
90const UNRESOLVABLE_CACHE_CAP: usize = 10_000;
91
92impl AnalysisSession {
93    /// Create a session targeting the given PHP language version.
94    pub fn new(php_version: PhpVersion) -> Self {
95        let db = Arc::new(AnalyzerDb::new());
96        db.salsa
97            .write()
98            .set_php_version(Arc::from(php_version.to_string().as_str()));
99        Self {
100            db,
101            cache: None,
102            psr4: None,
103            resolver: None,
104            php_version,
105            user_stub_files: Vec::new(),
106            user_stub_dirs: Vec::new(),
107            stale_defined_symbols: Arc::new(RwLock::new(HashMap::default())),
108            unresolvable_fqcns: Arc::new(RwLock::new(HashMap::default())),
109            source_provider: Arc::new(crate::FsSourceProvider),
110            pending_eager_function_files: Arc::new(parking_lot::Mutex::new(Some(Vec::new()))),
111        }
112    }
113
114    /// Swap in a custom [`crate::SourceProvider`]. LSPs install a VFS-backed
115    /// provider here so the analyzer reads from unsaved editor buffers
116    /// instead of disk.
117    pub fn with_source_provider(mut self, provider: Arc<dyn crate::SourceProvider>) -> Self {
118        self.source_provider = provider;
119        self
120    }
121
122    /// Attach a pre-built [`AnalysisCache`] (the body-analysis issue cache) and
123    /// open a sibling definition [`StubSlice`] cache under the same root, so
124    /// callers using this builder get the same speedup as `with_cache_dir`.
125    ///
126    /// Rebuilds the shared database to attach the definition cache — call
127    /// **before** any file is ingested. A debug assertion catches misuse.
128    ///
129    /// [`StubSlice`]: mir_codebase::storage::StubSlice
130    pub fn with_cache(mut self, cache: Arc<AnalysisCache>) -> Self {
131        debug_assert_eq!(
132            self.db.source_file_count(),
133            0,
134            "AnalysisSession::with_cache must be called before any file is ingested"
135        );
136        let dir = cache.cache_dir().to_path_buf();
137        self.db = Arc::new(AnalyzerDb::new().with_cache_dir(&dir));
138        self.db
139            .salsa
140            .write()
141            .set_php_version(Arc::from(self.php_version.to_string().as_str()));
142        self.cache = Some(cache);
143        self
144    }
145
146    /// Convenience: open a disk-backed cache at `cache_dir` and attach it.
147    ///
148    /// Attaches both the body-analysis issue cache ([`AnalysisCache`]) and the
149    /// definition [`StubSlice`] cache to the shared database. Builds a fresh
150    /// [`AnalyzerDb`] internally — call **before** any file is ingested. A
151    /// debug assertion catches misuse.
152    ///
153    /// [`StubSlice`]: mir_codebase::storage::StubSlice
154    pub fn with_cache_dir(mut self, cache_dir: &std::path::Path) -> Self {
155        debug_assert_eq!(
156            self.db.source_file_count(),
157            0,
158            "AnalysisSession::with_cache_dir must be called before any file is ingested"
159        );
160        self.db = Arc::new(AnalyzerDb::new().with_cache_dir(cache_dir));
161        self.db
162            .salsa
163            .write()
164            .set_php_version(Arc::from(self.php_version.to_string().as_str()));
165        // Fold the user-stub fingerprint into the cache epoch. `with_user_stubs`
166        // must run before this for it to be picked up (it does in `build_session`);
167        // sessions without user stubs get 0, which is correct.
168        let user_stub_fp =
169            crate::stubs::user_stub_fingerprint(&self.user_stub_files, &self.user_stub_dirs);
170        self.cache = Some(Arc::new(AnalysisCache::open(
171            cache_dir,
172            self.php_version.cache_byte(),
173            user_stub_fp,
174        )));
175        self
176    }
177
178    /// Attach a Composer autoload map (PSR-4, PSR-0, classmap, files).
179    /// Sets the same map as the active [`crate::ClassResolver`] so
180    /// [`Self::load_class`] works out of the box.
181    pub fn with_psr4(mut self, map: Arc<Psr4Map>) -> Self {
182        let user_resolver: Arc<dyn crate::ClassResolver> = map.clone();
183        // Wrap with stub awareness so `find_class_like` / `resolve_fqcn_to_path`
184        // can map built-in PHP class FQCNs (`ArrayObject`, `Exception`, …)
185        // to their stub virtual paths.
186        let resolver: Arc<dyn crate::ClassResolver> = Arc::new(crate::ChainedClassResolver::new(
187            user_resolver,
188            Arc::new(crate::StubClassResolver),
189        ));
190        self.psr4 = Some(map.clone());
191        self.resolver = Some(resolver.clone());
192        // Mirror into MirDbStorage so salsa-tracked resolver queries
193        // (`db::resolve_fqcn_to_path`) see the same resolver and are
194        // invalidated on swap.
195        self.db.salsa.write().set_resolver(Some(resolver));
196        // Register vendor autoload.files for lazy loading. They define global
197        // functions and constants that the class resolver cannot discover.
198        // `ensure_vendor_eager_functions` will index them on first analysis call.
199        *self.pending_eager_function_files.lock() = Some(map.vendor_eager_files());
200        self
201    }
202
203    /// Attach a generic class resolver for projects that don't use Composer
204    /// (WordPress, Drupal, custom autoloaders, workspace-walk indexes).
205    /// Replaces any previously-set Composer-backed resolver. Automatically
206    /// wrapped with stub awareness so PHP built-ins remain resolvable.
207    pub fn with_class_resolver(mut self, resolver: Arc<dyn crate::ClassResolver>) -> Self {
208        let wrapped: Arc<dyn crate::ClassResolver> = Arc::new(crate::ChainedClassResolver::new(
209            resolver,
210            Arc::new(crate::StubClassResolver),
211        ));
212        self.db.salsa.write().set_resolver(Some(wrapped.clone()));
213        self.resolver = Some(wrapped);
214        self
215    }
216
217    pub fn with_user_stubs(mut self, files: Vec<PathBuf>, dirs: Vec<PathBuf>) -> Self {
218        self.user_stub_files = files;
219        self.user_stub_dirs = dirs;
220        self
221    }
222
223    pub fn php_version(&self) -> PhpVersion {
224        self.php_version
225    }
226
227    pub fn cache(&self) -> Option<&AnalysisCache> {
228        self.cache.as_deref()
229    }
230
231    pub fn psr4(&self) -> Option<&Psr4Map> {
232        self.psr4.as_deref()
233    }
234}
235
236mod incremental;
237mod ingest;
238mod loading;
239mod queries;
240mod stubs;
241
242/// Compute the full set of files `file` depends on: structural edges from
243/// the memoized [`crate::db::file_structural_deps`] tracked query, plus
244/// bare-FQN references recorded during body analysis (which live in the
245/// reference index and are not visible to salsa). Self-edges are excluded.
246/// Used to persist the disk cache's reverse-dep graph.
247fn file_outgoing_dependencies(db: &dyn MirDatabase, file: &str) -> HashSet<String> {
248    let mut targets: HashSet<String> = HashSet::default();
249
250    if let Some(sf) = db.lookup_source_file(file) {
251        for target in crate::db::file_structural_deps(db, sf).iter() {
252            targets.insert(target.as_ref().to_string());
253        }
254    }
255
256    // Bare-FQN references recorded during body analysis (new \Foo(),
257    // \Foo::method(), \foo()) that do not appear in use-import statements.
258    for symbol_key in db.file_referenced_symbols(file) {
259        let lookup: &str = match symbol_key.split_once("::") {
260            Some((class, _)) => class,
261            None => &symbol_key,
262        };
263        if let Some(defining_file) = db.symbol_defining_file(lookup) {
264            if defining_file.as_ref() != file {
265                targets.insert(defining_file.as_ref().to_string());
266            }
267        }
268    }
269
270    targets
271}
272
273/// AST visitor that collects class FQCN references for PSR-4 preloading.
274/// Captures identifiers from `new X`, static calls / property / constant
275/// access, type hints, `instanceof`, and `@param`/`@return`/`@var`/`@extends`/
276/// `@implements` docblock annotations. Does *not* normalize via PSR-4 /
277/// imports — callers run the raw string through `resolve_name`.
278fn collect_class_refs_from_ast(program: &php_ast::owned::Program) -> Vec<String> {
279    use php_ast::ast::BinaryOp;
280    use php_ast::owned::visitor::{
281        walk_owned_catch_clause, walk_owned_class_member, walk_owned_expr, walk_owned_program,
282        walk_owned_stmt, walk_owned_type_hint, OwnedVisitor,
283    };
284    use php_ast::owned::{ClassMemberKind, ExprKind, TypeHintKind};
285    use std::ops::ControlFlow;
286
287    fn owned_name_str(name: &php_ast::owned::Name) -> String {
288        let joined: String = name
289            .parts
290            .iter()
291            .map(|p| p.as_ref())
292            .collect::<Vec<&str>>()
293            .join("\\");
294        if name.kind == php_ast::ast::NameKind::FullyQualified {
295            format!("\\{joined}")
296        } else {
297            joined
298        }
299    }
300
301    /// Recursively collect all `TNamedObject` FQCNs from a mir type, including
302    /// those nested inside generic type parameters (e.g. `Collection<Item>`).
303    fn collect_from_type(ty: &mir_types::Type, out: &mut std::collections::HashSet<String>) {
304        for atomic in ty.types.iter() {
305            if let mir_types::Atomic::TNamedObject { fqcn, type_params } = atomic {
306                out.insert(fqcn.as_ref().to_string());
307                for tp in type_params.iter() {
308                    collect_from_type(tp, out);
309                }
310            }
311        }
312    }
313
314    /// Parse a docblock and collect class names from `@param`, `@return`,
315    /// `@var`, `@extends`, and `@implements` annotations.
316    fn collect_from_docblock(text: &str, out: &mut std::collections::HashSet<String>) {
317        let parsed = crate::parser::DocblockParser::parse(text);
318        for (_, ty) in &parsed.params {
319            collect_from_type(ty, out);
320        }
321        if let Some(ret) = &parsed.return_type {
322            collect_from_type(ret, out);
323        }
324        if let Some(var) = &parsed.var_type {
325            collect_from_type(var, out);
326        }
327        if let Some(ext) = &parsed.extends {
328            collect_from_type(ext, out);
329        }
330        for impl_ty in &parsed.implements {
331            collect_from_type(impl_ty, out);
332        }
333    }
334
335    struct V {
336        names: std::collections::HashSet<String>,
337    }
338    impl OwnedVisitor for V {
339        fn visit_stmt(&mut self, stmt: &php_ast::owned::Stmt) -> ControlFlow<()> {
340            if let Some(doc) = stmt.leading_doc_comment() {
341                collect_from_docblock(&doc.text, &mut self.names);
342            }
343            walk_owned_stmt(self, stmt)
344        }
345
346        fn visit_class_member(&mut self, member: &php_ast::owned::ClassMember) -> ControlFlow<()> {
347            match &member.kind {
348                ClassMemberKind::Method(m) => {
349                    if let Some(doc) = &m.doc_comment {
350                        collect_from_docblock(&doc.text, &mut self.names);
351                    }
352                }
353                ClassMemberKind::Property(p) => {
354                    if let Some(doc) = &p.doc_comment {
355                        collect_from_docblock(&doc.text, &mut self.names);
356                    }
357                }
358                _ => {}
359            }
360            walk_owned_class_member(self, member)
361        }
362
363        fn visit_expr(&mut self, expr: &php_ast::owned::Expr) -> ControlFlow<()> {
364            match &expr.kind {
365                ExprKind::New(n) => {
366                    if let ExprKind::Identifier(name) = &n.class.kind {
367                        self.names.insert(name.as_ref().to_string());
368                    }
369                }
370                ExprKind::StaticMethodCall(c) => {
371                    if let ExprKind::Identifier(name) = &c.class.kind {
372                        self.names.insert(name.as_ref().to_string());
373                    }
374                }
375                ExprKind::StaticPropertyAccess(a) => {
376                    if let ExprKind::Identifier(name) = &a.class.kind {
377                        self.names.insert(name.as_ref().to_string());
378                    }
379                }
380                ExprKind::ClassConstAccess(a) => {
381                    if let ExprKind::Identifier(name) = &a.class.kind {
382                        self.names.insert(name.as_ref().to_string());
383                    }
384                }
385                ExprKind::Binary(b) if b.op == BinaryOp::Instanceof => {
386                    if let ExprKind::Identifier(name) = &b.right.kind {
387                        self.names.insert(name.as_ref().to_string());
388                    }
389                }
390                _ => {}
391            }
392            walk_owned_expr(self, expr)
393        }
394
395        fn visit_type_hint(&mut self, hint: &php_ast::owned::TypeHint) -> ControlFlow<()> {
396            if let TypeHintKind::Named(name) = &hint.kind {
397                let s = owned_name_str(name);
398                if !s.is_empty() {
399                    self.names.insert(s);
400                }
401            }
402            walk_owned_type_hint(self, hint)
403        }
404
405        fn visit_catch_clause(&mut self, catch: &php_ast::owned::CatchClause) -> ControlFlow<()> {
406            for ty in catch.types.iter() {
407                self.names.insert(owned_name_str(ty));
408            }
409            walk_owned_catch_clause(self, catch)
410        }
411    }
412    let mut v = V {
413        names: std::collections::HashSet::default(),
414    };
415    let _ = walk_owned_program(&mut v, program);
416    v.names.into_iter().collect()
417}