mir_analyzer/session/mod.rs
1//! Session-based analysis API for incremental, per-file analysis.
2//!
3//! [`AnalysisSession`] owns the salsa database and per-session caches for a
4//! long-running analysis context shared across many per-file analyses. Reads
5//! clone the database under a brief lock, then run lock-free; writes hold the
6//! lock briefly to mutate canonical state. `MirDbStorage::clone()` is cheap
7//! (Arc-wrapped registries), so this pattern gives parallel readers without
8//! blocking on concurrent writes for longer than the clone itself.
9//!
10//! See [`crate::file_analyzer::FileAnalyzer`] for the per-file analysis
11//! entry point that operates against a session.
12
13use rustc_hash::{FxHashMap as HashMap, FxHashSet as HashSet};
14use std::path::PathBuf;
15use std::sync::Arc;
16
17use parking_lot::RwLock;
18
19use crate::analyzer_db::AnalyzerDb;
20use crate::cache::AnalysisCache;
21use crate::composer::Psr4Map;
22use crate::db::{MirDatabase, MirDbStorage, RefLoc};
23use crate::php_version::PhpVersion;
24
25/// Long-lived analysis context. Owns the salsa database and tracks which
26/// stubs have been loaded.
27///
28/// Cheap to clone the inner db for parallel reads; writes funnel through
29/// [`Self::ingest_file`], [`Self::invalidate_file`], and the crate-internal
30/// [`Self::with_db_mut`].
31#[derive(Clone)]
32pub struct AnalysisSession {
33 /// Shared database management (salsa, file registry, stub tracking).
34 pub(crate) db: Arc<AnalyzerDb>,
35 pub(crate) cache: Option<Arc<AnalysisCache>>,
36 /// PSR-4 / Composer autoload map. Retained alongside `resolver` so the
37 /// `psr4()` accessor can still return a typed `Psr4Map` for callers that
38 /// need Composer-specific data (project_files / vendor_files / etc.).
39 pub(crate) psr4: Option<Arc<Psr4Map>>,
40 /// Generic class resolver used for on-demand lazy loading. When `psr4`
41 /// is set via [`Self::with_psr4`], this is populated with the same map
42 /// re-typed as `dyn ClassResolver`. Consumers can also supply their own
43 /// resolver via [`Self::with_class_resolver`] without going through
44 /// Composer.
45 resolver: Option<Arc<dyn crate::ClassResolver>>,
46 pub(crate) php_version: PhpVersion,
47 pub(crate) user_stub_files: Vec<PathBuf>,
48 pub(crate) user_stub_dirs: Vec<PathBuf>,
49 /// Tracks symbols that were previously defined in a file but have since
50 /// been removed (deleted or renamed). When `ingest_file` detects that
51 /// a symbol disappears, it records it here so `dependency_graph()` can
52 /// still produce edges to files that reference the now-gone symbol.
53 ///
54 /// Keyed by the file that used to define the symbols. Symbols are removed
55 /// from the set when re-added to the same file on a subsequent ingest.
56 /// The set may contain symbols with no current referencers; those are
57 /// harmless — the `symbol_referencers_of` lookup returns empty.
58 stale_defined_symbols: Arc<RwLock<HashMap<String, HashSet<Arc<str>>>>>,
59 /// Negative cache: FQCNs that `load_class` already failed on.
60 /// The value is the resolver-mapped path (when known) so eviction on
61 /// `set_file_text` / `ingest_file` is a path equality check rather than
62 /// re-running the resolver per entry. `None` means the resolver itself
63 /// couldn't map the FQCN; those entries survive file edits (no source
64 /// change makes a never-resolvable name resolvable).
65 /// Bounded to `UNRESOLVABLE_CACHE_CAP`; clears on overflow.
66 unresolvable_fqcns: UnresolvableCache,
67 /// Pluggable source-text provider for lazy-load. Defaults to filesystem
68 /// reads ([`crate::FsSourceProvider`]); LSPs swap in a VFS-backed
69 /// implementation so unsaved buffers override on-disk content.
70 source_provider: Arc<dyn crate::SourceProvider>,
71}
72
73/// FQCN → optional resolver-mapped path. See the field doc on
74/// `AnalysisSession::unresolvable_fqcns`.
75type UnresolvableCache = Arc<RwLock<HashMap<Arc<str>, Option<Arc<str>>>>>;
76
77/// Cap on the negative-resolution cache. Sized to accommodate a large
78/// workspace's worth of genuinely-missing references without unbounded
79/// growth. On overflow the cache is cleared; the cost is a few extra
80/// resolver calls until it re-fills.
81const UNRESOLVABLE_CACHE_CAP: usize = 10_000;
82
83impl AnalysisSession {
84 /// Create a session targeting the given PHP language version.
85 pub fn new(php_version: PhpVersion) -> Self {
86 let db = Arc::new(AnalyzerDb::new());
87 db.salsa
88 .write()
89 .set_php_version(Arc::from(php_version.to_string().as_str()));
90 Self {
91 db,
92 cache: None,
93 psr4: None,
94 resolver: None,
95 php_version,
96 user_stub_files: Vec::new(),
97 user_stub_dirs: Vec::new(),
98 stale_defined_symbols: Arc::new(RwLock::new(HashMap::default())),
99 unresolvable_fqcns: Arc::new(RwLock::new(HashMap::default())),
100 source_provider: Arc::new(crate::FsSourceProvider),
101 }
102 }
103
104 /// Swap in a custom [`crate::SourceProvider`]. LSPs install a VFS-backed
105 /// provider here so the analyzer reads from unsaved editor buffers
106 /// instead of disk.
107 pub fn with_source_provider(mut self, provider: Arc<dyn crate::SourceProvider>) -> Self {
108 self.source_provider = provider;
109 self
110 }
111
112 /// Attach a pre-built [`AnalysisCache`] (the body-analysis issue cache) and
113 /// open a sibling definition [`StubSlice`] cache under the same root, so
114 /// callers using this builder get the same speedup as `with_cache_dir`.
115 ///
116 /// Rebuilds the shared database to attach the definition cache — call
117 /// **before** any file is ingested. A debug assertion catches misuse.
118 ///
119 /// [`StubSlice`]: mir_codebase::storage::StubSlice
120 pub fn with_cache(mut self, cache: Arc<AnalysisCache>) -> Self {
121 debug_assert_eq!(
122 self.db.source_file_count(),
123 0,
124 "AnalysisSession::with_cache must be called before any file is ingested"
125 );
126 let dir = cache.cache_dir().to_path_buf();
127 self.db = Arc::new(AnalyzerDb::new().with_cache_dir(&dir));
128 self.db
129 .salsa
130 .write()
131 .set_php_version(Arc::from(self.php_version.to_string().as_str()));
132 self.cache = Some(cache);
133 self
134 }
135
136 /// Convenience: open a disk-backed cache at `cache_dir` and attach it.
137 ///
138 /// Attaches both the body-analysis issue cache ([`AnalysisCache`]) and the
139 /// definition [`StubSlice`] cache to the shared database. Builds a fresh
140 /// [`AnalyzerDb`] internally — call **before** any file is ingested. A
141 /// debug assertion catches misuse.
142 ///
143 /// [`StubSlice`]: mir_codebase::storage::StubSlice
144 pub fn with_cache_dir(mut self, cache_dir: &std::path::Path) -> Self {
145 debug_assert_eq!(
146 self.db.source_file_count(),
147 0,
148 "AnalysisSession::with_cache_dir must be called before any file is ingested"
149 );
150 self.db = Arc::new(AnalyzerDb::new().with_cache_dir(cache_dir));
151 self.db
152 .salsa
153 .write()
154 .set_php_version(Arc::from(self.php_version.to_string().as_str()));
155 // Fold the user-stub fingerprint into the cache epoch. `with_user_stubs`
156 // must run before this for it to be picked up (it does in `build_session`);
157 // sessions without user stubs get 0, which is correct.
158 let user_stub_fp =
159 crate::stubs::user_stub_fingerprint(&self.user_stub_files, &self.user_stub_dirs);
160 self.cache = Some(Arc::new(AnalysisCache::open(
161 cache_dir,
162 self.php_version.cache_byte(),
163 user_stub_fp,
164 )));
165 self
166 }
167
168 /// Attach a Composer autoload map (PSR-4, PSR-0, classmap, files).
169 /// Sets the same map as the active [`crate::ClassResolver`] so
170 /// [`Self::load_class`] works out of the box.
171 pub fn with_psr4(mut self, map: Arc<Psr4Map>) -> Self {
172 let user_resolver: Arc<dyn crate::ClassResolver> = map.clone();
173 // Wrap with stub awareness so `find_class_like` / `resolve_fqcn_to_path`
174 // can map built-in PHP class FQCNs (`ArrayObject`, `Exception`, …)
175 // to their stub virtual paths.
176 let resolver: Arc<dyn crate::ClassResolver> = Arc::new(crate::ChainedClassResolver::new(
177 user_resolver,
178 Arc::new(crate::StubClassResolver),
179 ));
180 self.psr4 = Some(map);
181 self.resolver = Some(resolver.clone());
182 // Mirror into MirDbStorage so salsa-tracked resolver queries
183 // (`db::resolve_fqcn_to_path`) see the same resolver and are
184 // invalidated on swap.
185 self.db.salsa.write().set_resolver(Some(resolver));
186 self
187 }
188
189 /// Attach a generic class resolver for projects that don't use Composer
190 /// (WordPress, Drupal, custom autoloaders, workspace-walk indexes).
191 /// Replaces any previously-set Composer-backed resolver. Automatically
192 /// wrapped with stub awareness so PHP built-ins remain resolvable.
193 pub fn with_class_resolver(mut self, resolver: Arc<dyn crate::ClassResolver>) -> Self {
194 let wrapped: Arc<dyn crate::ClassResolver> = Arc::new(crate::ChainedClassResolver::new(
195 resolver,
196 Arc::new(crate::StubClassResolver),
197 ));
198 self.db.salsa.write().set_resolver(Some(wrapped.clone()));
199 self.resolver = Some(wrapped);
200 self
201 }
202
203 pub fn with_user_stubs(mut self, files: Vec<PathBuf>, dirs: Vec<PathBuf>) -> Self {
204 self.user_stub_files = files;
205 self.user_stub_dirs = dirs;
206 self
207 }
208
209 pub fn php_version(&self) -> PhpVersion {
210 self.php_version
211 }
212
213 pub fn cache(&self) -> Option<&AnalysisCache> {
214 self.cache.as_deref()
215 }
216
217 pub fn psr4(&self) -> Option<&Psr4Map> {
218 self.psr4.as_deref()
219 }
220}
221
222mod incremental;
223mod ingest;
224mod loading;
225mod queries;
226mod stubs;
227
228/// Compute the full set of files `file` depends on: structural edges from
229/// the memoized [`crate::db::file_structural_deps`] tracked query, plus
230/// bare-FQN references recorded during body analysis (which live in the
231/// reference index and are not visible to salsa). Self-edges are excluded.
232/// Used to persist the disk cache's reverse-dep graph.
233fn file_outgoing_dependencies(db: &dyn MirDatabase, file: &str) -> HashSet<String> {
234 let mut targets: HashSet<String> = HashSet::default();
235
236 if let Some(sf) = db.lookup_source_file(file) {
237 for target in crate::db::file_structural_deps(db, sf).iter() {
238 targets.insert(target.as_ref().to_string());
239 }
240 }
241
242 // Bare-FQN references recorded during body analysis (new \Foo(),
243 // \Foo::method(), \foo()) that do not appear in use-import statements.
244 for symbol_key in db.file_referenced_symbols(file) {
245 let lookup: &str = match symbol_key.split_once("::") {
246 Some((class, _)) => class,
247 None => &symbol_key,
248 };
249 if let Some(defining_file) = db.symbol_defining_file(lookup) {
250 if defining_file.as_ref() != file {
251 targets.insert(defining_file.as_ref().to_string());
252 }
253 }
254 }
255
256 targets
257}
258
259/// AST visitor that collects class FQCN references for PSR-4 preloading.
260/// Captures identifiers from `new X`, static calls / property / constant
261/// access, type hints, and `instanceof`. Does *not* normalize via PSR-4 /
262/// imports — callers run the raw string through `resolve_name`.
263fn collect_class_refs_from_ast(program: &php_ast::owned::Program) -> Vec<String> {
264 use php_ast::ast::BinaryOp;
265 use php_ast::owned::visitor::{
266 walk_owned_catch_clause, walk_owned_expr, walk_owned_program, walk_owned_type_hint,
267 OwnedVisitor,
268 };
269 use php_ast::owned::{ExprKind, TypeHintKind};
270 use std::ops::ControlFlow;
271
272 fn owned_name_str(name: &php_ast::owned::Name) -> String {
273 let joined: String = name
274 .parts
275 .iter()
276 .map(|p| p.as_ref())
277 .collect::<Vec<&str>>()
278 .join("\\");
279 if name.kind == php_ast::ast::NameKind::FullyQualified {
280 format!("\\{joined}")
281 } else {
282 joined
283 }
284 }
285
286 struct V {
287 names: std::collections::HashSet<String>,
288 }
289 impl OwnedVisitor for V {
290 fn visit_expr(&mut self, expr: &php_ast::owned::Expr) -> ControlFlow<()> {
291 match &expr.kind {
292 ExprKind::New(n) => {
293 if let ExprKind::Identifier(name) = &n.class.kind {
294 self.names.insert(name.as_ref().to_string());
295 }
296 }
297 ExprKind::StaticMethodCall(c) => {
298 if let ExprKind::Identifier(name) = &c.class.kind {
299 self.names.insert(name.as_ref().to_string());
300 }
301 }
302 ExprKind::StaticPropertyAccess(a) => {
303 if let ExprKind::Identifier(name) = &a.class.kind {
304 self.names.insert(name.as_ref().to_string());
305 }
306 }
307 ExprKind::ClassConstAccess(a) => {
308 if let ExprKind::Identifier(name) = &a.class.kind {
309 self.names.insert(name.as_ref().to_string());
310 }
311 }
312 ExprKind::Binary(b) if b.op == BinaryOp::Instanceof => {
313 if let ExprKind::Identifier(name) = &b.right.kind {
314 self.names.insert(name.as_ref().to_string());
315 }
316 }
317 _ => {}
318 }
319 walk_owned_expr(self, expr)
320 }
321
322 fn visit_type_hint(&mut self, hint: &php_ast::owned::TypeHint) -> ControlFlow<()> {
323 if let TypeHintKind::Named(name) = &hint.kind {
324 let s = owned_name_str(name);
325 if !s.is_empty() {
326 self.names.insert(s);
327 }
328 }
329 walk_owned_type_hint(self, hint)
330 }
331
332 fn visit_catch_clause(&mut self, catch: &php_ast::owned::CatchClause) -> ControlFlow<()> {
333 for ty in catch.types.iter() {
334 self.names.insert(owned_name_str(ty));
335 }
336 walk_owned_catch_clause(self, catch)
337 }
338 }
339 let mut v = V {
340 names: std::collections::HashSet::default(),
341 };
342 let _ = walk_owned_program(&mut v, program);
343 v.names.into_iter().collect()
344}