mir_analyzer/session/ingest.rs
1use super::*;
2
3impl AnalysisSession {
4 /// Cheap clone of the salsa db for a read-only query. The lock is held
5 /// only for the duration of the clone, so concurrent readers never
6 /// serialize on each other or on writes for longer than the clone itself.
7 ///
8 /// **Internal API — exposes Salsa types.** Subject to change without
9 /// notice. Public consumers should use the typed query methods
10 /// ([`Self::definition_of`], [`Self::hover`], etc.) instead.
11 #[doc(hidden)]
12 pub fn snapshot_db(&self) -> MirDbStorage {
13 self.db.snapshot_db()
14 }
15
16 /// Commit a batch of reference locations from a db snapshot into the
17 /// session's shared maps. Called by [`crate::FileAnalyzer`] and
18 /// [`crate::BatchFileAnalyzer`] after parallel body analysis to flush the pending
19 /// buffers that accumulate in worker db clones.
20 pub(crate) fn commit_ref_locs_batch(&self, locs: Vec<RefLoc>) {
21 if locs.is_empty() {
22 return;
23 }
24 let guard = self.db.salsa.read();
25 guard.commit_reference_locations_batch(locs);
26 }
27
28 /// Run a closure with read access to a database snapshot.
29 ///
30 /// **Internal API — exposes Salsa types.** Subject to change without
31 /// notice.
32 #[doc(hidden)]
33 pub fn read<R>(&self, f: impl FnOnce(&dyn MirDatabase) -> R) -> R {
34 let db = self.snapshot_db();
35 f(&db)
36 }
37
38 /// definition-collection ingestion. Updates the file's source text in the salsa db,
39 /// runs definition collection, and ingests the resulting stub slice.
40 /// Triggers stub loading on first call. Also updates the cache's reverse-
41 /// dependency graph for `file` so cross-file invalidation stays correct
42 /// across incremental edits — without rebuilding the graph from scratch.
43 ///
44 /// If `file` was previously ingested, its old definitions and reference
45 /// locations are removed first so renames / deletions don't leave stale
46 /// state in the codebase. (Without this, long-running sessions would
47 /// accumulate dead reference-location entries indefinitely.)
48 pub fn ingest_file(&self, file: Arc<str>, source: Arc<str>) {
49 self.ensure_all_stubs();
50
51 // Snapshot symbols defined before clearing — O(symbols_in_file) with forward index.
52 let old_symbols: HashSet<Arc<str>> = {
53 let guard = self.db.salsa.read();
54 guard.file_defined_symbols(file.as_ref())
55 };
56
57 {
58 let mut guard = self.db.salsa.write();
59 guard.remove_file_definitions(file.as_ref());
60 }
61 let _file_defs =
62 self.db
63 .collect_and_ingest_file(file.clone(), source.as_ref(), self.php_version);
64
65 // Snapshot symbols after ingesting — O(symbols_in_file).
66 let new_symbols: HashSet<Arc<str>> = {
67 let guard = self.db.salsa.read();
68 guard.file_defined_symbols(file.as_ref())
69 };
70
71 // Symbols removed from this file must be tracked so dependency_graph()
72 // can still produce edges to files referencing the now-gone symbols.
73 let deleted: Vec<Arc<str>> = old_symbols.difference(&new_symbols).cloned().collect();
74 let re_added: Vec<Arc<str>> = new_symbols.difference(&old_symbols).cloned().collect();
75 if !deleted.is_empty() || !re_added.is_empty() {
76 let mut stale = self.stale_defined_symbols.write();
77 let entry = stale.entry(file.as_ref().to_string()).or_default();
78 for sym in deleted {
79 entry.insert(sym);
80 }
81 for sym in &re_added {
82 entry.remove(sym);
83 }
84 if entry.is_empty() {
85 stale.remove(file.as_ref());
86 }
87 }
88
89 self.update_reverse_deps_for(&file);
90 // Evict cached analysis results for files that depend on this one so
91 // that the next re_analyze_file call re-analyses them rather than
92 // replaying a stale cache entry. Mirrors the eviction in
93 // `re_analyze_file` (batch.rs) but applies to the ingest path used by
94 // LSP servers that edit a single file without re-analysing it.
95 if let Some(cache) = self.cache.as_deref() {
96 cache.evict_with_dependents(&[file.to_string()]);
97 }
98 // Only evict cache entries whose resolver-mapped path equals this
99 // file. FQCNs the resolver can't map (psr4 miss) stay cached — no
100 // ingest could change their fate. Avoids the per-keystroke storm
101 // where wholesale clearing forces every unresolved FQCN to re-hit
102 // the resolver on the next FileAnalyzer iteration.
103 self.evict_unresolvable_for_file(&file);
104
105 // If the workspace symbol index singleton has already been built, keep
106 // it consistent with this edit *incrementally*: subtract the file's old
107 // declarations and add its new ones (tier-aware). Body-only edits are a
108 // no-op inside `update_workspace_index_for_file` (name-only
109 // FileDeclarations equality → no singleton write → the HIGH-durability
110 // dep does not invalidate body-analysis memos). Only the rare ambiguous
111 // case (a removed name still declared by another file, where this file
112 // owned the winning entry) falls back to a full O(N) rebuild.
113 {
114 let mut guard = self.db.salsa.write();
115 if guard.workspace_symbol_index_singleton().is_some() {
116 if let Some(sf) = guard.lookup_source_file(file.as_ref()) {
117 if !guard.update_workspace_index_for_file(sf) {
118 guard.rebuild_workspace_symbol_index();
119 }
120 }
121 }
122 }
123 }
124
125 /// Register `source` as the text of `file` in the salsa input layer **without**
126 /// parsing or running definition collection.
127 ///
128 /// This is the LSP-friendly bulk-population entry point: after a workspace
129 /// scan, callers can feed every discovered file's text to the session
130 /// cheaply (an Arc clone plus a HashMap insert per file). Name resolution
131 /// then happens on demand via [`Self::load_class`], which reads
132 /// the file from disk through the configured [`crate::ClassResolver`] and
133 /// runs definition collection lazily when a class FQCN actually needs to resolve.
134 ///
135 /// Contrast with [`Self::ingest_file`], which eagerly parses, runs definition collection,
136 /// and populates the symbol index. Use `ingest_file` for files the user is
137 /// actively editing (where in-memory text diverges from disk); use
138 /// `set_file_text` for files known only through the workspace scan.
139 ///
140 /// Clears the negative cache: a previously-unresolvable FQCN may now
141 /// resolve if its defining file is among the newly-registered set.
142 pub fn set_file_text(&self, file: Arc<str>, source: Arc<str>) {
143 {
144 let mut guard = self.db.salsa.write();
145 guard.upsert_source_file(file.clone(), source);
146 }
147 self.evict_unresolvable_for_file(&file);
148 }
149
150 /// Bulk-register vendor / library files with HIGH salsa durability.
151 ///
152 /// HIGH-durability files are not expected to change during the session.
153 /// When a LOW-durability project file is edited, salsa can skip O(N)
154 /// dependency verification for every HIGH-durability file, reducing
155 /// `workspace_symbol_index` re-verification cost to O(project files only).
156 ///
157 /// Definition collection runs lazily on first symbol access; no parsing at call time.
158 pub fn set_vendor_files<I>(&self, files: I)
159 where
160 I: IntoIterator<Item = (Arc<str>, Arc<str>)>,
161 {
162 let mut guard = self.db.salsa.write();
163 for (file, source) in files {
164 guard.upsert_source_file_with_durability(file, source, salsa::Durability::HIGH);
165 }
166 }
167
168 /// Build or refresh the `WorkspaceSymbolIndexSingleton` from all currently
169 /// registered files.
170 ///
171 /// After this call, `find_class_like`, `find_function`, and
172 /// `find_global_constant` read `singleton.index(db)` — a single
173 /// `Durability::HIGH` tracked dep — instead of recomputing the full
174 /// O(N_files) dep list via `workspace_symbol_index`. On subsequent
175 /// LOW-durability (project-file) body edits the dep short-circuits in O(1).
176 ///
177 /// Call this once after all vendor + stub + project files have been
178 /// ingested (end of workspace warm-up). Also called automatically by
179 /// [`Self::ingest_file`] when a file's declared names change.
180 pub fn rebuild_workspace_symbol_index(&self) {
181 self.db.salsa.write().rebuild_workspace_symbol_index();
182 }
183
184 /// Bulk variant of [`Self::set_file_text`]. Acquires the salsa write lock
185 /// once for the entire batch instead of once per file.
186 ///
187 /// The intended LSP scan loop is:
188 /// ```text
189 /// let files: Vec<_> = walk_workspace()
190 /// .map(|path| (path, fs::read(&path).unwrap()))
191 /// .collect();
192 /// session.set_workspace_files(files);
193 /// ```
194 /// After this call, every file's source text is known to salsa. No
195 /// parsing has happened yet — Definition collection runs per file on the first
196 /// `load_class` that needs to consult it.
197 pub fn set_workspace_files<I>(&self, files: I)
198 where
199 I: IntoIterator<Item = (Arc<str>, Arc<str>)>,
200 {
201 let registered_paths: Vec<Arc<str>> = {
202 let mut guard = self.db.salsa.write();
203 files
204 .into_iter()
205 .map(|(file, source)| {
206 guard.upsert_source_file(file.clone(), source);
207 file
208 })
209 .collect()
210 };
211 if !registered_paths.is_empty() && self.resolver.is_some() {
212 self.evict_unresolvable_for_files(®istered_paths);
213 }
214 }
215
216 /// The workspace generation epoch — the rust-analyzer-style "are we up to
217 /// date" counter. Bumped whenever a file is added or removed. A consumer
218 /// records this alongside the diagnostics it publishes for a file; when the
219 /// value later advances (background indexing registered more files), those
220 /// files become candidates for re-analysis + re-publish.
221 pub fn index_generation(&self) -> u64 {
222 self.db.salsa.read().workspace_revision_value()
223 }
224
225 /// Index one bounded chunk of `(path, text)` files — the chunked background
226 /// indexing primitive.
227 ///
228 /// For each chunk this: (1) registers the files as `Durability::HIGH` salsa
229 /// inputs in one short write window, (2) parses them to prime the in-process
230 /// and on-disk declaration caches (in parallel when `parallelism ==
231 /// `[`IndexParallelism::Rayon`]; sequentially for wasm / single-thread
232 /// consumers), and (3) merges their declarations into the workspace symbol
233 /// index singleton **incrementally** (no full rebuild) so partially-indexed
234 /// symbols resolve immediately.
235 ///
236 /// The library spawns no thread: the consumer pumps chunks from its own
237 /// driver (LSP worker thread, or one chunk per wasm event-loop tick),
238 /// re-checking higher-priority work between calls. `cancel` is honoured at
239 /// chunk boundaries so an edit can abandon queued indexing cheaply.
240 ///
241 /// **Contract:** index the workspace *incrementally* through this method;
242 /// don't bulk-register the entire file set up front and then index — the
243 /// first call lazily seeds the singleton from the currently-registered set
244 /// (built-in stubs + this chunk), so keeping that initial set small keeps
245 /// the first call cheap. Call [`Self::finalize_index`] once after the last
246 /// chunk to reconcile authoritatively.
247 ///
248 /// **Responsiveness:** parsing / declaration collection happens off the
249 /// salsa write lock (on a snapshot); only the cheap symbol-map merge runs
250 /// under the lock, so the write window per chunk is short and an interactive
251 /// read on another thread blocks at most that long. Note that, per salsa's
252 /// snapshot model, a *cancellable query* in flight on another thread (e.g.
253 /// `hover`, `definition_of`, `FileAnalyzer::analyze`) when this batch takes
254 /// the write lock may unwind with `salsa::Cancelled`; a multi-threaded
255 /// consumer should catch that and retry the request (the rust-analyzer
256 /// pattern). A single-threaded consumer that interleaves requests *between*
257 /// `index_batch` calls never observes cancellation.
258 pub fn index_batch(
259 &self,
260 files: &[(Arc<str>, Arc<str>)],
261 parallelism: crate::IndexParallelism,
262 cancel: &crate::IndexCancel,
263 ) -> crate::IndexBatchOutcome {
264 if files.is_empty() || cancel.is_cancelled() {
265 return crate::IndexBatchOutcome {
266 registered: 0,
267 cancelled: cancel.is_cancelled(),
268 generation: self.index_generation(),
269 };
270 }
271 self.ensure_all_stubs();
272
273 // 1. Register the chunk as HIGH-durability inputs — one short write
274 // window, then release the lock so interactive requests interleave.
275 let sources: Vec<crate::db::SourceFile> = {
276 let mut guard = self.db.salsa.write();
277 files
278 .iter()
279 .map(|(file, source)| {
280 guard.upsert_source_file_with_durability(
281 file.clone(),
282 source.clone(),
283 salsa::Durability::HIGH,
284 )
285 })
286 .collect()
287 };
288 let registered = sources.len();
289
290 if cancel.is_cancelled() {
291 return crate::IndexBatchOutcome {
292 registered,
293 cancelled: true,
294 generation: self.index_generation(),
295 };
296 }
297
298 // Is this the seed chunk (no singleton yet)? If so we must collect decls
299 // for the whole currently-registered set (stubs + this chunk); otherwise
300 // just this chunk.
301 let seed = self
302 .db
303 .salsa
304 .read()
305 .workspace_symbol_index_singleton()
306 .is_none();
307 let snap = self.db.snapshot_db();
308 let to_collect: Vec<crate::db::SourceFile> = if seed {
309 snap.all_source_files()
310 } else {
311 sources.clone()
312 };
313
314 // 2. Collect per-file declarations OFF the write lock (on a snapshot).
315 // This is where parsing happens — crucially NOT while holding the
316 // write lock, so concurrent interactive reads are not blocked for the
317 // parse duration. Also primes the shared parse/disk caches.
318 let collect_one = |db: &crate::db::MirDbStorage, sf: crate::db::SourceFile| {
319 (sf, crate::db::collect_file_declarations(db, sf))
320 };
321 let decls: Vec<(crate::db::SourceFile, crate::db::FileDeclarations)> =
322 if parallelism == crate::IndexParallelism::Rayon {
323 use rayon::prelude::*;
324 to_collect
325 .par_iter()
326 .map_with(snap.clone(), |db, &sf| collect_one(db, sf))
327 .collect()
328 } else {
329 to_collect
330 .iter()
331 .map(|&sf| collect_one(&snap, sf))
332 .collect()
333 };
334 drop(snap);
335
336 if cancel.is_cancelled() {
337 return crate::IndexBatchOutcome {
338 registered,
339 cancelled: true,
340 generation: self.index_generation(),
341 };
342 }
343
344 // 3. Apply to the singleton under a SHORT write window — only cheap map
345 // construction / merge runs here (no parse).
346 {
347 let mut guard = self.db.salsa.write();
348 if guard.workspace_symbol_index_singleton().is_none() {
349 guard.build_workspace_index_from_decls(decls);
350 } else {
351 guard.merge_precomputed_into_workspace_index(&decls);
352 }
353 }
354
355 crate::IndexBatchOutcome {
356 registered,
357 cancelled: cancel.is_cancelled(),
358 generation: self.index_generation(),
359 }
360 }
361
362 /// Authoritative full rebuild of the workspace symbol index. Call once
363 /// after the consumer has pumped every [`Self::index_batch`] chunk (end of
364 /// warm-up) to reconcile the incrementally-merged index against the full
365 /// registered set. Cheap after indexing — every file's declarations are
366 /// already cached.
367 pub fn finalize_index(&self) {
368 self.db.salsa.write().rebuild_workspace_symbol_index();
369 }
370
371 /// Drop a file's contribution to the session: codebase definitions,
372 /// reference locations, salsa input handle, cache entry, and outgoing
373 /// reverse-dependency edges. Cache entries of *dependent* files are
374 /// also evicted (cross-file invalidation).
375 ///
376 /// Use this when a file is closed by the consumer, or before a re-ingest
377 /// of substantially changed content. (Plain re-ingest via
378 /// [`Self::ingest_file`] also drops old definitions, but does not
379 /// remove the salsa input handle — call this for full cleanup.)
380 pub fn invalidate_file(&self, file: &str) {
381 {
382 let mut guard = self.db.salsa.write();
383 guard.remove_file_definitions(file);
384 guard.remove_source_file(file);
385 }
386 // Outgoing structural edges disappear from the derived graph
387 // automatically: the file is no longer in `source_file_paths()`, so
388 // `dependency_graph()` stops iterating it.
389 // Clear stale symbol tracking for this file — it's fully gone.
390 self.stale_defined_symbols.write().remove(file);
391 if let Some(cache) = &self.cache {
392 cache.update_reverse_deps_for_file(file, &HashSet::default());
393 cache.evict_with_dependents(&[file.to_string()]);
394 }
395 // The file is gone; cache entries that previously mapped to it stay
396 // unresolvable until the file (or another with matching symbols) is
397 // ingested again. Selective evict mirrors the ingest path.
398 self.evict_unresolvable_for_file(file);
399 // Vendor files are static in the eager-index model — closing a project
400 // buffer never evicts them (no per-file pinning). Memory is bounded by
401 // the LRU on `collect_file_definitions` and the parse cache instead.
402 }
403
404 /// Number of files currently tracked in this session's salsa input set.
405 /// Stable across reads; useful for diagnostics and memory bounds checks.
406 pub fn tracked_file_count(&self) -> usize {
407 let guard = self.db.salsa.read();
408 guard.source_file_count()
409 }
410
411 // -----------------------------------------------------------------------
412 // Read-only codebase queries
413 //
414 // All take a brief lock to clone the db, then run the lookup against the
415 // owned snapshot — concurrent edits proceed without blocking.
416 // -----------------------------------------------------------------------
417}