mir_analyzer/session/ingest.rs
1use super::*;
2
3impl AnalysisSession {
4 /// Cheap clone of the salsa db for a read-only query. The lock is held
5 /// only for the duration of the clone, so concurrent readers never
6 /// serialize on each other or on writes for longer than the clone itself.
7 ///
8 /// **Internal API — exposes Salsa types.** Subject to change without
9 /// notice. Public consumers should use the typed query methods
10 /// ([`Self::definition_of`], [`Self::hover`], etc.) instead.
11 #[doc(hidden)]
12 pub fn snapshot_db(&self) -> MirDbStorage {
13 self.db.snapshot_db()
14 }
15
16 /// Register or update a [`crate::db::SourceFile`] salsa input and return its
17 /// handle, without running definition collection or reference recording.
18 ///
19 /// The write-path entry point for a host that drives this db's salsa inputs
20 /// directly (the LSP database-convergence path) and pulls definitions
21 /// lazily via tracked queries, rather than the eager [`Self::ingest_file`].
22 ///
23 /// **Internal API — exposes Salsa types.** Subject to change without notice.
24 #[doc(hidden)]
25 pub fn upsert_source_file(
26 &self,
27 path: Arc<str>,
28 text: Arc<str>,
29 durability: salsa::Durability,
30 ) -> crate::db::SourceFile {
31 self.db.upsert_source_file(path, text, durability)
32 }
33
34 /// Look up an existing [`crate::db::SourceFile`] handle by path.
35 ///
36 /// **Internal API — exposes Salsa types.** Subject to change without notice.
37 #[doc(hidden)]
38 pub fn lookup_source_file(&self, path: &str) -> Option<crate::db::SourceFile> {
39 self.db.lookup_source_file(path)
40 }
41
42 /// Mark a [`crate::db::SourceFile`] as removed from the workspace.
43 ///
44 /// **Internal API — exposes Salsa types.** Subject to change without notice.
45 #[doc(hidden)]
46 pub fn remove_source_file_input(&self, path: &str) {
47 self.db.remove_source_file(path);
48 }
49
50 /// Run `f` with exclusive `&mut` access to the shared salsa db, for a host
51 /// that owns additional salsa ingredients (inputs/tracked fns) on this db
52 /// and needs to create or mutate them. Held under the db write lock, so it
53 /// serialises with all other writers.
54 ///
55 /// **Internal API — exposes Salsa types.** Subject to change without notice.
56 #[doc(hidden)]
57 pub fn with_db_mut<R>(&self, f: impl FnOnce(&mut MirDbStorage) -> R) -> R {
58 let mut guard = self.db.salsa.write();
59 f(&mut guard)
60 }
61
62 /// Run `f` with shared access to the canonical (non-snapshot) salsa db,
63 /// under the read lock. For host-owned read-only queries that must observe
64 /// the live db rather than a clone.
65 ///
66 /// **Internal API — exposes Salsa types.** Subject to change without notice.
67 #[doc(hidden)]
68 pub fn with_db_ref<R>(&self, f: impl FnOnce(&MirDbStorage) -> R) -> R {
69 let guard = self.db.salsa.read();
70 f(&guard)
71 }
72
73 /// Commit a batch of reference locations from a db snapshot into the
74 /// session's shared maps. Called by [`crate::FileAnalyzer`] and
75 /// [`crate::BatchFileAnalyzer`] after parallel body analysis to flush the pending
76 /// buffers that accumulate in worker db clones.
77 pub(crate) fn commit_ref_locs_batch(&self, locs: Vec<RefLoc>) {
78 if locs.is_empty() {
79 return;
80 }
81 let guard = self.db.salsa.read();
82 guard.commit_reference_locations_batch(locs);
83 }
84
85 /// Run a closure with read access to a database snapshot.
86 ///
87 /// **Internal API — exposes Salsa types.** Subject to change without
88 /// notice.
89 #[doc(hidden)]
90 pub fn read<R>(&self, f: impl FnOnce(&dyn MirDatabase) -> R) -> R {
91 let db = self.snapshot_db();
92 f(&db)
93 }
94
95 /// definition-collection ingestion. Updates the file's source text in the salsa db,
96 /// runs definition collection, and ingests the resulting stub slice.
97 /// Triggers stub loading on first call. Also updates the cache's reverse-
98 /// dependency graph for `file` so cross-file invalidation stays correct
99 /// across incremental edits — without rebuilding the graph from scratch.
100 ///
101 /// If `file` was previously ingested, its old definitions and reference
102 /// locations are removed first so renames / deletions don't leave stale
103 /// state in the codebase. (Without this, long-running sessions would
104 /// accumulate dead reference-location entries indefinitely.)
105 pub fn ingest_file(&self, file: Arc<str>, source: Arc<str>) {
106 self.ensure_all_stubs();
107
108 // The symbols this file defined as of its last ingest. Read from the
109 // explicit `last_ingested_symbols` map rather than re-deriving via
110 // `file_defined_symbols` (a salsa query on the `SourceFile` input):
111 // when a host drives the db directly it may have already updated that
112 // input to the new text, which would make a re-derived "old" set equal
113 // the new set and silently drop deletions.
114 let old_symbols: HashSet<Arc<str>> = self
115 .last_ingested_symbols
116 .read()
117 .get(file.as_ref())
118 .cloned()
119 .unwrap_or_default();
120
121 {
122 let mut guard = self.db.salsa.write();
123 guard.remove_file_definitions(file.as_ref());
124 }
125 let _file_defs =
126 self.db
127 .collect_and_ingest_file(file.clone(), source.as_ref(), self.php_version);
128
129 // Snapshot symbols after ingesting — O(symbols_in_file).
130 let new_symbols: HashSet<Arc<str>> = {
131 let guard = self.db.salsa.read();
132 guard.file_defined_symbols(file.as_ref())
133 };
134 self.last_ingested_symbols
135 .write()
136 .insert(file.as_ref().to_string(), new_symbols.clone());
137
138 // Symbols removed from this file must be tracked so dependency_graph()
139 // can still produce edges to files referencing the now-gone symbols.
140 let deleted: Vec<Arc<str>> = old_symbols.difference(&new_symbols).cloned().collect();
141 let re_added: Vec<Arc<str>> = new_symbols.difference(&old_symbols).cloned().collect();
142 if !deleted.is_empty() || !re_added.is_empty() {
143 let mut stale = self.stale_defined_symbols.write();
144 let entry = stale.entry(file.as_ref().to_string()).or_default();
145 for sym in deleted {
146 entry.insert(sym);
147 }
148 for sym in &re_added {
149 entry.remove(sym);
150 }
151 if entry.is_empty() {
152 stale.remove(file.as_ref());
153 }
154 }
155
156 self.update_reverse_deps_for(&file);
157 // Evict cached analysis results for files that depend on this one so
158 // that the next re_analyze_file call re-analyses them rather than
159 // replaying a stale cache entry. Mirrors the eviction in
160 // `re_analyze_file` (batch.rs) but applies to the ingest path used by
161 // LSP servers that edit a single file without re-analysing it.
162 if let Some(cache) = self.cache.as_deref() {
163 cache.evict_with_dependents(&[file.to_string()]);
164 }
165 // Only evict cache entries whose resolver-mapped path equals this
166 // file. FQCNs the resolver can't map (psr4 miss) stay cached — no
167 // ingest could change their fate. Avoids the per-keystroke storm
168 // where wholesale clearing forces every unresolved FQCN to re-hit
169 // the resolver on the next FileAnalyzer iteration.
170 self.evict_unresolvable_for_file(&file);
171
172 // If the workspace symbol index singleton has already been built, keep
173 // it consistent with this edit *incrementally*: subtract the file's old
174 // declarations and add its new ones (tier-aware). Body-only edits are a
175 // no-op inside `update_workspace_index_for_file` (name-only
176 // FileDeclarations equality → no singleton write → the HIGH-durability
177 // dep does not invalidate body-analysis memos). Only the rare ambiguous
178 // case (a removed name still declared by another file, where this file
179 // owned the winning entry) falls back to a full O(N) rebuild.
180 {
181 let mut guard = self.db.salsa.write();
182 if guard.workspace_symbol_index_singleton().is_some() {
183 if let Some(sf) = guard.lookup_source_file(file.as_ref()) {
184 if !guard.update_workspace_index_for_file(sf) {
185 guard.rebuild_workspace_symbol_index();
186 }
187 }
188 }
189 }
190 }
191
192 /// Register `source` as the text of `file` in the salsa input layer **without**
193 /// parsing or running definition collection.
194 ///
195 /// This is the LSP-friendly bulk-population entry point: after a workspace
196 /// scan, callers can feed every discovered file's text to the session
197 /// cheaply (an Arc clone plus a HashMap insert per file). Name resolution
198 /// then happens on demand via [`Self::load_class`], which reads
199 /// the file from disk through the configured [`crate::ClassResolver`] and
200 /// runs definition collection lazily when a class FQCN actually needs to resolve.
201 ///
202 /// Contrast with [`Self::ingest_file`], which eagerly parses, runs definition collection,
203 /// and populates the symbol index. Use `ingest_file` for files the user is
204 /// actively editing (where in-memory text diverges from disk); use
205 /// `set_file_text` for files known only through the workspace scan.
206 ///
207 /// Clears the negative cache: a previously-unresolvable FQCN may now
208 /// resolve if its defining file is among the newly-registered set.
209 pub fn set_file_text(&self, file: Arc<str>, source: Arc<str>) {
210 {
211 let mut guard = self.db.salsa.write();
212 guard.upsert_source_file(file.clone(), source);
213 }
214 self.evict_unresolvable_for_file(&file);
215 }
216
217 /// Bulk-register vendor / library files with HIGH salsa durability.
218 ///
219 /// HIGH-durability files are not expected to change during the session.
220 /// When a LOW-durability project file is edited, salsa can skip O(N)
221 /// dependency verification for every HIGH-durability file, reducing
222 /// `workspace_symbol_index` re-verification cost to O(project files only).
223 ///
224 /// Definition collection runs lazily on first symbol access; no parsing at call time.
225 pub fn set_vendor_files<I>(&self, files: I)
226 where
227 I: IntoIterator<Item = (Arc<str>, Arc<str>)>,
228 {
229 let mut guard = self.db.salsa.write();
230 for (file, source) in files {
231 guard.upsert_source_file_with_durability(file, source, salsa::Durability::HIGH);
232 }
233 }
234
235 /// Build or refresh the `WorkspaceSymbolIndexSingleton` from all currently
236 /// registered files.
237 ///
238 /// After this call, `find_class_like`, `find_function`, and
239 /// `find_global_constant` read `singleton.index(db)` — a single
240 /// `Durability::HIGH` tracked dep — instead of recomputing the full
241 /// O(N_files) dep list via `workspace_symbol_index`. On subsequent
242 /// LOW-durability (project-file) body edits the dep short-circuits in O(1).
243 ///
244 /// Call this once after all vendor + stub + project files have been
245 /// ingested (end of workspace warm-up). Also called automatically by
246 /// [`Self::ingest_file`] when a file's declared names change.
247 pub fn rebuild_workspace_symbol_index(&self) {
248 self.db.salsa.write().rebuild_workspace_symbol_index();
249 }
250
251 /// Bulk variant of [`Self::set_file_text`]. Acquires the salsa write lock
252 /// once for the entire batch instead of once per file.
253 ///
254 /// The intended LSP scan loop is:
255 /// ```text
256 /// let files: Vec<_> = walk_workspace()
257 /// .map(|path| (path, fs::read(&path).unwrap()))
258 /// .collect();
259 /// session.set_workspace_files(files);
260 /// ```
261 /// After this call, every file's source text is known to salsa. No
262 /// parsing has happened yet — Definition collection runs per file on the first
263 /// `load_class` that needs to consult it.
264 pub fn set_workspace_files<I>(&self, files: I)
265 where
266 I: IntoIterator<Item = (Arc<str>, Arc<str>)>,
267 {
268 let registered_paths: Vec<Arc<str>> = {
269 let mut guard = self.db.salsa.write();
270 files
271 .into_iter()
272 .map(|(file, source)| {
273 guard.upsert_source_file(file.clone(), source);
274 file
275 })
276 .collect()
277 };
278 if !registered_paths.is_empty() && self.resolver.is_some() {
279 self.evict_unresolvable_for_files(®istered_paths);
280 }
281 }
282
283 /// The workspace generation epoch — the rust-analyzer-style "are we up to
284 /// date" counter. Bumped whenever a file is added or removed. A consumer
285 /// records this alongside the diagnostics it publishes for a file; when the
286 /// value later advances (background indexing registered more files), those
287 /// files become candidates for re-analysis + re-publish.
288 pub fn index_generation(&self) -> u64 {
289 self.db.salsa.read().workspace_revision_value()
290 }
291
292 /// Index one bounded chunk of `(path, text)` files — the chunked background
293 /// indexing primitive.
294 ///
295 /// For each chunk this: (1) registers the files as `Durability::HIGH` salsa
296 /// inputs in one short write window, (2) parses them to prime the in-process
297 /// and on-disk declaration caches (in parallel when `parallelism ==
298 /// `[`IndexParallelism::Rayon`]; sequentially for wasm / single-thread
299 /// consumers), and (3) merges their declarations into the workspace symbol
300 /// index singleton **incrementally** (no full rebuild) so partially-indexed
301 /// symbols resolve immediately.
302 ///
303 /// The library spawns no thread: the consumer pumps chunks from its own
304 /// driver (LSP worker thread, or one chunk per wasm event-loop tick),
305 /// re-checking higher-priority work between calls. `cancel` is honoured at
306 /// chunk boundaries so an edit can abandon queued indexing cheaply.
307 ///
308 /// **Contract:** index the workspace *incrementally* through this method;
309 /// don't bulk-register the entire file set up front and then index — the
310 /// first call lazily seeds the singleton from the currently-registered set
311 /// (built-in stubs + this chunk), so keeping that initial set small keeps
312 /// the first call cheap. Call [`Self::finalize_index`] once after the last
313 /// chunk to reconcile authoritatively.
314 ///
315 /// **Responsiveness:** parsing / declaration collection happens off the
316 /// salsa write lock (on a snapshot); only the cheap symbol-map merge runs
317 /// under the lock, so the write window per chunk is short and an interactive
318 /// read on another thread blocks at most that long. Note that, per salsa's
319 /// snapshot model, a *cancellable query* in flight on another thread (e.g.
320 /// `hover`, `definition_of`, `FileAnalyzer::analyze`) when this batch takes
321 /// the write lock may unwind with `salsa::Cancelled`; a multi-threaded
322 /// consumer should catch that and retry the request (the rust-analyzer
323 /// pattern). A single-threaded consumer that interleaves requests *between*
324 /// `index_batch` calls never observes cancellation.
325 pub fn index_batch(
326 &self,
327 files: &[(Arc<str>, Arc<str>)],
328 parallelism: crate::IndexParallelism,
329 cancel: &crate::IndexCancel,
330 ) -> crate::IndexBatchOutcome {
331 if files.is_empty() || cancel.is_cancelled() {
332 return crate::IndexBatchOutcome {
333 registered: 0,
334 cancelled: cancel.is_cancelled(),
335 generation: self.index_generation(),
336 };
337 }
338 self.ensure_all_stubs();
339
340 // 1. Register the chunk as HIGH-durability inputs — one short write
341 // window, then release the lock so interactive requests interleave.
342 let sources: Vec<crate::db::SourceFile> = {
343 let mut guard = self.db.salsa.write();
344 files
345 .iter()
346 .map(|(file, source)| {
347 guard.upsert_source_file_with_durability(
348 file.clone(),
349 source.clone(),
350 salsa::Durability::HIGH,
351 )
352 })
353 .collect()
354 };
355 let registered = sources.len();
356
357 if cancel.is_cancelled() {
358 return crate::IndexBatchOutcome {
359 registered,
360 cancelled: true,
361 generation: self.index_generation(),
362 };
363 }
364
365 // Is this the seed chunk (no singleton yet)? If so we must collect decls
366 // for the whole currently-registered set (stubs + this chunk); otherwise
367 // just this chunk.
368 let seed = self
369 .db
370 .salsa
371 .read()
372 .workspace_symbol_index_singleton()
373 .is_none();
374 let snap = self.db.snapshot_db();
375 let to_collect: Vec<crate::db::SourceFile> = if seed {
376 snap.all_source_files()
377 } else {
378 sources.clone()
379 };
380
381 // 2. Collect per-file declarations OFF the write lock (on a snapshot).
382 // This is where parsing happens — crucially NOT while holding the
383 // write lock, so concurrent interactive reads are not blocked for the
384 // parse duration. Also primes the shared parse/disk caches.
385 let collect_one = |db: &crate::db::MirDbStorage, sf: crate::db::SourceFile| {
386 (sf, crate::db::collect_file_declarations(db, sf))
387 };
388 let decls: Vec<(crate::db::SourceFile, crate::db::FileDeclarations)> =
389 if parallelism == crate::IndexParallelism::Rayon {
390 use rayon::prelude::*;
391 to_collect
392 .par_iter()
393 .map_with(snap.clone(), |db, &sf| collect_one(db, sf))
394 .collect()
395 } else {
396 to_collect
397 .iter()
398 .map(|&sf| collect_one(&snap, sf))
399 .collect()
400 };
401 drop(snap);
402
403 if cancel.is_cancelled() {
404 return crate::IndexBatchOutcome {
405 registered,
406 cancelled: true,
407 generation: self.index_generation(),
408 };
409 }
410
411 // 3. Apply to the singleton under a SHORT write window — only cheap map
412 // construction / merge runs here (no parse).
413 {
414 let mut guard = self.db.salsa.write();
415 if guard.workspace_symbol_index_singleton().is_none() {
416 guard.build_workspace_index_from_decls(decls);
417 } else {
418 guard.merge_precomputed_into_workspace_index(&decls);
419 }
420 }
421
422 crate::IndexBatchOutcome {
423 registered,
424 cancelled: cancel.is_cancelled(),
425 generation: self.index_generation(),
426 }
427 }
428
429 /// Authoritative full rebuild of the workspace symbol index. Call once
430 /// after the consumer has pumped every [`Self::index_batch`] chunk (end of
431 /// warm-up) to reconcile the incrementally-merged index against the full
432 /// registered set. Cheap after indexing — every file's declarations are
433 /// already cached.
434 pub fn finalize_index(&self) {
435 self.db.salsa.write().rebuild_workspace_symbol_index();
436 }
437
438 /// Drop a file's contribution to the session: codebase definitions,
439 /// reference locations, salsa input handle, cache entry, and outgoing
440 /// reverse-dependency edges. Cache entries of *dependent* files are
441 /// also evicted (cross-file invalidation).
442 ///
443 /// Use this when a file is closed by the consumer, or before a re-ingest
444 /// of substantially changed content. (Plain re-ingest via
445 /// [`Self::ingest_file`] also drops old definitions, but does not
446 /// remove the salsa input handle — call this for full cleanup.)
447 pub fn invalidate_file(&self, file: &str) {
448 {
449 let mut guard = self.db.salsa.write();
450 guard.remove_file_definitions(file);
451 guard.remove_source_file(file);
452 }
453 // Outgoing structural edges disappear from the derived graph
454 // automatically: the file is no longer in `source_file_paths()`, so
455 // `dependency_graph()` stops iterating it.
456 // Clear stale symbol tracking for this file — it's fully gone.
457 self.stale_defined_symbols.write().remove(file);
458 self.last_ingested_symbols.write().remove(file);
459 if let Some(cache) = &self.cache {
460 cache.update_reverse_deps_for_file(file, &HashSet::default());
461 cache.evict_with_dependents(&[file.to_string()]);
462 }
463 // The file is gone; cache entries that previously mapped to it stay
464 // unresolvable until the file (or another with matching symbols) is
465 // ingested again. Selective evict mirrors the ingest path.
466 self.evict_unresolvable_for_file(file);
467 // Vendor files are static in the eager-index model — closing a project
468 // buffer never evicts them (no per-file pinning). Memory is bounded by
469 // the LRU on `collect_file_definitions` and the parse cache instead.
470 }
471
472 /// Number of files currently tracked in this session's salsa input set.
473 /// Stable across reads; useful for diagnostics and memory bounds checks.
474 pub fn tracked_file_count(&self) -> usize {
475 let guard = self.db.salsa.read();
476 guard.source_file_count()
477 }
478
479 // -----------------------------------------------------------------------
480 // Read-only codebase queries
481 //
482 // All take a brief lock to clone the db, then run the lookup against the
483 // owned snapshot — concurrent edits proceed without blocking.
484 // -----------------------------------------------------------------------
485}