Skip to main content

mir_analyzer/session/
ingest.rs

1use super::*;
2
3impl AnalysisSession {
4    /// Cheap clone of the salsa db for a read-only query. The lock is held
5    /// only for the duration of the clone, so concurrent readers never
6    /// serialize on each other or on writes for longer than the clone itself.
7    ///
8    /// **Internal API — exposes Salsa types.** Subject to change without
9    /// notice. Public consumers should use the typed query methods
10    /// ([`Self::definition_of`], [`Self::hover`], etc.) instead.
11    #[doc(hidden)]
12    pub fn snapshot_db(&self) -> MirDbStorage {
13        self.db.snapshot_db()
14    }
15
16    /// Commit a batch of reference locations from a db snapshot into the
17    /// session's shared maps.  Called by [`crate::FileAnalyzer`] and
18    /// [`crate::BatchFileAnalyzer`] after parallel body analysis to flush the pending
19    /// buffers that accumulate in worker db clones.
20    pub(crate) fn commit_ref_locs_batch(&self, locs: Vec<RefLoc>) {
21        if locs.is_empty() {
22            return;
23        }
24        let guard = self.db.salsa.read();
25        guard.commit_reference_locations_batch(locs);
26    }
27
28    /// Run a closure with read access to a database snapshot.
29    ///
30    /// **Internal API — exposes Salsa types.** Subject to change without
31    /// notice.
32    #[doc(hidden)]
33    pub fn read<R>(&self, f: impl FnOnce(&dyn MirDatabase) -> R) -> R {
34        let db = self.snapshot_db();
35        f(&db)
36    }
37
38    /// definition-collection ingestion. Updates the file's source text in the salsa db,
39    /// runs definition collection, and ingests the resulting stub slice.
40    /// Triggers stub loading on first call. Also updates the cache's reverse-
41    /// dependency graph for `file` so cross-file invalidation stays correct
42    /// across incremental edits — without rebuilding the graph from scratch.
43    ///
44    /// If `file` was previously ingested, its old definitions and reference
45    /// locations are removed first so renames / deletions don't leave stale
46    /// state in the codebase. (Without this, long-running sessions would
47    /// accumulate dead reference-location entries indefinitely.)
48    pub fn ingest_file(&self, file: Arc<str>, source: Arc<str>) {
49        self.ensure_all_stubs();
50
51        // Snapshot symbols defined before clearing — O(symbols_in_file) with forward index.
52        let old_symbols: HashSet<Arc<str>> = {
53            let guard = self.db.salsa.read();
54            guard.file_defined_symbols(file.as_ref())
55        };
56
57        {
58            let mut guard = self.db.salsa.write();
59            guard.remove_file_definitions(file.as_ref());
60        }
61        let _file_defs =
62            self.db
63                .collect_and_ingest_file(file.clone(), source.as_ref(), self.php_version);
64
65        // Snapshot symbols after ingesting — O(symbols_in_file).
66        let new_symbols: HashSet<Arc<str>> = {
67            let guard = self.db.salsa.read();
68            guard.file_defined_symbols(file.as_ref())
69        };
70
71        // Symbols removed from this file must be tracked so dependency_graph()
72        // can still produce edges to files referencing the now-gone symbols.
73        let deleted: Vec<Arc<str>> = old_symbols.difference(&new_symbols).cloned().collect();
74        let re_added: Vec<Arc<str>> = new_symbols.difference(&old_symbols).cloned().collect();
75        if !deleted.is_empty() || !re_added.is_empty() {
76            let mut stale = self.stale_defined_symbols.write();
77            let entry = stale.entry(file.as_ref().to_string()).or_default();
78            for sym in deleted {
79                entry.insert(sym);
80            }
81            for sym in &re_added {
82                entry.remove(sym);
83            }
84            if entry.is_empty() {
85                stale.remove(file.as_ref());
86            }
87        }
88
89        self.update_reverse_deps_for(&file);
90        // Evict cached analysis results for files that depend on this one so
91        // that the next re_analyze_file call re-analyses them rather than
92        // replaying a stale cache entry. Mirrors the eviction in
93        // `re_analyze_file` (batch.rs) but applies to the ingest path used by
94        // LSP servers that edit a single file without re-analysing it.
95        if let Some(cache) = self.cache.as_deref() {
96            cache.evict_with_dependents(&[file.to_string()]);
97        }
98        // Only evict cache entries whose resolver-mapped path equals this
99        // file. FQCNs the resolver can't map (psr4 miss) stay cached — no
100        // ingest could change their fate. Avoids the per-keystroke storm
101        // where wholesale clearing forces every unresolved FQCN to re-hit
102        // the resolver on the next FileAnalyzer iteration.
103        self.evict_unresolvable_for_file(&file);
104
105        // If the workspace symbol index singleton has already been built, keep
106        // it consistent with this edit *incrementally*: subtract the file's old
107        // declarations and add its new ones (tier-aware). Body-only edits are a
108        // no-op inside `update_workspace_index_for_file` (name-only
109        // FileDeclarations equality → no singleton write → the HIGH-durability
110        // dep does not invalidate body-analysis memos). Only the rare ambiguous
111        // case (a removed name still declared by another file, where this file
112        // owned the winning entry) falls back to a full O(N) rebuild.
113        {
114            let mut guard = self.db.salsa.write();
115            if guard.workspace_symbol_index_singleton().is_some() {
116                if let Some(sf) = guard.lookup_source_file(file.as_ref()) {
117                    if !guard.update_workspace_index_for_file(sf) {
118                        guard.rebuild_workspace_symbol_index();
119                    }
120                }
121            }
122        }
123    }
124
125    /// Register `source` as the text of `file` in the salsa input layer **without**
126    /// parsing or running definition collection.
127    ///
128    /// This is the LSP-friendly bulk-population entry point: after a workspace
129    /// scan, callers can feed every discovered file's text to the session
130    /// cheaply (an Arc clone plus a HashMap insert per file). Name resolution
131    /// then happens on demand via [`Self::load_class`], which reads
132    /// the file from disk through the configured [`crate::ClassResolver`] and
133    /// runs definition collection lazily when a class FQCN actually needs to resolve.
134    ///
135    /// Contrast with [`Self::ingest_file`], which eagerly parses, runs definition collection,
136    /// and populates the symbol index. Use `ingest_file` for files the user is
137    /// actively editing (where in-memory text diverges from disk); use
138    /// `set_file_text` for files known only through the workspace scan.
139    ///
140    /// Clears the negative cache: a previously-unresolvable FQCN may now
141    /// resolve if its defining file is among the newly-registered set.
142    pub fn set_file_text(&self, file: Arc<str>, source: Arc<str>) {
143        {
144            let mut guard = self.db.salsa.write();
145            guard.upsert_source_file(file.clone(), source);
146        }
147        self.evict_unresolvable_for_file(&file);
148    }
149
150    /// Bulk-register vendor / library files with HIGH salsa durability.
151    ///
152    /// HIGH-durability files are not expected to change during the session.
153    /// When a LOW-durability project file is edited, salsa can skip O(N)
154    /// dependency verification for every HIGH-durability file, reducing
155    /// `workspace_symbol_index` re-verification cost to O(project files only).
156    ///
157    /// Definition collection runs lazily on first symbol access; no parsing at call time.
158    pub fn set_vendor_files<I>(&self, files: I)
159    where
160        I: IntoIterator<Item = (Arc<str>, Arc<str>)>,
161    {
162        let mut guard = self.db.salsa.write();
163        for (file, source) in files {
164            guard.upsert_source_file_with_durability(file, source, salsa::Durability::HIGH);
165        }
166    }
167
168    /// Build or refresh the `WorkspaceSymbolIndexSingleton` from all currently
169    /// registered files.
170    ///
171    /// After this call, `find_class_like`, `find_function`, and
172    /// `find_global_constant` read `singleton.index(db)` — a single
173    /// `Durability::HIGH` tracked dep — instead of recomputing the full
174    /// O(N_files) dep list via `workspace_symbol_index`. On subsequent
175    /// LOW-durability (project-file) body edits the dep short-circuits in O(1).
176    ///
177    /// Call this once after all vendor + stub + project files have been
178    /// ingested (end of workspace warm-up). Also called automatically by
179    /// [`Self::ingest_file`] when a file's declared names change.
180    pub fn rebuild_workspace_symbol_index(&self) {
181        self.db.salsa.write().rebuild_workspace_symbol_index();
182    }
183
184    /// Bulk variant of [`Self::set_file_text`]. Acquires the salsa write lock
185    /// once for the entire batch instead of once per file.
186    ///
187    /// The intended LSP scan loop is:
188    /// ```text
189    /// let files: Vec<_> = walk_workspace()
190    ///     .map(|path| (path, fs::read(&path).unwrap()))
191    ///     .collect();
192    /// session.set_workspace_files(files);
193    /// ```
194    /// After this call, every file's source text is known to salsa. No
195    /// parsing has happened yet — Definition collection runs per file on the first
196    /// `load_class` that needs to consult it.
197    pub fn set_workspace_files<I>(&self, files: I)
198    where
199        I: IntoIterator<Item = (Arc<str>, Arc<str>)>,
200    {
201        let registered_paths: Vec<Arc<str>> = {
202            let mut guard = self.db.salsa.write();
203            files
204                .into_iter()
205                .map(|(file, source)| {
206                    guard.upsert_source_file(file.clone(), source);
207                    file
208                })
209                .collect()
210        };
211        if !registered_paths.is_empty() && self.resolver.is_some() {
212            self.evict_unresolvable_for_files(&registered_paths);
213        }
214    }
215
216    /// The workspace generation epoch — the rust-analyzer-style "are we up to
217    /// date" counter. Bumped whenever a file is added or removed. A consumer
218    /// records this alongside the diagnostics it publishes for a file; when the
219    /// value later advances (background indexing registered more files), those
220    /// files become candidates for re-analysis + re-publish.
221    pub fn index_generation(&self) -> u64 {
222        self.db.salsa.read().workspace_revision_value()
223    }
224
225    /// Index one bounded chunk of `(path, text)` files — the chunked background
226    /// indexing primitive.
227    ///
228    /// For each chunk this: (1) registers the files as `Durability::HIGH` salsa
229    /// inputs in one short write window, (2) parses them to prime the in-process
230    /// and on-disk declaration caches (in parallel when `parallelism ==
231    /// `[`IndexParallelism::Rayon`]; sequentially for wasm / single-thread
232    /// consumers), and (3) merges their declarations into the workspace symbol
233    /// index singleton **incrementally** (no full rebuild) so partially-indexed
234    /// symbols resolve immediately.
235    ///
236    /// The library spawns no thread: the consumer pumps chunks from its own
237    /// driver (LSP worker thread, or one chunk per wasm event-loop tick),
238    /// re-checking higher-priority work between calls. `cancel` is honoured at
239    /// chunk boundaries so an edit can abandon queued indexing cheaply.
240    ///
241    /// **Contract:** index the workspace *incrementally* through this method;
242    /// don't bulk-register the entire file set up front and then index — the
243    /// first call lazily seeds the singleton from the currently-registered set
244    /// (built-in stubs + this chunk), so keeping that initial set small keeps
245    /// the first call cheap. Call [`Self::finalize_index`] once after the last
246    /// chunk to reconcile authoritatively.
247    ///
248    /// **Responsiveness:** parsing / declaration collection happens off the
249    /// salsa write lock (on a snapshot); only the cheap symbol-map merge runs
250    /// under the lock, so the write window per chunk is short and an interactive
251    /// read on another thread blocks at most that long. Note that, per salsa's
252    /// snapshot model, a *cancellable query* in flight on another thread (e.g.
253    /// `hover`, `definition_of`, `FileAnalyzer::analyze`) when this batch takes
254    /// the write lock may unwind with `salsa::Cancelled`; a multi-threaded
255    /// consumer should catch that and retry the request (the rust-analyzer
256    /// pattern). A single-threaded consumer that interleaves requests *between*
257    /// `index_batch` calls never observes cancellation.
258    pub fn index_batch(
259        &self,
260        files: &[(Arc<str>, Arc<str>)],
261        parallelism: crate::IndexParallelism,
262        cancel: &crate::IndexCancel,
263    ) -> crate::IndexBatchOutcome {
264        if files.is_empty() || cancel.is_cancelled() {
265            return crate::IndexBatchOutcome {
266                registered: 0,
267                cancelled: cancel.is_cancelled(),
268                generation: self.index_generation(),
269            };
270        }
271        self.ensure_all_stubs();
272
273        // 1. Register the chunk as HIGH-durability inputs — one short write
274        //    window, then release the lock so interactive requests interleave.
275        let sources: Vec<crate::db::SourceFile> = {
276            let mut guard = self.db.salsa.write();
277            files
278                .iter()
279                .map(|(file, source)| {
280                    guard.upsert_source_file_with_durability(
281                        file.clone(),
282                        source.clone(),
283                        salsa::Durability::HIGH,
284                    )
285                })
286                .collect()
287        };
288        let registered = sources.len();
289
290        if cancel.is_cancelled() {
291            return crate::IndexBatchOutcome {
292                registered,
293                cancelled: true,
294                generation: self.index_generation(),
295            };
296        }
297
298        // Is this the seed chunk (no singleton yet)? If so we must collect decls
299        // for the whole currently-registered set (stubs + this chunk); otherwise
300        // just this chunk.
301        let seed = self
302            .db
303            .salsa
304            .read()
305            .workspace_symbol_index_singleton()
306            .is_none();
307        let snap = self.db.snapshot_db();
308        let to_collect: Vec<crate::db::SourceFile> = if seed {
309            snap.all_source_files()
310        } else {
311            sources.clone()
312        };
313
314        // 2. Collect per-file declarations OFF the write lock (on a snapshot).
315        //    This is where parsing happens — crucially NOT while holding the
316        //    write lock, so concurrent interactive reads are not blocked for the
317        //    parse duration. Also primes the shared parse/disk caches.
318        let collect_one = |db: &crate::db::MirDbStorage, sf: crate::db::SourceFile| {
319            (sf, crate::db::collect_file_declarations(db, sf))
320        };
321        let decls: Vec<(crate::db::SourceFile, crate::db::FileDeclarations)> =
322            if parallelism == crate::IndexParallelism::Rayon {
323                use rayon::prelude::*;
324                to_collect
325                    .par_iter()
326                    .map_with(snap.clone(), |db, &sf| collect_one(db, sf))
327                    .collect()
328            } else {
329                to_collect
330                    .iter()
331                    .map(|&sf| collect_one(&snap, sf))
332                    .collect()
333            };
334        drop(snap);
335
336        if cancel.is_cancelled() {
337            return crate::IndexBatchOutcome {
338                registered,
339                cancelled: true,
340                generation: self.index_generation(),
341            };
342        }
343
344        // 3. Apply to the singleton under a SHORT write window — only cheap map
345        //    construction / merge runs here (no parse).
346        {
347            let mut guard = self.db.salsa.write();
348            if guard.workspace_symbol_index_singleton().is_none() {
349                guard.build_workspace_index_from_decls(decls);
350            } else {
351                guard.merge_precomputed_into_workspace_index(&decls);
352            }
353        }
354
355        crate::IndexBatchOutcome {
356            registered,
357            cancelled: cancel.is_cancelled(),
358            generation: self.index_generation(),
359        }
360    }
361
362    /// Authoritative full rebuild of the workspace symbol index. Call once
363    /// after the consumer has pumped every [`Self::index_batch`] chunk (end of
364    /// warm-up) to reconcile the incrementally-merged index against the full
365    /// registered set. Cheap after indexing — every file's declarations are
366    /// already cached.
367    pub fn finalize_index(&self) {
368        self.db.salsa.write().rebuild_workspace_symbol_index();
369    }
370
371    /// Drop a file's contribution to the session: codebase definitions,
372    /// reference locations, salsa input handle, cache entry, and outgoing
373    /// reverse-dependency edges. Cache entries of *dependent* files are
374    /// also evicted (cross-file invalidation).
375    ///
376    /// Use this when a file is closed by the consumer, or before a re-ingest
377    /// of substantially changed content. (Plain re-ingest via
378    /// [`Self::ingest_file`] also drops old definitions, but does not
379    /// remove the salsa input handle — call this for full cleanup.)
380    pub fn invalidate_file(&self, file: &str) {
381        {
382            let mut guard = self.db.salsa.write();
383            guard.remove_file_definitions(file);
384            guard.remove_source_file(file);
385        }
386        // Outgoing structural edges disappear from the derived graph
387        // automatically: the file is no longer in `source_file_paths()`, so
388        // `dependency_graph()` stops iterating it.
389        // Clear stale symbol tracking for this file — it's fully gone.
390        self.stale_defined_symbols.write().remove(file);
391        if let Some(cache) = &self.cache {
392            cache.update_reverse_deps_for_file(file, &HashSet::default());
393            cache.evict_with_dependents(&[file.to_string()]);
394        }
395        // The file is gone; cache entries that previously mapped to it stay
396        // unresolvable until the file (or another with matching symbols) is
397        // ingested again. Selective evict mirrors the ingest path.
398        self.evict_unresolvable_for_file(file);
399        // Vendor files are static in the eager-index model — closing a project
400        // buffer never evicts them (no per-file pinning). Memory is bounded by
401        // the LRU on `collect_file_definitions` and the parse cache instead.
402    }
403
404    /// Number of files currently tracked in this session's salsa input set.
405    /// Stable across reads; useful for diagnostics and memory bounds checks.
406    pub fn tracked_file_count(&self) -> usize {
407        let guard = self.db.salsa.read();
408        guard.source_file_count()
409    }
410
411    // -----------------------------------------------------------------------
412    // Read-only codebase queries
413    //
414    // All take a brief lock to clone the db, then run the lookup against the
415    // owned snapshot — concurrent edits proceed without blocking.
416    // -----------------------------------------------------------------------
417}