Skip to main content

mir_analyzer/session/
ingest.rs

1use super::*;
2
3impl AnalysisSession {
4    /// Cheap clone of the salsa db for a read-only query. The lock is held
5    /// only for the duration of the clone, so concurrent readers never
6    /// serialize on each other or on writes for longer than the clone itself.
7    ///
8    /// **Internal API — exposes Salsa types.** Subject to change without
9    /// notice. Public consumers should use the typed query methods
10    /// ([`Self::definition_of`], [`Self::hover`], etc.) instead.
11    #[doc(hidden)]
12    pub fn snapshot_db(&self) -> MirDbStorage {
13        self.db.snapshot_db()
14    }
15
16    /// Register or update a [`crate::db::SourceFile`] salsa input and return its
17    /// handle, without running definition collection or reference recording.
18    ///
19    /// The write-path entry point for a host that drives this db's salsa inputs
20    /// directly (the LSP database-convergence path) and pulls definitions
21    /// lazily via tracked queries, rather than the eager [`Self::ingest_file`].
22    ///
23    /// **Internal API — exposes Salsa types.** Subject to change without notice.
24    #[doc(hidden)]
25    pub fn upsert_source_file(
26        &self,
27        path: Arc<str>,
28        text: Arc<str>,
29        durability: salsa::Durability,
30    ) -> crate::db::SourceFile {
31        self.db.upsert_source_file(path, text, durability)
32    }
33
34    /// Look up an existing [`crate::db::SourceFile`] handle by path.
35    ///
36    /// **Internal API — exposes Salsa types.** Subject to change without notice.
37    #[doc(hidden)]
38    pub fn lookup_source_file(&self, path: &str) -> Option<crate::db::SourceFile> {
39        self.db.lookup_source_file(path)
40    }
41
42    /// Mark a [`crate::db::SourceFile`] as removed from the workspace.
43    ///
44    /// **Internal API — exposes Salsa types.** Subject to change without notice.
45    #[doc(hidden)]
46    pub fn remove_source_file_input(&self, path: &str) {
47        self.db.remove_source_file(path);
48    }
49
50    /// Run `f` with exclusive `&mut` access to the shared salsa db, for a host
51    /// that owns additional salsa ingredients (inputs/tracked fns) on this db
52    /// and needs to create or mutate them. Held under the db write lock, so it
53    /// serialises with all other writers.
54    ///
55    /// **Internal API — exposes Salsa types.** Subject to change without notice.
56    #[doc(hidden)]
57    pub fn with_db_mut<R>(&self, f: impl FnOnce(&mut MirDbStorage) -> R) -> R {
58        let mut guard = self.db.salsa.write();
59        f(&mut guard)
60    }
61
62    /// Run `f` with shared access to the canonical (non-snapshot) salsa db,
63    /// under the read lock. For host-owned read-only queries that must observe
64    /// the live db rather than a clone.
65    ///
66    /// **Internal API — exposes Salsa types.** Subject to change without notice.
67    #[doc(hidden)]
68    pub fn with_db_ref<R>(&self, f: impl FnOnce(&MirDbStorage) -> R) -> R {
69        let guard = self.db.salsa.read();
70        f(&guard)
71    }
72
73    /// Commit a batch of reference locations from a db snapshot into the
74    /// session's shared maps.  Called by [`crate::FileAnalyzer`] and
75    /// [`crate::BatchFileAnalyzer`] after parallel body analysis to flush the pending
76    /// buffers that accumulate in worker db clones.
77    pub(crate) fn commit_ref_locs_batch(&self, locs: Vec<RefLoc>) {
78        if locs.is_empty() {
79            return;
80        }
81        let guard = self.db.salsa.read();
82        guard.commit_reference_locations_batch(locs);
83    }
84
85    /// Run a closure with read access to a database snapshot.
86    ///
87    /// **Internal API — exposes Salsa types.** Subject to change without
88    /// notice.
89    #[doc(hidden)]
90    pub fn read<R>(&self, f: impl FnOnce(&dyn MirDatabase) -> R) -> R {
91        let db = self.snapshot_db();
92        f(&db)
93    }
94
95    /// definition-collection ingestion. Updates the file's source text in the salsa db,
96    /// runs definition collection, and ingests the resulting stub slice.
97    /// Triggers stub loading on first call. Also updates the cache's reverse-
98    /// dependency graph for `file` so cross-file invalidation stays correct
99    /// across incremental edits — without rebuilding the graph from scratch.
100    ///
101    /// If `file` was previously ingested, its old definitions and reference
102    /// locations are removed first so renames / deletions don't leave stale
103    /// state in the codebase. (Without this, long-running sessions would
104    /// accumulate dead reference-location entries indefinitely.)
105    pub fn ingest_file(&self, file: Arc<str>, source: Arc<str>) {
106        self.ensure_all_stubs();
107
108        // The symbols this file defined as of its last ingest. Read from the
109        // explicit `last_ingested_symbols` map rather than re-deriving via
110        // `file_defined_symbols` (a salsa query on the `SourceFile` input):
111        // when a host drives the db directly it may have already updated that
112        // input to the new text, which would make a re-derived "old" set equal
113        // the new set and silently drop deletions.
114        let old_symbols: HashSet<Arc<str>> = self
115            .last_ingested_symbols
116            .read()
117            .get(file.as_ref())
118            .cloned()
119            .unwrap_or_default();
120
121        {
122            let mut guard = self.db.salsa.write();
123            guard.remove_file_definitions(file.as_ref());
124        }
125        let _file_defs =
126            self.db
127                .collect_and_ingest_file(file.clone(), source.as_ref(), self.php_version);
128
129        // Snapshot symbols after ingesting — O(symbols_in_file).
130        let new_symbols: HashSet<Arc<str>> = {
131            let guard = self.db.salsa.read();
132            guard.file_defined_symbols(file.as_ref())
133        };
134        self.last_ingested_symbols
135            .write()
136            .insert(file.as_ref().to_string(), new_symbols.clone());
137
138        // Symbols removed from this file must be tracked so dependency_graph()
139        // can still produce edges to files referencing the now-gone symbols.
140        let deleted: Vec<Arc<str>> = old_symbols.difference(&new_symbols).cloned().collect();
141        let re_added: Vec<Arc<str>> = new_symbols.difference(&old_symbols).cloned().collect();
142        if !deleted.is_empty() || !re_added.is_empty() {
143            let mut stale = self.stale_defined_symbols.write();
144            let entry = stale.entry(file.as_ref().to_string()).or_default();
145            for sym in deleted {
146                entry.insert(sym);
147            }
148            for sym in &re_added {
149                entry.remove(sym);
150            }
151            if entry.is_empty() {
152                stale.remove(file.as_ref());
153            }
154        }
155
156        self.update_reverse_deps_for(&file);
157        // Evict cached analysis results for files that depend on this one so
158        // that the next re_analyze_file call re-analyses them rather than
159        // replaying a stale cache entry. Mirrors the eviction in
160        // `re_analyze_file` (batch.rs) but applies to the ingest path used by
161        // LSP servers that edit a single file without re-analysing it.
162        if let Some(cache) = self.cache.as_deref() {
163            cache.evict_with_dependents(&[file.to_string()]);
164        }
165        // Only evict cache entries whose resolver-mapped path equals this
166        // file. FQCNs the resolver can't map (psr4 miss) stay cached — no
167        // ingest could change their fate. Avoids the per-keystroke storm
168        // where wholesale clearing forces every unresolved FQCN to re-hit
169        // the resolver on the next FileAnalyzer iteration.
170        self.evict_unresolvable_for_file(&file);
171
172        // If the workspace symbol index singleton has already been built, keep
173        // it consistent with this edit *incrementally*: subtract the file's old
174        // declarations and add its new ones (tier-aware). Body-only edits are a
175        // no-op inside `update_workspace_index_for_file` (name-only
176        // FileDeclarations equality → no singleton write → the HIGH-durability
177        // dep does not invalidate body-analysis memos). Only the rare ambiguous
178        // case (a removed name still declared by another file, where this file
179        // owned the winning entry) falls back to a full O(N) rebuild.
180        {
181            let mut guard = self.db.salsa.write();
182            if guard.workspace_symbol_index_singleton().is_some() {
183                if let Some(sf) = guard.lookup_source_file(file.as_ref()) {
184                    if !guard.update_workspace_index_for_file(sf) {
185                        guard.rebuild_workspace_symbol_index();
186                    }
187                }
188            }
189        }
190    }
191
192    /// Register `source` as the text of `file` in the salsa input layer **without**
193    /// parsing or running definition collection.
194    ///
195    /// This is the LSP-friendly bulk-population entry point: after a workspace
196    /// scan, callers can feed every discovered file's text to the session
197    /// cheaply (an Arc clone plus a HashMap insert per file). Name resolution
198    /// then happens on demand via [`Self::load_class`], which reads
199    /// the file from disk through the configured [`crate::ClassResolver`] and
200    /// runs definition collection lazily when a class FQCN actually needs to resolve.
201    ///
202    /// Contrast with [`Self::ingest_file`], which eagerly parses, runs definition collection,
203    /// and populates the symbol index. Use `ingest_file` for files the user is
204    /// actively editing (where in-memory text diverges from disk); use
205    /// `set_file_text` for files known only through the workspace scan.
206    ///
207    /// Clears the negative cache: a previously-unresolvable FQCN may now
208    /// resolve if its defining file is among the newly-registered set.
209    pub fn set_file_text(&self, file: Arc<str>, source: Arc<str>) {
210        {
211            let mut guard = self.db.salsa.write();
212            guard.upsert_source_file(file.clone(), source);
213        }
214        self.evict_unresolvable_for_file(&file);
215    }
216
217    /// Bulk-register vendor / library files with HIGH salsa durability.
218    ///
219    /// HIGH-durability files are not expected to change during the session.
220    /// When a LOW-durability project file is edited, salsa can skip O(N)
221    /// dependency verification for every HIGH-durability file, reducing
222    /// `workspace_symbol_index` re-verification cost to O(project files only).
223    ///
224    /// Definition collection runs lazily on first symbol access; no parsing at call time.
225    pub fn set_vendor_files<I>(&self, files: I)
226    where
227        I: IntoIterator<Item = (Arc<str>, Arc<str>)>,
228    {
229        let mut guard = self.db.salsa.write();
230        for (file, source) in files {
231            guard.upsert_source_file_with_durability(file, source, salsa::Durability::HIGH);
232        }
233    }
234
235    /// Build or refresh the `WorkspaceSymbolIndexSingleton` from all currently
236    /// registered files.
237    ///
238    /// After this call, `find_class_like`, `find_function`, and
239    /// `find_global_constant` read `singleton.index(db)` — a single
240    /// `Durability::HIGH` tracked dep — instead of recomputing the full
241    /// O(N_files) dep list via `workspace_symbol_index`. On subsequent
242    /// LOW-durability (project-file) body edits the dep short-circuits in O(1).
243    ///
244    /// Call this once after all vendor + stub + project files have been
245    /// ingested (end of workspace warm-up). Also called automatically by
246    /// [`Self::ingest_file`] when a file's declared names change.
247    pub fn rebuild_workspace_symbol_index(&self) {
248        self.db.salsa.write().rebuild_workspace_symbol_index();
249    }
250
251    /// Bulk variant of [`Self::set_file_text`]. Acquires the salsa write lock
252    /// once for the entire batch instead of once per file.
253    ///
254    /// The intended LSP scan loop is:
255    /// ```text
256    /// let files: Vec<_> = walk_workspace()
257    ///     .map(|path| (path, fs::read(&path).unwrap()))
258    ///     .collect();
259    /// session.set_workspace_files(files);
260    /// ```
261    /// After this call, every file's source text is known to salsa. No
262    /// parsing has happened yet — Definition collection runs per file on the first
263    /// `load_class` that needs to consult it.
264    pub fn set_workspace_files<I>(&self, files: I)
265    where
266        I: IntoIterator<Item = (Arc<str>, Arc<str>)>,
267    {
268        let registered_paths: Vec<Arc<str>> = {
269            let mut guard = self.db.salsa.write();
270            files
271                .into_iter()
272                .map(|(file, source)| {
273                    guard.upsert_source_file(file.clone(), source);
274                    file
275                })
276                .collect()
277        };
278        if !registered_paths.is_empty() && self.resolver.is_some() {
279            self.evict_unresolvable_for_files(&registered_paths);
280        }
281    }
282
283    /// The workspace generation epoch — the rust-analyzer-style "are we up to
284    /// date" counter. Bumped whenever a file is added or removed. A consumer
285    /// records this alongside the diagnostics it publishes for a file; when the
286    /// value later advances (background indexing registered more files), those
287    /// files become candidates for re-analysis + re-publish.
288    pub fn index_generation(&self) -> u64 {
289        self.db.salsa.read().workspace_revision_value()
290    }
291
292    /// Index one bounded chunk of `(path, text)` files — the chunked background
293    /// indexing primitive.
294    ///
295    /// For each chunk this: (1) registers the files as `Durability::HIGH` salsa
296    /// inputs in one short write window, (2) parses them to prime the in-process
297    /// and on-disk declaration caches (in parallel when `parallelism ==
298    /// `[`IndexParallelism::Rayon`]; sequentially for wasm / single-thread
299    /// consumers), and (3) merges their declarations into the workspace symbol
300    /// index singleton **incrementally** (no full rebuild) so partially-indexed
301    /// symbols resolve immediately.
302    ///
303    /// The library spawns no thread: the consumer pumps chunks from its own
304    /// driver (LSP worker thread, or one chunk per wasm event-loop tick),
305    /// re-checking higher-priority work between calls. `cancel` is honoured at
306    /// chunk boundaries so an edit can abandon queued indexing cheaply.
307    ///
308    /// **Contract:** index the workspace *incrementally* through this method;
309    /// don't bulk-register the entire file set up front and then index — the
310    /// first call lazily seeds the singleton from the currently-registered set
311    /// (built-in stubs + this chunk), so keeping that initial set small keeps
312    /// the first call cheap. Call [`Self::finalize_index`] once after the last
313    /// chunk to reconcile authoritatively.
314    ///
315    /// **Responsiveness:** parsing / declaration collection happens off the
316    /// salsa write lock (on a snapshot); only the cheap symbol-map merge runs
317    /// under the lock, so the write window per chunk is short and an interactive
318    /// read on another thread blocks at most that long. Note that, per salsa's
319    /// snapshot model, a *cancellable query* in flight on another thread (e.g.
320    /// `hover`, `definition_of`, `FileAnalyzer::analyze`) when this batch takes
321    /// the write lock may unwind with `salsa::Cancelled`; a multi-threaded
322    /// consumer should catch that and retry the request (the rust-analyzer
323    /// pattern). A single-threaded consumer that interleaves requests *between*
324    /// `index_batch` calls never observes cancellation.
325    pub fn index_batch(
326        &self,
327        files: &[(Arc<str>, Arc<str>)],
328        parallelism: crate::IndexParallelism,
329        cancel: &crate::IndexCancel,
330    ) -> crate::IndexBatchOutcome {
331        if files.is_empty() || cancel.is_cancelled() {
332            return crate::IndexBatchOutcome {
333                registered: 0,
334                cancelled: cancel.is_cancelled(),
335                generation: self.index_generation(),
336            };
337        }
338        self.ensure_all_stubs();
339
340        // 1. Register the chunk as HIGH-durability inputs — one short write
341        //    window, then release the lock so interactive requests interleave.
342        let sources: Vec<crate::db::SourceFile> = {
343            let mut guard = self.db.salsa.write();
344            files
345                .iter()
346                .map(|(file, source)| {
347                    guard.upsert_source_file_with_durability(
348                        file.clone(),
349                        source.clone(),
350                        salsa::Durability::HIGH,
351                    )
352                })
353                .collect()
354        };
355        let registered = sources.len();
356
357        if cancel.is_cancelled() {
358            return crate::IndexBatchOutcome {
359                registered,
360                cancelled: true,
361                generation: self.index_generation(),
362            };
363        }
364
365        // Is this the seed chunk (no singleton yet)? If so we must collect decls
366        // for the whole currently-registered set (stubs + this chunk); otherwise
367        // just this chunk.
368        let seed = self
369            .db
370            .salsa
371            .read()
372            .workspace_symbol_index_singleton()
373            .is_none();
374        let snap = self.db.snapshot_db();
375        let to_collect: Vec<crate::db::SourceFile> = if seed {
376            snap.all_source_files()
377        } else {
378            sources.clone()
379        };
380
381        // 2. Collect per-file declarations OFF the write lock (on a snapshot).
382        //    This is where parsing happens — crucially NOT while holding the
383        //    write lock, so concurrent interactive reads are not blocked for the
384        //    parse duration. Also primes the shared parse/disk caches.
385        let collect_one = |db: &crate::db::MirDbStorage, sf: crate::db::SourceFile| {
386            (sf, crate::db::collect_file_declarations(db, sf))
387        };
388        let decls: Vec<(crate::db::SourceFile, crate::db::FileDeclarations)> =
389            if parallelism == crate::IndexParallelism::Rayon {
390                use rayon::prelude::*;
391                to_collect
392                    .par_iter()
393                    .map_with(snap.clone(), |db, &sf| collect_one(db, sf))
394                    .collect()
395            } else {
396                to_collect
397                    .iter()
398                    .map(|&sf| collect_one(&snap, sf))
399                    .collect()
400            };
401        drop(snap);
402
403        if cancel.is_cancelled() {
404            return crate::IndexBatchOutcome {
405                registered,
406                cancelled: true,
407                generation: self.index_generation(),
408            };
409        }
410
411        // 3. Apply to the singleton under a SHORT write window — only cheap map
412        //    construction / merge runs here (no parse).
413        {
414            let mut guard = self.db.salsa.write();
415            if guard.workspace_symbol_index_singleton().is_none() {
416                guard.build_workspace_index_from_decls(decls);
417            } else {
418                guard.merge_precomputed_into_workspace_index(&decls);
419            }
420        }
421
422        crate::IndexBatchOutcome {
423            registered,
424            cancelled: cancel.is_cancelled(),
425            generation: self.index_generation(),
426        }
427    }
428
429    /// Authoritative full rebuild of the workspace symbol index. Call once
430    /// after the consumer has pumped every [`Self::index_batch`] chunk (end of
431    /// warm-up) to reconcile the incrementally-merged index against the full
432    /// registered set. Cheap after indexing — every file's declarations are
433    /// already cached.
434    pub fn finalize_index(&self) {
435        self.db.salsa.write().rebuild_workspace_symbol_index();
436    }
437
438    /// Drop a file's contribution to the session: codebase definitions,
439    /// reference locations, salsa input handle, cache entry, and outgoing
440    /// reverse-dependency edges. Cache entries of *dependent* files are
441    /// also evicted (cross-file invalidation).
442    ///
443    /// Use this when a file is closed by the consumer, or before a re-ingest
444    /// of substantially changed content. (Plain re-ingest via
445    /// [`Self::ingest_file`] also drops old definitions, but does not
446    /// remove the salsa input handle — call this for full cleanup.)
447    pub fn invalidate_file(&self, file: &str) {
448        {
449            let mut guard = self.db.salsa.write();
450            guard.remove_file_definitions(file);
451            guard.remove_source_file(file);
452        }
453        // Outgoing structural edges disappear from the derived graph
454        // automatically: the file is no longer in `source_file_paths()`, so
455        // `dependency_graph()` stops iterating it.
456        // Clear stale symbol tracking for this file — it's fully gone.
457        self.stale_defined_symbols.write().remove(file);
458        self.last_ingested_symbols.write().remove(file);
459        if let Some(cache) = &self.cache {
460            cache.update_reverse_deps_for_file(file, &HashSet::default());
461            cache.evict_with_dependents(&[file.to_string()]);
462        }
463        // The file is gone; cache entries that previously mapped to it stay
464        // unresolvable until the file (or another with matching symbols) is
465        // ingested again. Selective evict mirrors the ingest path.
466        self.evict_unresolvable_for_file(file);
467        // Vendor files are static in the eager-index model — closing a project
468        // buffer never evicts them (no per-file pinning). Memory is bounded by
469        // the LRU on `collect_file_definitions` and the parse cache instead.
470    }
471
472    /// Number of files currently tracked in this session's salsa input set.
473    /// Stable across reads; useful for diagnostics and memory bounds checks.
474    pub fn tracked_file_count(&self) -> usize {
475        let guard = self.db.salsa.read();
476        guard.source_file_count()
477    }
478
479    // -----------------------------------------------------------------------
480    // Read-only codebase queries
481    //
482    // All take a brief lock to clone the db, then run the lookup against the
483    // owned snapshot — concurrent edits proceed without blocking.
484    // -----------------------------------------------------------------------
485}