Skip to main content

php_lsp/
document_store.rs

1use std::sync::atomic::{AtomicU32, Ordering};
2use std::sync::{Arc, Mutex};
3
4use dashmap::DashMap;
5use salsa::Setter;
6use tower_lsp::lsp_types::{Diagnostic, SemanticToken, Url};
7
8use crate::ast::ParsedDoc;
9use crate::db::analysis::AnalysisHost;
10use crate::db::input::{FileId, SourceFile, Workspace};
11use crate::file_index::FileIndex;
12
13/// Upper bound on `parsed_cache` entries. Matched to the `lru = 2048` on
14/// `parsed_doc` in `src/db/parse.rs` so the secondary Arc retention can't
15/// pin more ASTs alive than salsa's memo already bounds. Exceeding this
16/// triggers probabilistic eviction (see [`DocumentStore::insert_parsed_cache`]).
17const PARSED_CACHE_CAP: usize = 2048;
18
19pub struct DocumentStore {
20    /// Cached semantic tokens per document: (result_id, tokens).
21    /// Used to compute incremental deltas for `textDocument/semanticTokens/full/delta`.
22    token_cache: DashMap<Url, (String, Vec<SemanticToken>)>,
23
24    // ── Salsa-input storage ────────────────────────────────────────────────
25    // Phase E4: `DocumentStore` is now a pure salsa-input wrapper. Open-file
26    // state (live text, version token, parse-diagnostics cache) lives on
27    // `Backend` in its `open_files` map; the set of files tracked by salsa
28    // is exactly `source_files.keys()`.
29    /// Mutex — held briefly to clone the database for reads and to mutate
30    /// it for writes. Per-thread salsa state (`zalsa_local`) is `!Sync`,
31    /// which rules out `RwLock<AnalysisHost>`. Readers instead snapshot the
32    /// db (cheap — storage is `Arc<Zalsa>`) and run queries on the clone
33    /// with the lock released, giving real read/read parallelism. Writers
34    /// during an in-flight read bump the shared revision; the reader raises
35    /// `salsa::Cancelled` on its next query call and `snapshot_query` below
36    /// retries with a fresh snapshot.
37    host: Mutex<AnalysisHost>,
38    /// `Url -> SourceFile` lookup. The `SourceFile` is a salsa-id handle; the
39    /// underlying input lives in `host.db` for the lifetime of the database.
40    source_files: DashMap<Url, SourceFile>,
41    /// G2: lock-free mirror of each `SourceFile`'s last-set text. Lets
42    /// `mirror_text` dedup repeated no-op updates (common during workspace
43    /// scan and `did_open` for already-indexed files) without taking
44    /// `host.lock()`. Updated inside the mutex whenever the salsa input is
45    /// set, so it is always consistent with the salsa revision for the
46    /// purposes of byte-equality comparison.
47    text_cache: DashMap<Url, Arc<str>>,
48    /// G3: cross-revision read-through cache for `parsed_doc`. Keyed on
49    /// `Url`, stored value is `(text_arc, Arc<ParsedDoc>)` — the text Arc
50    /// captured at parse time. On read, compare against `text_cache[uri]`
51    /// via `Arc::ptr_eq`; a match guarantees the cached ParsedDoc matches
52    /// the current salsa revision's text input, so the query can return
53    /// without snapshotting the db or invoking salsa at all. A miss
54    /// (different pointer, stale or absent entry) falls through to
55    /// `snapshot_query`. Self-evicts on text change — no writer-side
56    /// invalidation is required, which avoids the TOCTOU window where a
57    /// concurrent reader could re-insert a stale entry after a writer's
58    /// eviction.
59    ///
60    /// Size-bounded at [`PARSED_CACHE_CAP`] — see `insert_parsed_cache`.
61    /// Without this bound, every workspace file read-through would pin
62    /// its bumpalo arena alive regardless of salsa's `lru = 2048` on the
63    /// `parsed_doc` memo.
64    parsed_cache: DashMap<Url, (Arc<str>, Arc<ParsedDoc>)>,
65    /// Monotonic allocator for `FileId`s (one per ever-seen URL).
66    next_file_id: AtomicU32,
67    /// Workspace salsa input. Tracks the full set of `SourceFile`s that
68    /// participate in whole-program queries (`codebase`, `file_refs`).
69    /// Re-synced from `source_files` on demand by `sync_workspace_files`.
70    workspace: Workspace,
71}
72
73impl Default for DocumentStore {
74    fn default() -> Self {
75        Self::new()
76    }
77}
78
79impl DocumentStore {
80    pub fn new() -> Self {
81        let host = AnalysisHost::new();
82        let workspace = Workspace::new(
83            host.db(),
84            Arc::<[SourceFile]>::from(Vec::new()),
85            mir_analyzer::PhpVersion::LATEST,
86        );
87        DocumentStore {
88            token_cache: DashMap::new(),
89            host: Mutex::new(host),
90            source_files: DashMap::new(),
91            text_cache: DashMap::new(),
92            parsed_cache: DashMap::new(),
93            next_file_id: AtomicU32::new(0),
94            workspace,
95        }
96    }
97
98    /// Mirror a file's current text into the salsa layer. Creates the
99    /// `SourceFile` input on first sight, otherwise updates `text` on the
100    /// existing input (bumping the salsa revision so downstream queries
101    /// invalidate). Returns the `SourceFile` handle for this `uri`.
102    ///
103    /// B4a: called from every text-changing mutation site. Reads still come
104    /// from the legacy `map` — this mirror is not yet observed by production
105    /// code paths.
106    pub fn mirror_text(&self, uri: &Url, text: &str) -> SourceFile {
107        // G2 fast path: compare against the lock-free text cache. When the
108        // new text byte-matches what we already mirrored, skip the host
109        // mutex entirely. Common during workspace scan + `did_open` for
110        // unchanged files, where most threads would otherwise serialise on
111        // `host.lock()` just to confirm a no-op. Cache is only populated
112        // after the matching `source_files` entry, so a cache hit implies
113        // the handle exists.
114        if let Some(cached) = self.text_cache.get(uri)
115            && **cached == *text
116            && let Some(sf) = self.source_files.get(uri)
117        {
118            return *sf;
119        }
120
121        let text_arc: Arc<str> = Arc::from(text);
122        if let Some(existing) = self.source_files.get(uri) {
123            let sf = *existing;
124            drop(existing);
125            // Slow path: another writer may have raced us; re-check inside
126            // the mutex. Salsa's `set_text` unconditionally bumps the
127            // revision, so every spurious setter invalidates every
128            // downstream query.
129            let mut host = self.host.lock().unwrap();
130            let current: Arc<str> = sf.text(host.db());
131            if *current == *text_arc {
132                drop(host);
133                self.text_cache.insert(uri.clone(), current);
134                return sf;
135            }
136            sf.set_text(host.db_mut()).to(text_arc.clone());
137            // Phase K2: any text change invalidates a previously-seeded
138            // cached slice. Clearing it forces the fresh-parse branch of
139            // `file_definitions` on the next query, which is correct —
140            // the cached slice no longer matches the new text.
141            sf.set_cached_slice(host.db_mut()).to(None);
142            drop(host);
143            self.text_cache.insert(uri.clone(), text_arc);
144            sf
145        } else {
146            let id = FileId(self.next_file_id.fetch_add(1, Ordering::Relaxed));
147            let uri_arc: Arc<str> = Arc::from(uri.as_str());
148            let sf = {
149                let host = self.host.lock().unwrap();
150                SourceFile::new(host.db(), id, uri_arc, text_arc.clone(), None)
151            };
152            self.source_files.insert(uri.clone(), sf);
153            self.text_cache.insert(uri.clone(), text_arc);
154            sf
155        }
156    }
157
158    /// Return the salsa `SourceFile` handle for a URL, if one exists.
159    pub fn source_file(&self, uri: &Url) -> Option<SourceFile> {
160        self.source_files.get(uri).map(|e| *e)
161    }
162
163    /// Phase K2: pre-seed a `StubSlice` loaded from the on-disk cache
164    /// onto the `SourceFile` input for `uri`. The next `file_definitions`
165    /// call for that file returns the cached slice directly, skipping
166    /// parse + `DefinitionCollector`.
167    ///
168    /// Must be called **before** any `file_definitions(db, sf)` call for
169    /// this file — otherwise salsa has already memoized the fresh-parse
170    /// result and setting `cached_slice` now would only bump the revision
171    /// without actually using the cache. In practice the workspace-scan
172    /// path seeds immediately after `mirror_text` and before any query
173    /// runs.
174    ///
175    /// Returns `false` when `uri` was not mirrored (caller should mirror
176    /// first); returns `true` on success.
177    pub fn seed_cached_slice(
178        &self,
179        uri: &Url,
180        slice: Arc<mir_codebase::storage::StubSlice>,
181    ) -> bool {
182        let Some(sf) = self.source_files.get(uri).map(|e| *e) else {
183            return false;
184        };
185        let mut host = self.host.lock().unwrap();
186        sf.set_cached_slice(host.db_mut()).to(Some(slice));
187        true
188    }
189
190    /// Run `f` with a borrow of the `AnalysisHost`. Used by tests and by the
191    /// upcoming `*_salsa` accessors to query the salsa layer.
192    pub fn with_host<R>(&self, f: impl FnOnce(&AnalysisHost) -> R) -> R {
193        let host = self.host.lock().unwrap();
194        f(&host)
195    }
196
197    /// Phase E1: take a brief lock, clone the salsa database, release the
198    /// lock. Queries then run on the cloned `RootDatabase` without blocking
199    /// writers or other readers. Salsa's `Storage<Self>` is reference-counted
200    /// (`Arc<Zalsa>`), so the clone is cheap — it shares memoized data and
201    /// the cancellation flag with the host's db.
202    fn snapshot_db(&self) -> crate::db::analysis::RootDatabase {
203        let host = self.host.lock().unwrap();
204        host.db().clone()
205    }
206
207    /// Run a query on a fresh snapshot, catching `salsa::Cancelled` (raised
208    /// when a concurrent writer advances the revision) and retrying with a
209    /// new snapshot. Writers hold the mutex only long enough to bump input
210    /// values, so a handful of retries is more than enough in practice; we
211    /// cap at 8 to avoid pathological livelock under sustained write pressure.
212    fn snapshot_query<R>(&self, f: impl Fn(&crate::db::analysis::RootDatabase) -> R + Clone) -> R {
213        use std::panic::AssertUnwindSafe;
214        for _ in 0..8 {
215            let db = self.snapshot_db();
216            let f = f.clone();
217            match salsa::Cancelled::catch(AssertUnwindSafe(move || f(&db))) {
218                Ok(r) => return r,
219                Err(_) => continue,
220            }
221        }
222        // Last-resort attempt: take the mutex for the whole query so no
223        // writer can race us. Much slower, but guaranteed to make progress.
224        let host = self.host.lock().unwrap();
225        f(host.db())
226    }
227
228    /// Evict the semantic-tokens cache for `uri`. Called by Backend when a
229    /// file is closed; diff-based tokens computed against the old revision
230    /// are no longer meaningful.
231    pub fn evict_token_cache(&self, uri: &Url) {
232        self.token_cache.remove(uri);
233    }
234
235    /// Register a file in the salsa layer without marking it open.
236    ///
237    /// Salsa's `parsed_doc` query parses lazily on first read; diagnostics
238    /// are populated by `did_open` when the editor actually opens the file.
239    pub fn index(&self, uri: Url, text: &str) {
240        self.mirror_text(&uri, text);
241    }
242
243    /// Index a file using an already-parsed `ParsedDoc`, avoiding a second parse.
244    ///
245    /// Prefer this over [`index`] when the caller already has a `ParsedDoc` (e.g.
246    /// after running `DefinitionCollector` during workspace scan).
247    ///
248    /// `_diagnostics` is accepted for call-site compatibility; parse
249    /// diagnostics for background-indexed files are never consulted
250    /// (callers gate on `get_doc_salsa` returning `Some`).
251    pub fn index_from_doc(&self, uri: Url, doc: &ParsedDoc, _diagnostics: Vec<Diagnostic>) {
252        self.mirror_text(&uri, doc.source());
253    }
254
255    pub fn remove(&self, uri: &Url) {
256        self.token_cache.remove(uri);
257        // Also drop the Url→SourceFile mapping so the file stops contributing
258        // to the workspace codebase query. Salsa inputs themselves remain
259        // alive (salsa doesn't expose input removal in 0.26), but they're
260        // orphaned — no query keys them anymore, and re-opening the file
261        // allocates a fresh SourceFile with a new FileId. The ~40 bytes per
262        // orphan is acceptable; revisit if workspace-churn profiling hurts.
263        self.source_files.remove(uri);
264        self.text_cache.remove(uri);
265        self.parsed_cache.remove(uri);
266    }
267
268    // ── B4b salsa-backed accessors ─────────────────────────────────────────
269    //
270    // These are additive and not yet called from production code. They go
271    // through the salsa layer — reads run the memoized `parsed_doc` /
272    // `file_index` / `method_returns` queries, parsing only on first access
273    // per revision. B4c will migrate feature modules to call these instead of
274    // the legacy `get_doc` / `get_index`.
275
276    /// Salsa-backed parsed document.
277    ///
278    /// Salsa-backed parsed document for any mirrored file (open or
279    /// background-indexed). Returns `None` only when the file is not known
280    /// to the store. Callers that want "only if open" should gate on
281    /// `Backend::open_files` at the call site (see `Backend::get_doc`).
282    pub fn get_doc_salsa(&self, uri: &Url) -> Option<Arc<ParsedDoc>> {
283        self.get_parsed_cached(uri)
284    }
285
286    /// Salsa-backed compact symbol index.
287    pub fn get_index_salsa(&self, uri: &Url) -> Option<Arc<FileIndex>> {
288        let sf = self.source_file(uri)?;
289        Some(self.snapshot_query(move |db| crate::db::index::file_index(db, sf).0.clone()))
290    }
291
292    /// G3: shared implementation for `get_doc_salsa`.
293    /// Tries the `parsed_cache` (lock-free) first; validates via
294    /// `Arc::ptr_eq` against the G2 `text_cache` so a concurrent writer
295    /// that has already committed a new text input cannot be masked by a
296    /// stale cache entry. On miss, captures the text Arc and ParsedDoc
297    /// together inside a single `snapshot_query`, then publishes both.
298    fn get_parsed_cached(&self, uri: &Url) -> Option<Arc<ParsedDoc>> {
299        if let Some(current_text) = self.text_cache.get(uri)
300            && let Some(entry) = self.parsed_cache.get(uri)
301            && Arc::ptr_eq(&*current_text, &entry.0)
302        {
303            return Some(entry.1.clone());
304        }
305
306        let sf = self.source_file(uri)?;
307        let (text, doc) = self.snapshot_query(move |db| {
308            let text = sf.text(db);
309            let doc = crate::db::parse::parsed_doc(db, sf).0.clone();
310            (text, doc)
311        });
312        self.insert_parsed_cache(uri.clone(), text, doc.clone());
313        Some(doc)
314    }
315
316    /// Publish a fresh `ParsedDoc` into `parsed_cache`, shedding roughly
317    /// half of the cache first if it has grown past [`PARSED_CACHE_CAP`].
318    ///
319    /// Eviction is probabilistic (DashMap iteration order is arbitrary),
320    /// not LRU. That's fine — salsa's own `parsed_doc` memo uses
321    /// `lru = 2048` on hotness-aware storage, so a cache-miss here is
322    /// cheap: the next read goes through `snapshot_query` and
323    /// `parsed_doc`, which still short-circuits on the salsa memo.
324    /// What we're bounding here is the *secondary* Arc retention that
325    /// would otherwise pin every workspace file's bumpalo arena alive
326    /// regardless of salsa's eviction decisions.
327    fn insert_parsed_cache(&self, uri: Url, text: Arc<str>, doc: Arc<ParsedDoc>) {
328        if self.parsed_cache.len() >= PARSED_CACHE_CAP {
329            let drop_target = self.parsed_cache.len() / 2;
330            let mut dropped = 0usize;
331            self.parsed_cache.retain(|_, _| {
332                if dropped < drop_target {
333                    dropped += 1;
334                    false
335                } else {
336                    true
337                }
338            });
339        }
340        self.parsed_cache.insert(uri, (text, doc));
341    }
342
343    /// Refresh `workspace.files` to mirror the current `source_files` set.
344    ///
345    /// Called by `get_codebase_salsa`. Skips the setter when the file list
346    /// hasn't changed — salsa's `set_field` unconditionally bumps revision,
347    /// which would invalidate every downstream query (codebase, file_refs).
348    /// Dedup is essential for memoization across LSP requests.
349    pub fn sync_workspace_files(&self) {
350        let mut files: Vec<SourceFile> = self.source_files.iter().map(|e| *e.value()).collect();
351        files.sort_by_key(|sf| self.with_host(|host| sf.id(host.db()).0));
352        let mut host = self.host.lock().unwrap();
353        let current = self.workspace.files(host.db());
354        if current.len() == files.len() && current.iter().zip(files.iter()).all(|(a, b)| a == b) {
355            return;
356        }
357        let arc: Arc<[SourceFile]> = Arc::from(files);
358        self.workspace.set_files(host.db_mut()).to(arc);
359    }
360
361    /// Update the PHP version tracked by the workspace. Salsa will invalidate
362    /// all `semantic_issues` queries so diagnostics are re-evaluated.
363    /// Skips the setter when the version hasn't changed to avoid spurious
364    /// query invalidation.
365    pub fn set_php_version(&self, version: mir_analyzer::PhpVersion) {
366        let mut host = self.host.lock().unwrap();
367        if self.workspace.php_version(host.db()) == version {
368            return;
369        }
370        self.workspace.set_php_version(host.db_mut()).to(version);
371    }
372
373    /// Salsa-backed finalized Codebase. Aggregates every known file's
374    /// `StubSlice` via `codebase_from_parts`, memoized by salsa.
375    ///
376    /// Phase C step 3: this runs in parallel with Backend's imperative
377    /// `Arc<Codebase>`. Comparison tests validate parity; readers migrate in
378    /// a follow-up.
379    pub fn get_codebase_salsa(&self) -> Arc<mir_codebase::Codebase> {
380        self.sync_workspace_files();
381        let ws = self.workspace;
382        self.snapshot_query(move |db| crate::db::codebase::codebase(db, ws).0.clone())
383    }
384
385    /// Salsa-backed reference lookup — drop-in replacement for
386    /// `Codebase::get_reference_locations`. First call per `key` runs
387    /// `file_refs` over every workspace file; subsequent calls hit the
388    /// `symbol_refs` memo.
389    pub fn get_symbol_refs_salsa(&self, key: &str) -> Vec<(Arc<str>, u32, u32)> {
390        self.sync_workspace_files();
391        let ws = self.workspace;
392        let key = key.to_string();
393        self.snapshot_query(move |db| {
394            warm_file_refs_parallel(db, ws);
395            crate::db::refs::symbol_refs(db, ws, key.clone())
396                .0
397                .as_ref()
398                .clone()
399        })
400    }
401
402    /// Phase J: salsa-memoized aggregate workspace index.
403    ///
404    /// Returns the shared `Arc<WorkspaceIndexData>` with flat
405    /// `(Url, Arc<FileIndex>)` list plus pre-built `classes_by_name` and
406    /// `subtypes_of` reverse maps. Used by workspace_symbols,
407    /// prepare_type_hierarchy, supertypes_of, subtypes_of, and
408    /// find_implementations so they don't each rebuild the aggregate per
409    /// request. Invalidates automatically when any file's `file_index`
410    /// changes.
411    pub fn get_workspace_index_salsa(&self) -> Arc<crate::db::workspace_index::WorkspaceIndexData> {
412        self.sync_workspace_files();
413        let ws = self.workspace;
414        self.snapshot_query(move |db| {
415            crate::db::workspace_index::workspace_index(db, ws)
416                .0
417                .clone()
418        })
419    }
420
421    /// Phase L: force `file_refs` to run for every workspace file so that
422    /// subsequent `textDocument/references` / `prepare_rename` / call-hierarchy
423    /// lookups hit the memo instead of paying first-call latency.
424    ///
425    /// Uses parallel warming (`warm_file_refs_parallel`) so all `file_refs`
426    /// complete concurrently; `symbol_refs` then only aggregates memos.
427    pub fn warm_reference_index(&self) {
428        self.sync_workspace_files();
429        let ws = self.workspace;
430        let _ = self.snapshot_query(move |db| {
431            warm_file_refs_parallel(db, ws);
432            crate::db::refs::symbol_refs(db, ws, String::from("__phplsp_warmup__"))
433                .0
434                .clone()
435        });
436    }
437
438    /// Phase K2b: run `file_definitions` for `uri` and return the
439    /// resulting `StubSlice`. Used by the workspace-scan write path to
440    /// persist slices to disk after a cache miss.
441    pub fn slice_for(&self, uri: &Url) -> Option<Arc<mir_codebase::storage::StubSlice>> {
442        let sf = self.source_file(uri)?;
443        Some(
444            self.snapshot_query(move |db| {
445                crate::db::definitions::file_definitions(db, sf).0.clone()
446            }),
447        )
448    }
449
450    /// Salsa-backed per-file method-return-type map.
451    pub fn get_method_returns_salsa(&self, uri: &Url) -> Option<Arc<crate::ast::MethodReturnsMap>> {
452        let sf = self.source_file(uri)?;
453        Some(
454            self.snapshot_query(move |db| {
455                crate::db::method_returns::method_returns(db, sf).0.clone()
456            }),
457        )
458    }
459
460    /// Cache the semantic tokens computed for a delta response.
461    /// `result_id` is an opaque string (a hash of the token data) returned to the client.
462    pub fn store_token_cache(&self, uri: &Url, result_id: String, tokens: Vec<SemanticToken>) {
463        self.token_cache.insert(uri.clone(), (result_id, tokens));
464    }
465
466    /// Return the cached tokens if `result_id` matches the stored one.
467    pub fn get_token_cache(&self, uri: &Url, result_id: &str) -> Option<Vec<SemanticToken>> {
468        self.token_cache
469            .get(uri)
470            .filter(|e| e.0.as_str() == result_id)
471            .map(|e| e.1.clone())
472    }
473
474    /// Phase I: salsa-memoized raw semantic issues for a file. Callers apply
475    /// their own `DiagnosticsConfig` filter via
476    /// [`crate::semantic_diagnostics::issues_to_diagnostics`] — keeping the
477    /// filter outside the query preserves memoization across config toggles.
478    pub fn get_semantic_issues_salsa(&self, uri: &Url) -> Option<Arc<[mir_issues::Issue]>> {
479        let sf = self.source_file(uri)?;
480        self.sync_workspace_files();
481        let ws = self.workspace;
482        Some(
483            self.snapshot_query(move |db| {
484                crate::db::semantic::semantic_issues(db, ws, sf).0.clone()
485            }),
486        )
487    }
488
489    /// Returns `(uri, doc)` for files currently open in the editor.
490    ///
491    /// Resolve `open_urls` (from `Backend::open_urls()`) to parsed docs.
492    /// Files not mirrored in the salsa layer are filtered out silently.
493    pub fn docs_for(&self, open_urls: &[Url]) -> Vec<(Url, Arc<ParsedDoc>)> {
494        open_urls
495            .iter()
496            .filter_map(|u| self.get_doc_salsa(u).map(|d| (u.clone(), d)))
497            .collect()
498    }
499
500    /// `(primary, doc)` first, then every other open file's parsed doc.
501    /// The `open_urls` slice should include `uri` — this helper filters it out.
502    pub fn doc_with_others(
503        &self,
504        uri: &Url,
505        doc: Arc<ParsedDoc>,
506        open_urls: &[Url],
507    ) -> Vec<(Url, Arc<ParsedDoc>)> {
508        let mut result = vec![(uri.clone(), doc)];
509        result.extend(self.other_docs(uri, open_urls));
510        result
511    }
512
513    /// Parsed docs for every entry in `open_urls` except `uri`.
514    pub fn other_docs(&self, uri: &Url, open_urls: &[Url]) -> Vec<(Url, Arc<ParsedDoc>)> {
515        open_urls
516            .iter()
517            .filter(|u| *u != uri)
518            .filter_map(|u| self.get_doc_salsa(u).map(|d| (u.clone(), d)))
519            .collect()
520    }
521
522    /// Batched salsa fetch for every entry in `open_urls` except `uri`:
523    /// returns each `(uri, ParsedDoc, MethodReturnsMap)` triple in a single
524    /// `snapshot_query` so cancellation retries don't run N times.
525    pub fn other_docs_with_returns(
526        &self,
527        uri: &Url,
528        open_urls: &[Url],
529    ) -> Vec<(Url, Arc<ParsedDoc>, Arc<crate::ast::MethodReturnsMap>)> {
530        let source_files: Vec<(Url, crate::db::input::SourceFile)> = open_urls
531            .iter()
532            .filter(|u| *u != uri)
533            .filter_map(|u| self.source_file(u).map(|sf| (u.clone(), sf)))
534            .collect();
535        if source_files.is_empty() {
536            return Vec::new();
537        }
538        self.snapshot_query(move |db| {
539            source_files
540                .iter()
541                .map(|(u, sf)| {
542                    let doc = crate::db::parse::parsed_doc(db, *sf).0.clone();
543                    let mr = crate::db::method_returns::method_returns(db, *sf).0.clone();
544                    (u.clone(), doc, mr)
545                })
546                .collect()
547        })
548    }
549
550    /// Compact symbol index for every mirrored file.
551    pub fn all_indexes(&self) -> Vec<(Url, Arc<FileIndex>)> {
552        let urls: Vec<Url> = self.source_files.iter().map(|e| e.key().clone()).collect();
553        urls.into_iter()
554            .filter_map(|u| self.get_index_salsa(&u).map(|idx| (u, idx)))
555            .collect()
556    }
557
558    /// Same as `all_indexes` but excludes `uri`.
559    pub fn other_indexes(&self, uri: &Url) -> Vec<(Url, Arc<FileIndex>)> {
560        let urls: Vec<Url> = self
561            .source_files
562            .iter()
563            .filter(|e| e.key() != uri)
564            .map(|e| e.key().clone())
565            .collect();
566        urls.into_iter()
567            .filter_map(|u| self.get_index_salsa(&u).map(|idx| (u, idx)))
568            .collect()
569    }
570
571    /// Parsed documents for every mirrored file (open or background-indexed).
572    /// Suitable for full-scan operations: find-references, rename,
573    /// call_hierarchy, code_lens.
574    pub fn all_docs_for_scan(&self) -> Vec<(Url, Arc<ParsedDoc>)> {
575        let urls: Vec<Url> = self.source_files.iter().map(|e| e.key().clone()).collect();
576        urls.into_iter()
577            .filter_map(|u| self.get_doc_salsa(&u).map(|d| (u, d)))
578            .collect()
579    }
580}
581
582/// Run `file_refs` for every workspace file in parallel.
583///
584/// `db` clones are cheap (they share the same `Arc<Zalsa>` memo store), so
585/// results computed on any clone are immediately visible to all others at the
586/// same revision.  After this returns, the sequential loop inside `symbol_refs`
587/// only does cheap memo lookups instead of running `StatementsAnalyzer` on
588/// every file one-by-one.
589///
590/// Per-task `salsa::Cancelled` is caught and swallowed.  If the revision was
591/// bumped, the main thread's next salsa call inside `symbol_refs` will raise
592/// `Cancelled` too and `snapshot_query` retries the whole operation from
593/// scratch.  If the revision was not bumped, any file whose task was cancelled
594/// before completion simply has no memo entry and `symbol_refs`'s sequential
595/// loop recomputes it.
596fn warm_file_refs_parallel(
597    db: &crate::db::analysis::RootDatabase,
598    ws: crate::db::input::Workspace,
599) {
600    let files: Vec<_> = ws.files(db).iter().copied().collect();
601    // Pre-clone one snapshot per file before entering the scope.
602    // RootDatabase: Send (ZalsaLocal owns its RefCell; Arc<Zalsa> is Sync),
603    // but RootDatabase: !Sync, so we must avoid sharing &RootDatabase across
604    // threads.  Collecting owned clones first and moving each into its task
605    // requires only Send, not Sync.
606    let snaps: Vec<crate::db::analysis::RootDatabase> = files.iter().map(|_| db.clone()).collect();
607    rayon::scope(move |s| {
608        for (sf, snap) in files.into_iter().zip(snaps) {
609            s.spawn(move |_| {
610                let _ = salsa::Cancelled::catch(std::panic::AssertUnwindSafe(|| {
611                    crate::db::refs::file_refs(&snap, ws, sf);
612                }));
613            });
614        }
615    });
616}
617
618#[cfg(test)]
619mod tests {
620    use super::*;
621
622    fn uri(path: &str) -> Url {
623        Url::parse(&format!("file://{path}")).unwrap()
624    }
625
626    /// Phase E4: open-file state lives on `Backend`, not `DocumentStore`.
627    /// Tests that need to simulate "file is open" just mirror the text into
628    /// the salsa input — the open/closed distinction is enforced by the
629    /// caller (Backend) in production.
630    fn open(store: &DocumentStore, u: Url, text: String) {
631        store.mirror_text(&u, &text);
632    }
633
634    #[test]
635    fn salsa_codebase_matches_imperative_codebase() {
636        // Parity check for Phase C step 3: the salsa-built codebase should
637        // contain exactly the same class/interface/function FQNs as one
638        // built imperatively via DefinitionCollector against a fresh
639        // mir_codebase::Codebase.
640        let store = DocumentStore::new();
641        let sources = [
642            (
643                "/a.php",
644                "<?php\nnamespace A;\nclass Foo {}\ninterface IX {}",
645            ),
646            (
647                "/b.php",
648                "<?php\nnamespace B;\nfunction bar(): int { return 1; }",
649            ),
650            ("/c.php", "<?php\nnamespace C;\nenum Color { case Red; }"),
651        ];
652        for (p, src) in &sources {
653            open(&store, uri(p), src.to_string());
654        }
655
656        let salsa_cb = store.get_codebase_salsa();
657
658        let imperative_cb = mir_codebase::Codebase::new();
659        for (p, src) in &sources {
660            let (doc, _) = crate::diagnostics::parse_document(src);
661            let file: Arc<str> = Arc::from(uri(p).as_str());
662            let map = php_rs_parser::source_map::SourceMap::new(src);
663            let c =
664                mir_analyzer::collector::DefinitionCollector::new(&imperative_cb, file, src, &map);
665            let _ = c.collect(doc.program());
666        }
667        imperative_cb.finalize();
668
669        for fqn in ["A\\Foo", "A\\IX", "C\\Color"] {
670            assert_eq!(
671                salsa_cb.type_exists(fqn),
672                imperative_cb.type_exists(fqn),
673                "parity mismatch on type {fqn}"
674            );
675            assert!(salsa_cb.type_exists(fqn), "{fqn} missing from salsa cb");
676        }
677        assert_eq!(
678            salsa_cb.function_exists("B\\bar"),
679            imperative_cb.function_exists("B\\bar"),
680        );
681        assert!(salsa_cb.function_exists("B\\bar"));
682    }
683
684    #[test]
685    fn index_registers_file_in_salsa() {
686        let store = DocumentStore::new();
687        store.index(uri("/lib.php"), "<?php\nfunction lib_fn() {}");
688        let idx = store.get_index_salsa(&uri("/lib.php")).unwrap();
689        assert_eq!(idx.functions.len(), 1);
690        assert_eq!(idx.functions[0].name, "lib_fn");
691    }
692
693    #[test]
694    fn remove_drops_salsa_input() {
695        let store = DocumentStore::new();
696        store.index(uri("/lib.php"), "<?php");
697        store.remove(&uri("/lib.php"));
698        assert!(store.get_index_salsa(&uri("/lib.php")).is_none());
699    }
700
701    #[test]
702    fn all_indexes_includes_every_mirrored_file() {
703        let store = DocumentStore::new();
704        open(&store, uri("/a.php"), "<?php\nfunction a() {}".to_string());
705        store.index(uri("/b.php"), "<?php\nfunction b() {}");
706        assert_eq!(store.all_indexes().len(), 2);
707    }
708
709    #[test]
710    fn other_indexes_excludes_current_uri() {
711        let store = DocumentStore::new();
712        open(&store, uri("/a.php"), "<?php\nfunction a() {}".to_string());
713        open(&store, uri("/b.php"), "<?php\nfunction b() {}".to_string());
714        assert_eq!(store.other_indexes(&uri("/a.php")).len(), 1);
715    }
716
717    #[test]
718    fn other_docs_excludes_current_uri() {
719        let store = DocumentStore::new();
720        let ua = uri("/a.php");
721        let ub = uri("/b.php");
722        open(&store, ua.clone(), "<?php\nfunction a() {}".to_string());
723        open(&store, ub.clone(), "<?php\nfunction b() {}".to_string());
724        let open_urls = vec![ua.clone(), ub];
725        assert_eq!(store.other_docs(&ua, &open_urls).len(), 1);
726    }
727
728    #[test]
729    fn evict_token_cache_removes_entry() {
730        let store = DocumentStore::new();
731        let u = uri("/a.php");
732        open(&store, u.clone(), "<?php".to_string());
733        store.store_token_cache(&u, "id1".to_string(), vec![]);
734        assert!(store.get_token_cache(&u, "id1").is_some());
735        store.evict_token_cache(&u);
736        assert!(store.get_token_cache(&u, "id1").is_none());
737    }
738
739    #[test]
740    fn index_populates_file_index_with_symbols() {
741        let store = DocumentStore::new();
742        store.index(uri("/a.php"), "<?php\nfunction hello() {}");
743        let idx = store.get_index_salsa(&uri("/a.php")).unwrap();
744        assert_eq!(idx.functions.len(), 1);
745        assert_eq!(idx.functions[0].name, "hello");
746    }
747
748    #[test]
749    fn open_populates_file_index_with_symbols() {
750        let store = DocumentStore::new();
751        open(&store, uri("/a.php"), "<?php\nclass Foo {}".to_string());
752        let idx = store.get_index_salsa(&uri("/a.php")).unwrap();
753        assert_eq!(idx.classes.len(), 1);
754        assert_eq!(idx.classes[0].name, "Foo");
755    }
756
757    // ── Mirror invariants ────────────────────────────────────────────────
758    //
759    // Every mutation path that changes file text must keep the salsa layer
760    // consistent. These tests walk a set-edit-reopen cycle and assert that
761    // the salsa-derived `FileIndex` reflects the latest text at each step.
762
763    fn names_of(idx: &FileIndex) -> Vec<String> {
764        let mut out: Vec<String> = idx.classes.iter().map(|c| c.name.clone()).collect();
765        out.extend(idx.functions.iter().map(|f| f.name.clone()));
766        out.sort();
767        out
768    }
769
770    fn salsa_index_names(store: &DocumentStore, url: &Url) -> Vec<String> {
771        let sf = store.source_file(url).expect("mirror recorded SourceFile");
772        store.with_host(|host| {
773            let arc = crate::db::index::file_index(host.db(), sf);
774            names_of(arc.get())
775        })
776    }
777
778    #[test]
779    fn mirror_tracks_repeated_edits() {
780        let store = DocumentStore::new();
781        let u = uri("/mirror.php");
782
783        open(&store, u.clone(), "<?php\nclass A {}".to_string());
784        assert_eq!(salsa_index_names(&store, &u), vec!["A".to_string()]);
785
786        open(
787            &store,
788            u.clone(),
789            "<?php\nclass A {}\nclass B {}".to_string(),
790        );
791        assert_eq!(
792            salsa_index_names(&store, &u),
793            vec!["A".to_string(), "B".to_string()]
794        );
795
796        open(&store, u.clone(), "<?php\nfunction greet() {}".to_string());
797        assert_eq!(salsa_index_names(&store, &u), vec!["greet".to_string()]);
798    }
799
800    #[test]
801    fn mirror_tracks_index_and_index_from_doc() {
802        let store = DocumentStore::new();
803
804        // Background `index(url, text)` path.
805        let u1 = uri("/bg1.php");
806        store.index(u1.clone(), "<?php\nclass Bg1 {}");
807        assert_eq!(salsa_index_names(&store, &u1), vec!["Bg1".to_string()]);
808
809        // `index_from_doc(url, &doc, diags)` path (workspace-scan Phase 2).
810        let u2 = uri("/bg2.php");
811        let (doc, diags) =
812            crate::diagnostics::parse_document("<?php\nclass Bg2 {}\nfunction f() {}");
813        store.index_from_doc(u2.clone(), &doc, diags);
814        assert_eq!(
815            salsa_index_names(&store, &u2),
816            vec!["Bg2".to_string(), "f".to_string()]
817        );
818    }
819
820    /// G3: confirms the `parsed_cache` actually hits — two consecutive
821    /// `get_doc_salsa` calls on unchanged text return the same `Arc`
822    /// (pointer equality), and an edit forces a miss that produces a
823    /// different `Arc`.
824    /// parsed_cache must stay bounded — inserting more than
825    /// `PARSED_CACHE_CAP` unique URLs must not cause unbounded growth.
826    /// Eviction is probabilistic, so we only assert the bound, not which
827    /// Phase K2 end-to-end: seed a cached slice through `DocumentStore`,
828    /// confirm the workspace codebase sees the cached fact, then edit the
829    /// text and confirm the cache is cleared (codebase now reflects the
830    /// re-parsed text). Exercises `seed_cached_slice` + `mirror_text`'s
831    /// `set_cached_slice(None)` invalidation together.
832    #[test]
833    fn seed_cached_slice_then_edit_invalidates() {
834        let store = DocumentStore::new();
835        let u = uri("/seed_e2e.php");
836
837        // Mirror the initial text — classes: "Real".
838        store.mirror_text(&u, "<?php\nclass Real {}");
839
840        // Build a cached slice claiming classes: "Seeded", for the same URI.
841        let seeded = {
842            let src = "<?php\nclass Seeded {}";
843            let source_map = php_rs_parser::source_map::SourceMap::new(src);
844            let (doc, _) = crate::diagnostics::parse_document(src);
845            let collector = mir_analyzer::collector::DefinitionCollector::new_for_slice(
846                Arc::<str>::from(u.as_str()),
847                src,
848                &source_map,
849            );
850            let (s, _) = collector.collect_slice(doc.program());
851            Arc::new(s)
852        };
853        assert!(store.seed_cached_slice(&u, seeded));
854
855        // Codebase should contain the seeded class, not the real one.
856        let cb = store.get_codebase_salsa();
857        assert!(cb.type_exists("Seeded"));
858        assert!(!cb.type_exists("Real"));
859
860        // Edit: mirror_text flips the text and also clears cached_slice.
861        store.mirror_text(&u, "<?php\nclass Edited {}");
862        let cb = store.get_codebase_salsa();
863        assert!(
864            cb.type_exists("Edited"),
865            "after edit, codebase must reflect fresh parse"
866        );
867        assert!(
868            !cb.type_exists("Seeded"),
869            "mirror_text must clear cached_slice so stale data is gone"
870        );
871    }
872
873    /// Seeding for a URL that was never mirrored is a no-op (returns `false`)
874    /// — avoids silently allocating SourceFiles outside `mirror_text`'s control.
875    #[test]
876    fn seed_cached_slice_noops_for_unknown_uri() {
877        let store = DocumentStore::new();
878        let u = uri("/never_mirrored.php");
879        let slice = Arc::new(mir_codebase::storage::StubSlice::default());
880        assert!(!store.seed_cached_slice(&u, slice));
881    }
882
883    /// entries survive.
884    #[test]
885    fn parsed_cache_stays_bounded_under_many_inserts() {
886        let store = DocumentStore::new();
887        let overflow = PARSED_CACHE_CAP + 100;
888        for i in 0..overflow {
889            let u = uri(&format!("/cap/file{i}.php"));
890            store.index(u.clone(), "<?php\nclass A {}");
891            // Force a parsed_cache insert via get_doc_salsa.
892            let _ = store.get_doc_salsa(&u);
893        }
894        assert!(
895            store.parsed_cache.len() <= PARSED_CACHE_CAP,
896            "parsed_cache grew to {} entries (cap {})",
897            store.parsed_cache.len(),
898            PARSED_CACHE_CAP
899        );
900    }
901
902    #[test]
903    fn get_doc_salsa_cache_hits_across_calls() {
904        let store = DocumentStore::new();
905        let u = uri("/g3_cache.php");
906        open(&store, u.clone(), "<?php\nclass G3 {}".to_string());
907
908        let a = store.get_doc_salsa(&u).unwrap();
909        let b = store.get_doc_salsa(&u).unwrap();
910        assert!(
911            Arc::ptr_eq(&a, &b),
912            "parsed_cache hit should yield the same Arc across calls"
913        );
914
915        open(&store, u.clone(), "<?php\nclass G3b {}".to_string());
916        let c = store.get_doc_salsa(&u).unwrap();
917        assert!(
918            !Arc::ptr_eq(&a, &c),
919            "edit should invalidate the parsed_cache entry"
920        );
921    }
922
923    #[test]
924    fn get_doc_salsa_returns_some_for_mirrored_files() {
925        // Phase E4: `get_doc_salsa` no longer gates on open-state. The
926        // open/closed distinction now lives on `Backend::get_doc`.
927        let store = DocumentStore::new();
928        let u = uri("/e4_doc.php");
929        store.index(u.clone(), "<?php\nclass P {}");
930        assert!(store.get_doc_salsa(&u).is_some());
931    }
932
933    #[test]
934    fn get_salsa_accessors_return_none_for_unknown_uri() {
935        let store = DocumentStore::new();
936        let u = uri("/never-seen.php");
937        assert!(store.get_doc_salsa(&u).is_none());
938        assert!(store.get_index_salsa(&u).is_none());
939        assert!(store.get_method_returns_salsa(&u).is_none());
940    }
941
942    /// Phase E1: concurrent readers and writers must not deadlock, panic, or
943    /// return stale data. Writers briefly bump inputs while readers are
944    /// running on cloned snapshots; any `salsa::Cancelled` raised on the
945    /// reader side must be caught and retried by `snapshot_query`.
946    #[test]
947    fn concurrent_reads_and_writes_do_not_panic() {
948        use std::sync::Arc;
949        use std::thread;
950        use std::time::{Duration, Instant};
951
952        let store = Arc::new(DocumentStore::new());
953        let urls: Vec<Url> = (0..8).map(|i| uri(&format!("/f{i}.php"))).collect();
954        for (i, u) in urls.iter().enumerate() {
955            open(&store, u.clone(), format!("<?php\nclass C{i} {{}}"));
956        }
957
958        let deadline = Instant::now() + Duration::from_millis(400);
959        let mut handles = Vec::new();
960
961        // Writer thread: keep bumping every file's text.
962        {
963            let store = Arc::clone(&store);
964            let urls = urls.clone();
965            handles.push(thread::spawn(move || {
966                let mut rev = 0u32;
967                while Instant::now() < deadline {
968                    for u in &urls {
969                        let text = format!("<?php\nclass C{{}}\n// rev {rev}");
970                        store.mirror_text(u, &text);
971                    }
972                    rev += 1;
973                }
974            }));
975        }
976
977        // Reader threads: hammer the salsa accessors.
978        for _ in 0..4 {
979            let store = Arc::clone(&store);
980            let urls = urls.clone();
981            handles.push(thread::spawn(move || {
982                while Instant::now() < deadline {
983                    for u in &urls {
984                        let _ = store.get_doc_salsa(u);
985                        let _ = store.get_index_salsa(u);
986                    }
987                    let _ = store.get_codebase_salsa();
988                    let _ = store.get_symbol_refs_salsa("C0");
989                }
990            }));
991        }
992
993        for h in handles {
994            h.join().expect("no panic under concurrent read/write");
995        }
996    }
997
998    /// Phase L: warm-up must not error and must pre-populate the `file_refs`
999    /// memo. We can't cheaply observe salsa memo state from outside, so we
1000    /// instead call `warm_reference_index` and then verify that a real
1001    /// reference lookup returns the expected result — the warm-up running
1002    /// without panic across a realistic two-file workspace is the load-bearing
1003    /// guarantee.
1004    #[test]
1005    fn warm_reference_index_does_not_panic_and_keeps_lookups_correct() {
1006        let store = DocumentStore::new();
1007        open(
1008            &store,
1009            uri("/wa.php"),
1010            "<?php\nfunction a() { b(); }".to_string(),
1011        );
1012        open(
1013            &store,
1014            uri("/wb.php"),
1015            "<?php\nfunction b() {}\na();".to_string(),
1016        );
1017        store.warm_reference_index();
1018        let refs_to_a = store.get_symbol_refs_salsa("a");
1019        assert!(
1020            refs_to_a.iter().any(|(uri, _, _)| uri.contains("wb.php")),
1021            "reference to a() from /wb.php should be discoverable after warm-up, got {refs_to_a:?}"
1022        );
1023    }
1024}