Skip to main content

php_lsp/
document_store.rs

1use std::sync::atomic::{AtomicU32, Ordering};
2use std::sync::{Arc, Mutex, RwLock};
3
4use dashmap::DashMap;
5use salsa::Setter;
6use tower_lsp::lsp_types::{Diagnostic, SemanticToken, Url};
7
8use crate::ast::ParsedDoc;
9use crate::autoload::Psr4Map;
10use crate::db::analysis::AnalysisHost;
11use crate::db::input::{FileId, SourceFile, Workspace};
12use crate::file_index::FileIndex;
13
14/// Upper bound on `parsed_cache` entries. Matched to the `lru = 2048` on
15/// `parsed_doc` in `src/db/parse.rs` so the secondary Arc retention can't
16/// pin more ASTs alive than salsa's memo already bounds. Exceeding this
17/// triggers probabilistic eviction (see [`DocumentStore::insert_parsed_cache`]).
18const PARSED_CACHE_CAP: usize = 2048;
19
20pub struct DocumentStore {
21    /// Cached semantic tokens per document: (result_id, tokens).
22    /// Used to compute incremental deltas for `textDocument/semanticTokens/full/delta`.
23    token_cache: DashMap<Url, (String, Vec<SemanticToken>)>,
24
25    // ── Salsa-input storage ────────────────────────────────────────────────
26    // Phase E4: `DocumentStore` is now a pure salsa-input wrapper. Open-file
27    // state (live text, version token, parse-diagnostics cache) lives on
28    // `Backend` in its `open_files` map; the set of files tracked by salsa
29    // is exactly `source_files.keys()`.
30    /// Mutex — held briefly to clone the database for reads and to mutate
31    /// it for writes. Per-thread salsa state (`zalsa_local`) is `!Sync`,
32    /// which rules out `RwLock<AnalysisHost>`. Readers instead snapshot the
33    /// db (cheap — storage is `Arc<Zalsa>`) and run queries on the clone
34    /// with the lock released, giving real read/read parallelism. Writers
35    /// during an in-flight read bump the shared revision; the reader raises
36    /// `salsa::Cancelled` on its next query call and `snapshot_query` below
37    /// retries with a fresh snapshot.
38    host: Mutex<AnalysisHost>,
39    /// `Url -> SourceFile` lookup. The `SourceFile` is a salsa-id handle; the
40    /// underlying input lives in `host.db` for the lifetime of the database.
41    source_files: DashMap<Url, SourceFile>,
42    /// G2: lock-free mirror of each `SourceFile`'s last-set text. Lets
43    /// `mirror_text` dedup repeated no-op updates (common during workspace
44    /// scan and `did_open` for already-indexed files) without taking
45    /// `host.lock()`. Updated inside the mutex whenever the salsa input is
46    /// set, so it is always consistent with the salsa revision for the
47    /// purposes of byte-equality comparison.
48    text_cache: DashMap<Url, Arc<str>>,
49    /// G3: cross-revision read-through cache for `parsed_doc`. Keyed on
50    /// `Url`, stored value is `(text_arc, Arc<ParsedDoc>)` — the text Arc
51    /// captured at parse time. On read, compare against `text_cache[uri]`
52    /// via `Arc::ptr_eq`; a match guarantees the cached ParsedDoc matches
53    /// the current salsa revision's text input, so the query can return
54    /// without snapshotting the db or invoking salsa at all. A miss
55    /// (different pointer, stale or absent entry) falls through to
56    /// `snapshot_query`. Self-evicts on text change — no writer-side
57    /// invalidation is required, which avoids the TOCTOU window where a
58    /// concurrent reader could re-insert a stale entry after a writer's
59    /// eviction.
60    ///
61    /// Size-bounded at [`PARSED_CACHE_CAP`] — see `insert_parsed_cache`.
62    /// Without this bound, every workspace file read-through would pin
63    /// its bumpalo arena alive regardless of salsa's `lru = 2048` on the
64    /// `parsed_doc` memo.
65    parsed_cache: DashMap<Url, (Arc<str>, Arc<ParsedDoc>)>,
66    /// Monotonic allocator for `FileId`s (one per ever-seen URL).
67    next_file_id: AtomicU32,
68    /// Workspace salsa input. Tracks the full set of `SourceFile`s that
69    /// participate in whole-program queries (`codebase`, `file_refs`).
70    /// Re-synced from `source_files` on demand by `sync_workspace_files`.
71    workspace: Workspace,
72    /// Shared PSR-4 namespace-to-path map. Shared with `Backend` via `Arc`
73    /// so updates from `initialized` (when composer.json is loaded) are
74    /// visible here without any additional wiring.
75    psr4: Arc<RwLock<Psr4Map>>,
76}
77
78impl Default for DocumentStore {
79    fn default() -> Self {
80        Self::new()
81    }
82}
83
84impl DocumentStore {
85    pub fn new() -> Self {
86        let host = AnalysisHost::new();
87        let workspace = Workspace::new(
88            host.db(),
89            Arc::<[SourceFile]>::from(Vec::new()),
90            mir_analyzer::PhpVersion::LATEST,
91        );
92        DocumentStore {
93            token_cache: DashMap::new(),
94            host: Mutex::new(host),
95            source_files: DashMap::new(),
96            text_cache: DashMap::new(),
97            parsed_cache: DashMap::new(),
98            next_file_id: AtomicU32::new(0),
99            workspace,
100            psr4: Arc::new(RwLock::new(Psr4Map::empty())),
101        }
102    }
103
104    /// Return the `Arc<RwLock<Psr4Map>>` so callers can share it.
105    /// `Backend` clones this arc at construction time so writes to the lock
106    /// (e.g. loading composer.json on `initialized`) are immediately visible
107    /// to `lazy_load_psr4_imports` without extra plumbing.
108    pub fn psr4_arc(&self) -> Arc<RwLock<Psr4Map>> {
109        Arc::clone(&self.psr4)
110    }
111
112    /// Mirror a file's current text into the salsa layer. Creates the
113    /// `SourceFile` input on first sight, otherwise updates `text` on the
114    /// existing input (bumping the salsa revision so downstream queries
115    /// invalidate). Returns the `SourceFile` handle for this `uri`.
116    ///
117    /// B4a: called from every text-changing mutation site. Reads still come
118    /// from the legacy `map` — this mirror is not yet observed by production
119    /// code paths.
120    pub fn mirror_text(&self, uri: &Url, text: &str) -> SourceFile {
121        // G2 fast path: compare against the lock-free text cache. When the
122        // new text byte-matches what we already mirrored, skip the host
123        // mutex entirely. Common during workspace scan + `did_open` for
124        // unchanged files, where most threads would otherwise serialise on
125        // `host.lock()` just to confirm a no-op. Cache is only populated
126        // after the matching `source_files` entry, so a cache hit implies
127        // the handle exists.
128        if let Some(cached) = self.text_cache.get(uri)
129            && **cached == *text
130            && let Some(sf) = self.source_files.get(uri)
131        {
132            return *sf;
133        }
134
135        let text_arc: Arc<str> = Arc::from(text);
136        if let Some(existing) = self.source_files.get(uri) {
137            let sf = *existing;
138            drop(existing);
139            // Slow path: another writer may have raced us; re-check inside
140            // the mutex. Salsa's `set_text` unconditionally bumps the
141            // revision, so every spurious setter invalidates every
142            // downstream query.
143            let mut host = self.host.lock().unwrap();
144            let current: Arc<str> = sf.text(host.db());
145            if *current == *text_arc {
146                drop(host);
147                self.text_cache.insert(uri.clone(), current);
148                return sf;
149            }
150            sf.set_text(host.db_mut()).to(text_arc.clone());
151            // Phase K2: any text change invalidates a previously-seeded
152            // cached slice. Clearing it forces the fresh-parse branch of
153            // `file_definitions` on the next query, which is correct —
154            // the cached slice no longer matches the new text.
155            sf.set_cached_slice(host.db_mut()).to(None);
156            drop(host);
157            self.text_cache.insert(uri.clone(), text_arc);
158            sf
159        } else {
160            let id = FileId(self.next_file_id.fetch_add(1, Ordering::Relaxed));
161            let uri_arc: Arc<str> = Arc::from(uri.as_str());
162            let sf = {
163                let host = self.host.lock().unwrap();
164                SourceFile::new(host.db(), id, uri_arc, text_arc.clone(), None)
165            };
166            self.source_files.insert(uri.clone(), sf);
167            self.text_cache.insert(uri.clone(), text_arc);
168            sf
169        }
170    }
171
172    /// Return the salsa `SourceFile` handle for a URL, if one exists.
173    pub fn source_file(&self, uri: &Url) -> Option<SourceFile> {
174        self.source_files.get(uri).map(|e| *e)
175    }
176
177    /// Phase K2: pre-seed a `StubSlice` loaded from the on-disk cache
178    /// onto the `SourceFile` input for `uri`. The next `file_definitions`
179    /// call for that file returns the cached slice directly, skipping
180    /// parse + `DefinitionCollector`.
181    ///
182    /// Must be called **before** any `file_definitions(db, sf)` call for
183    /// this file — otherwise salsa has already memoized the fresh-parse
184    /// result and setting `cached_slice` now would only bump the revision
185    /// without actually using the cache. In practice the workspace-scan
186    /// path seeds immediately after `mirror_text` and before any query
187    /// runs.
188    ///
189    /// Returns `false` when `uri` was not mirrored (caller should mirror
190    /// first); returns `true` on success.
191    pub fn seed_cached_slice(
192        &self,
193        uri: &Url,
194        slice: Arc<mir_codebase::storage::StubSlice>,
195    ) -> bool {
196        let Some(sf) = self.source_files.get(uri).map(|e| *e) else {
197            return false;
198        };
199        let mut host = self.host.lock().unwrap();
200        sf.set_cached_slice(host.db_mut()).to(Some(slice));
201        true
202    }
203
204    /// Run `f` with a borrow of the `AnalysisHost`. Used by tests and by the
205    /// upcoming `*_salsa` accessors to query the salsa layer.
206    pub fn with_host<R>(&self, f: impl FnOnce(&AnalysisHost) -> R) -> R {
207        let host = self.host.lock().unwrap();
208        f(&host)
209    }
210
211    /// Phase E1: take a brief lock, clone the salsa database, release the
212    /// lock. Queries then run on the cloned `RootDatabase` without blocking
213    /// writers or other readers. Salsa's `Storage<Self>` is reference-counted
214    /// (`Arc<Zalsa>`), so the clone is cheap — it shares memoized data and
215    /// the cancellation flag with the host's db.
216    fn snapshot_db(&self) -> crate::db::analysis::RootDatabase {
217        let host = self.host.lock().unwrap();
218        host.db().clone()
219    }
220
221    /// Run a query on a fresh snapshot, catching `salsa::Cancelled` (raised
222    /// when a concurrent writer advances the revision) and retrying with a
223    /// new snapshot. Writers hold the mutex only long enough to bump input
224    /// values, so a handful of retries is more than enough in practice; we
225    /// cap at 8 to avoid pathological livelock under sustained write pressure.
226    fn snapshot_query<R>(&self, f: impl Fn(&crate::db::analysis::RootDatabase) -> R + Clone) -> R {
227        use std::panic::AssertUnwindSafe;
228        for _ in 0..8 {
229            let db = self.snapshot_db();
230            let f = f.clone();
231            match salsa::Cancelled::catch(AssertUnwindSafe(move || f(&db))) {
232                Ok(r) => return r,
233                Err(_) => continue,
234            }
235        }
236        // Last-resort attempt: take the mutex for the whole query so no
237        // writer can race us. Much slower, but guaranteed to make progress.
238        let host = self.host.lock().unwrap();
239        f(host.db())
240    }
241
242    /// Evict the semantic-tokens cache for `uri`. Called by Backend when a
243    /// file is closed; diff-based tokens computed against the old revision
244    /// are no longer meaningful.
245    pub fn evict_token_cache(&self, uri: &Url) {
246        self.token_cache.remove(uri);
247    }
248
249    /// Register a file in the salsa layer without marking it open.
250    ///
251    /// Salsa's `parsed_doc` query parses lazily on first read; diagnostics
252    /// are populated by `did_open` when the editor actually opens the file.
253    pub fn index(&self, uri: Url, text: &str) {
254        self.mirror_text(&uri, text);
255    }
256
257    /// Index a file using an already-parsed `ParsedDoc`, avoiding a second parse.
258    ///
259    /// Prefer this over [`index`] when the caller already has a `ParsedDoc` (e.g.
260    /// after running `DefinitionCollector` during workspace scan).
261    ///
262    /// `_diagnostics` is accepted for call-site compatibility; parse
263    /// diagnostics for background-indexed files are never consulted
264    /// (callers gate on `get_doc_salsa` returning `Some`).
265    pub fn index_from_doc(&self, uri: Url, doc: &ParsedDoc, _diagnostics: Vec<Diagnostic>) {
266        self.mirror_text(&uri, doc.source());
267    }
268
269    pub fn remove(&self, uri: &Url) {
270        self.token_cache.remove(uri);
271        // Also drop the Url→SourceFile mapping so the file stops contributing
272        // to the workspace codebase query. Salsa inputs themselves remain
273        // alive (salsa doesn't expose input removal in 0.26), but they're
274        // orphaned — no query keys them anymore, and re-opening the file
275        // allocates a fresh SourceFile with a new FileId. The ~40 bytes per
276        // orphan is acceptable; revisit if workspace-churn profiling hurts.
277        self.source_files.remove(uri);
278        self.text_cache.remove(uri);
279        self.parsed_cache.remove(uri);
280    }
281
282    // ── B4b salsa-backed accessors ─────────────────────────────────────────
283    //
284    // These are additive and not yet called from production code. They go
285    // through the salsa layer — reads run the memoized `parsed_doc` /
286    // `file_index` / `method_returns` queries, parsing only on first access
287    // per revision. B4c will migrate feature modules to call these instead of
288    // the legacy `get_doc` / `get_index`.
289
290    /// Salsa-backed parsed document.
291    ///
292    /// Salsa-backed parsed document for any mirrored file (open or
293    /// background-indexed). Returns `None` only when the file is not known
294    /// to the store. Callers that want "only if open" should gate on
295    /// `Backend::open_files` at the call site (see `Backend::get_doc`).
296    pub fn get_doc_salsa(&self, uri: &Url) -> Option<Arc<ParsedDoc>> {
297        self.get_parsed_cached(uri)
298    }
299
300    /// Salsa-backed compact symbol index.
301    pub fn get_index_salsa(&self, uri: &Url) -> Option<Arc<FileIndex>> {
302        let sf = self.source_file(uri)?;
303        Some(self.snapshot_query(move |db| crate::db::index::file_index(db, sf).0.clone()))
304    }
305
306    /// G3: shared implementation for `get_doc_salsa`.
307    /// Tries the `parsed_cache` (lock-free) first; validates via
308    /// `Arc::ptr_eq` against the G2 `text_cache` so a concurrent writer
309    /// that has already committed a new text input cannot be masked by a
310    /// stale cache entry. On miss, captures the text Arc and ParsedDoc
311    /// together inside a single `snapshot_query`, then publishes both.
312    fn get_parsed_cached(&self, uri: &Url) -> Option<Arc<ParsedDoc>> {
313        if let Some(current_text) = self.text_cache.get(uri)
314            && let Some(entry) = self.parsed_cache.get(uri)
315            && Arc::ptr_eq(&*current_text, &entry.0)
316        {
317            return Some(entry.1.clone());
318        }
319
320        let sf = self.source_file(uri)?;
321        let (text, doc) = self.snapshot_query(move |db| {
322            let text = sf.text(db);
323            let doc = crate::db::parse::parsed_doc(db, sf).0.clone();
324            (text, doc)
325        });
326        self.insert_parsed_cache(uri.clone(), text, doc.clone());
327        Some(doc)
328    }
329
330    /// Publish a fresh `ParsedDoc` into `parsed_cache`, shedding roughly
331    /// half of the cache first if it has grown past [`PARSED_CACHE_CAP`].
332    ///
333    /// Eviction is probabilistic (DashMap iteration order is arbitrary),
334    /// not LRU. That's fine — salsa's own `parsed_doc` memo uses
335    /// `lru = 2048` on hotness-aware storage, so a cache-miss here is
336    /// cheap: the next read goes through `snapshot_query` and
337    /// `parsed_doc`, which still short-circuits on the salsa memo.
338    /// What we're bounding here is the *secondary* Arc retention that
339    /// would otherwise pin every workspace file's bumpalo arena alive
340    /// regardless of salsa's eviction decisions.
341    fn insert_parsed_cache(&self, uri: Url, text: Arc<str>, doc: Arc<ParsedDoc>) {
342        if self.parsed_cache.len() >= PARSED_CACHE_CAP {
343            let drop_target = self.parsed_cache.len() / 2;
344            let mut dropped = 0usize;
345            self.parsed_cache.retain(|_, _| {
346                if dropped < drop_target {
347                    dropped += 1;
348                    false
349                } else {
350                    true
351                }
352            });
353        }
354        self.parsed_cache.insert(uri, (text, doc));
355    }
356
357    /// Refresh `workspace.files` to mirror the current `source_files` set.
358    ///
359    /// Called by `get_codebase_salsa`. Skips the setter when the file list
360    /// hasn't changed — salsa's `set_field` unconditionally bumps revision,
361    /// which would invalidate every downstream query (codebase, file_refs).
362    /// Dedup is essential for memoization across LSP requests.
363    pub fn sync_workspace_files(&self) {
364        let mut files: Vec<SourceFile> = self.source_files.iter().map(|e| *e.value()).collect();
365        files.sort_by_key(|sf| self.with_host(|host| sf.id(host.db()).0));
366        let mut host = self.host.lock().unwrap();
367        let current = self.workspace.files(host.db());
368        if current.len() == files.len() && current.iter().zip(files.iter()).all(|(a, b)| a == b) {
369            return;
370        }
371        let arc: Arc<[SourceFile]> = Arc::from(files);
372        self.workspace.set_files(host.db_mut()).to(arc);
373    }
374
375    /// Update the PHP version tracked by the workspace. Salsa will invalidate
376    /// all `semantic_issues` queries so diagnostics are re-evaluated.
377    /// Skips the setter when the version hasn't changed to avoid spurious
378    /// query invalidation.
379    pub fn set_php_version(&self, version: mir_analyzer::PhpVersion) {
380        let mut host = self.host.lock().unwrap();
381        if self.workspace.php_version(host.db()) == version {
382            return;
383        }
384        self.workspace.set_php_version(host.db_mut()).to(version);
385    }
386
387    /// Salsa-backed finalized Codebase. Aggregates every known file's
388    /// `StubSlice` via `codebase_from_parts`, memoized by salsa.
389    ///
390    /// Phase C step 3: this runs in parallel with Backend's imperative
391    /// `Arc<Codebase>`. Comparison tests validate parity; readers migrate in
392    /// a follow-up.
393    pub fn get_codebase_salsa(&self) -> Arc<mir_codebase::Codebase> {
394        self.sync_workspace_files();
395        let ws = self.workspace;
396        self.snapshot_query(move |db| crate::db::codebase::codebase(db, ws).0.clone())
397    }
398
399    /// Salsa-backed reference lookup — drop-in replacement for
400    /// `Codebase::get_reference_locations`. First call per `key` runs
401    /// `file_refs` over every workspace file; subsequent calls hit the
402    /// `symbol_refs` memo.
403    pub fn get_symbol_refs_salsa(&self, key: &str) -> Vec<(Arc<str>, u32, u16, u16)> {
404        self.sync_workspace_files();
405        let ws = self.workspace;
406        let key = key.to_string();
407        self.snapshot_query(move |db| {
408            warm_file_refs_parallel(db, ws);
409            crate::db::refs::symbol_refs(db, ws, key.clone())
410                .0
411                .as_ref()
412                .clone()
413        })
414    }
415
416    /// Phase J: salsa-memoized aggregate workspace index.
417    ///
418    /// Returns the shared `Arc<WorkspaceIndexData>` with flat
419    /// `(Url, Arc<FileIndex>)` list plus pre-built `classes_by_name` and
420    /// `subtypes_of` reverse maps. Used by workspace_symbols,
421    /// prepare_type_hierarchy, supertypes_of, subtypes_of, and
422    /// find_implementations so they don't each rebuild the aggregate per
423    /// request. Invalidates automatically when any file's `file_index`
424    /// changes.
425    pub fn get_workspace_index_salsa(&self) -> Arc<crate::db::workspace_index::WorkspaceIndexData> {
426        self.sync_workspace_files();
427        let ws = self.workspace;
428        self.snapshot_query(move |db| {
429            crate::db::workspace_index::workspace_index(db, ws)
430                .0
431                .clone()
432        })
433    }
434
435    /// Phase L: force `file_refs` to run for every workspace file so that
436    /// subsequent `textDocument/references` / `prepare_rename` / call-hierarchy
437    /// lookups hit the memo instead of paying first-call latency.
438    ///
439    /// Uses parallel warming (`warm_file_refs_parallel`) so all `file_refs`
440    /// complete concurrently; `symbol_refs` then only aggregates memos.
441    pub fn warm_reference_index(&self) {
442        self.sync_workspace_files();
443        let ws = self.workspace;
444        let _ = self.snapshot_query(move |db| {
445            warm_file_refs_parallel(db, ws);
446            crate::db::refs::symbol_refs(db, ws, String::from("__phplsp_warmup__"))
447                .0
448                .clone()
449        });
450    }
451
452    /// Phase K2b: run `file_definitions` for `uri` and return the
453    /// resulting `StubSlice`. Used by the workspace-scan write path to
454    /// persist slices to disk after a cache miss.
455    pub fn slice_for(&self, uri: &Url) -> Option<Arc<mir_codebase::storage::StubSlice>> {
456        let sf = self.source_file(uri)?;
457        Some(
458            self.snapshot_query(move |db| {
459                crate::db::definitions::file_definitions(db, sf).0.clone()
460            }),
461        )
462    }
463
464    /// Salsa-backed per-file method-return-type map.
465    pub fn get_method_returns_salsa(&self, uri: &Url) -> Option<Arc<crate::ast::MethodReturnsMap>> {
466        let sf = self.source_file(uri)?;
467        Some(
468            self.snapshot_query(move |db| {
469                crate::db::method_returns::method_returns(db, sf).0.clone()
470            }),
471        )
472    }
473
474    /// Cache the semantic tokens computed for a delta response.
475    /// `result_id` is an opaque string (a hash of the token data) returned to the client.
476    pub fn store_token_cache(&self, uri: &Url, result_id: String, tokens: Vec<SemanticToken>) {
477        self.token_cache.insert(uri.clone(), (result_id, tokens));
478    }
479
480    /// Return the cached tokens if `result_id` matches the stored one.
481    pub fn get_token_cache(&self, uri: &Url, result_id: &str) -> Option<Vec<SemanticToken>> {
482        self.token_cache
483            .get(uri)
484            .filter(|e| e.0.as_str() == result_id)
485            .map(|e| e.1.clone())
486    }
487
488    /// Before running semantic analysis for `uri`, resolve every `use`-imported
489    /// class through the PSR-4 map and mirror any that are not yet registered.
490    /// This prevents spurious `UndefinedClass` diagnostics when the background
491    /// workspace scan has not yet reached a dependency file.
492    fn lazy_load_psr4_imports(&self, uri: &Url) {
493        let doc = match self.get_doc_salsa(uri) {
494            Some(d) => d,
495            None => return,
496        };
497        let imports = crate::references::collect_file_imports(&doc);
498        if imports.is_empty() {
499            return;
500        }
501        let psr4 = self.psr4.read().unwrap();
502        let paths: Vec<std::path::PathBuf> = imports
503            .values()
504            .filter_map(|fqcn| psr4.resolve(fqcn))
505            .collect();
506        drop(psr4);
507
508        for path in paths {
509            let Ok(dep_url) = Url::from_file_path(&path) else {
510                continue;
511            };
512            if self.source_files.contains_key(&dep_url) {
513                continue;
514            }
515            if let Ok(text) = std::fs::read_to_string(&path) {
516                self.mirror_text(&dep_url, &text);
517            }
518        }
519    }
520
521    /// Phase I: salsa-memoized raw semantic issues for a file. Callers apply
522    /// their own `DiagnosticsConfig` filter via
523    /// [`crate::semantic_diagnostics::issues_to_diagnostics`] — keeping the
524    /// filter outside the query preserves memoization across config toggles.
525    pub fn get_semantic_issues_salsa(&self, uri: &Url) -> Option<Arc<[mir_issues::Issue]>> {
526        let sf = self.source_file(uri)?;
527        self.lazy_load_psr4_imports(uri);
528        self.sync_workspace_files();
529        let ws = self.workspace;
530        Some(
531            self.snapshot_query(move |db| {
532                crate::db::semantic::semantic_issues(db, ws, sf).0.clone()
533            }),
534        )
535    }
536
537    /// Returns `(uri, doc)` for files currently open in the editor.
538    ///
539    /// Resolve `open_urls` (from `Backend::open_urls()`) to parsed docs.
540    /// Files not mirrored in the salsa layer are filtered out silently.
541    pub fn docs_for(&self, open_urls: &[Url]) -> Vec<(Url, Arc<ParsedDoc>)> {
542        open_urls
543            .iter()
544            .filter_map(|u| self.get_doc_salsa(u).map(|d| (u.clone(), d)))
545            .collect()
546    }
547
548    /// `(primary, doc)` first, then every other open file's parsed doc.
549    /// The `open_urls` slice should include `uri` — this helper filters it out.
550    pub fn doc_with_others(
551        &self,
552        uri: &Url,
553        doc: Arc<ParsedDoc>,
554        open_urls: &[Url],
555    ) -> Vec<(Url, Arc<ParsedDoc>)> {
556        let mut result = vec![(uri.clone(), doc)];
557        result.extend(self.other_docs(uri, open_urls));
558        result
559    }
560
561    /// Parsed docs for every entry in `open_urls` except `uri`.
562    pub fn other_docs(&self, uri: &Url, open_urls: &[Url]) -> Vec<(Url, Arc<ParsedDoc>)> {
563        open_urls
564            .iter()
565            .filter(|u| *u != uri)
566            .filter_map(|u| self.get_doc_salsa(u).map(|d| (u.clone(), d)))
567            .collect()
568    }
569
570    /// Batched salsa fetch for every entry in `open_urls` except `uri`:
571    /// returns each `(uri, ParsedDoc, MethodReturnsMap)` triple in a single
572    /// `snapshot_query` so cancellation retries don't run N times.
573    pub fn other_docs_with_returns(
574        &self,
575        uri: &Url,
576        open_urls: &[Url],
577    ) -> Vec<(Url, Arc<ParsedDoc>, Arc<crate::ast::MethodReturnsMap>)> {
578        let source_files: Vec<(Url, crate::db::input::SourceFile)> = open_urls
579            .iter()
580            .filter(|u| *u != uri)
581            .filter_map(|u| self.source_file(u).map(|sf| (u.clone(), sf)))
582            .collect();
583        if source_files.is_empty() {
584            return Vec::new();
585        }
586        self.snapshot_query(move |db| {
587            source_files
588                .iter()
589                .map(|(u, sf)| {
590                    let doc = crate::db::parse::parsed_doc(db, *sf).0.clone();
591                    let mr = crate::db::method_returns::method_returns(db, *sf).0.clone();
592                    (u.clone(), doc, mr)
593                })
594                .collect()
595        })
596    }
597
598    /// Compact symbol index for every mirrored file.
599    pub fn all_indexes(&self) -> Vec<(Url, Arc<FileIndex>)> {
600        let urls: Vec<Url> = self.source_files.iter().map(|e| e.key().clone()).collect();
601        urls.into_iter()
602            .filter_map(|u| self.get_index_salsa(&u).map(|idx| (u, idx)))
603            .collect()
604    }
605
606    /// Same as `all_indexes` but excludes `uri`.
607    pub fn other_indexes(&self, uri: &Url) -> Vec<(Url, Arc<FileIndex>)> {
608        let urls: Vec<Url> = self
609            .source_files
610            .iter()
611            .filter(|e| e.key() != uri)
612            .map(|e| e.key().clone())
613            .collect();
614        urls.into_iter()
615            .filter_map(|u| self.get_index_salsa(&u).map(|idx| (u, idx)))
616            .collect()
617    }
618
619    /// Parsed documents for every mirrored file (open or background-indexed).
620    /// Suitable for full-scan operations: find-references, rename,
621    /// call_hierarchy, code_lens.
622    pub fn all_docs_for_scan(&self) -> Vec<(Url, Arc<ParsedDoc>)> {
623        let urls: Vec<Url> = self.source_files.iter().map(|e| e.key().clone()).collect();
624        urls.into_iter()
625            .filter_map(|u| self.get_doc_salsa(&u).map(|d| (u, d)))
626            .collect()
627    }
628}
629
630/// Run `file_refs` for every workspace file in parallel.
631///
632/// `db` clones are cheap (they share the same `Arc<Zalsa>` memo store), so
633/// results computed on any clone are immediately visible to all others at the
634/// same revision.  After this returns, the sequential loop inside `symbol_refs`
635/// only does cheap memo lookups instead of running `StatementsAnalyzer` on
636/// every file one-by-one.
637///
638/// Per-task `salsa::Cancelled` is caught and swallowed.  If the revision was
639/// bumped, the main thread's next salsa call inside `symbol_refs` will raise
640/// `Cancelled` too and `snapshot_query` retries the whole operation from
641/// scratch.  If the revision was not bumped, any file whose task was cancelled
642/// before completion simply has no memo entry and `symbol_refs`'s sequential
643/// loop recomputes it.
644fn warm_file_refs_parallel(
645    db: &crate::db::analysis::RootDatabase,
646    ws: crate::db::input::Workspace,
647) {
648    let files: Vec<_> = ws.files(db).iter().copied().collect();
649    // Pre-clone one snapshot per file before entering the scope.
650    // RootDatabase: Send (ZalsaLocal owns its RefCell; Arc<Zalsa> is Sync),
651    // but RootDatabase: !Sync, so we must avoid sharing &RootDatabase across
652    // threads.  Collecting owned clones first and moving each into its task
653    // requires only Send, not Sync.
654    let snaps: Vec<crate::db::analysis::RootDatabase> = files.iter().map(|_| db.clone()).collect();
655    rayon::scope(move |s| {
656        for (sf, snap) in files.into_iter().zip(snaps) {
657            s.spawn(move |_| {
658                let _ = salsa::Cancelled::catch(std::panic::AssertUnwindSafe(|| {
659                    crate::db::refs::file_refs(&snap, ws, sf);
660                }));
661            });
662        }
663    });
664}
665
666#[cfg(test)]
667mod tests {
668    use super::*;
669
670    fn uri(path: &str) -> Url {
671        Url::parse(&format!("file://{path}")).unwrap()
672    }
673
674    /// Phase E4: open-file state lives on `Backend`, not `DocumentStore`.
675    /// Tests that need to simulate "file is open" just mirror the text into
676    /// the salsa input — the open/closed distinction is enforced by the
677    /// caller (Backend) in production.
678    fn open(store: &DocumentStore, u: Url, text: String) {
679        store.mirror_text(&u, &text);
680    }
681
682    #[test]
683    fn salsa_codebase_matches_imperative_codebase() {
684        // Parity check for Phase C step 3: the salsa-built codebase should
685        // contain exactly the same class/interface/function FQNs as one
686        // built imperatively via DefinitionCollector against a fresh
687        // mir_codebase::Codebase.
688        let store = DocumentStore::new();
689        let sources = [
690            (
691                "/a.php",
692                "<?php\nnamespace A;\nclass Foo {}\ninterface IX {}",
693            ),
694            (
695                "/b.php",
696                "<?php\nnamespace B;\nfunction bar(): int { return 1; }",
697            ),
698            ("/c.php", "<?php\nnamespace C;\nenum Color { case Red; }"),
699        ];
700        for (p, src) in &sources {
701            open(&store, uri(p), src.to_string());
702        }
703
704        let salsa_cb = store.get_codebase_salsa();
705
706        let imperative_cb = mir_codebase::Codebase::new();
707        for (p, src) in &sources {
708            let (doc, _) = crate::diagnostics::parse_document(src);
709            let file: Arc<str> = Arc::from(uri(p).as_str());
710            let map = php_rs_parser::source_map::SourceMap::new(src);
711            let c =
712                mir_analyzer::collector::DefinitionCollector::new(&imperative_cb, file, src, &map);
713            let _ = c.collect(doc.program());
714        }
715        imperative_cb.finalize();
716
717        for fqn in ["A\\Foo", "A\\IX", "C\\Color"] {
718            assert_eq!(
719                salsa_cb.type_exists(fqn),
720                imperative_cb.type_exists(fqn),
721                "parity mismatch on type {fqn}"
722            );
723            assert!(salsa_cb.type_exists(fqn), "{fqn} missing from salsa cb");
724        }
725        assert_eq!(
726            salsa_cb.function_exists("B\\bar"),
727            imperative_cb.function_exists("B\\bar"),
728        );
729        assert!(salsa_cb.function_exists("B\\bar"));
730    }
731
732    #[test]
733    fn index_registers_file_in_salsa() {
734        let store = DocumentStore::new();
735        store.index(uri("/lib.php"), "<?php\nfunction lib_fn() {}");
736        let idx = store.get_index_salsa(&uri("/lib.php")).unwrap();
737        assert_eq!(idx.functions.len(), 1);
738        assert_eq!(idx.functions[0].name, "lib_fn");
739    }
740
741    #[test]
742    fn remove_drops_salsa_input() {
743        let store = DocumentStore::new();
744        store.index(uri("/lib.php"), "<?php");
745        store.remove(&uri("/lib.php"));
746        assert!(store.get_index_salsa(&uri("/lib.php")).is_none());
747    }
748
749    #[test]
750    fn all_indexes_includes_every_mirrored_file() {
751        let store = DocumentStore::new();
752        open(&store, uri("/a.php"), "<?php\nfunction a() {}".to_string());
753        store.index(uri("/b.php"), "<?php\nfunction b() {}");
754        assert_eq!(store.all_indexes().len(), 2);
755    }
756
757    #[test]
758    fn other_indexes_excludes_current_uri() {
759        let store = DocumentStore::new();
760        open(&store, uri("/a.php"), "<?php\nfunction a() {}".to_string());
761        open(&store, uri("/b.php"), "<?php\nfunction b() {}".to_string());
762        assert_eq!(store.other_indexes(&uri("/a.php")).len(), 1);
763    }
764
765    #[test]
766    fn other_docs_excludes_current_uri() {
767        let store = DocumentStore::new();
768        let ua = uri("/a.php");
769        let ub = uri("/b.php");
770        open(&store, ua.clone(), "<?php\nfunction a() {}".to_string());
771        open(&store, ub.clone(), "<?php\nfunction b() {}".to_string());
772        let open_urls = vec![ua.clone(), ub];
773        assert_eq!(store.other_docs(&ua, &open_urls).len(), 1);
774    }
775
776    #[test]
777    fn evict_token_cache_removes_entry() {
778        let store = DocumentStore::new();
779        let u = uri("/a.php");
780        open(&store, u.clone(), "<?php".to_string());
781        store.store_token_cache(&u, "id1".to_string(), vec![]);
782        assert!(store.get_token_cache(&u, "id1").is_some());
783        store.evict_token_cache(&u);
784        assert!(store.get_token_cache(&u, "id1").is_none());
785    }
786
787    #[test]
788    fn index_populates_file_index_with_symbols() {
789        let store = DocumentStore::new();
790        store.index(uri("/a.php"), "<?php\nfunction hello() {}");
791        let idx = store.get_index_salsa(&uri("/a.php")).unwrap();
792        assert_eq!(idx.functions.len(), 1);
793        assert_eq!(idx.functions[0].name, "hello");
794    }
795
796    #[test]
797    fn open_populates_file_index_with_symbols() {
798        let store = DocumentStore::new();
799        open(&store, uri("/a.php"), "<?php\nclass Foo {}".to_string());
800        let idx = store.get_index_salsa(&uri("/a.php")).unwrap();
801        assert_eq!(idx.classes.len(), 1);
802        assert_eq!(idx.classes[0].name, "Foo");
803    }
804
805    // ── Mirror invariants ────────────────────────────────────────────────
806    //
807    // Every mutation path that changes file text must keep the salsa layer
808    // consistent. These tests walk a set-edit-reopen cycle and assert that
809    // the salsa-derived `FileIndex` reflects the latest text at each step.
810
811    fn names_of(idx: &FileIndex) -> Vec<String> {
812        let mut out: Vec<String> = idx.classes.iter().map(|c| c.name.clone()).collect();
813        out.extend(idx.functions.iter().map(|f| f.name.clone()));
814        out.sort();
815        out
816    }
817
818    fn salsa_index_names(store: &DocumentStore, url: &Url) -> Vec<String> {
819        let sf = store.source_file(url).expect("mirror recorded SourceFile");
820        store.with_host(|host| {
821            let arc = crate::db::index::file_index(host.db(), sf);
822            names_of(arc.get())
823        })
824    }
825
826    #[test]
827    fn mirror_tracks_repeated_edits() {
828        let store = DocumentStore::new();
829        let u = uri("/mirror.php");
830
831        open(&store, u.clone(), "<?php\nclass A {}".to_string());
832        assert_eq!(salsa_index_names(&store, &u), vec!["A".to_string()]);
833
834        open(
835            &store,
836            u.clone(),
837            "<?php\nclass A {}\nclass B {}".to_string(),
838        );
839        assert_eq!(
840            salsa_index_names(&store, &u),
841            vec!["A".to_string(), "B".to_string()]
842        );
843
844        open(&store, u.clone(), "<?php\nfunction greet() {}".to_string());
845        assert_eq!(salsa_index_names(&store, &u), vec!["greet".to_string()]);
846    }
847
848    #[test]
849    fn mirror_tracks_index_and_index_from_doc() {
850        let store = DocumentStore::new();
851
852        // Background `index(url, text)` path.
853        let u1 = uri("/bg1.php");
854        store.index(u1.clone(), "<?php\nclass Bg1 {}");
855        assert_eq!(salsa_index_names(&store, &u1), vec!["Bg1".to_string()]);
856
857        // `index_from_doc(url, &doc, diags)` path (workspace-scan Phase 2).
858        let u2 = uri("/bg2.php");
859        let (doc, diags) =
860            crate::diagnostics::parse_document("<?php\nclass Bg2 {}\nfunction f() {}");
861        store.index_from_doc(u2.clone(), &doc, diags);
862        assert_eq!(
863            salsa_index_names(&store, &u2),
864            vec!["Bg2".to_string(), "f".to_string()]
865        );
866    }
867
868    /// G3: confirms the `parsed_cache` actually hits — two consecutive
869    /// `get_doc_salsa` calls on unchanged text return the same `Arc`
870    /// (pointer equality), and an edit forces a miss that produces a
871    /// different `Arc`.
872    /// parsed_cache must stay bounded — inserting more than
873    /// `PARSED_CACHE_CAP` unique URLs must not cause unbounded growth.
874    /// Eviction is probabilistic, so we only assert the bound, not which
875    /// Phase K2 end-to-end: seed a cached slice through `DocumentStore`,
876    /// confirm the workspace codebase sees the cached fact, then edit the
877    /// text and confirm the cache is cleared (codebase now reflects the
878    /// re-parsed text). Exercises `seed_cached_slice` + `mirror_text`'s
879    /// `set_cached_slice(None)` invalidation together.
880    #[test]
881    fn seed_cached_slice_then_edit_invalidates() {
882        let store = DocumentStore::new();
883        let u = uri("/seed_test.php");
884
885        // Mirror the initial text — classes: "Real".
886        store.mirror_text(&u, "<?php\nclass Real {}");
887
888        // Build a cached slice claiming classes: "Seeded", for the same URI.
889        let seeded = {
890            let src = "<?php\nclass Seeded {}";
891            let source_map = php_rs_parser::source_map::SourceMap::new(src);
892            let (doc, _) = crate::diagnostics::parse_document(src);
893            let collector = mir_analyzer::collector::DefinitionCollector::new_for_slice(
894                Arc::<str>::from(u.as_str()),
895                src,
896                &source_map,
897            );
898            let (s, _) = collector.collect_slice(doc.program());
899            Arc::new(s)
900        };
901        assert!(store.seed_cached_slice(&u, seeded));
902
903        // Codebase should contain the seeded class, not the real one.
904        let cb = store.get_codebase_salsa();
905        assert!(cb.type_exists("Seeded"));
906        assert!(!cb.type_exists("Real"));
907
908        // Edit: mirror_text flips the text and also clears cached_slice.
909        store.mirror_text(&u, "<?php\nclass Edited {}");
910        let cb = store.get_codebase_salsa();
911        assert!(
912            cb.type_exists("Edited"),
913            "after edit, codebase must reflect fresh parse"
914        );
915        assert!(
916            !cb.type_exists("Seeded"),
917            "mirror_text must clear cached_slice so stale data is gone"
918        );
919    }
920
921    /// Seeding for a URL that was never mirrored is a no-op (returns `false`)
922    /// — avoids silently allocating SourceFiles outside `mirror_text`'s control.
923    #[test]
924    fn seed_cached_slice_noops_for_unknown_uri() {
925        let store = DocumentStore::new();
926        let u = uri("/never_mirrored.php");
927        let slice = Arc::new(mir_codebase::storage::StubSlice::default());
928        assert!(!store.seed_cached_slice(&u, slice));
929    }
930
931    /// entries survive.
932    #[test]
933    fn parsed_cache_stays_bounded_under_many_inserts() {
934        let store = DocumentStore::new();
935        let overflow = PARSED_CACHE_CAP + 100;
936        for i in 0..overflow {
937            let u = uri(&format!("/cap/file{i}.php"));
938            store.index(u.clone(), "<?php\nclass A {}");
939            // Force a parsed_cache insert via get_doc_salsa.
940            let _ = store.get_doc_salsa(&u);
941        }
942        assert!(
943            store.parsed_cache.len() <= PARSED_CACHE_CAP,
944            "parsed_cache grew to {} entries (cap {})",
945            store.parsed_cache.len(),
946            PARSED_CACHE_CAP
947        );
948    }
949
950    #[test]
951    fn get_doc_salsa_cache_hits_across_calls() {
952        let store = DocumentStore::new();
953        let u = uri("/g3_cache.php");
954        open(&store, u.clone(), "<?php\nclass G3 {}".to_string());
955
956        let a = store.get_doc_salsa(&u).unwrap();
957        let b = store.get_doc_salsa(&u).unwrap();
958        assert!(
959            Arc::ptr_eq(&a, &b),
960            "parsed_cache hit should yield the same Arc across calls"
961        );
962
963        open(&store, u.clone(), "<?php\nclass G3b {}".to_string());
964        let c = store.get_doc_salsa(&u).unwrap();
965        assert!(
966            !Arc::ptr_eq(&a, &c),
967            "edit should invalidate the parsed_cache entry"
968        );
969    }
970
971    #[test]
972    fn get_doc_salsa_returns_some_for_mirrored_files() {
973        // Phase E4: `get_doc_salsa` no longer gates on open-state. The
974        // open/closed distinction now lives on `Backend::get_doc`.
975        let store = DocumentStore::new();
976        let u = uri("/e4_doc.php");
977        store.index(u.clone(), "<?php\nclass P {}");
978        assert!(store.get_doc_salsa(&u).is_some());
979    }
980
981    #[test]
982    fn get_salsa_accessors_return_none_for_unknown_uri() {
983        let store = DocumentStore::new();
984        let u = uri("/never-seen.php");
985        assert!(store.get_doc_salsa(&u).is_none());
986        assert!(store.get_index_salsa(&u).is_none());
987        assert!(store.get_method_returns_salsa(&u).is_none());
988    }
989
990    /// Phase E1: concurrent readers and writers must not deadlock, panic, or
991    /// return stale data. Writers briefly bump inputs while readers are
992    /// running on cloned snapshots; any `salsa::Cancelled` raised on the
993    /// reader side must be caught and retried by `snapshot_query`.
994    #[test]
995    fn concurrent_reads_and_writes_do_not_panic() {
996        use std::sync::Arc;
997        use std::thread;
998        use std::time::{Duration, Instant};
999
1000        let store = Arc::new(DocumentStore::new());
1001        let urls: Vec<Url> = (0..8).map(|i| uri(&format!("/f{i}.php"))).collect();
1002        for (i, u) in urls.iter().enumerate() {
1003            open(&store, u.clone(), format!("<?php\nclass C{i} {{}}"));
1004        }
1005
1006        let deadline = Instant::now() + Duration::from_millis(400);
1007        let mut handles = Vec::new();
1008
1009        // Writer thread: keep bumping every file's text.
1010        {
1011            let store = Arc::clone(&store);
1012            let urls = urls.clone();
1013            handles.push(thread::spawn(move || {
1014                let mut rev = 0u32;
1015                while Instant::now() < deadline {
1016                    for u in &urls {
1017                        let text = format!("<?php\nclass C{{}}\n// rev {rev}");
1018                        store.mirror_text(u, &text);
1019                    }
1020                    rev += 1;
1021                }
1022            }));
1023        }
1024
1025        // Reader threads: hammer the salsa accessors.
1026        for _ in 0..4 {
1027            let store = Arc::clone(&store);
1028            let urls = urls.clone();
1029            handles.push(thread::spawn(move || {
1030                while Instant::now() < deadline {
1031                    for u in &urls {
1032                        let _ = store.get_doc_salsa(u);
1033                        let _ = store.get_index_salsa(u);
1034                    }
1035                    let _ = store.get_codebase_salsa();
1036                    let _ = store.get_symbol_refs_salsa("C0");
1037                }
1038            }));
1039        }
1040
1041        for h in handles {
1042            h.join().expect("no panic under concurrent read/write");
1043        }
1044    }
1045
1046    /// Phase L: warm-up must not error and must pre-populate the `file_refs`
1047    /// memo. We can't cheaply observe salsa memo state from outside, so we
1048    /// instead call `warm_reference_index` and then verify that a real
1049    /// reference lookup returns the expected result — the warm-up running
1050    /// without panic across a realistic two-file workspace is the load-bearing
1051    /// guarantee.
1052    #[test]
1053    fn warm_reference_index_does_not_panic_and_keeps_lookups_correct() {
1054        let store = DocumentStore::new();
1055        open(
1056            &store,
1057            uri("/wa.php"),
1058            "<?php\nfunction a() { b(); }".to_string(),
1059        );
1060        open(
1061            &store,
1062            uri("/wb.php"),
1063            "<?php\nfunction b() {}\na();".to_string(),
1064        );
1065        store.warm_reference_index();
1066        let refs_to_a = store.get_symbol_refs_salsa("a");
1067        assert!(
1068            refs_to_a
1069                .iter()
1070                .any(|(uri, _, _, _)| uri.contains("wb.php")),
1071            "reference to a() from /wb.php should be discoverable after warm-up, got {refs_to_a:?}"
1072        );
1073    }
1074
1075    /// PSR-4 lazy-loading: `get_semantic_issues_salsa` must not emit
1076    /// `UndefinedClass` for a class that is PSR-4-resolvable on disk, even
1077    /// when the dependency file is not yet in `source_files`.
1078    #[test]
1079    fn psr4_lazy_load_suppresses_undefined_class() {
1080        let tmp = tempfile::tempdir().unwrap();
1081
1082        // Write Entity.php to disk (not mirrored into the store).
1083        std::fs::create_dir_all(tmp.path().join("src/Model")).unwrap();
1084        std::fs::write(
1085            tmp.path().join("src/Model/Entity.php"),
1086            "<?php\nnamespace App\\Model;\nclass Entity {}\n",
1087        )
1088        .unwrap();
1089
1090        // Write composer.json so Psr4Map::load can build the map.
1091        std::fs::write(
1092            tmp.path().join("composer.json"),
1093            r#"{"autoload":{"psr-4":{"App\\":"src/"}}}"#,
1094        )
1095        .unwrap();
1096
1097        let store = DocumentStore::new();
1098
1099        // Inject a PSR-4 map pointing at the tmp dir.
1100        *store.psr4.write().unwrap() = crate::autoload::Psr4Map::load(tmp.path());
1101
1102        // Mirror the consuming file (Entity not yet in source_files).
1103        // Uses Entity as a parameter type hint — the analyzer resolves these
1104        // through use statements, so this exercises the full PSR-4 lazy-load path.
1105        let handler_url = Url::from_file_path(tmp.path().join("src/Service/Handler.php")).unwrap();
1106        store.mirror_text(
1107            &handler_url,
1108            "<?php\nnamespace App\\Service;\nuse App\\Model\\Entity;\nfunction handle(Entity $e): Entity { return $e; }\n",
1109        );
1110
1111        let issues = store.get_semantic_issues_salsa(&handler_url).unwrap();
1112        let undef: Vec<_> = issues
1113            .iter()
1114            .filter(|i| matches!(i.kind, mir_issues::IssueKind::UndefinedClass { .. }))
1115            .collect();
1116        assert!(
1117            undef.is_empty(),
1118            "PSR-4 lazy-loading must prevent UndefinedClass for App\\Model\\Entity; got: {undef:?}"
1119        );
1120    }
1121}