Skip to main content

php_lsp/document/
document_store.rs

1use std::sync::atomic::{AtomicBool, AtomicU64, Ordering};
2use std::sync::{Arc, Mutex, OnceLock};
3
4use arc_swap::ArcSwap;
5
6use dashmap::{DashMap, DashSet};
7use salsa::Setter;
8use tower_lsp::lsp_types::{SemanticToken, Url};
9
10use crate::db::analysis::AnalysisHost;
11use crate::db::input::{FileText, Workspace, find_source_file};
12use crate::document::ast::ParsedDoc;
13use crate::index::file_index::FileIndex;
14use crate::lang::autoload::Psr4Map;
15
16/// Upper bound on `parsed_cache` entries. Matched to the `lru = 2048` on
17/// `parsed_doc` in `src/db/parse.rs` so the secondary Arc retention can't
18/// pin more ASTs alive than salsa's memo already bounds. Exceeding this
19/// triggers probabilistic eviction (see [`DocumentStore::insert_parsed_cache`]).
20const PARSED_CACHE_CAP: usize = 2048;
21
22pub struct DocumentStore {
23    /// Cached semantic tokens per document: (result_id, tokens).
24    /// Used to compute incremental deltas for `textDocument/semanticTokens/full/delta`.
25    /// Tokens are stored in an `Arc` so the delta-path lookup can hand the
26    /// previous snapshot back without cloning the inner Vec.
27    token_cache: DashMap<Url, (String, Arc<Vec<SemanticToken>>)>,
28
29    // ── Salsa-input storage ────────────────────────────────────────────────
30    // Phase E4: `DocumentStore` is now a pure salsa-input wrapper. Open-file
31    // state (live text, version token, parse-diagnostics cache) lives on
32    // `Backend` in its `open_files` map; the set of files tracked by salsa
33    // is exactly `source_files.keys()`.
34    /// Mutex — held briefly to clone the database for reads and to mutate
35    /// it for writes. Per-thread salsa state (`zalsa_local`) is `!Sync`,
36    /// which rules out `RwLock<AnalysisHost>`. Readers instead snapshot the
37    /// db (cheap — storage is `Arc<Zalsa>`) and run queries on the clone
38    /// with the lock released, giving real read/read parallelism. Writers
39    /// during an in-flight read bump the shared revision; the reader raises
40    /// `salsa::Cancelled` on its next query call and `snapshot_query` below
41    /// retries with a fresh snapshot.
42    host: Mutex<AnalysisHost>,
43    /// `Url -> FileText` lookup. One immortal `FileText` salsa input per unique
44    /// URI ever seen. Text edits mutate the existing handle; delete/reopen cycles
45    /// reuse it rather than allocating a new input each time.
46    file_texts: DashMap<Url, FileText>,
47    /// URIs that have been removed. Re-opening a deleted URI un-deletes it here
48    /// and reuses the existing `FileText` handle.
49    deleted_uris: DashSet<Url>,
50    /// G2: lock-free mirror of each `SourceFile`'s last-set text. Lets
51    /// `mirror_text` dedup repeated no-op updates (common during workspace
52    /// scan and `did_open` for already-indexed files) without taking
53    /// `host.lock()`. Updated inside the mutex whenever the salsa input is
54    /// set, so it is always consistent with the salsa revision for the
55    /// purposes of byte-equality comparison.
56    text_cache: DashMap<Url, Arc<str>>,
57    /// G3: cross-revision read-through cache for `parsed_doc`. Keyed on
58    /// `Url`, stored value is `(text_arc, Arc<ParsedDoc>)` — the text Arc
59    /// captured at parse time. On read, compare against `text_cache[uri]`
60    /// via `Arc::ptr_eq`; a match guarantees the cached ParsedDoc matches
61    /// the current salsa revision's text input, so the query can return
62    /// without snapshotting the db or invoking salsa at all. A miss
63    /// (different pointer, stale or absent entry) falls through to
64    /// `snapshot_query`. Self-evicts on text change — no writer-side
65    /// invalidation is required, which avoids the TOCTOU window where a
66    /// concurrent reader could re-insert a stale entry after a writer's
67    /// eviction.
68    ///
69    /// Size-bounded at [`PARSED_CACHE_CAP`] — see `insert_parsed_cache`.
70    /// Without this bound, every workspace file read-through would pin
71    /// its bumpalo arena alive regardless of salsa's `lru = 2048` on the
72    /// `parsed_doc` memo.
73    parsed_cache: DashMap<Url, (Arc<str>, Arc<ParsedDoc>)>,
74    /// Cross-request read-through cache for a file's mir body analysis. Keyed
75    /// on `Url`, stored value is `(source_arc, Arc<FileAnalysis>)` — the source
76    /// Arc captured at analysis time. On read, compare against the current
77    /// `doc.source_arc()` via `Arc::ptr_eq`; a match means the cached analysis
78    /// matches the live content. A miss recomputes and overwrites, so the cache
79    /// self-evicts on edit (same discipline as `parsed_cache`).
80    ///
81    /// `FileAnalysis` carries BOTH the issues consumed by diagnostics and the
82    /// per-expression `ResolvedSymbol`s consumed by position features (hover,
83    /// type-definition, completion, inlay hints). Retaining it means mir's
84    /// `FileAnalyzer::analyze` runs once per content revision instead of being
85    /// re-run (for diagnostics) and then re-derived in a weaker form (for
86    /// position queries). Bounded by the set of analyzed files (open files plus
87    /// their open dependents); explicitly evicted in [`DocumentStore::remove`].
88    /// Per-file mir analysis cache. Entry is `(source_arc, decl_ver, analysis)`.
89    /// A cache hit requires both the source pointer to match the live text AND
90    /// `decl_ver` to equal the current `decl_version` counter — the latter
91    /// ensures that a body-only edit to file A doesn't silently serve a stale
92    /// analysis for file B whose cached entry predates a declaration change in
93    /// an unrelated file C.
94    analysis_cache: DashMap<Url, (Arc<str>, u64, Arc<mir_analyzer::FileAnalysis>)>,
95    /// Monotonically increasing counter bumped whenever any file's `FileIndex`
96    /// (declaration-level info) changes. Cache entries that embed an older
97    /// version are considered stale and are recomputed on the next request.
98    decl_version: AtomicU64,
99    /// Last-seen `FileIndex` per URI. Used to decide whether a re-analysis
100    /// produced a declaration-level change (→ bump `decl_version`) or was a
101    /// body-only edit (→ leave `decl_version` unchanged so other files' cached
102    /// analyses remain valid).
103    decl_fingerprints: DashMap<Url, Arc<FileIndex>>,
104    /// Cross-request cache for the whole-doc completion [`crate::types::type_map::TypeMap`]
105    /// (`TypeMap::from_doc_with_meta`). Unlike `analysis_cache`, validity is
106    /// purely per-file (the map reads only this doc plus PHPStorm meta), so the
107    /// entry needs no cross-file invalidation: it is fresh when its captured
108    /// source `Arc` is pointer-equal to the doc's current `source_arc()` and
109    /// the meta pointer is unchanged, self-evicting on any content/meta edit.
110    type_map_cache: DashMap<Url, (Arc<str>, usize, Arc<crate::types::type_map::TypeMap>)>,
111    /// Set to `true` when the set of tracked files changes (add or remove).
112    /// `sync_workspace_files` skips the collect/sort/compare path when this
113    /// is `false`, avoiding a mutex acquisition on every LSP request.
114    workspace_files_dirty: AtomicBool,
115    /// Workspace salsa input. Tracks the full set of `SourceFile`s that
116    /// participate in whole-program queries (`codebase`, `file_refs`).
117    /// Re-synced from `source_files` on demand by `sync_workspace_files`.
118    workspace: Workspace,
119    /// Shared PSR-4 namespace-to-path map. Shared with `Backend` via `Arc`
120    /// so updates from `initialized` (when composer.json is loaded) are
121    /// visible here without any additional wiring. `ArcSwap` makes reads
122    /// lock-free — a poisoned guard can no longer crash a request handler.
123    psr4: Arc<ArcSwap<Psr4Map>>,
124    /// mir-analyzer's `AnalysisSession` — owns the workspace MirDb, runs
125    /// Pass-2 analysis, and lazy-loads dependencies via PSR-4. Built lazily
126    /// on first use; rebuilt when PHP version changes.
127    analysis_session: Mutex<Option<(mir_analyzer::PhpVersion, Arc<mir_analyzer::AnalysisSession>)>>,
128    /// Cache directory shared with the workspace file-index cache. When set,
129    /// new `AnalysisSession`s are built with `with_cache_dir` so that stub
130    /// parsing results survive server restarts.
131    session_cache_dir: OnceLock<std::path::PathBuf>,
132    /// URIs of autoload.files entries from composer.json. These define global
133    /// helper functions (e.g. tap, class_uses_recursive in Laravel) that are
134    /// not discoverable by namespace walk. Pre-ingested into the AnalysisSession
135    /// before each file analysis so mir doesn't emit false UndefinedFunction.
136    autoload_uris: std::sync::RwLock<Vec<Url>>,
137    /// On-demand `FileIndex` store for vendor files loaded lazily via PSR-4
138    /// navigation. Vendor is excluded from the eager workspace scan, so files
139    /// ingested by `psr4_method_goto` are not in the salsa workspace_index;
140    /// this map fills that gap for hierarchy traversal. Populated by
141    /// `cache_vendor_index`; reads via `get_vendor_index`.
142    vendor_index_cache: DashMap<Url, Arc<FileIndex>>,
143}
144
145impl Default for DocumentStore {
146    fn default() -> Self {
147        Self::new()
148    }
149}
150
151impl DocumentStore {
152    pub fn new() -> Self {
153        let host = AnalysisHost::new();
154        let workspace = Workspace::new(
155            host.db(),
156            Arc::<[(Arc<str>, FileText)]>::from(Vec::new()),
157            mir_analyzer::PhpVersion::LATEST,
158        );
159        DocumentStore {
160            token_cache: DashMap::new(),
161            host: Mutex::new(host),
162            file_texts: DashMap::new(),
163            deleted_uris: DashSet::new(),
164            text_cache: DashMap::new(),
165            parsed_cache: DashMap::new(),
166            analysis_cache: DashMap::new(),
167            decl_version: AtomicU64::new(0),
168            decl_fingerprints: DashMap::new(),
169            type_map_cache: DashMap::new(),
170            workspace_files_dirty: AtomicBool::new(true),
171            workspace,
172            psr4: Arc::new(ArcSwap::from_pointee(Psr4Map::empty())),
173            analysis_session: Mutex::new(None),
174            session_cache_dir: OnceLock::new(),
175            autoload_uris: std::sync::RwLock::new(Vec::new()),
176            vendor_index_cache: DashMap::new(),
177        }
178    }
179
180    /// Set the directory used to persist stub-parse and analysis results across
181    /// server restarts.  Must be called before the first `analysis_session` use;
182    /// subsequent calls are silently ignored (`OnceLock` semantics).
183    pub fn set_session_cache_dir(&self, dir: std::path::PathBuf) {
184        let _ = self.session_cache_dir.set(dir);
185    }
186
187    /// Register URIs discovered from composer.json `autoload.files` entries.
188    /// These PHP files define global helper functions (e.g. `tap()` in Laravel)
189    /// that are not class-resolvable via PSR-4. Clears `analysis_cache` so the
190    /// next per-file analysis pre-ingests them into the AnalysisSession before
191    /// running mir's FileAnalyzer.
192    pub fn set_autoload_uris(&self, uris: Vec<Url>) {
193        *self.autoload_uris.write().unwrap() = uris;
194        self.analysis_cache.clear();
195    }
196
197    /// Get or build the `AnalysisSession` for the given PHP version. Rebuilds
198    /// when the version changes (e.g. user flipped config). The session owns
199    /// its own salsa db and AnalysisCache; lazy-loads vendor files via the
200    /// shared PSR-4 map.
201    pub fn analysis_session(
202        &self,
203        php_version: mir_analyzer::PhpVersion,
204    ) -> Arc<mir_analyzer::AnalysisSession> {
205        let mut guard = self.analysis_session.lock().unwrap();
206        if let Some((cached_ver, session)) = guard.as_ref()
207            && *cached_ver == php_version
208        {
209            return Arc::clone(session);
210        }
211        // Build a fresh session. Hand it the shared PSR-4 map so it can
212        // lazy-resolve `UndefinedClass` candidates without us having to mirror
213        // every vendor file upfront.
214        let resolver: Arc<dyn mir_analyzer::ClassResolver> = self.psr4.load_full();
215        let mut builder =
216            mir_analyzer::AnalysisSession::new(php_version).with_class_resolver(resolver);
217        if let Some(dir) = self.session_cache_dir.get() {
218            builder = builder.with_cache_dir(dir);
219        }
220        let session = Arc::new(builder);
221        session.ensure_all_stubs();
222        *guard = Some((php_version, Arc::clone(&session)));
223        session
224    }
225
226    /// Current PHP version tracked by the workspace input.
227    pub fn workspace_php_version(&self) -> mir_analyzer::PhpVersion {
228        self.with_host(|h| self.workspace.php_version(h.db()))
229    }
230
231    /// Return the `Arc<ArcSwap<Psr4Map>>` so callers can share it.
232    /// `Backend` clones this arc at construction time so writes
233    /// (e.g. loading composer.json on `initialized`) are immediately visible
234    /// to PSR-4 resolution during analysis without extra plumbing.
235    pub fn psr4_arc(&self) -> Arc<ArcSwap<Psr4Map>> {
236        Arc::clone(&self.psr4)
237    }
238
239    /// Mirror a file's current text into the salsa layer. Creates the
240    /// `FileText` input on first sight, otherwise updates `text` on the
241    /// existing input (bumping the salsa revision so downstream queries
242    /// invalidate).
243    pub fn mirror_text(&self, uri: &Url, text: &str) {
244        // G2 fast path: compare against the lock-free text cache. When the
245        // new text byte-matches what we already mirrored, skip the host
246        // mutex entirely. Common during workspace scan + `did_open` for
247        // unchanged files, where most threads would otherwise serialise on
248        // `host.lock()` just to confirm a no-op.
249        if let Some(cached) = self.text_cache.get(uri)
250            && **cached == *text
251            && !self.deleted_uris.contains(uri)
252            && self.file_texts.contains_key(uri)
253        {
254            return;
255        }
256        self.mirror_text_arc(uri, Arc::from(text))
257    }
258
259    /// Like [`mirror_text`] but takes an already-allocated `Arc<str>`.
260    ///
261    /// Callers that already hold an `Arc<str>` (e.g. `ingest_from_doc` reusing
262    /// `ParsedDoc::source_arc()`) use this to avoid a second allocation and to
263    /// ensure `text_cache` and `parsed_cache` hold the same Arc pointer —
264    /// enabling `Arc::ptr_eq` validation in `get_parsed_cached`.
265    pub fn mirror_text_arc(&self, uri: &Url, text_arc: Arc<str>) {
266        if let Some(ft) = self.file_texts.get(uri).map(|e| *e) {
267            self.deleted_uris.remove(uri);
268            // Slow path: re-check inside the mutex. Salsa's `set_text`
269            // unconditionally bumps the revision, so every spurious setter
270            // invalidates every downstream query.
271            let mut host = self.host.lock().unwrap();
272            let current: Arc<str> = ft.text(host.db());
273            if *current == *text_arc {
274                drop(host);
275                self.text_cache.insert(uri.clone(), current);
276                return;
277            }
278            ft.set_text(host.db_mut()).to(text_arc.clone());
279            // Phase K2: any text change invalidates a previously-seeded
280            // cached index. Only bump the revision when a cached index is
281            // actually present — an unconditional set would cause two
282            // revision bumps per edit (one for text, one for cached_index),
283            // which needlessly cancels in-flight `file_index` queries on
284            // every keystroke.
285            if ft.cached_index(host.db()).is_some() {
286                ft.set_cached_index(host.db_mut()).to(None);
287            }
288            drop(host);
289            self.text_cache.insert(uri.clone(), text_arc);
290            // Evict only this file's analysis. Declaration-level changes (which
291            // invalidate other files' cached analyses) are detected lazily in
292            // `cached_analysis` by comparing the new `FileIndex` against the
293            // stored fingerprint; if changed, `decl_version` is bumped and other
294            // files' cache entries (which carry the old version) become stale.
295            // Body-only edits leave `decl_version` unchanged so sibling files
296            // are served from cache without re-analysis.
297            self.analysis_cache.remove(uri);
298        } else {
299            let is_vendor = uri.as_str().contains("/vendor/");
300            let ft = {
301                let mut host = self.host.lock().unwrap();
302                let ft = FileText::new(host.db(), text_arc.clone(), None);
303                if is_vendor {
304                    // Vendor files never change in a session — mark their text
305                    // as HIGH durability so salsa skips re-validating
306                    // parsed_doc/file_index for them on every user edit.
307                    ft.set_text(host.db_mut())
308                        .with_durability(salsa::Durability::HIGH)
309                        .to(Arc::clone(&text_arc));
310                }
311                ft
312            };
313            self.file_texts.insert(uri.clone(), ft);
314            self.text_cache.insert(uri.clone(), text_arc);
315            self.workspace_files_dirty.store(true, Ordering::Release);
316            // A newly-ingested file may resolve previously-unresolved references
317            // in other files. Cross-file invalidation happens lazily: the first
318            // `cached_analysis` call for this file sees no fingerprint (old_fp =
319            // None), treats it as a declaration change, and bumps `decl_version`,
320            // making every other file's cache entry stale at that point.
321            // No eager clear needed — other files' entries are still valid until
322            // this file's declarations are first observed.
323        }
324    }
325
326    /// Return the `FileText` handle for a URL, if active (not deleted).
327    #[cfg(test)]
328    pub fn source_file(&self, uri: &Url) -> Option<FileText> {
329        if self.deleted_uris.contains(uri) {
330            return None;
331        }
332        self.file_texts.get(uri).map(|e| *e)
333    }
334
335    /// Phase K2: pre-seed a `FileIndex` loaded from the on-disk cache onto
336    /// the `FileText` input for `uri`. The next `file_index` call for that
337    /// file returns the cached index directly, skipping parse + extract.
338    ///
339    /// Must be called **before** any `file_index(db, sf)` call for this file —
340    /// otherwise salsa has already memoized the fresh-parse result and setting
341    /// `cached_index` now would only bump the revision without using the cache.
342    /// In practice the workspace-scan path seeds immediately after `mirror_text`
343    /// and before any query runs.
344    ///
345    /// Returns `false` when `uri` was not mirrored (caller should mirror
346    /// first); returns `true` on success.
347    pub fn seed_cached_index(&self, uri: &Url, index: Arc<FileIndex>) -> bool {
348        let Some(ft) = self.file_texts.get(uri).map(|e| *e) else {
349            return false;
350        };
351        let mut host = self.host.lock().unwrap();
352        ft.set_cached_index(host.db_mut()).to(Some(index));
353        true
354    }
355
356    /// Run `f` with a borrow of the `AnalysisHost`. Used by tests and by the
357    /// upcoming `*_salsa` accessors to query the salsa layer.
358    pub fn with_host<R>(&self, f: impl FnOnce(&AnalysisHost) -> R) -> R {
359        let host = self.host.lock().unwrap();
360        f(&host)
361    }
362
363    /// Phase E1: take a brief lock, clone the salsa database, release the
364    /// lock. Queries then run on the cloned `RootDatabase` without blocking
365    /// writers or other readers. Salsa's `Storage<Self>` is reference-counted
366    /// (`Arc<Zalsa>`), so the clone is cheap — it shares memoized data and
367    /// the cancellation flag with the host's db.
368    fn snapshot_db(&self) -> crate::db::analysis::RootDatabase {
369        let host = self.host.lock().unwrap();
370        host.db().clone()
371    }
372
373    /// Run a query on a fresh snapshot, catching `salsa::Cancelled` (raised
374    /// when a concurrent writer advances the revision) and retrying with a
375    /// new snapshot. Writers hold the mutex only long enough to bump input
376    /// values, so a handful of retries is more than enough in practice; we
377    /// cap at 8 to avoid pathological livelock under sustained write pressure.
378    fn snapshot_query<R>(&self, f: impl Fn(&crate::db::analysis::RootDatabase) -> R + Clone) -> R {
379        use std::panic::AssertUnwindSafe;
380        for _ in 0..8 {
381            let db = self.snapshot_db();
382            let f = f.clone();
383            match salsa::Cancelled::catch(AssertUnwindSafe(move || f(&db))) {
384                Ok(r) => return r,
385                Err(_) => continue,
386            }
387        }
388        // Last-resort attempt: take the mutex for the whole query so no
389        // writer can race us. Much slower, but guaranteed to make progress.
390        let host = self.host.lock().unwrap();
391        f(host.db())
392    }
393
394    /// Evict the semantic-tokens cache for `uri`. Called by Backend when a
395    /// file is closed; diff-based tokens computed against the old revision
396    /// are no longer meaningful.
397    pub fn evict_token_cache(&self, uri: &Url) {
398        self.token_cache.remove(uri);
399    }
400
401    /// Return the `FileIndex` for `uri` by running `file_index` on a salsa
402    /// snapshot.  Returns `None` when `uri` has not been mirrored.
403    ///
404    /// Test-only — production code uses the salsa query directly via
405    /// `snapshot_query`.
406    #[cfg(test)]
407    pub fn source_files_len(&self) -> usize {
408        self.file_texts.len()
409    }
410
411    #[cfg(test)]
412    pub fn snapshot_query_file_index(
413        &self,
414        uri: &Url,
415    ) -> Option<crate::index::file_index::FileIndex> {
416        if self.deleted_uris.contains(uri) {
417            return None;
418        }
419        if !self.file_texts.contains_key(uri) {
420            return None;
421        }
422        self.sync_workspace_files();
423        let uri_str: Arc<str> = Arc::from(uri.as_str());
424        let ws = self.workspace;
425        self.snapshot_query(move |db| {
426            let sf = find_source_file(db, ws, &uri_str)?;
427            Some(crate::db::index::file_index(db, sf).get().clone())
428        })
429    }
430
431    /// Register a file in the salsa layer without marking it open.
432    ///
433    /// Salsa's `parsed_doc` query parses lazily on first read; diagnostics
434    /// are populated by `did_open` when the editor actually opens the file.
435    pub fn ingest(&self, uri: Url, text: &str) {
436        self.mirror_text(&uri, text);
437    }
438
439    /// Index a file using an already-parsed `ParsedDoc`, avoiding a second parse.
440    ///
441    /// Prefer this over [`ingest`] when the caller already has a `ParsedDoc` (e.g.
442    /// after running `DefinitionCollector` during workspace scan). Reuses the
443    /// `Arc<str>` already owned by `doc` so that `text_cache` and `SourceFile::text`
444    /// share the same pointer — enabling the `Arc::ptr_eq` fast path in
445    /// `get_parsed_cached` on the first subsequent salsa query, without an extra
446    /// `Arc::from(source)` allocation.
447    pub fn ingest_from_doc(&self, uri: Url, doc: &ParsedDoc) {
448        self.mirror_text_arc(&uri, doc.source_arc());
449    }
450
451    pub fn remove(&self, uri: &Url) {
452        self.token_cache.remove(uri);
453        // Mark the URI as deleted but keep the `source_files` entry so the
454        // salsa `SourceFile` handle remains alive. Re-opening the file reuses
455        // the same handle instead of calling `SourceFile::new()` again, which
456        // would create a new orphaned salsa input on every delete-reopen cycle.
457        self.deleted_uris.insert(uri.clone());
458        self.workspace_files_dirty.store(true, Ordering::Release);
459        // Sync workspace files so the deleted file is removed from the salsa
460        // `Workspace::files` list and won't appear in workspace symbols etc.
461        self.sync_workspace_files();
462        self.text_cache.remove(uri);
463        self.parsed_cache.remove(uri);
464        self.analysis_cache.remove(uri);
465        self.decl_fingerprints.remove(uri);
466        self.type_map_cache.remove(uri);
467        // Also evict the file from the `AnalysisSession`'s internal state so
468        // workspace symbol queries don't keep returning the deleted file's
469        // declarations. Cheap when the session hasn't ingested this file.
470        let guard = self.analysis_session.lock().unwrap();
471        if let Some((_, session)) = guard.as_ref() {
472            session.invalidate_file(uri.as_str());
473        }
474    }
475
476    // ── Salsa-backed accessors ─────────────────────────────────────────────
477    //
478    // Reads run the memoized `parsed_doc` / `file_index` queries, parsing
479    // only on first access per revision. These are the production accessors
480    // used by every handler.
481
482    /// Salsa-backed parsed document.
483    ///
484    /// Salsa-backed parsed document for any mirrored file (open or
485    /// background-indexed). Returns `None` only when the file is not known
486    /// to the store. Callers that want "only if open" should gate on
487    /// `Backend::open_files` at the call site (see `Backend::get_doc`).
488    pub fn get_doc_salsa(&self, uri: &Url) -> Option<Arc<ParsedDoc>> {
489        self.get_parsed_cached(uri)
490    }
491
492    /// Salsa-backed compact symbol index.
493    pub fn get_index_salsa(&self, uri: &Url) -> Option<Arc<FileIndex>> {
494        if self.deleted_uris.contains(uri) {
495            return None;
496        }
497        if !self.file_texts.contains_key(uri) {
498            return None;
499        }
500        self.sync_workspace_files();
501        let uri_str: Arc<str> = Arc::from(uri.as_str());
502        let ws = self.workspace;
503        self.snapshot_query(move |db| {
504            let sf = find_source_file(db, ws, &uri_str)?;
505            Some(crate::db::index::file_index(db, sf).0.clone())
506        })
507    }
508
509    /// Salsa-backed pre-computed symbol map (name → Vec<SymbolEntry>).
510    /// Memoized per revision: stable files serve from cache in O(1).
511    pub fn get_symbol_map_salsa(
512        &self,
513        uri: &Url,
514    ) -> Option<Arc<crate::types::symbol_map::SymbolMap>> {
515        if self.deleted_uris.contains(uri) {
516            return None;
517        }
518        if !self.file_texts.contains_key(uri) {
519            return None;
520        }
521        self.sync_workspace_files();
522        let uri_str: Arc<str> = Arc::from(uri.as_str());
523        let ws = self.workspace;
524        self.snapshot_query(move |db| {
525            let sf = find_source_file(db, ws, &uri_str)?;
526            Some(crate::db::symbol_map::symbol_map(db, sf).0.clone())
527        })
528    }
529
530    /// Pre-computed symbol maps for every entry in `open_urls` except `uri`.
531    pub fn other_symbol_maps(
532        &self,
533        uri: &Url,
534        open_urls: &[Url],
535    ) -> Vec<(Url, Arc<crate::types::symbol_map::SymbolMap>)> {
536        open_urls
537            .iter()
538            .filter(|u| *u != uri)
539            .filter_map(|u| self.get_symbol_map_salsa(u).map(|m| (u.clone(), m)))
540            .collect()
541    }
542
543    /// G3: shared implementation for `get_doc_salsa`.
544    /// Tries the `parsed_cache` (lock-free) first; validates via
545    /// `Arc::ptr_eq` against the G2 `text_cache` so a concurrent writer
546    /// that has already committed a new text input cannot be masked by a
547    /// stale cache entry. On miss, captures the text Arc and ParsedDoc
548    /// together inside a single `snapshot_query`, then publishes both.
549    fn get_parsed_cached(&self, uri: &Url) -> Option<Arc<ParsedDoc>> {
550        if let Some(current_text) = self.text_cache.get(uri)
551            && let Some(entry) = self.parsed_cache.get(uri)
552            && Arc::ptr_eq(&*current_text, &entry.0)
553        {
554            return Some(entry.1.clone());
555        }
556
557        if self.deleted_uris.contains(uri) {
558            return None;
559        }
560        if !self.file_texts.contains_key(uri) {
561            return None;
562        }
563        self.sync_workspace_files();
564        let uri_str: Arc<str> = Arc::from(uri.as_str());
565        let ws = self.workspace;
566        let (text, doc) = self.snapshot_query(move |db| {
567            let sf = find_source_file(db, ws, &uri_str)?;
568            let text = sf.text_input(db).text(db);
569            let doc = crate::db::parse::parsed_doc(db, sf).0.clone();
570            Some((text, doc))
571        })?;
572        self.insert_parsed_cache(uri.clone(), text, doc.clone());
573        Some(doc)
574    }
575
576    /// Publish a fresh `ParsedDoc` into `parsed_cache`, shedding roughly
577    /// half of the cache first if it has grown past [`PARSED_CACHE_CAP`].
578    ///
579    /// Eviction is probabilistic (DashMap iteration order is arbitrary),
580    /// not LRU. That's fine — salsa's own `parsed_doc` memo uses
581    /// `lru = 2048` on hotness-aware storage, so a cache-miss here is
582    /// cheap: the next read goes through `snapshot_query` and
583    /// `parsed_doc`, which still short-circuits on the salsa memo.
584    /// What we're bounding here is the *secondary* Arc retention that
585    /// would otherwise pin every workspace file's bumpalo arena alive
586    /// regardless of salsa's eviction decisions.
587    fn insert_parsed_cache(&self, uri: Url, text: Arc<str>, doc: Arc<ParsedDoc>) {
588        if self.parsed_cache.len() >= PARSED_CACHE_CAP {
589            let drop_target = self.parsed_cache.len() / 2;
590            let mut dropped = 0usize;
591            self.parsed_cache.retain(|_, _| {
592                if dropped < drop_target {
593                    dropped += 1;
594                    false
595                } else {
596                    true
597                }
598            });
599        }
600        self.parsed_cache.insert(uri, (text, doc));
601    }
602
603    /// Refresh `workspace.files` to mirror the current active file set.
604    ///
605    /// Skips all work when `workspace_files_dirty` is `false` (the common
606    /// case after the workspace scan completes — file-set changes are rare).
607    pub fn sync_workspace_files(&self) {
608        // Atomically clear the flag.  If it was already false the file set
609        // hasn't changed since the last sync; nothing to do.
610        if !self.workspace_files_dirty.swap(false, Ordering::AcqRel) {
611            return;
612        }
613
614        // Collect active (non-deleted) files without holding the host lock.
615        let mut files: Vec<(Arc<str>, FileText)> = self
616            .file_texts
617            .iter()
618            .filter(|e| !self.deleted_uris.contains(e.key()))
619            .map(|e| (Arc::<str>::from(e.key().as_str()), *e.value()))
620            .collect();
621        // Sort by URI string for stable ordering.
622        files.sort_unstable_by(|(a, _), (b, _)| a.cmp(b));
623
624        let mut host = self.host.lock().unwrap();
625        let current = self.workspace.files(host.db());
626        if current.len() == files.len()
627            && current
628                .iter()
629                .zip(files.iter())
630                .all(|(a, b)| a.0 == b.0 && a.1 == b.1)
631        {
632            return;
633        }
634        self.workspace.set_files(host.db_mut()).to(Arc::from(files));
635    }
636
637    /// Mark the workspace file set as dirty so the next `sync_workspace_files`
638    /// call re-runs the collect/sort/compare path.  Exposed for benchmarks that
639    /// need to measure the dirty-path cost in isolation.
640    pub fn mark_workspace_files_dirty(&self) {
641        self.workspace_files_dirty.store(true, Ordering::Release);
642    }
643
644    /// Update the PHP version tracked by the workspace. Salsa will invalidate
645    /// all `semantic_issues` queries so diagnostics are re-evaluated.
646    /// Skips the setter when the version hasn't changed to avoid spurious
647    /// query invalidation.
648    pub fn set_php_version(&self, version: mir_analyzer::PhpVersion) {
649        let mut host = self.host.lock().unwrap();
650        if self.workspace.php_version(host.db()) == version {
651            return;
652        }
653        self.workspace.set_php_version(host.db_mut()).to(version);
654        // The analysis_cache validates against source content only, so stale
655        // FileAnalysis results from the old PHP version would survive unchanged
656        // files. Clear it so the next request re-runs with the new version.
657        drop(host);
658        self.analysis_cache.clear();
659    }
660
661    /// Session-backed workspace reference lookup. Returns `(file, line, col)`
662    /// locations for every occurrence of `symbol` in the files that the
663    /// `AnalysisSession` has ingested so far. The session's reference index
664    /// is built incrementally during `ingest_file`, so refs for files the
665    /// session hasn't seen yet (background-indexed but never opened) won't
666    /// appear here — those are covered by the AST-walker fallback in the
667    /// references handler.
668    ///
669    /// Returns LSP-style 0-based line/column.
670    pub fn session_references_to(
671        &self,
672        symbol: &mir_analyzer::Name,
673    ) -> Vec<(Arc<str>, u32, u32, u32)> {
674        let php_version = self.workspace_php_version();
675        let session = self.analysis_session(php_version);
676        session
677            .references_to(symbol)
678            .into_iter()
679            .map(|(file, range)| {
680                // mir uses 1-based lines; 0-based columns (since mir 0.42.0).
681                let line = range.start.line.saturating_sub(1);
682                let col_start = range.start.column;
683                let col_end = range.end.column;
684                (file, line, col_start, col_end)
685            })
686            .collect()
687    }
688
689    /// Phase J: salsa-memoized aggregate workspace index.
690    ///
691    /// Returns the shared `Arc<WorkspaceIndexData>` with flat
692    /// `(Url, Arc<FileIndex>)` list plus pre-built `classes_by_name` and
693    /// `subtypes_of` reverse maps. Used by workspace_symbols,
694    /// prepare_type_hierarchy, supertypes_of, subtypes_of, and
695    /// find_implementations so they don't each rebuild the aggregate per
696    /// request. Invalidates automatically when any file's `file_index`
697    /// changes.
698    pub fn get_workspace_index_salsa(&self) -> Arc<crate::db::workspace_index::WorkspaceIndexData> {
699        self.sync_workspace_files();
700        let ws = self.workspace;
701        self.snapshot_query(move |db| {
702            crate::db::workspace_index::workspace_index(db, ws)
703                .0
704                .clone()
705        })
706    }
707
708    /// No-op after mir 0.22 migration. The session manages its own warm-up
709    /// via `ingest_file` / `analyze_dependents_of`; there's nothing for us
710    /// to pre-warm here.
711    pub fn warm_reference_index(&self) {}
712
713    /// Return the raw source text for `uri` if it has been mirrored into the
714    /// salsa workspace. Used by the references handler to pre-filter session
715    /// results by checking whether a file mentions the owning class name.
716    pub fn source_text(&self, uri: &Url) -> Option<Arc<str>> {
717        self.text_cache.get(uri).map(|e| Arc::clone(&e))
718    }
719
720    /// Run Pass 1 + Pass 2 analysis on every mirrored workspace file so that
721    /// type-aware queries (e.g. `session.references_to`) see the full workspace.
722    ///
723    /// Reference locations are only recorded during Pass 2 (`FileAnalyzer::analyze`).
724    /// `ingest_file` alone (Pass 1) is not sufficient. Only needed for cross-file
725    /// queries like `textDocument/references` that rely on the reference index.
726    /// The session's internal cache makes re-analysis of unchanged files cheap.
727    pub fn ensure_all_files_ingested(&self) {
728        let php_version = self.workspace_php_version();
729        let session = self.analysis_session(php_version);
730        let urls: Vec<Url> = self
731            .file_texts
732            .iter()
733            .filter(|e| !self.deleted_uris.contains(e.key()))
734            .map(|e| e.key().clone())
735            .collect();
736        for uri in &urls {
737            let Some(doc) = self.get_doc_salsa(uri) else {
738                continue;
739            };
740            let file: Arc<str> = Arc::from(uri.as_str());
741            session.ingest_file(file.clone(), doc.source_arc());
742            let source_map = php_rs_parser::source_map::SourceMap::new(doc.source());
743            let owned_program = php_ast::owned::to_owned_program(doc.program());
744            let analyzer = mir_analyzer::FileAnalyzer::new(&session);
745            analyzer.analyze(file, doc.source(), &owned_program, &source_map);
746        }
747    }
748
749    /// Cache the semantic tokens computed for a delta response.
750    /// `result_id` is an opaque string (a hash of the token data) returned to the client.
751    pub fn store_token_cache(&self, uri: &Url, result_id: String, tokens: Arc<Vec<SemanticToken>>) {
752        self.token_cache.insert(uri.clone(), (result_id, tokens));
753    }
754
755    /// Return the cached tokens if `result_id` matches the stored one.
756    pub fn get_token_cache(&self, uri: &Url, result_id: &str) -> Option<Arc<Vec<SemanticToken>>> {
757        self.token_cache
758            .get(uri)
759            .filter(|e| e.0.as_str() == result_id)
760            .map(|e| Arc::clone(&e.1))
761    }
762
763    /// Raw semantic issues for a file, computed via mir's session-based
764    /// `FileAnalyzer`. The session lazy-loads dependencies via PSR-4 so the
765    /// LSP no longer needs to mirror vendor up-front. Callers apply their
766    /// own `DiagnosticsConfig` filter via
767    /// [`crate::semantic_diagnostics::issues_to_diagnostics`].
768    #[tracing::instrument(skip_all)]
769    pub fn get_semantic_issues_salsa(&self, uri: &Url) -> Option<Arc<[mir_issues::Issue]>> {
770        let analysis = self.cached_analysis(uri)?;
771        let file: Arc<str> = Arc::from(uri.as_str());
772        // Workspace-level class issues for this file (circular inheritance,
773        // override violations, abstract-method gaps). These are session-wide
774        // (a dependency edit changes them without changing this file's bytes),
775        // so they are recomputed live rather than cached alongside the
776        // per-file body analysis.
777        let class_issues = {
778            let _s = tracing::debug_span!("session.class_issues_for").entered();
779            self.analysis_session(self.workspace_php_version())
780                .class_issues(std::slice::from_ref(&file))
781        };
782        let combined: Vec<mir_issues::Issue> = analysis
783            .issues
784            .iter()
785            .cloned()
786            .chain(class_issues)
787            .filter(|i| !i.suppressed)
788            .collect();
789        Some(Arc::from(combined))
790    }
791
792    /// Run (or reuse) mir's per-file body analysis, retaining the full
793    /// [`mir_analyzer::FileAnalysis`] — issues **and** resolved symbols — across
794    /// requests. Diagnostics read `.issues`; position features call
795    /// `.symbol_at(offset)` for the resolved type at a cursor.
796    ///
797    /// Cache hit when the entry's captured source `Arc` is pointer-equal to the
798    /// file's current `doc.source_arc()`. A miss recomputes and overwrites, so
799    /// the entry self-evicts on any content edit.
800    /// Build (or reuse) the whole-doc completion [`crate::types::type_map::TypeMap`]
801    /// for `uri`. Cache hit when the entry's captured source `Arc` is
802    /// pointer-equal to `doc.source_arc()` and the PHPStorm-meta pointer is
803    /// unchanged (meta lives behind `ArcSwap`, so its address is stable until
804    /// `.phpstorm.meta.php` is reloaded). A miss rebuilds and overwrites, so
805    /// the entry self-evicts on any content edit.
806    pub fn cached_type_map(
807        &self,
808        uri: &Url,
809        doc: &crate::document::ast::ParsedDoc,
810        meta: Option<&crate::lang::phpstorm_meta::PhpStormMeta>,
811    ) -> Arc<crate::types::type_map::TypeMap> {
812        let source = doc.source_arc();
813        let meta_key = meta.map_or(0usize, |m| std::ptr::from_ref(m) as usize);
814        if let Some(entry) = self.type_map_cache.get(uri)
815            && Arc::ptr_eq(&entry.0, &source)
816            && entry.1 == meta_key
817        {
818            return Arc::clone(&entry.2);
819        }
820        let map = Arc::new(crate::types::type_map::TypeMap::from_doc_with_meta(
821            doc, meta,
822        ));
823        self.type_map_cache
824            .insert(uri.clone(), (source, meta_key, Arc::clone(&map)));
825        map
826    }
827
828    /// Cache-hit-only variant of [`Self::cached_analysis`]: returns the cached
829    /// analysis when the entry is current for the file's text, never computes.
830    /// Lets async handlers take the warm path synchronously and reserve
831    /// `spawn_blocking` for the cold path (mir Pass 1 + Pass 2 can take
832    /// hundreds of ms on large files).
833    pub fn cached_analysis_if_fresh(&self, uri: &Url) -> Option<Arc<mir_analyzer::FileAnalysis>> {
834        let doc = self.get_doc_salsa(uri)?;
835        let source = doc.source_arc();
836        let entry = self.analysis_cache.get(uri)?;
837        let cur_ver = self.decl_version.load(Ordering::Acquire);
838        (Arc::ptr_eq(&entry.0, &source) && entry.1 == cur_ver).then(|| Arc::clone(&entry.2))
839    }
840
841    #[tracing::instrument(skip_all)]
842    pub fn cached_analysis(&self, uri: &Url) -> Option<Arc<mir_analyzer::FileAnalysis>> {
843        // Need the parsed doc both for the analyzer and as the cache key.
844        let doc = self.get_doc_salsa(uri)?;
845        let source = doc.source_arc();
846
847        let cur_ver = self.decl_version.load(Ordering::Acquire);
848        if let Some(entry) = self.analysis_cache.get(uri)
849            && Arc::ptr_eq(&entry.0, &source)
850            && entry.1 == cur_ver
851        {
852            return Some(Arc::clone(&entry.2));
853        }
854
855        let php_version = self.with_host(|h| self.workspace.php_version(h.db()));
856        let session = self.analysis_session(php_version);
857        let file: Arc<str> = Arc::from(uri.as_str());
858        {
859            let _s = tracing::debug_span!("session.ingest_file").entered();
860            session.ingest_file(file.clone(), source.clone());
861        }
862        // Pre-ingest autoload.files helpers (e.g. tap(), class_uses_recursive()
863        // in Laravel) so mir sees their function definitions before analyzing
864        // the current file. ingest_file is idempotent — already-ingested files
865        // are skipped cheaply by the session's internal content cache.
866        {
867            let autoload_uris = self.autoload_uris.read().unwrap().clone();
868            for auri in &autoload_uris {
869                if let Some(atext) = self.text_cache.get(auri).map(|t| Arc::clone(&*t)) {
870                    let afile: Arc<str> = Arc::from(auri.as_str());
871                    session.ingest_file(afile, atext);
872                }
873            }
874        }
875        // Pre-load every imported class via PSR-4 so Pass-2 doesn't emit
876        // spurious `UndefinedClass` for classes that ARE on disk but haven't
877        // been ingested yet. The session's resolver was supplied at
878        // construction time.
879        {
880            let _s = tracing::debug_span!("session.lazy_load_imports").entered();
881            // Pre-load every class-typed reference resolved via the file's
882            // namespace + `use` imports. This covers `use` imports, FQN refs
883            // (`new \App\Foo`), and bare same-namespace refs (`new Foo` from
884            // inside `namespace App;`) in a single sweep — mir won't auto-
885            // resolve via the ClassResolver, so anything not lazy-loaded here
886            // produces a spurious `UndefinedClass`.
887            let fqns = crate::references::collect_referenced_class_fqns(&doc);
888            for fqcn in &fqns {
889                let _ = session.load_class(fqcn);
890            }
891        }
892        let source_map = php_rs_parser::source_map::SourceMap::new(doc.source());
893        let owned_program = php_ast::owned::to_owned_program(doc.program());
894        let analysis = {
895            let _s = tracing::debug_span!("FileAnalyzer::analyze").entered();
896            let analyzer = mir_analyzer::FileAnalyzer::new(&session);
897            Arc::new(analyzer.analyze(file.clone(), doc.source(), &owned_program, &source_map))
898        };
899        // Compare the new FileIndex against the stored fingerprint. If
900        // declarations changed (or this is the first analysis), bump
901        // `decl_version` so other files' cache entries become stale. Body-only
902        // edits leave the counter unchanged, allowing sibling files to be
903        // served from cache on the next request.
904        let new_index = self.get_index_salsa(uri);
905        let old_fp = self.decl_fingerprints.get(uri).map(|e| Arc::clone(&*e));
906        let decl_changed = match (&old_fp, &new_index) {
907            (Some(old), Some(new)) => **old != **new,
908            (None, Some(_)) => true,
909            _ => false,
910        };
911        if decl_changed {
912            if let Some(idx) = new_index {
913                self.decl_fingerprints.insert(uri.clone(), idx);
914            }
915            self.decl_version.fetch_add(1, Ordering::Release);
916        }
917        let ver = self.decl_version.load(Ordering::Acquire);
918        self.analysis_cache
919            .insert(uri.clone(), (source, ver, Arc::clone(&analysis)));
920        Some(analysis)
921    }
922
923    /// Returns `(uri, doc)` for files currently open in the editor.
924    ///
925    /// Resolve `open_urls` (from `Backend::open_urls()`) to parsed docs.
926    /// Files not mirrored in the salsa layer are filtered out silently.
927    pub fn docs_for(&self, open_urls: &[Url]) -> Vec<(Url, Arc<ParsedDoc>)> {
928        open_urls
929            .iter()
930            .filter_map(|u| self.get_doc_salsa(u).map(|d| (u.clone(), d)))
931            .collect()
932    }
933
934    /// `(primary, doc)` first, then every other open file's parsed doc.
935    /// The `open_urls` slice should include `uri` — this helper filters it out.
936    pub fn doc_with_others(
937        &self,
938        uri: &Url,
939        doc: Arc<ParsedDoc>,
940        open_urls: &[Url],
941    ) -> Vec<(Url, Arc<ParsedDoc>)> {
942        let mut result = vec![(uri.clone(), doc)];
943        result.extend(self.other_docs(uri, open_urls));
944        result
945    }
946
947    /// Parsed docs for every entry in `open_urls` except `uri`.
948    pub fn other_docs(&self, uri: &Url, open_urls: &[Url]) -> Vec<(Url, Arc<ParsedDoc>)> {
949        open_urls
950            .iter()
951            .filter(|u| *u != uri)
952            .filter_map(|u| self.get_doc_salsa(u).map(|d| (u.clone(), d)))
953            .collect()
954    }
955
956    /// Compact symbol index for every mirrored file.
957    pub fn all_indexes(&self) -> Vec<(Url, Arc<FileIndex>)> {
958        self.get_workspace_index_salsa().files.clone()
959    }
960
961    /// Store a lazily-loaded vendor `FileIndex` in the session cache.
962    /// Only call this for files that are not part of the normal workspace scan
963    /// (i.e. vendor files loaded on-demand by PSR-4 navigation).
964    pub fn cache_vendor_index(&self, uri: Url, index: Arc<FileIndex>) {
965        self.vendor_index_cache.insert(uri, index);
966    }
967
968    /// Retrieve a previously cached vendor `FileIndex`.
969    pub fn get_vendor_index(&self, uri: &Url) -> Option<Arc<FileIndex>> {
970        self.vendor_index_cache.get(uri).map(|e| Arc::clone(&*e))
971    }
972
973    /// Same as `all_indexes` but excludes `uri`.
974    pub fn other_indexes(&self, uri: &Url) -> Vec<(Url, Arc<FileIndex>)> {
975        self.get_workspace_index_salsa()
976            .files
977            .iter()
978            .filter(|(u, _)| u != uri)
979            .cloned()
980            .collect()
981    }
982
983    /// Parsed documents for every mirrored file (open or background-indexed).
984    /// Suitable for full-scan operations: find-references, rename,
985    /// call_hierarchy, code_lens.
986    pub fn all_docs_for_scan(&self) -> Vec<(Url, Arc<ParsedDoc>)> {
987        let urls: Vec<Url> = self
988            .file_texts
989            .iter()
990            .filter(|e| !self.deleted_uris.contains(e.key()))
991            .map(|e| e.key().clone())
992            .collect();
993        urls.into_iter()
994            .filter_map(|u| self.get_doc_salsa(&u).map(|d| (u, d)))
995            .collect()
996    }
997
998    /// Parsed documents limited to files whose raw source text contains `word`.
999    ///
1000    /// Prefilters via [`Self::text_cache`] (a cheap substring scan on the raw
1001    /// `Arc<str>` already in memory) before calling [`Self::get_doc_salsa`],
1002    /// which triggers a salsa parse for files not yet in the AST cache.  This
1003    /// means only candidate files are ever parsed — the key win over
1004    /// [`all_docs_for_scan`] for find-references, which otherwise parses the
1005    /// entire workspace before the memchr gate in `find_references_inner` fires.
1006    ///
1007    /// Files whose text is not yet in `text_cache` are included conservatively
1008    /// (safe superset — never produces false negatives).
1009    pub fn candidate_docs_for(&self, word: &str) -> Vec<(Url, Arc<ParsedDoc>)> {
1010        let candidate_urls: Vec<Url> = self
1011            .file_texts
1012            .iter()
1013            .filter(|e| !self.deleted_uris.contains(e.key()))
1014            .filter(|e| {
1015                self.text_cache
1016                    .get(e.key())
1017                    .map(|src| src.contains(word))
1018                    .unwrap_or(true)
1019            })
1020            .map(|e| e.key().clone())
1021            .collect();
1022        candidate_urls
1023            .into_iter()
1024            .filter_map(|u| self.get_doc_salsa(&u).map(|d| (u, d)))
1025            .collect()
1026    }
1027
1028    /// URLs of files whose raw source text contains `word`. No parsing.
1029    ///
1030    /// Used to scope [`ensure_files_ingested`] for method references: only
1031    /// files that mention the method name by text need mir Pass 2 analysis.
1032    pub fn candidate_urls_mentioning(&self, word: &str) -> Vec<Url> {
1033        self.file_texts
1034            .iter()
1035            .filter(|e| !self.deleted_uris.contains(e.key()))
1036            .filter(|e| {
1037                self.text_cache
1038                    .get(e.key())
1039                    .map(|src| src.contains(word))
1040                    .unwrap_or(true)
1041            })
1042            .map(|e| e.key().clone())
1043            .collect()
1044    }
1045
1046    /// Run Pass 1 + Pass 2 analysis on the given files only.
1047    ///
1048    /// Scoped alternative to [`ensure_all_files_ingested`] used by
1049    /// `textDocument/references` for method symbols: only files that textually
1050    /// mention the method name need to be analyzed, cutting the Pass-2 cost
1051    /// from O(workspace) to O(candidates).
1052    ///
1053    /// Uses `BatchFileAnalyzer` so Pass 2 runs in parallel across rayon threads,
1054    /// cutting wall time from O(N × per-file) to O(N/cores × per-file).
1055    pub fn ensure_files_ingested(&self, urls: &[Url]) {
1056        let php_version = self.workspace_php_version();
1057        let session = self.analysis_session(php_version);
1058
1059        // Pass 1: ingest all files (sequential — session serialises writes internally).
1060        let parsed_files: Vec<mir_analyzer::ParsedFile> = urls
1061            .iter()
1062            .filter_map(|uri| {
1063                let doc = self.get_doc_salsa(uri)?;
1064                let file: Arc<str> = Arc::from(uri.as_str());
1065                session.ingest_file(file.clone(), doc.source_arc());
1066                let source_map = php_rs_parser::source_map::SourceMap::new(doc.source());
1067                let owned_program = php_ast::owned::to_owned_program(doc.program());
1068                Some(mir_analyzer::ParsedFile::new(
1069                    file,
1070                    doc.source_arc(),
1071                    owned_program,
1072                    source_map,
1073                ))
1074            })
1075            .collect();
1076
1077        // Pass 2: analyze in parallel via rayon — each worker gets its own db clone.
1078        let batch = mir_analyzer::BatchFileAnalyzer::new(&session);
1079        batch.analyze_batch(parsed_files);
1080    }
1081}
1082
1083// `warm_file_refs_parallel` removed: the analyzer-side reference index is
1084// now owned by `AnalysisSession` and warmed by `ingest_file`. This salsa-side
1085// helper has no counterpart in the new architecture.
1086
1087#[cfg(test)]
1088mod tests {
1089    use super::*;
1090
1091    fn uri(path: &str) -> Url {
1092        Url::parse(&format!("file://{path}")).unwrap()
1093    }
1094
1095    /// Phase E4: open-file state lives on `Backend`, not `DocumentStore`.
1096    /// Tests that need to simulate "file is open" just mirror the text into
1097    /// the salsa input — the open/closed distinction is enforced by the
1098    /// caller (Backend) in production.
1099    fn open(store: &DocumentStore, u: Url, text: String) {
1100        store.mirror_text(&u, &text);
1101    }
1102
1103    // Removed `salsa_codebase_aggregates_all_files`: the salsa-side codebase
1104    // aggregation was deleted with the mir 0.22 migration. Equivalent
1105    // behaviour is now covered by mir-analyzer's own session tests.
1106
1107    #[test]
1108    fn index_registers_file_in_salsa() {
1109        let store = DocumentStore::new();
1110        store.ingest(uri("/lib.php"), "<?php\nfunction lib_fn() {}");
1111        let idx = store.get_index_salsa(&uri("/lib.php")).unwrap();
1112        assert_eq!(idx.functions.len(), 1);
1113        assert_eq!(idx.functions[0].name, "lib_fn".into());
1114    }
1115
1116    #[test]
1117    fn remove_hides_file_from_index() {
1118        let store = DocumentStore::new();
1119        let u = uri("/lib.php");
1120        store.ingest(u.clone(), "<?php");
1121        store.remove(&u);
1122        assert!(store.get_index_salsa(&u).is_none());
1123    }
1124
1125    #[test]
1126    fn remove_and_reopen_reuses_source_file_handle() {
1127        let store = DocumentStore::new();
1128        let u = uri("/lib.php");
1129        store.ingest(u.clone(), "<?php");
1130        let ft_before = store.source_file(&u).unwrap();
1131        store.remove(&u);
1132        assert!(
1133            store.source_file(&u).is_none(),
1134            "deleted file should be hidden"
1135        );
1136        store.mirror_text(&u, "<?php");
1137        let ft_after = store.source_file(&u).unwrap();
1138        assert!(
1139            ft_before == ft_after,
1140            "reopen must reuse the same FileText handle"
1141        );
1142    }
1143
1144    #[test]
1145    fn delete_reopen_churn_does_not_amplify_salsa_inputs() {
1146        let store = DocumentStore::new();
1147        let uris: Vec<Url> = (0..20).map(|i| uri(&format!("/churn/f{i}.php"))).collect();
1148        for u in &uris {
1149            store.ingest(u.clone(), "<?php class A {}");
1150        }
1151        let count_before = store.source_files_len();
1152        for _ in 0..10 {
1153            for u in &uris {
1154                store.remove(u);
1155            }
1156            for u in &uris {
1157                store.ingest(u.clone(), "<?php class A {}");
1158            }
1159        }
1160        assert_eq!(
1161            store.source_files_len(),
1162            count_before,
1163            "delete-reopen cycles must not create new salsa inputs (L1-B regression guard)"
1164        );
1165    }
1166
1167    #[test]
1168    fn all_indexes_includes_every_mirrored_file() {
1169        let store = DocumentStore::new();
1170        open(&store, uri("/a.php"), "<?php\nfunction a() {}".to_string());
1171        store.ingest(uri("/b.php"), "<?php\nfunction b() {}");
1172        assert_eq!(store.all_indexes().len(), 2);
1173    }
1174
1175    #[test]
1176    fn other_indexes_excludes_current_uri() {
1177        let store = DocumentStore::new();
1178        open(&store, uri("/a.php"), "<?php\nfunction a() {}".to_string());
1179        open(&store, uri("/b.php"), "<?php\nfunction b() {}".to_string());
1180        assert_eq!(store.other_indexes(&uri("/a.php")).len(), 1);
1181    }
1182
1183    #[test]
1184    fn other_docs_excludes_current_uri() {
1185        let store = DocumentStore::new();
1186        let ua = uri("/a.php");
1187        let ub = uri("/b.php");
1188        open(&store, ua.clone(), "<?php\nfunction a() {}".to_string());
1189        open(&store, ub.clone(), "<?php\nfunction b() {}".to_string());
1190        let open_urls = vec![ua.clone(), ub];
1191        assert_eq!(store.other_docs(&ua, &open_urls).len(), 1);
1192    }
1193
1194    #[test]
1195    fn evict_token_cache_removes_entry() {
1196        let store = DocumentStore::new();
1197        let u = uri("/a.php");
1198        open(&store, u.clone(), "<?php".to_string());
1199        store.store_token_cache(&u, "id1".to_string(), Arc::new(vec![]));
1200        assert!(store.get_token_cache(&u, "id1").is_some());
1201        store.evict_token_cache(&u);
1202        assert!(store.get_token_cache(&u, "id1").is_none());
1203    }
1204
1205    #[test]
1206    fn index_populates_file_index_with_symbols() {
1207        let store = DocumentStore::new();
1208        store.ingest(uri("/a.php"), "<?php\nfunction hello() {}");
1209        let idx = store.get_index_salsa(&uri("/a.php")).unwrap();
1210        assert_eq!(idx.functions.len(), 1);
1211        assert_eq!(idx.functions[0].name, "hello".into());
1212    }
1213
1214    #[test]
1215    fn open_populates_file_index_with_symbols() {
1216        let store = DocumentStore::new();
1217        open(&store, uri("/a.php"), "<?php\nclass Foo {}".to_string());
1218        let idx = store.get_index_salsa(&uri("/a.php")).unwrap();
1219        assert_eq!(idx.classes.len(), 1);
1220        assert_eq!(idx.classes[0].name, "Foo".into());
1221    }
1222
1223    // ── Mirror invariants ────────────────────────────────────────────────
1224    //
1225    // Every mutation path that changes file text must keep the salsa layer
1226    // consistent. These tests walk a set-edit-reopen cycle and assert that
1227    // the salsa-derived `FileIndex` reflects the latest text at each step.
1228
1229    fn names_of(idx: &FileIndex) -> Vec<String> {
1230        let mut out: Vec<String> = idx.classes.iter().map(|c| c.name.to_string()).collect();
1231        out.extend(idx.functions.iter().map(|f| f.name.to_string()));
1232        out.sort();
1233        out
1234    }
1235
1236    fn salsa_index_names(store: &DocumentStore, url: &Url) -> Vec<String> {
1237        store
1238            .snapshot_query_file_index(url)
1239            .map(|idx| names_of(&idx))
1240            .unwrap_or_default()
1241    }
1242
1243    #[test]
1244    fn mirror_tracks_repeated_edits() {
1245        let store = DocumentStore::new();
1246        let u = uri("/mirror.php");
1247
1248        open(&store, u.clone(), "<?php\nclass A {}".to_string());
1249        assert_eq!(salsa_index_names(&store, &u), vec!["A".to_string()]);
1250
1251        open(
1252            &store,
1253            u.clone(),
1254            "<?php\nclass A {}\nclass B {}".to_string(),
1255        );
1256        assert_eq!(
1257            salsa_index_names(&store, &u),
1258            vec!["A".to_string(), "B".to_string()]
1259        );
1260
1261        open(&store, u.clone(), "<?php\nfunction greet() {}".to_string());
1262        assert_eq!(salsa_index_names(&store, &u), vec!["greet".to_string()]);
1263    }
1264
1265    #[test]
1266    fn mirror_tracks_ingest_and_ingest_from_doc() {
1267        let store = DocumentStore::new();
1268
1269        // Background `index(url, text)` path.
1270        let u1 = uri("/bg1.php");
1271        store.ingest(u1.clone(), "<?php\nclass Bg1 {}");
1272        assert_eq!(salsa_index_names(&store, &u1), vec!["Bg1".to_string()]);
1273
1274        // `ingest_from_doc(url, &doc)` path (workspace-scan Phase 2).
1275        let u2 = uri("/bg2.php");
1276        let doc = crate::analysis::diagnostics::parse_document_no_diags(
1277            "<?php\nclass Bg2 {}\nfunction f() {}",
1278        );
1279        store.ingest_from_doc(u2.clone(), &doc);
1280        assert_eq!(
1281            salsa_index_names(&store, &u2),
1282            vec!["Bg2".to_string(), "f".to_string()]
1283        );
1284    }
1285
1286    /// G3: confirms the `parsed_cache` actually hits — two consecutive
1287    /// `get_doc_salsa` calls on unchanged text return the same `Arc`
1288    /// (pointer equality), and an edit forces a miss that produces a
1289    /// different `Arc`.
1290    /// parsed_cache must stay bounded — inserting more than
1291    /// `PARSED_CACHE_CAP` unique URLs must not cause unbounded growth.
1292    /// Eviction is probabilistic, so we only assert the bound, not which
1293    /// Seeding a cached index for a URL that was never mirrored is a no-op
1294    /// (returns `false`) — avoids silently allocating SourceFiles outside
1295    /// `mirror_text`'s control.
1296    #[test]
1297    fn seed_cached_index_noops_for_unknown_uri() {
1298        let store = DocumentStore::new();
1299        let u = uri("/never_mirrored.php");
1300        let index = Arc::new(crate::index::file_index::FileIndex::default());
1301        assert!(!store.seed_cached_index(&u, index));
1302    }
1303
1304    /// entries survive.
1305    #[test]
1306    fn parsed_cache_stays_bounded_under_many_inserts() {
1307        let store = DocumentStore::new();
1308        let overflow = PARSED_CACHE_CAP + 100;
1309        for i in 0..overflow {
1310            let u = uri(&format!("/cap/file{i}.php"));
1311            store.ingest(u.clone(), "<?php\nclass A {}");
1312            // Force a parsed_cache insert via get_doc_salsa.
1313            let _ = store.get_doc_salsa(&u);
1314        }
1315        assert!(
1316            store.parsed_cache.len() <= PARSED_CACHE_CAP,
1317            "parsed_cache grew to {} entries (cap {})",
1318            store.parsed_cache.len(),
1319            PARSED_CACHE_CAP
1320        );
1321    }
1322
1323    #[test]
1324    fn get_doc_salsa_cache_hits_across_calls() {
1325        let store = DocumentStore::new();
1326        let u = uri("/g3_cache.php");
1327        open(&store, u.clone(), "<?php\nclass G3 {}".to_string());
1328
1329        let a = store.get_doc_salsa(&u).unwrap();
1330        let b = store.get_doc_salsa(&u).unwrap();
1331        assert!(
1332            Arc::ptr_eq(&a, &b),
1333            "parsed_cache hit should yield the same Arc across calls"
1334        );
1335
1336        open(&store, u.clone(), "<?php\nclass G3b {}".to_string());
1337        let c = store.get_doc_salsa(&u).unwrap();
1338        assert!(
1339            !Arc::ptr_eq(&a, &c),
1340            "edit should invalidate the parsed_cache entry"
1341        );
1342    }
1343
1344    #[test]
1345    fn get_doc_salsa_returns_some_for_mirrored_files() {
1346        // Phase E4: `get_doc_salsa` no longer gates on open-state. The
1347        // open/closed distinction now lives on `Backend::get_doc`.
1348        let store = DocumentStore::new();
1349        let u = uri("/e4_doc.php");
1350        store.ingest(u.clone(), "<?php\nclass P {}");
1351        assert!(store.get_doc_salsa(&u).is_some());
1352    }
1353
1354    #[test]
1355    fn get_salsa_accessors_return_none_for_unknown_uri() {
1356        let store = DocumentStore::new();
1357        let u = uri("/never-seen.php");
1358        assert!(store.get_doc_salsa(&u).is_none());
1359        assert!(store.get_index_salsa(&u).is_none());
1360    }
1361
1362    /// Phase E1: concurrent readers and writers must not deadlock, panic, or
1363    /// return stale data. Writers briefly bump inputs while readers are
1364    /// running on cloned snapshots; any `salsa::Cancelled` raised on the
1365    /// reader side must be caught and retried by `snapshot_query`.
1366    ///
1367    /// The salsa surface (`get_doc_salsa`, `get_index_salsa`) is protected by
1368    /// `snapshot_query`'s last-resort host-lock fallback.
1369    #[test]
1370    fn concurrent_reads_and_writes_do_not_panic() {
1371        use std::sync::Arc;
1372        use std::thread;
1373        use std::time::{Duration, Instant};
1374
1375        let store = Arc::new(DocumentStore::new());
1376        let urls: Vec<Url> = (0..8).map(|i| uri(&format!("/f{i}.php"))).collect();
1377        for (i, u) in urls.iter().enumerate() {
1378            open(&store, u.clone(), format!("<?php\nclass C{i} {{}}"));
1379        }
1380
1381        let deadline = Instant::now() + Duration::from_millis(400);
1382        let mut handles = Vec::new();
1383
1384        // Writer thread: keep bumping every file's text.
1385        {
1386            let store = Arc::clone(&store);
1387            let urls = urls.clone();
1388            handles.push(thread::spawn(move || {
1389                let mut rev = 0u32;
1390                while Instant::now() < deadline {
1391                    for u in &urls {
1392                        let text = format!("<?php\nclass C{{}}\n// rev {rev}");
1393                        store.mirror_text(u, &text);
1394                    }
1395                    rev += 1;
1396                }
1397            }));
1398        }
1399
1400        // Reader threads: hammer the salsa accessors.
1401        for _ in 0..4 {
1402            let store = Arc::clone(&store);
1403            let urls = urls.clone();
1404            handles.push(thread::spawn(move || {
1405                while Instant::now() < deadline {
1406                    for u in &urls {
1407                        let _ = store.get_doc_salsa(u);
1408                        let _ = store.get_index_salsa(u);
1409                    }
1410                    // Post mir 0.22: codebase + refs live in the session,
1411                    // not salsa. Concurrent-read smoke is limited to the
1412                    // remaining salsa surface (parsed_doc, file_index).
1413                }
1414            }));
1415        }
1416
1417        for h in handles {
1418            h.join().expect("no panic under concurrent read/write");
1419        }
1420    }
1421
1422    /// PSR-4 lazy-loading: `get_semantic_issues_salsa` must not emit
1423    /// `UndefinedClass` for a class that is PSR-4-resolvable on disk, even
1424    /// when the dependency file is not yet in `source_files`.
1425    #[test]
1426    fn psr4_lazy_load_suppresses_undefined_class() {
1427        let tmp = tempfile::tempdir().unwrap();
1428
1429        // Write Entity.php to disk (not mirrored into the store).
1430        std::fs::create_dir_all(tmp.path().join("src/Model")).unwrap();
1431        std::fs::write(
1432            tmp.path().join("src/Model/Entity.php"),
1433            "<?php\nnamespace App\\Model;\nclass Entity {}\n",
1434        )
1435        .unwrap();
1436
1437        // Write composer.json so Psr4Map::load can build the map.
1438        std::fs::write(
1439            tmp.path().join("composer.json"),
1440            r#"{"autoload":{"psr-4":{"App\\":"src/"}}}"#,
1441        )
1442        .unwrap();
1443
1444        let store = DocumentStore::new();
1445
1446        // Inject a PSR-4 map pointing at the tmp dir.
1447        store
1448            .psr4
1449            .store(Arc::new(crate::lang::autoload::Psr4Map::load(tmp.path())));
1450
1451        // Mirror the consuming file (Entity not yet in source_files).
1452        // Uses Entity as a parameter type hint — the analyzer resolves these
1453        // through use statements, so this exercises the full PSR-4 lazy-load path.
1454        let handler_url = Url::from_file_path(tmp.path().join("src/Service/Handler.php")).unwrap();
1455        store.mirror_text(
1456            &handler_url,
1457            "<?php\nnamespace App\\Service;\nuse App\\Model\\Entity;\nfunction handle(Entity $e): Entity { return $e; }\n",
1458        );
1459
1460        let issues = store.get_semantic_issues_salsa(&handler_url).unwrap();
1461        let undef: Vec<_> = issues
1462            .iter()
1463            .filter(|i| matches!(i.kind, mir_issues::IssueKind::UndefinedClass { .. }))
1464            .collect();
1465        assert!(
1466            undef.is_empty(),
1467            "PSR-4 lazy-loading must prevent UndefinedClass for App\\Model\\Entity; got: {undef:?}"
1468        );
1469    }
1470
1471    /// Issue #191 regression: workspace-wide scans (find-references, rename,
1472    /// call-hierarchy) must not re-parse closed/indexed files on repeated
1473    /// invocations. Once a file's `ParsedDoc` has been produced, subsequent
1474    /// `all_docs_for_scan()` calls must hit the cache and return the same
1475    /// `Arc<ParsedDoc>` (pointer equality), proving no re-parse occurred.
1476    ///
1477    /// The cache layers protecting this are:
1478    ///   1. `parsed_cache` (cap [`PARSED_CACHE_CAP`]) — read-through, validated
1479    ///      via `Arc::ptr_eq` on the text Arc.
1480    ///   2. salsa `parsed_doc` memo (`lru = 2048`) — second line of defense
1481    ///      when `parsed_cache` evicts.
1482    ///
1483    /// Together they keep every workspace-scan op O(N) memo lookups, never
1484    /// O(N) parses, for any workspace whose file count fits the cap.
1485    #[test]
1486    fn all_docs_for_scan_does_not_reparse_indexed_files() {
1487        let store = DocumentStore::new();
1488        const N: usize = 50;
1489        for i in 0..N {
1490            let u = uri(&format!("/scan/file{i}.php"));
1491            store.ingest(u, &format!("<?php\nclass C{i} {{}}\nfunction f{i}() {{}}"));
1492        }
1493
1494        let first: Vec<_> = store.all_docs_for_scan();
1495        let second: Vec<_> = store.all_docs_for_scan();
1496        assert_eq!(first.len(), N);
1497        assert_eq!(second.len(), N);
1498
1499        let by_url_first: std::collections::HashMap<Url, Arc<ParsedDoc>> =
1500            first.into_iter().collect();
1501        for (u, doc2) in second {
1502            let doc1 = by_url_first
1503                .get(&u)
1504                .expect("second scan returned a URL the first didn't");
1505            assert!(
1506                Arc::ptr_eq(doc1, &doc2),
1507                "{u} re-parsed across all_docs_for_scan calls — \
1508                 cache (parsed_cache + salsa parsed_doc memo) failed to hit"
1509            );
1510        }
1511
1512        // Editing one file's text must invalidate just that file's entry,
1513        // not the rest. This locks in self-eviction via Arc::ptr_eq on text.
1514        let edited_url = uri("/scan/file0.php");
1515        let pre_edit = store.get_doc_salsa(&edited_url).unwrap();
1516        store.ingest(edited_url.clone(), "<?php\nclass C0Edited {}");
1517        let post_edit = store.get_doc_salsa(&edited_url).unwrap();
1518        assert!(
1519            !Arc::ptr_eq(&pre_edit, &post_edit),
1520            "edited file must produce a fresh ParsedDoc"
1521        );
1522        for i in 1..N {
1523            let u = uri(&format!("/scan/file{i}.php"));
1524            let original = by_url_first.get(&u).unwrap();
1525            let after = store.get_doc_salsa(&u).unwrap();
1526            assert!(
1527                Arc::ptr_eq(original, &after),
1528                "{u} should not have re-parsed because of an unrelated edit"
1529            );
1530        }
1531    }
1532
1533    /// Incremental analysis cache: a body-only edit to file A (no declaration
1534    /// changes) must not bump `decl_version`, so file B's cached analysis
1535    /// survives. A declaration edit MUST bump the version so B's entry goes
1536    /// stale.
1537    #[test]
1538    fn body_only_edit_does_not_invalidate_sibling_analysis_cache() {
1539        let store = DocumentStore::new();
1540        let ua = uri("/ic_a.php");
1541        let ub = uri("/ic_b.php");
1542
1543        // Analyze both files to establish their fingerprints.
1544        open(
1545            &store,
1546            ua.clone(),
1547            "<?php\nfunction a() { return 1; }".to_string(),
1548        );
1549        open(
1550            &store,
1551            ub.clone(),
1552            "<?php\nfunction b() { return 2; }".to_string(),
1553        );
1554        let _ = store.cached_analysis(&ua).unwrap();
1555        let analysis_b_first = store.cached_analysis(&ub).unwrap();
1556        let ver_after_warm = store.decl_version.load(Ordering::Acquire);
1557
1558        // Body-only edit to A: same function name, different body → FileIndex unchanged.
1559        store.mirror_text(&ua, "<?php\nfunction a() { return 999; }");
1560        let _ = store.cached_analysis(&ua);
1561        let ver_after_body_edit = store.decl_version.load(Ordering::Acquire);
1562        assert_eq!(
1563            ver_after_warm, ver_after_body_edit,
1564            "body-only edit must not bump decl_version"
1565        );
1566
1567        // B's cached entry should still be valid (ptr-eq source AND same version).
1568        let analysis_b_second = store.cached_analysis_if_fresh(&ub);
1569        assert!(
1570            analysis_b_second.is_some(),
1571            "B's analysis should hit cache after body-only edit to A"
1572        );
1573        assert!(
1574            Arc::ptr_eq(&analysis_b_first, &analysis_b_second.unwrap()),
1575            "B's analysis should be the identical Arc (no re-analysis)"
1576        );
1577
1578        // Declaration edit to A: rename the function → FileIndex changes.
1579        store.mirror_text(&ua, "<?php\nfunction a_renamed() { return 999; }");
1580        let _ = store.cached_analysis(&ua);
1581        let ver_after_decl_edit = store.decl_version.load(Ordering::Acquire);
1582        assert!(
1583            ver_after_decl_edit > ver_after_body_edit,
1584            "declaration edit must bump decl_version (was {ver_after_body_edit}, now {ver_after_decl_edit})"
1585        );
1586
1587        // B's entry is now stale — cached_analysis_if_fresh must return None.
1588        let analysis_b_stale = store.cached_analysis_if_fresh(&ub);
1589        assert!(
1590            analysis_b_stale.is_none(),
1591            "B's analysis should be stale after A's declaration changed"
1592        );
1593    }
1594}