Skip to main content

php_lsp/
document_store.rs

1use std::sync::atomic::{AtomicBool, Ordering};
2use std::sync::{Arc, Mutex, OnceLock};
3
4use arc_swap::ArcSwap;
5
6use dashmap::{DashMap, DashSet};
7use salsa::Setter;
8use tower_lsp::lsp_types::{SemanticToken, Url};
9
10use crate::ast::ParsedDoc;
11use crate::autoload::Psr4Map;
12use crate::db::analysis::AnalysisHost;
13use crate::db::input::{FileText, Workspace, find_source_file};
14use crate::file_index::FileIndex;
15
16/// Upper bound on `parsed_cache` entries. Matched to the `lru = 2048` on
17/// `parsed_doc` in `src/db/parse.rs` so the secondary Arc retention can't
18/// pin more ASTs alive than salsa's memo already bounds. Exceeding this
19/// triggers probabilistic eviction (see [`DocumentStore::insert_parsed_cache`]).
20const PARSED_CACHE_CAP: usize = 2048;
21
22pub struct DocumentStore {
23    /// Cached semantic tokens per document: (result_id, tokens).
24    /// Used to compute incremental deltas for `textDocument/semanticTokens/full/delta`.
25    /// Tokens are stored in an `Arc` so the delta-path lookup can hand the
26    /// previous snapshot back without cloning the inner Vec.
27    token_cache: DashMap<Url, (String, Arc<Vec<SemanticToken>>)>,
28
29    // ── Salsa-input storage ────────────────────────────────────────────────
30    // Phase E4: `DocumentStore` is now a pure salsa-input wrapper. Open-file
31    // state (live text, version token, parse-diagnostics cache) lives on
32    // `Backend` in its `open_files` map; the set of files tracked by salsa
33    // is exactly `source_files.keys()`.
34    /// Mutex — held briefly to clone the database for reads and to mutate
35    /// it for writes. Per-thread salsa state (`zalsa_local`) is `!Sync`,
36    /// which rules out `RwLock<AnalysisHost>`. Readers instead snapshot the
37    /// db (cheap — storage is `Arc<Zalsa>`) and run queries on the clone
38    /// with the lock released, giving real read/read parallelism. Writers
39    /// during an in-flight read bump the shared revision; the reader raises
40    /// `salsa::Cancelled` on its next query call and `snapshot_query` below
41    /// retries with a fresh snapshot.
42    host: Mutex<AnalysisHost>,
43    /// `Url -> FileText` lookup. One immortal `FileText` salsa input per unique
44    /// URI ever seen. Text edits mutate the existing handle; delete/reopen cycles
45    /// reuse it rather than allocating a new input each time.
46    file_texts: DashMap<Url, FileText>,
47    /// URIs that have been removed. Re-opening a deleted URI un-deletes it here
48    /// and reuses the existing `FileText` handle.
49    deleted_uris: DashSet<Url>,
50    /// G2: lock-free mirror of each `SourceFile`'s last-set text. Lets
51    /// `mirror_text` dedup repeated no-op updates (common during workspace
52    /// scan and `did_open` for already-indexed files) without taking
53    /// `host.lock()`. Updated inside the mutex whenever the salsa input is
54    /// set, so it is always consistent with the salsa revision for the
55    /// purposes of byte-equality comparison.
56    text_cache: DashMap<Url, Arc<str>>,
57    /// G3: cross-revision read-through cache for `parsed_doc`. Keyed on
58    /// `Url`, stored value is `(text_arc, Arc<ParsedDoc>)` — the text Arc
59    /// captured at parse time. On read, compare against `text_cache[uri]`
60    /// via `Arc::ptr_eq`; a match guarantees the cached ParsedDoc matches
61    /// the current salsa revision's text input, so the query can return
62    /// without snapshotting the db or invoking salsa at all. A miss
63    /// (different pointer, stale or absent entry) falls through to
64    /// `snapshot_query`. Self-evicts on text change — no writer-side
65    /// invalidation is required, which avoids the TOCTOU window where a
66    /// concurrent reader could re-insert a stale entry after a writer's
67    /// eviction.
68    ///
69    /// Size-bounded at [`PARSED_CACHE_CAP`] — see `insert_parsed_cache`.
70    /// Without this bound, every workspace file read-through would pin
71    /// its bumpalo arena alive regardless of salsa's `lru = 2048` on the
72    /// `parsed_doc` memo.
73    parsed_cache: DashMap<Url, (Arc<str>, Arc<ParsedDoc>)>,
74    /// Cross-request read-through cache for a file's mir body analysis. Keyed
75    /// on `Url`, stored value is `(source_arc, Arc<FileAnalysis>)` — the source
76    /// Arc captured at analysis time. On read, compare against the current
77    /// `doc.source_arc()` via `Arc::ptr_eq`; a match means the cached analysis
78    /// matches the live content. A miss recomputes and overwrites, so the cache
79    /// self-evicts on edit (same discipline as `parsed_cache`).
80    ///
81    /// `FileAnalysis` carries BOTH the issues consumed by diagnostics and the
82    /// per-expression `ResolvedSymbol`s consumed by position features (hover,
83    /// type-definition, completion, inlay hints). Retaining it means mir's
84    /// `FileAnalyzer::analyze` runs once per content revision instead of being
85    /// re-run (for diagnostics) and then re-derived in a weaker form (for
86    /// position queries). Bounded by the set of analyzed files (open files plus
87    /// their open dependents); explicitly evicted in [`DocumentStore::remove`].
88    analysis_cache: DashMap<Url, (Arc<str>, Arc<mir_analyzer::FileAnalysis>)>,
89    /// Cross-request cache for the whole-doc completion [`crate::type_map::TypeMap`]
90    /// (`TypeMap::from_doc_with_meta`). Unlike `analysis_cache`, validity is
91    /// purely per-file (the map reads only this doc plus PHPStorm meta), so the
92    /// entry needs no cross-file invalidation: it is fresh when its captured
93    /// source `Arc` is pointer-equal to the doc's current `source_arc()` and
94    /// the meta pointer is unchanged, self-evicting on any content/meta edit.
95    type_map_cache: DashMap<Url, (Arc<str>, usize, Arc<crate::type_map::TypeMap>)>,
96    /// Set to `true` when the set of tracked files changes (add or remove).
97    /// `sync_workspace_files` skips the collect/sort/compare path when this
98    /// is `false`, avoiding a mutex acquisition on every LSP request.
99    workspace_files_dirty: AtomicBool,
100    /// Workspace salsa input. Tracks the full set of `SourceFile`s that
101    /// participate in whole-program queries (`codebase`, `file_refs`).
102    /// Re-synced from `source_files` on demand by `sync_workspace_files`.
103    workspace: Workspace,
104    /// Shared PSR-4 namespace-to-path map. Shared with `Backend` via `Arc`
105    /// so updates from `initialized` (when composer.json is loaded) are
106    /// visible here without any additional wiring. `ArcSwap` makes reads
107    /// lock-free — a poisoned guard can no longer crash a request handler.
108    psr4: Arc<ArcSwap<Psr4Map>>,
109    /// mir-analyzer's `AnalysisSession` — owns the workspace MirDb, runs
110    /// Pass-2 analysis, and lazy-loads dependencies via PSR-4. Built lazily
111    /// on first use; rebuilt when PHP version changes.
112    analysis_session: Mutex<Option<(mir_analyzer::PhpVersion, Arc<mir_analyzer::AnalysisSession>)>>,
113    /// Cache directory shared with the workspace file-index cache. When set,
114    /// new `AnalysisSession`s are built with `with_cache_dir` so that stub
115    /// parsing results survive server restarts.
116    session_cache_dir: OnceLock<std::path::PathBuf>,
117}
118
119impl Default for DocumentStore {
120    fn default() -> Self {
121        Self::new()
122    }
123}
124
125impl DocumentStore {
126    pub fn new() -> Self {
127        let host = AnalysisHost::new();
128        let workspace = Workspace::new(
129            host.db(),
130            Arc::<[(Arc<str>, FileText)]>::from(Vec::new()),
131            mir_analyzer::PhpVersion::LATEST,
132        );
133        DocumentStore {
134            token_cache: DashMap::new(),
135            host: Mutex::new(host),
136            file_texts: DashMap::new(),
137            deleted_uris: DashSet::new(),
138            text_cache: DashMap::new(),
139            parsed_cache: DashMap::new(),
140            analysis_cache: DashMap::new(),
141            type_map_cache: DashMap::new(),
142            workspace_files_dirty: AtomicBool::new(true),
143            workspace,
144            psr4: Arc::new(ArcSwap::from_pointee(Psr4Map::empty())),
145            analysis_session: Mutex::new(None),
146            session_cache_dir: OnceLock::new(),
147        }
148    }
149
150    /// Set the directory used to persist stub-parse and analysis results across
151    /// server restarts.  Must be called before the first `analysis_session` use;
152    /// subsequent calls are silently ignored (`OnceLock` semantics).
153    pub fn set_session_cache_dir(&self, dir: std::path::PathBuf) {
154        let _ = self.session_cache_dir.set(dir);
155    }
156
157    /// Get or build the `AnalysisSession` for the given PHP version. Rebuilds
158    /// when the version changes (e.g. user flipped config). The session owns
159    /// its own salsa db and AnalysisCache; lazy-loads vendor files via the
160    /// shared PSR-4 map.
161    pub fn analysis_session(
162        &self,
163        php_version: mir_analyzer::PhpVersion,
164    ) -> Arc<mir_analyzer::AnalysisSession> {
165        let mut guard = self.analysis_session.lock().unwrap();
166        if let Some((cached_ver, session)) = guard.as_ref()
167            && *cached_ver == php_version
168        {
169            return Arc::clone(session);
170        }
171        // Build a fresh session. Hand it the shared PSR-4 map so it can
172        // lazy-resolve `UndefinedClass` candidates without us having to mirror
173        // every vendor file upfront.
174        let resolver: Arc<dyn mir_analyzer::ClassResolver> = self.psr4.load_full();
175        let mut builder =
176            mir_analyzer::AnalysisSession::new(php_version).with_class_resolver(resolver);
177        if let Some(dir) = self.session_cache_dir.get() {
178            builder = builder.with_cache_dir(dir);
179        }
180        let session = Arc::new(builder);
181        session.ensure_all_stubs();
182        *guard = Some((php_version, Arc::clone(&session)));
183        session
184    }
185
186    /// Current PHP version tracked by the workspace input.
187    pub fn workspace_php_version(&self) -> mir_analyzer::PhpVersion {
188        self.with_host(|h| self.workspace.php_version(h.db()))
189    }
190
191    /// Return the `Arc<ArcSwap<Psr4Map>>` so callers can share it.
192    /// `Backend` clones this arc at construction time so writes
193    /// (e.g. loading composer.json on `initialized`) are immediately visible
194    /// to `lazy_load_psr4_imports` without extra plumbing.
195    pub fn psr4_arc(&self) -> Arc<ArcSwap<Psr4Map>> {
196        Arc::clone(&self.psr4)
197    }
198
199    /// Mirror a file's current text into the salsa layer. Creates the
200    /// `FileText` input on first sight, otherwise updates `text` on the
201    /// existing input (bumping the salsa revision so downstream queries
202    /// invalidate).
203    pub fn mirror_text(&self, uri: &Url, text: &str) {
204        // G2 fast path: compare against the lock-free text cache. When the
205        // new text byte-matches what we already mirrored, skip the host
206        // mutex entirely. Common during workspace scan + `did_open` for
207        // unchanged files, where most threads would otherwise serialise on
208        // `host.lock()` just to confirm a no-op.
209        if let Some(cached) = self.text_cache.get(uri)
210            && **cached == *text
211            && !self.deleted_uris.contains(uri)
212            && self.file_texts.contains_key(uri)
213        {
214            return;
215        }
216        self.mirror_text_arc(uri, Arc::from(text))
217    }
218
219    /// Like [`mirror_text`] but takes an already-allocated `Arc<str>`.
220    ///
221    /// Callers that already hold an `Arc<str>` (e.g. `index_from_doc` reusing
222    /// `ParsedDoc::source_arc()`) use this to avoid a second allocation and to
223    /// ensure `text_cache` and `parsed_cache` hold the same Arc pointer —
224    /// enabling `Arc::ptr_eq` validation in `get_parsed_cached`.
225    pub fn mirror_text_arc(&self, uri: &Url, text_arc: Arc<str>) {
226        if let Some(ft) = self.file_texts.get(uri).map(|e| *e) {
227            self.deleted_uris.remove(uri);
228            // Slow path: re-check inside the mutex. Salsa's `set_text`
229            // unconditionally bumps the revision, so every spurious setter
230            // invalidates every downstream query.
231            let mut host = self.host.lock().unwrap();
232            let current: Arc<str> = ft.text(host.db());
233            if *current == *text_arc {
234                drop(host);
235                self.text_cache.insert(uri.clone(), current);
236                return;
237            }
238            ft.set_text(host.db_mut()).to(text_arc.clone());
239            // Phase K2: any text change invalidates a previously-seeded
240            // cached index. Only bump the revision when a cached index is
241            // actually present — an unconditional set would cause two
242            // revision bumps per edit (one for text, one for cached_index),
243            // which needlessly cancels in-flight `file_index` queries on
244            // every keystroke.
245            if ft.cached_index(host.db()).is_some() {
246                ft.set_cached_index(host.db_mut()).to(None);
247            }
248            drop(host);
249            self.text_cache.insert(uri.clone(), text_arc);
250            // A content change to ANY file can invalidate cross-file analysis
251            // (mir resolves types/issues against other files). `cached_analysis`
252            // is validated only on a file's own `source_arc`, so a dependency
253            // edit wouldn't otherwise refresh an unchanged dependent's cached
254            // entry — drop the whole cache. Bounded by open files; recompute is
255            // ~6ms warm. Matches the salsa revision bump `set_text` just made.
256            self.analysis_cache.clear();
257        } else {
258            let is_vendor = uri.as_str().contains("/vendor/");
259            let ft = {
260                let mut host = self.host.lock().unwrap();
261                let ft = FileText::new(host.db(), text_arc.clone(), None);
262                if is_vendor {
263                    // Vendor files never change in a session — mark their text
264                    // as HIGH durability so salsa skips re-validating
265                    // parsed_doc/file_index for them on every user edit.
266                    ft.set_text(host.db_mut())
267                        .with_durability(salsa::Durability::HIGH)
268                        .to(Arc::clone(&text_arc));
269                }
270                ft
271            };
272            self.file_texts.insert(uri.clone(), ft);
273            self.text_cache.insert(uri.clone(), text_arc);
274            self.workspace_files_dirty.store(true, Ordering::Release);
275            // A newly-ingested file may resolve references that were previously
276            // unresolved in already-analyzed files; invalidate cross-file caches.
277            self.analysis_cache.clear();
278        }
279    }
280
281    /// Return the `FileText` handle for a URL, if active (not deleted).
282    #[cfg(test)]
283    pub fn source_file(&self, uri: &Url) -> Option<FileText> {
284        if self.deleted_uris.contains(uri) {
285            return None;
286        }
287        self.file_texts.get(uri).map(|e| *e)
288    }
289
290    /// Phase K2: pre-seed a `FileIndex` loaded from the on-disk cache onto
291    /// the `FileText` input for `uri`. The next `file_index` call for that
292    /// file returns the cached index directly, skipping parse + extract.
293    ///
294    /// Must be called **before** any `file_index(db, sf)` call for this file —
295    /// otherwise salsa has already memoized the fresh-parse result and setting
296    /// `cached_index` now would only bump the revision without using the cache.
297    /// In practice the workspace-scan path seeds immediately after `mirror_text`
298    /// and before any query runs.
299    ///
300    /// Returns `false` when `uri` was not mirrored (caller should mirror
301    /// first); returns `true` on success.
302    pub fn seed_cached_index(&self, uri: &Url, index: Arc<FileIndex>) -> bool {
303        let Some(ft) = self.file_texts.get(uri).map(|e| *e) else {
304            return false;
305        };
306        let mut host = self.host.lock().unwrap();
307        ft.set_cached_index(host.db_mut()).to(Some(index));
308        true
309    }
310
311    /// Run `f` with a borrow of the `AnalysisHost`. Used by tests and by the
312    /// upcoming `*_salsa` accessors to query the salsa layer.
313    pub fn with_host<R>(&self, f: impl FnOnce(&AnalysisHost) -> R) -> R {
314        let host = self.host.lock().unwrap();
315        f(&host)
316    }
317
318    /// Phase E1: take a brief lock, clone the salsa database, release the
319    /// lock. Queries then run on the cloned `RootDatabase` without blocking
320    /// writers or other readers. Salsa's `Storage<Self>` is reference-counted
321    /// (`Arc<Zalsa>`), so the clone is cheap — it shares memoized data and
322    /// the cancellation flag with the host's db.
323    fn snapshot_db(&self) -> crate::db::analysis::RootDatabase {
324        let host = self.host.lock().unwrap();
325        host.db().clone()
326    }
327
328    /// Run a query on a fresh snapshot, catching `salsa::Cancelled` (raised
329    /// when a concurrent writer advances the revision) and retrying with a
330    /// new snapshot. Writers hold the mutex only long enough to bump input
331    /// values, so a handful of retries is more than enough in practice; we
332    /// cap at 8 to avoid pathological livelock under sustained write pressure.
333    fn snapshot_query<R>(&self, f: impl Fn(&crate::db::analysis::RootDatabase) -> R + Clone) -> R {
334        use std::panic::AssertUnwindSafe;
335        for _ in 0..8 {
336            let db = self.snapshot_db();
337            let f = f.clone();
338            match salsa::Cancelled::catch(AssertUnwindSafe(move || f(&db))) {
339                Ok(r) => return r,
340                Err(_) => continue,
341            }
342        }
343        // Last-resort attempt: take the mutex for the whole query so no
344        // writer can race us. Much slower, but guaranteed to make progress.
345        let host = self.host.lock().unwrap();
346        f(host.db())
347    }
348
349    /// Evict the semantic-tokens cache for `uri`. Called by Backend when a
350    /// file is closed; diff-based tokens computed against the old revision
351    /// are no longer meaningful.
352    pub fn evict_token_cache(&self, uri: &Url) {
353        self.token_cache.remove(uri);
354    }
355
356    /// Return the `FileIndex` for `uri` by running `file_index` on a salsa
357    /// snapshot.  Returns `None` when `uri` has not been mirrored.
358    ///
359    /// Test-only — production code uses the salsa query directly via
360    /// `snapshot_query`.
361    #[cfg(test)]
362    pub fn source_files_len(&self) -> usize {
363        self.file_texts.len()
364    }
365
366    #[cfg(test)]
367    pub fn snapshot_query_file_index(&self, uri: &Url) -> Option<crate::file_index::FileIndex> {
368        if self.deleted_uris.contains(uri) {
369            return None;
370        }
371        if !self.file_texts.contains_key(uri) {
372            return None;
373        }
374        self.sync_workspace_files();
375        let uri_str: Arc<str> = Arc::from(uri.as_str());
376        let ws = self.workspace;
377        self.snapshot_query(move |db| {
378            let sf = find_source_file(db, ws, &uri_str)?;
379            Some(crate::db::index::file_index(db, sf).get().clone())
380        })
381    }
382
383    /// Register a file in the salsa layer without marking it open.
384    ///
385    /// Salsa's `parsed_doc` query parses lazily on first read; diagnostics
386    /// are populated by `did_open` when the editor actually opens the file.
387    pub fn index(&self, uri: Url, text: &str) {
388        self.mirror_text(&uri, text);
389    }
390
391    /// Index a file using an already-parsed `ParsedDoc`, avoiding a second parse.
392    ///
393    /// Prefer this over [`index`] when the caller already has a `ParsedDoc` (e.g.
394    /// after running `DefinitionCollector` during workspace scan). Reuses the
395    /// `Arc<str>` already owned by `doc` so that `text_cache` and `SourceFile::text`
396    /// share the same pointer — enabling the `Arc::ptr_eq` fast path in
397    /// `get_parsed_cached` on the first subsequent salsa query, without an extra
398    /// `Arc::from(source)` allocation.
399    pub fn index_from_doc(&self, uri: Url, doc: &ParsedDoc) {
400        self.mirror_text_arc(&uri, doc.source_arc());
401    }
402
403    pub fn remove(&self, uri: &Url) {
404        self.token_cache.remove(uri);
405        // Mark the URI as deleted but keep the `source_files` entry so the
406        // salsa `SourceFile` handle remains alive. Re-opening the file reuses
407        // the same handle instead of calling `SourceFile::new()` again, which
408        // would create a new orphaned salsa input on every delete-reopen cycle.
409        self.deleted_uris.insert(uri.clone());
410        self.workspace_files_dirty.store(true, Ordering::Release);
411        // Sync workspace files so the deleted file is removed from the salsa
412        // `Workspace::files` list and won't appear in workspace symbols etc.
413        self.sync_workspace_files();
414        self.text_cache.remove(uri);
415        self.parsed_cache.remove(uri);
416        self.analysis_cache.remove(uri);
417        self.type_map_cache.remove(uri);
418        // Also evict the file from the `AnalysisSession`'s internal state so
419        // workspace symbol queries don't keep returning the deleted file's
420        // declarations. Cheap when the session hasn't ingested this file.
421        let guard = self.analysis_session.lock().unwrap();
422        if let Some((_, session)) = guard.as_ref() {
423            session.invalidate_file(uri.as_str());
424        }
425    }
426
427    // ── Salsa-backed accessors ─────────────────────────────────────────────
428    //
429    // Reads run the memoized `parsed_doc` / `file_index` queries, parsing
430    // only on first access per revision. These are the production accessors
431    // used by every handler.
432
433    /// Salsa-backed parsed document.
434    ///
435    /// Salsa-backed parsed document for any mirrored file (open or
436    /// background-indexed). Returns `None` only when the file is not known
437    /// to the store. Callers that want "only if open" should gate on
438    /// `Backend::open_files` at the call site (see `Backend::get_doc`).
439    pub fn get_doc_salsa(&self, uri: &Url) -> Option<Arc<ParsedDoc>> {
440        self.get_parsed_cached(uri)
441    }
442
443    /// Salsa-backed compact symbol index.
444    pub fn get_index_salsa(&self, uri: &Url) -> Option<Arc<FileIndex>> {
445        if self.deleted_uris.contains(uri) {
446            return None;
447        }
448        if !self.file_texts.contains_key(uri) {
449            return None;
450        }
451        self.sync_workspace_files();
452        let uri_str: Arc<str> = Arc::from(uri.as_str());
453        let ws = self.workspace;
454        self.snapshot_query(move |db| {
455            let sf = find_source_file(db, ws, &uri_str)?;
456            Some(crate::db::index::file_index(db, sf).0.clone())
457        })
458    }
459
460    /// Salsa-backed pre-computed symbol map (name → Vec<SymbolEntry>).
461    /// Memoized per revision: stable files serve from cache in O(1).
462    pub fn get_symbol_map_salsa(&self, uri: &Url) -> Option<Arc<crate::symbol_map::SymbolMap>> {
463        if self.deleted_uris.contains(uri) {
464            return None;
465        }
466        if !self.file_texts.contains_key(uri) {
467            return None;
468        }
469        self.sync_workspace_files();
470        let uri_str: Arc<str> = Arc::from(uri.as_str());
471        let ws = self.workspace;
472        self.snapshot_query(move |db| {
473            let sf = find_source_file(db, ws, &uri_str)?;
474            Some(crate::db::symbol_map::symbol_map(db, sf).0.clone())
475        })
476    }
477
478    /// Pre-computed symbol maps for every entry in `open_urls` except `uri`.
479    pub fn other_symbol_maps(
480        &self,
481        uri: &Url,
482        open_urls: &[Url],
483    ) -> Vec<(Url, Arc<crate::symbol_map::SymbolMap>)> {
484        open_urls
485            .iter()
486            .filter(|u| *u != uri)
487            .filter_map(|u| self.get_symbol_map_salsa(u).map(|m| (u.clone(), m)))
488            .collect()
489    }
490
491    /// G3: shared implementation for `get_doc_salsa`.
492    /// Tries the `parsed_cache` (lock-free) first; validates via
493    /// `Arc::ptr_eq` against the G2 `text_cache` so a concurrent writer
494    /// that has already committed a new text input cannot be masked by a
495    /// stale cache entry. On miss, captures the text Arc and ParsedDoc
496    /// together inside a single `snapshot_query`, then publishes both.
497    fn get_parsed_cached(&self, uri: &Url) -> Option<Arc<ParsedDoc>> {
498        if let Some(current_text) = self.text_cache.get(uri)
499            && let Some(entry) = self.parsed_cache.get(uri)
500            && Arc::ptr_eq(&*current_text, &entry.0)
501        {
502            return Some(entry.1.clone());
503        }
504
505        if self.deleted_uris.contains(uri) {
506            return None;
507        }
508        if !self.file_texts.contains_key(uri) {
509            return None;
510        }
511        self.sync_workspace_files();
512        let uri_str: Arc<str> = Arc::from(uri.as_str());
513        let ws = self.workspace;
514        let (text, doc) = self.snapshot_query(move |db| {
515            let sf = find_source_file(db, ws, &uri_str)?;
516            let text = sf.text_input(db).text(db);
517            let doc = crate::db::parse::parsed_doc(db, sf).0.clone();
518            Some((text, doc))
519        })?;
520        self.insert_parsed_cache(uri.clone(), text, doc.clone());
521        Some(doc)
522    }
523
524    /// Publish a fresh `ParsedDoc` into `parsed_cache`, shedding roughly
525    /// half of the cache first if it has grown past [`PARSED_CACHE_CAP`].
526    ///
527    /// Eviction is probabilistic (DashMap iteration order is arbitrary),
528    /// not LRU. That's fine — salsa's own `parsed_doc` memo uses
529    /// `lru = 2048` on hotness-aware storage, so a cache-miss here is
530    /// cheap: the next read goes through `snapshot_query` and
531    /// `parsed_doc`, which still short-circuits on the salsa memo.
532    /// What we're bounding here is the *secondary* Arc retention that
533    /// would otherwise pin every workspace file's bumpalo arena alive
534    /// regardless of salsa's eviction decisions.
535    fn insert_parsed_cache(&self, uri: Url, text: Arc<str>, doc: Arc<ParsedDoc>) {
536        if self.parsed_cache.len() >= PARSED_CACHE_CAP {
537            let drop_target = self.parsed_cache.len() / 2;
538            let mut dropped = 0usize;
539            self.parsed_cache.retain(|_, _| {
540                if dropped < drop_target {
541                    dropped += 1;
542                    false
543                } else {
544                    true
545                }
546            });
547        }
548        self.parsed_cache.insert(uri, (text, doc));
549    }
550
551    /// Refresh `workspace.files` to mirror the current active file set.
552    ///
553    /// Skips all work when `workspace_files_dirty` is `false` (the common
554    /// case after the workspace scan completes — file-set changes are rare).
555    pub fn sync_workspace_files(&self) {
556        // Atomically clear the flag.  If it was already false the file set
557        // hasn't changed since the last sync; nothing to do.
558        if !self.workspace_files_dirty.swap(false, Ordering::AcqRel) {
559            return;
560        }
561
562        // Collect active (non-deleted) files without holding the host lock.
563        let mut files: Vec<(Arc<str>, FileText)> = self
564            .file_texts
565            .iter()
566            .filter(|e| !self.deleted_uris.contains(e.key()))
567            .map(|e| (Arc::<str>::from(e.key().as_str()), *e.value()))
568            .collect();
569        // Sort by URI string for stable ordering.
570        files.sort_unstable_by(|(a, _), (b, _)| a.cmp(b));
571
572        let mut host = self.host.lock().unwrap();
573        let current = self.workspace.files(host.db());
574        if current.len() == files.len()
575            && current
576                .iter()
577                .zip(files.iter())
578                .all(|(a, b)| a.0 == b.0 && a.1 == b.1)
579        {
580            return;
581        }
582        self.workspace.set_files(host.db_mut()).to(Arc::from(files));
583    }
584
585    /// Mark the workspace file set as dirty so the next `sync_workspace_files`
586    /// call re-runs the collect/sort/compare path.  Exposed for benchmarks that
587    /// need to measure the dirty-path cost in isolation.
588    pub fn mark_workspace_files_dirty(&self) {
589        self.workspace_files_dirty.store(true, Ordering::Release);
590    }
591
592    /// Update the PHP version tracked by the workspace. Salsa will invalidate
593    /// all `semantic_issues` queries so diagnostics are re-evaluated.
594    /// Skips the setter when the version hasn't changed to avoid spurious
595    /// query invalidation.
596    pub fn set_php_version(&self, version: mir_analyzer::PhpVersion) {
597        let mut host = self.host.lock().unwrap();
598        if self.workspace.php_version(host.db()) == version {
599            return;
600        }
601        self.workspace.set_php_version(host.db_mut()).to(version);
602        // The analysis_cache validates against source content only, so stale
603        // FileAnalysis results from the old PHP version would survive unchanged
604        // files. Clear it so the next request re-runs with the new version.
605        drop(host);
606        self.analysis_cache.clear();
607    }
608
609    /// Session-backed workspace reference lookup. Returns `(file, line, col)`
610    /// locations for every occurrence of `symbol` in the files that the
611    /// `AnalysisSession` has ingested so far. The session's reference index
612    /// is built incrementally during `ingest_file`, so refs for files the
613    /// session hasn't seen yet (background-indexed but never opened) won't
614    /// appear here — those are covered by the AST-walker fallback in the
615    /// references handler.
616    ///
617    /// Returns LSP-style 0-based line/column.
618    pub fn session_references_to(
619        &self,
620        symbol: &mir_analyzer::Name,
621    ) -> Vec<(Arc<str>, u32, u32, u32)> {
622        let php_version = self.workspace_php_version();
623        let session = self.analysis_session(php_version);
624        session
625            .references_to(symbol)
626            .into_iter()
627            .map(|(file, range)| {
628                // mir 0.30+ uses 1-based lines and 1-based columns; LSP uses 0-based.
629                let line = range.start.line.saturating_sub(1);
630                let col_start = range.start.column.saturating_sub(1);
631                let col_end = range.end.column.saturating_sub(1);
632                (file, line, col_start, col_end)
633            })
634            .collect()
635    }
636
637    /// Phase J: salsa-memoized aggregate workspace index.
638    ///
639    /// Returns the shared `Arc<WorkspaceIndexData>` with flat
640    /// `(Url, Arc<FileIndex>)` list plus pre-built `classes_by_name` and
641    /// `subtypes_of` reverse maps. Used by workspace_symbols,
642    /// prepare_type_hierarchy, supertypes_of, subtypes_of, and
643    /// find_implementations so they don't each rebuild the aggregate per
644    /// request. Invalidates automatically when any file's `file_index`
645    /// changes.
646    pub fn get_workspace_index_salsa(&self) -> Arc<crate::db::workspace_index::WorkspaceIndexData> {
647        self.sync_workspace_files();
648        let ws = self.workspace;
649        self.snapshot_query(move |db| {
650            crate::db::workspace_index::workspace_index(db, ws)
651                .0
652                .clone()
653        })
654    }
655
656    /// No-op after mir 0.22 migration. The session manages its own warm-up
657    /// via `ingest_file` / `analyze_dependents_of`; there's nothing for us
658    /// to pre-warm here.
659    pub fn warm_reference_index(&self) {}
660
661    /// Return the raw source text for `uri` if it has been mirrored into the
662    /// salsa workspace. Used by the references handler to pre-filter session
663    /// results by checking whether a file mentions the owning class name.
664    pub fn source_text(&self, uri: &Url) -> Option<Arc<str>> {
665        self.text_cache.get(uri).map(|e| Arc::clone(&e))
666    }
667
668    /// Run Pass 1 + Pass 2 analysis on every mirrored workspace file so that
669    /// type-aware queries (e.g. `session.references_to`) see the full workspace.
670    ///
671    /// Reference locations are only recorded during Pass 2 (`FileAnalyzer::analyze`).
672    /// `ingest_file` alone (Pass 1) is not sufficient. Only needed for cross-file
673    /// queries like `textDocument/references` that rely on the reference index.
674    /// The session's internal cache makes re-analysis of unchanged files cheap.
675    pub fn ensure_all_files_ingested(&self) {
676        let php_version = self.workspace_php_version();
677        let session = self.analysis_session(php_version);
678        let urls: Vec<Url> = self
679            .file_texts
680            .iter()
681            .filter(|e| !self.deleted_uris.contains(e.key()))
682            .map(|e| e.key().clone())
683            .collect();
684        for uri in &urls {
685            let Some(doc) = self.get_doc_salsa(uri) else {
686                continue;
687            };
688            let file: Arc<str> = Arc::from(uri.as_str());
689            session.ingest_file(file.clone(), doc.source_arc());
690            let source_map = php_rs_parser::source_map::SourceMap::new(doc.source());
691            let owned_program = php_ast::owned::to_owned_program(doc.program());
692            let analyzer = mir_analyzer::FileAnalyzer::new(&session);
693            analyzer.analyze(file, doc.source(), &owned_program, &source_map);
694        }
695    }
696
697    /// Cache the semantic tokens computed for a delta response.
698    /// `result_id` is an opaque string (a hash of the token data) returned to the client.
699    pub fn store_token_cache(&self, uri: &Url, result_id: String, tokens: Arc<Vec<SemanticToken>>) {
700        self.token_cache.insert(uri.clone(), (result_id, tokens));
701    }
702
703    /// Return the cached tokens if `result_id` matches the stored one.
704    pub fn get_token_cache(&self, uri: &Url, result_id: &str) -> Option<Arc<Vec<SemanticToken>>> {
705        self.token_cache
706            .get(uri)
707            .filter(|e| e.0.as_str() == result_id)
708            .map(|e| Arc::clone(&e.1))
709    }
710
711    /// Before running semantic analysis for `uri`, resolve every `use`-imported
712    /// class through the PSR-4 map and mirror any that are not yet registered.
713    /// This prevents spurious `UndefinedClass` diagnostics when the background
714    /// workspace scan has not yet reached a dependency file.
715    fn lazy_load_psr4_imports(&self, uri: &Url) {
716        let doc = match self.get_doc_salsa(uri) {
717            Some(d) => d,
718            None => return,
719        };
720        let fqns = crate::references::collect_referenced_class_fqns(&doc);
721        if fqns.is_empty() {
722            return;
723        }
724        let psr4 = self.psr4.load();
725        let paths: Vec<std::path::PathBuf> =
726            fqns.iter().filter_map(|fqcn| psr4.resolve(fqcn)).collect();
727        drop(psr4);
728
729        for path in paths {
730            let Ok(dep_url) = Url::from_file_path(&path) else {
731                continue;
732            };
733            if self.file_texts.contains_key(&dep_url) && !self.deleted_uris.contains(&dep_url) {
734                continue;
735            }
736            if let Ok(text) = std::fs::read_to_string(&path) {
737                self.mirror_text(&dep_url, &text);
738            }
739        }
740    }
741
742    /// Raw semantic issues for a file, computed via mir's session-based
743    /// `FileAnalyzer`. The session lazy-loads dependencies via PSR-4 so the
744    /// LSP no longer needs to mirror vendor up-front. Callers apply their
745    /// own `DiagnosticsConfig` filter via
746    /// [`crate::semantic_diagnostics::issues_to_diagnostics`].
747    #[tracing::instrument(skip_all)]
748    pub fn get_semantic_issues_salsa(&self, uri: &Url) -> Option<Arc<[mir_issues::Issue]>> {
749        let analysis = self.cached_analysis(uri)?;
750        let file: Arc<str> = Arc::from(uri.as_str());
751        // Workspace-level class issues for this file (circular inheritance,
752        // override violations, abstract-method gaps). These are session-wide
753        // (a dependency edit changes them without changing this file's bytes),
754        // so they are recomputed live rather than cached alongside the
755        // per-file body analysis.
756        let class_issues = {
757            let _s = tracing::debug_span!("session.class_issues_for").entered();
758            self.analysis_session(self.workspace_php_version())
759                .class_issues(std::slice::from_ref(&file))
760        };
761        let combined: Vec<mir_issues::Issue> = analysis
762            .issues
763            .iter()
764            .cloned()
765            .chain(class_issues)
766            .filter(|i| !i.suppressed)
767            .collect();
768        Some(Arc::from(combined))
769    }
770
771    /// Run (or reuse) mir's per-file body analysis, retaining the full
772    /// [`mir_analyzer::FileAnalysis`] — issues **and** resolved symbols — across
773    /// requests. Diagnostics read `.issues`; position features call
774    /// `.symbol_at(offset)` for the resolved type at a cursor.
775    ///
776    /// Cache hit when the entry's captured source `Arc` is pointer-equal to the
777    /// file's current `doc.source_arc()`. A miss recomputes and overwrites, so
778    /// the entry self-evicts on any content edit.
779    /// Build (or reuse) the whole-doc completion [`crate::type_map::TypeMap`]
780    /// for `uri`. Cache hit when the entry's captured source `Arc` is
781    /// pointer-equal to `doc.source_arc()` and the PHPStorm-meta pointer is
782    /// unchanged (meta lives behind `ArcSwap`, so its address is stable until
783    /// `.phpstorm.meta.php` is reloaded). A miss rebuilds and overwrites, so
784    /// the entry self-evicts on any content edit.
785    pub fn cached_type_map(
786        &self,
787        uri: &Url,
788        doc: &crate::ast::ParsedDoc,
789        meta: Option<&crate::phpstorm_meta::PhpStormMeta>,
790    ) -> Arc<crate::type_map::TypeMap> {
791        let source = doc.source_arc();
792        let meta_key = meta.map_or(0usize, |m| std::ptr::from_ref(m) as usize);
793        if let Some(entry) = self.type_map_cache.get(uri)
794            && Arc::ptr_eq(&entry.0, &source)
795            && entry.1 == meta_key
796        {
797            return Arc::clone(&entry.2);
798        }
799        let map = Arc::new(crate::type_map::TypeMap::from_doc_with_meta(doc, meta));
800        self.type_map_cache
801            .insert(uri.clone(), (source, meta_key, Arc::clone(&map)));
802        map
803    }
804
805    /// Cache-hit-only variant of [`Self::cached_analysis`]: returns the cached
806    /// analysis when the entry is current for the file's text, never computes.
807    /// Lets async handlers take the warm path synchronously and reserve
808    /// `spawn_blocking` for the cold path (mir Pass 1 + Pass 2 can take
809    /// hundreds of ms on large files).
810    pub fn cached_analysis_if_fresh(&self, uri: &Url) -> Option<Arc<mir_analyzer::FileAnalysis>> {
811        let doc = self.get_doc_salsa(uri)?;
812        let source = doc.source_arc();
813        let entry = self.analysis_cache.get(uri)?;
814        Arc::ptr_eq(&entry.0, &source).then(|| Arc::clone(&entry.1))
815    }
816
817    #[tracing::instrument(skip_all)]
818    pub fn cached_analysis(&self, uri: &Url) -> Option<Arc<mir_analyzer::FileAnalysis>> {
819        // Need the parsed doc both for the analyzer and as the cache key.
820        let doc = self.get_doc_salsa(uri)?;
821        let source = doc.source_arc();
822
823        if let Some(entry) = self.analysis_cache.get(uri)
824            && Arc::ptr_eq(&entry.0, &source)
825        {
826            return Some(Arc::clone(&entry.1));
827        }
828
829        let php_version = self.with_host(|h| self.workspace.php_version(h.db()));
830        let session = self.analysis_session(php_version);
831        let file: Arc<str> = Arc::from(uri.as_str());
832        {
833            let _s = tracing::debug_span!("session.ingest_file").entered();
834            session.ingest_file(file.clone(), source.clone());
835        }
836        // Pre-load every imported class via PSR-4 so Pass-2 doesn't emit
837        // spurious `UndefinedClass` for classes that ARE on disk but haven't
838        // been ingested yet. The session's resolver was supplied at
839        // construction time.
840        {
841            let _s = tracing::debug_span!("session.lazy_load_imports").entered();
842            // Pre-load every class-typed reference resolved via the file's
843            // namespace + `use` imports. This covers `use` imports, FQN refs
844            // (`new \App\Foo`), and bare same-namespace refs (`new Foo` from
845            // inside `namespace App;`) in a single sweep — mir won't auto-
846            // resolve via the ClassResolver, so anything not lazy-loaded here
847            // produces a spurious `UndefinedClass`.
848            let fqns = crate::references::collect_referenced_class_fqns(&doc);
849            for fqcn in &fqns {
850                let _ = session.load_class(fqcn);
851            }
852        }
853        let source_map = php_rs_parser::source_map::SourceMap::new(doc.source());
854        let owned_program = php_ast::owned::to_owned_program(doc.program());
855        let analysis = {
856            let _s = tracing::debug_span!("FileAnalyzer::analyze").entered();
857            let analyzer = mir_analyzer::FileAnalyzer::new(&session);
858            Arc::new(analyzer.analyze(file.clone(), doc.source(), &owned_program, &source_map))
859        };
860        self.analysis_cache
861            .insert(uri.clone(), (source, Arc::clone(&analysis)));
862        Some(analysis)
863    }
864
865    /// Returns `(uri, doc)` for files currently open in the editor.
866    ///
867    /// Resolve `open_urls` (from `Backend::open_urls()`) to parsed docs.
868    /// Files not mirrored in the salsa layer are filtered out silently.
869    pub fn docs_for(&self, open_urls: &[Url]) -> Vec<(Url, Arc<ParsedDoc>)> {
870        open_urls
871            .iter()
872            .filter_map(|u| self.get_doc_salsa(u).map(|d| (u.clone(), d)))
873            .collect()
874    }
875
876    /// `(primary, doc)` first, then every other open file's parsed doc.
877    /// The `open_urls` slice should include `uri` — this helper filters it out.
878    pub fn doc_with_others(
879        &self,
880        uri: &Url,
881        doc: Arc<ParsedDoc>,
882        open_urls: &[Url],
883    ) -> Vec<(Url, Arc<ParsedDoc>)> {
884        let mut result = vec![(uri.clone(), doc)];
885        result.extend(self.other_docs(uri, open_urls));
886        result
887    }
888
889    /// Parsed docs for every entry in `open_urls` except `uri`.
890    pub fn other_docs(&self, uri: &Url, open_urls: &[Url]) -> Vec<(Url, Arc<ParsedDoc>)> {
891        open_urls
892            .iter()
893            .filter(|u| *u != uri)
894            .filter_map(|u| self.get_doc_salsa(u).map(|d| (u.clone(), d)))
895            .collect()
896    }
897
898    /// Compact symbol index for every mirrored file.
899    pub fn all_indexes(&self) -> Vec<(Url, Arc<FileIndex>)> {
900        self.get_workspace_index_salsa().files.clone()
901    }
902
903    /// Same as `all_indexes` but excludes `uri`.
904    pub fn other_indexes(&self, uri: &Url) -> Vec<(Url, Arc<FileIndex>)> {
905        self.get_workspace_index_salsa()
906            .files
907            .iter()
908            .filter(|(u, _)| u != uri)
909            .cloned()
910            .collect()
911    }
912
913    /// Parsed documents for every mirrored file (open or background-indexed).
914    /// Suitable for full-scan operations: find-references, rename,
915    /// call_hierarchy, code_lens.
916    pub fn all_docs_for_scan(&self) -> Vec<(Url, Arc<ParsedDoc>)> {
917        let urls: Vec<Url> = self
918            .file_texts
919            .iter()
920            .filter(|e| !self.deleted_uris.contains(e.key()))
921            .map(|e| e.key().clone())
922            .collect();
923        urls.into_iter()
924            .filter_map(|u| self.get_doc_salsa(&u).map(|d| (u, d)))
925            .collect()
926    }
927}
928
929/// Run `file_refs` for every workspace file in parallel.
930///
931/// `db` clones are cheap (they share the same `Arc<Zalsa>` memo store), so
932/// results computed on any clone are immediately visible to all others at the
933/// same revision.  After this returns, the sequential loop inside `symbol_refs`
934/// only does cheap memo lookups instead of running `StatementsAnalyzer` on
935/// every file one-by-one.
936///
937/// Per-task `salsa::Cancelled` is caught and swallowed.  If the revision was
938/// bumped, the main thread's next salsa call inside `symbol_refs` will raise
939/// `Cancelled` too and `snapshot_query` retries the whole operation from
940/// scratch.  If the revision was not bumped, any file whose task was cancelled
941/// before completion simply has no memo entry and `symbol_refs`'s sequential
942/// loop recomputes it.
943// `warm_file_refs_parallel` removed: the analyzer-side reference index is
944// now owned by `AnalysisSession` and warmed by `ingest_file`. This salsa-side
945// helper has no counterpart in the new architecture.
946
947#[cfg(test)]
948mod tests {
949    use super::*;
950
951    fn uri(path: &str) -> Url {
952        Url::parse(&format!("file://{path}")).unwrap()
953    }
954
955    /// Phase E4: open-file state lives on `Backend`, not `DocumentStore`.
956    /// Tests that need to simulate "file is open" just mirror the text into
957    /// the salsa input — the open/closed distinction is enforced by the
958    /// caller (Backend) in production.
959    fn open(store: &DocumentStore, u: Url, text: String) {
960        store.mirror_text(&u, &text);
961    }
962
963    // Removed `salsa_codebase_aggregates_all_files`: the salsa-side codebase
964    // aggregation was deleted with the mir 0.22 migration. Equivalent
965    // behaviour is now covered by mir-analyzer's own session tests.
966
967    #[test]
968    fn index_registers_file_in_salsa() {
969        let store = DocumentStore::new();
970        store.index(uri("/lib.php"), "<?php\nfunction lib_fn() {}");
971        let idx = store.get_index_salsa(&uri("/lib.php")).unwrap();
972        assert_eq!(idx.functions.len(), 1);
973        assert_eq!(idx.functions[0].name, "lib_fn".into());
974    }
975
976    #[test]
977    fn remove_hides_file_from_index() {
978        let store = DocumentStore::new();
979        let u = uri("/lib.php");
980        store.index(u.clone(), "<?php");
981        store.remove(&u);
982        assert!(store.get_index_salsa(&u).is_none());
983    }
984
985    #[test]
986    fn remove_and_reopen_reuses_source_file_handle() {
987        let store = DocumentStore::new();
988        let u = uri("/lib.php");
989        store.index(u.clone(), "<?php");
990        let ft_before = store.source_file(&u).unwrap();
991        store.remove(&u);
992        assert!(
993            store.source_file(&u).is_none(),
994            "deleted file should be hidden"
995        );
996        store.mirror_text(&u, "<?php");
997        let ft_after = store.source_file(&u).unwrap();
998        assert!(
999            ft_before == ft_after,
1000            "reopen must reuse the same FileText handle"
1001        );
1002    }
1003
1004    #[test]
1005    fn delete_reopen_churn_does_not_amplify_salsa_inputs() {
1006        let store = DocumentStore::new();
1007        let uris: Vec<Url> = (0..20).map(|i| uri(&format!("/churn/f{i}.php"))).collect();
1008        for u in &uris {
1009            store.index(u.clone(), "<?php class A {}");
1010        }
1011        let count_before = store.source_files_len();
1012        for _ in 0..10 {
1013            for u in &uris {
1014                store.remove(u);
1015            }
1016            for u in &uris {
1017                store.index(u.clone(), "<?php class A {}");
1018            }
1019        }
1020        assert_eq!(
1021            store.source_files_len(),
1022            count_before,
1023            "delete-reopen cycles must not create new salsa inputs (L1-B regression guard)"
1024        );
1025    }
1026
1027    #[test]
1028    fn all_indexes_includes_every_mirrored_file() {
1029        let store = DocumentStore::new();
1030        open(&store, uri("/a.php"), "<?php\nfunction a() {}".to_string());
1031        store.index(uri("/b.php"), "<?php\nfunction b() {}");
1032        assert_eq!(store.all_indexes().len(), 2);
1033    }
1034
1035    #[test]
1036    fn other_indexes_excludes_current_uri() {
1037        let store = DocumentStore::new();
1038        open(&store, uri("/a.php"), "<?php\nfunction a() {}".to_string());
1039        open(&store, uri("/b.php"), "<?php\nfunction b() {}".to_string());
1040        assert_eq!(store.other_indexes(&uri("/a.php")).len(), 1);
1041    }
1042
1043    #[test]
1044    fn other_docs_excludes_current_uri() {
1045        let store = DocumentStore::new();
1046        let ua = uri("/a.php");
1047        let ub = uri("/b.php");
1048        open(&store, ua.clone(), "<?php\nfunction a() {}".to_string());
1049        open(&store, ub.clone(), "<?php\nfunction b() {}".to_string());
1050        let open_urls = vec![ua.clone(), ub];
1051        assert_eq!(store.other_docs(&ua, &open_urls).len(), 1);
1052    }
1053
1054    #[test]
1055    fn evict_token_cache_removes_entry() {
1056        let store = DocumentStore::new();
1057        let u = uri("/a.php");
1058        open(&store, u.clone(), "<?php".to_string());
1059        store.store_token_cache(&u, "id1".to_string(), Arc::new(vec![]));
1060        assert!(store.get_token_cache(&u, "id1").is_some());
1061        store.evict_token_cache(&u);
1062        assert!(store.get_token_cache(&u, "id1").is_none());
1063    }
1064
1065    #[test]
1066    fn index_populates_file_index_with_symbols() {
1067        let store = DocumentStore::new();
1068        store.index(uri("/a.php"), "<?php\nfunction hello() {}");
1069        let idx = store.get_index_salsa(&uri("/a.php")).unwrap();
1070        assert_eq!(idx.functions.len(), 1);
1071        assert_eq!(idx.functions[0].name, "hello".into());
1072    }
1073
1074    #[test]
1075    fn open_populates_file_index_with_symbols() {
1076        let store = DocumentStore::new();
1077        open(&store, uri("/a.php"), "<?php\nclass Foo {}".to_string());
1078        let idx = store.get_index_salsa(&uri("/a.php")).unwrap();
1079        assert_eq!(idx.classes.len(), 1);
1080        assert_eq!(idx.classes[0].name, "Foo".into());
1081    }
1082
1083    // ── Mirror invariants ────────────────────────────────────────────────
1084    //
1085    // Every mutation path that changes file text must keep the salsa layer
1086    // consistent. These tests walk a set-edit-reopen cycle and assert that
1087    // the salsa-derived `FileIndex` reflects the latest text at each step.
1088
1089    fn names_of(idx: &FileIndex) -> Vec<String> {
1090        let mut out: Vec<String> = idx.classes.iter().map(|c| c.name.to_string()).collect();
1091        out.extend(idx.functions.iter().map(|f| f.name.to_string()));
1092        out.sort();
1093        out
1094    }
1095
1096    fn salsa_index_names(store: &DocumentStore, url: &Url) -> Vec<String> {
1097        store
1098            .snapshot_query_file_index(url)
1099            .map(|idx| names_of(&idx))
1100            .unwrap_or_default()
1101    }
1102
1103    #[test]
1104    fn mirror_tracks_repeated_edits() {
1105        let store = DocumentStore::new();
1106        let u = uri("/mirror.php");
1107
1108        open(&store, u.clone(), "<?php\nclass A {}".to_string());
1109        assert_eq!(salsa_index_names(&store, &u), vec!["A".to_string()]);
1110
1111        open(
1112            &store,
1113            u.clone(),
1114            "<?php\nclass A {}\nclass B {}".to_string(),
1115        );
1116        assert_eq!(
1117            salsa_index_names(&store, &u),
1118            vec!["A".to_string(), "B".to_string()]
1119        );
1120
1121        open(&store, u.clone(), "<?php\nfunction greet() {}".to_string());
1122        assert_eq!(salsa_index_names(&store, &u), vec!["greet".to_string()]);
1123    }
1124
1125    #[test]
1126    fn mirror_tracks_index_and_index_from_doc() {
1127        let store = DocumentStore::new();
1128
1129        // Background `index(url, text)` path.
1130        let u1 = uri("/bg1.php");
1131        store.index(u1.clone(), "<?php\nclass Bg1 {}");
1132        assert_eq!(salsa_index_names(&store, &u1), vec!["Bg1".to_string()]);
1133
1134        // `index_from_doc(url, &doc)` path (workspace-scan Phase 2).
1135        let u2 = uri("/bg2.php");
1136        let doc = crate::analysis::diagnostics::parse_document_no_diags(
1137            "<?php\nclass Bg2 {}\nfunction f() {}",
1138        );
1139        store.index_from_doc(u2.clone(), &doc);
1140        assert_eq!(
1141            salsa_index_names(&store, &u2),
1142            vec!["Bg2".to_string(), "f".to_string()]
1143        );
1144    }
1145
1146    /// G3: confirms the `parsed_cache` actually hits — two consecutive
1147    /// `get_doc_salsa` calls on unchanged text return the same `Arc`
1148    /// (pointer equality), and an edit forces a miss that produces a
1149    /// different `Arc`.
1150    /// parsed_cache must stay bounded — inserting more than
1151    /// `PARSED_CACHE_CAP` unique URLs must not cause unbounded growth.
1152    /// Eviction is probabilistic, so we only assert the bound, not which
1153    /// Seeding a cached index for a URL that was never mirrored is a no-op
1154    /// (returns `false`) — avoids silently allocating SourceFiles outside
1155    /// `mirror_text`'s control.
1156    #[test]
1157    fn seed_cached_index_noops_for_unknown_uri() {
1158        let store = DocumentStore::new();
1159        let u = uri("/never_mirrored.php");
1160        let index = Arc::new(crate::file_index::FileIndex::default());
1161        assert!(!store.seed_cached_index(&u, index));
1162    }
1163
1164    /// entries survive.
1165    #[test]
1166    fn parsed_cache_stays_bounded_under_many_inserts() {
1167        let store = DocumentStore::new();
1168        let overflow = PARSED_CACHE_CAP + 100;
1169        for i in 0..overflow {
1170            let u = uri(&format!("/cap/file{i}.php"));
1171            store.index(u.clone(), "<?php\nclass A {}");
1172            // Force a parsed_cache insert via get_doc_salsa.
1173            let _ = store.get_doc_salsa(&u);
1174        }
1175        assert!(
1176            store.parsed_cache.len() <= PARSED_CACHE_CAP,
1177            "parsed_cache grew to {} entries (cap {})",
1178            store.parsed_cache.len(),
1179            PARSED_CACHE_CAP
1180        );
1181    }
1182
1183    #[test]
1184    fn get_doc_salsa_cache_hits_across_calls() {
1185        let store = DocumentStore::new();
1186        let u = uri("/g3_cache.php");
1187        open(&store, u.clone(), "<?php\nclass G3 {}".to_string());
1188
1189        let a = store.get_doc_salsa(&u).unwrap();
1190        let b = store.get_doc_salsa(&u).unwrap();
1191        assert!(
1192            Arc::ptr_eq(&a, &b),
1193            "parsed_cache hit should yield the same Arc across calls"
1194        );
1195
1196        open(&store, u.clone(), "<?php\nclass G3b {}".to_string());
1197        let c = store.get_doc_salsa(&u).unwrap();
1198        assert!(
1199            !Arc::ptr_eq(&a, &c),
1200            "edit should invalidate the parsed_cache entry"
1201        );
1202    }
1203
1204    #[test]
1205    fn get_doc_salsa_returns_some_for_mirrored_files() {
1206        // Phase E4: `get_doc_salsa` no longer gates on open-state. The
1207        // open/closed distinction now lives on `Backend::get_doc`.
1208        let store = DocumentStore::new();
1209        let u = uri("/e4_doc.php");
1210        store.index(u.clone(), "<?php\nclass P {}");
1211        assert!(store.get_doc_salsa(&u).is_some());
1212    }
1213
1214    #[test]
1215    fn get_salsa_accessors_return_none_for_unknown_uri() {
1216        let store = DocumentStore::new();
1217        let u = uri("/never-seen.php");
1218        assert!(store.get_doc_salsa(&u).is_none());
1219        assert!(store.get_index_salsa(&u).is_none());
1220    }
1221
1222    /// Phase E1: concurrent readers and writers must not deadlock, panic, or
1223    /// return stale data. Writers briefly bump inputs while readers are
1224    /// running on cloned snapshots; any `salsa::Cancelled` raised on the
1225    /// reader side must be caught and retried by `snapshot_query`.
1226    ///
1227    /// The salsa surface (`get_doc_salsa`, `get_index_salsa`) is protected by
1228    /// `snapshot_query`'s last-resort host-lock fallback.
1229    #[test]
1230    fn concurrent_reads_and_writes_do_not_panic() {
1231        use std::sync::Arc;
1232        use std::thread;
1233        use std::time::{Duration, Instant};
1234
1235        let store = Arc::new(DocumentStore::new());
1236        let urls: Vec<Url> = (0..8).map(|i| uri(&format!("/f{i}.php"))).collect();
1237        for (i, u) in urls.iter().enumerate() {
1238            open(&store, u.clone(), format!("<?php\nclass C{i} {{}}"));
1239        }
1240
1241        let deadline = Instant::now() + Duration::from_millis(400);
1242        let mut handles = Vec::new();
1243
1244        // Writer thread: keep bumping every file's text.
1245        {
1246            let store = Arc::clone(&store);
1247            let urls = urls.clone();
1248            handles.push(thread::spawn(move || {
1249                let mut rev = 0u32;
1250                while Instant::now() < deadline {
1251                    for u in &urls {
1252                        let text = format!("<?php\nclass C{{}}\n// rev {rev}");
1253                        store.mirror_text(u, &text);
1254                    }
1255                    rev += 1;
1256                }
1257            }));
1258        }
1259
1260        // Reader threads: hammer the salsa accessors.
1261        for _ in 0..4 {
1262            let store = Arc::clone(&store);
1263            let urls = urls.clone();
1264            handles.push(thread::spawn(move || {
1265                while Instant::now() < deadline {
1266                    for u in &urls {
1267                        let _ = store.get_doc_salsa(u);
1268                        let _ = store.get_index_salsa(u);
1269                    }
1270                    // Post mir 0.22: codebase + refs live in the session,
1271                    // not salsa. Concurrent-read smoke is limited to the
1272                    // remaining salsa surface (parsed_doc, file_index).
1273                }
1274            }));
1275        }
1276
1277        for h in handles {
1278            h.join().expect("no panic under concurrent read/write");
1279        }
1280    }
1281
1282    /// PSR-4 lazy-loading: `get_semantic_issues_salsa` must not emit
1283    /// `UndefinedClass` for a class that is PSR-4-resolvable on disk, even
1284    /// when the dependency file is not yet in `source_files`.
1285    #[test]
1286    fn psr4_lazy_load_suppresses_undefined_class() {
1287        let tmp = tempfile::tempdir().unwrap();
1288
1289        // Write Entity.php to disk (not mirrored into the store).
1290        std::fs::create_dir_all(tmp.path().join("src/Model")).unwrap();
1291        std::fs::write(
1292            tmp.path().join("src/Model/Entity.php"),
1293            "<?php\nnamespace App\\Model;\nclass Entity {}\n",
1294        )
1295        .unwrap();
1296
1297        // Write composer.json so Psr4Map::load can build the map.
1298        std::fs::write(
1299            tmp.path().join("composer.json"),
1300            r#"{"autoload":{"psr-4":{"App\\":"src/"}}}"#,
1301        )
1302        .unwrap();
1303
1304        let store = DocumentStore::new();
1305
1306        // Inject a PSR-4 map pointing at the tmp dir.
1307        store
1308            .psr4
1309            .store(Arc::new(crate::autoload::Psr4Map::load(tmp.path())));
1310
1311        // Mirror the consuming file (Entity not yet in source_files).
1312        // Uses Entity as a parameter type hint — the analyzer resolves these
1313        // through use statements, so this exercises the full PSR-4 lazy-load path.
1314        let handler_url = Url::from_file_path(tmp.path().join("src/Service/Handler.php")).unwrap();
1315        store.mirror_text(
1316            &handler_url,
1317            "<?php\nnamespace App\\Service;\nuse App\\Model\\Entity;\nfunction handle(Entity $e): Entity { return $e; }\n",
1318        );
1319
1320        let issues = store.get_semantic_issues_salsa(&handler_url).unwrap();
1321        let undef: Vec<_> = issues
1322            .iter()
1323            .filter(|i| matches!(i.kind, mir_issues::IssueKind::UndefinedClass { .. }))
1324            .collect();
1325        assert!(
1326            undef.is_empty(),
1327            "PSR-4 lazy-loading must prevent UndefinedClass for App\\Model\\Entity; got: {undef:?}"
1328        );
1329    }
1330
1331    /// Issue #191 regression: workspace-wide scans (find-references, rename,
1332    /// call-hierarchy) must not re-parse closed/indexed files on repeated
1333    /// invocations. Once a file's `ParsedDoc` has been produced, subsequent
1334    /// `all_docs_for_scan()` calls must hit the cache and return the same
1335    /// `Arc<ParsedDoc>` (pointer equality), proving no re-parse occurred.
1336    ///
1337    /// The cache layers protecting this are:
1338    ///   1. `parsed_cache` (cap [`PARSED_CACHE_CAP`]) — read-through, validated
1339    ///      via `Arc::ptr_eq` on the text Arc.
1340    ///   2. salsa `parsed_doc` memo (`lru = 2048`) — second line of defense
1341    ///      when `parsed_cache` evicts.
1342    ///
1343    /// Together they keep every workspace-scan op O(N) memo lookups, never
1344    /// O(N) parses, for any workspace whose file count fits the cap.
1345    #[test]
1346    fn all_docs_for_scan_does_not_reparse_indexed_files() {
1347        let store = DocumentStore::new();
1348        const N: usize = 50;
1349        for i in 0..N {
1350            let u = uri(&format!("/scan/file{i}.php"));
1351            store.index(u, &format!("<?php\nclass C{i} {{}}\nfunction f{i}() {{}}"));
1352        }
1353
1354        let first: Vec<_> = store.all_docs_for_scan();
1355        let second: Vec<_> = store.all_docs_for_scan();
1356        assert_eq!(first.len(), N);
1357        assert_eq!(second.len(), N);
1358
1359        let by_url_first: std::collections::HashMap<Url, Arc<ParsedDoc>> =
1360            first.into_iter().collect();
1361        for (u, doc2) in second {
1362            let doc1 = by_url_first
1363                .get(&u)
1364                .expect("second scan returned a URL the first didn't");
1365            assert!(
1366                Arc::ptr_eq(doc1, &doc2),
1367                "{u} re-parsed across all_docs_for_scan calls — \
1368                 cache (parsed_cache + salsa parsed_doc memo) failed to hit"
1369            );
1370        }
1371
1372        // Editing one file's text must invalidate just that file's entry,
1373        // not the rest. This locks in self-eviction via Arc::ptr_eq on text.
1374        let edited_url = uri("/scan/file0.php");
1375        let pre_edit = store.get_doc_salsa(&edited_url).unwrap();
1376        store.index(edited_url.clone(), "<?php\nclass C0Edited {}");
1377        let post_edit = store.get_doc_salsa(&edited_url).unwrap();
1378        assert!(
1379            !Arc::ptr_eq(&pre_edit, &post_edit),
1380            "edited file must produce a fresh ParsedDoc"
1381        );
1382        for i in 1..N {
1383            let u = uri(&format!("/scan/file{i}.php"));
1384            let original = by_url_first.get(&u).unwrap();
1385            let after = store.get_doc_salsa(&u).unwrap();
1386            assert!(
1387                Arc::ptr_eq(original, &after),
1388                "{u} should not have re-parsed because of an unrelated edit"
1389            );
1390        }
1391    }
1392}