Skip to main content

php_lsp/
document_store.rs

1use std::sync::atomic::{AtomicBool, AtomicU32, Ordering};
2use std::sync::{Arc, Mutex, OnceLock};
3
4use arc_swap::ArcSwap;
5
6use dashmap::DashMap;
7use salsa::Setter;
8use tower_lsp::lsp_types::{SemanticToken, Url};
9
10use crate::ast::ParsedDoc;
11use crate::autoload::Psr4Map;
12use crate::db::analysis::AnalysisHost;
13use crate::db::input::{FileId, SourceFile, Workspace};
14use crate::file_index::FileIndex;
15
16/// Upper bound on `parsed_cache` entries. Matched to the `lru = 2048` on
17/// `parsed_doc` in `src/db/parse.rs` so the secondary Arc retention can't
18/// pin more ASTs alive than salsa's memo already bounds. Exceeding this
19/// triggers probabilistic eviction (see [`DocumentStore::insert_parsed_cache`]).
20const PARSED_CACHE_CAP: usize = 2048;
21
22pub struct DocumentStore {
23    /// Cached semantic tokens per document: (result_id, tokens).
24    /// Used to compute incremental deltas for `textDocument/semanticTokens/full/delta`.
25    /// Tokens are stored in an `Arc` so the delta-path lookup can hand the
26    /// previous snapshot back without cloning the inner Vec.
27    token_cache: DashMap<Url, (String, Arc<Vec<SemanticToken>>)>,
28
29    // ── Salsa-input storage ────────────────────────────────────────────────
30    // Phase E4: `DocumentStore` is now a pure salsa-input wrapper. Open-file
31    // state (live text, version token, parse-diagnostics cache) lives on
32    // `Backend` in its `open_files` map; the set of files tracked by salsa
33    // is exactly `source_files.keys()`.
34    /// Mutex — held briefly to clone the database for reads and to mutate
35    /// it for writes. Per-thread salsa state (`zalsa_local`) is `!Sync`,
36    /// which rules out `RwLock<AnalysisHost>`. Readers instead snapshot the
37    /// db (cheap — storage is `Arc<Zalsa>`) and run queries on the clone
38    /// with the lock released, giving real read/read parallelism. Writers
39    /// during an in-flight read bump the shared revision; the reader raises
40    /// `salsa::Cancelled` on its next query call and `snapshot_query` below
41    /// retries with a fresh snapshot.
42    host: Mutex<AnalysisHost>,
43    /// `Url -> SourceFile` lookup. The `SourceFile` is a salsa-id handle; the
44    /// underlying input lives in `host.db` for the lifetime of the database.
45    source_files: DashMap<Url, SourceFile>,
46    /// G2: lock-free mirror of each `SourceFile`'s last-set text. Lets
47    /// `mirror_text` dedup repeated no-op updates (common during workspace
48    /// scan and `did_open` for already-indexed files) without taking
49    /// `host.lock()`. Updated inside the mutex whenever the salsa input is
50    /// set, so it is always consistent with the salsa revision for the
51    /// purposes of byte-equality comparison.
52    text_cache: DashMap<Url, Arc<str>>,
53    /// G3: cross-revision read-through cache for `parsed_doc`. Keyed on
54    /// `Url`, stored value is `(text_arc, Arc<ParsedDoc>)` — the text Arc
55    /// captured at parse time. On read, compare against `text_cache[uri]`
56    /// via `Arc::ptr_eq`; a match guarantees the cached ParsedDoc matches
57    /// the current salsa revision's text input, so the query can return
58    /// without snapshotting the db or invoking salsa at all. A miss
59    /// (different pointer, stale or absent entry) falls through to
60    /// `snapshot_query`. Self-evicts on text change — no writer-side
61    /// invalidation is required, which avoids the TOCTOU window where a
62    /// concurrent reader could re-insert a stale entry after a writer's
63    /// eviction.
64    ///
65    /// Size-bounded at [`PARSED_CACHE_CAP`] — see `insert_parsed_cache`.
66    /// Without this bound, every workspace file read-through would pin
67    /// its bumpalo arena alive regardless of salsa's `lru = 2048` on the
68    /// `parsed_doc` memo.
69    parsed_cache: DashMap<Url, (Arc<str>, Arc<ParsedDoc>)>,
70    /// Cross-request read-through cache for a file's mir body analysis. Keyed
71    /// on `Url`, stored value is `(source_arc, Arc<FileAnalysis>)` — the source
72    /// Arc captured at analysis time. On read, compare against the current
73    /// `doc.source_arc()` via `Arc::ptr_eq`; a match means the cached analysis
74    /// matches the live content. A miss recomputes and overwrites, so the cache
75    /// self-evicts on edit (same discipline as `parsed_cache`).
76    ///
77    /// `FileAnalysis` carries BOTH the issues consumed by diagnostics and the
78    /// per-expression `ResolvedSymbol`s consumed by position features (hover,
79    /// type-definition, completion, inlay hints). Retaining it means mir's
80    /// `FileAnalyzer::analyze` runs once per content revision instead of being
81    /// re-run (for diagnostics) and then re-derived in a weaker form (for
82    /// position queries). Bounded by the set of analyzed files (open files plus
83    /// their open dependents); explicitly evicted in [`DocumentStore::remove`].
84    analysis_cache: DashMap<Url, (Arc<str>, Arc<mir_analyzer::FileAnalysis>)>,
85    /// Monotonic allocator for `FileId`s (one per ever-seen URL).
86    next_file_id: AtomicU32,
87    /// Set to `true` when the set of tracked files changes (add or remove).
88    /// `sync_workspace_files` skips the collect/sort/compare path when this
89    /// is `false`, avoiding a mutex acquisition on every LSP request.
90    workspace_files_dirty: AtomicBool,
91    /// Workspace salsa input. Tracks the full set of `SourceFile`s that
92    /// participate in whole-program queries (`codebase`, `file_refs`).
93    /// Re-synced from `source_files` on demand by `sync_workspace_files`.
94    workspace: Workspace,
95    /// Shared PSR-4 namespace-to-path map. Shared with `Backend` via `Arc`
96    /// so updates from `initialized` (when composer.json is loaded) are
97    /// visible here without any additional wiring. `ArcSwap` makes reads
98    /// lock-free — a poisoned guard can no longer crash a request handler.
99    psr4: Arc<ArcSwap<Psr4Map>>,
100    /// mir-analyzer's `AnalysisSession` — owns the workspace MirDb, runs
101    /// Pass-2 analysis, and lazy-loads dependencies via PSR-4. Built lazily
102    /// on first use; rebuilt when PHP version changes.
103    analysis_session: Mutex<Option<(mir_analyzer::PhpVersion, Arc<mir_analyzer::AnalysisSession>)>>,
104    /// Cache directory shared with the workspace file-index cache. When set,
105    /// new `AnalysisSession`s are built with `with_cache_dir` so that stub
106    /// parsing results survive server restarts.
107    session_cache_dir: OnceLock<std::path::PathBuf>,
108}
109
110impl Default for DocumentStore {
111    fn default() -> Self {
112        Self::new()
113    }
114}
115
116impl DocumentStore {
117    pub fn new() -> Self {
118        let host = AnalysisHost::new();
119        let workspace = Workspace::new(
120            host.db(),
121            Arc::<[SourceFile]>::from(Vec::new()),
122            mir_analyzer::PhpVersion::LATEST,
123        );
124        DocumentStore {
125            token_cache: DashMap::new(),
126            host: Mutex::new(host),
127            source_files: DashMap::new(),
128            text_cache: DashMap::new(),
129            parsed_cache: DashMap::new(),
130            analysis_cache: DashMap::new(),
131            next_file_id: AtomicU32::new(0),
132            workspace_files_dirty: AtomicBool::new(true),
133            workspace,
134            psr4: Arc::new(ArcSwap::from_pointee(Psr4Map::empty())),
135            analysis_session: Mutex::new(None),
136            session_cache_dir: OnceLock::new(),
137        }
138    }
139
140    /// Set the directory used to persist stub-parse and analysis results across
141    /// server restarts.  Must be called before the first `analysis_session` use;
142    /// subsequent calls are silently ignored (`OnceLock` semantics).
143    pub fn set_session_cache_dir(&self, dir: std::path::PathBuf) {
144        let _ = self.session_cache_dir.set(dir);
145    }
146
147    /// Get or build the `AnalysisSession` for the given PHP version. Rebuilds
148    /// when the version changes (e.g. user flipped config). The session owns
149    /// its own salsa db and AnalysisCache; lazy-loads vendor files via the
150    /// shared PSR-4 map.
151    pub fn analysis_session(
152        &self,
153        php_version: mir_analyzer::PhpVersion,
154    ) -> Arc<mir_analyzer::AnalysisSession> {
155        let mut guard = self.analysis_session.lock().unwrap();
156        if let Some((cached_ver, session)) = guard.as_ref()
157            && *cached_ver == php_version
158        {
159            return Arc::clone(session);
160        }
161        // Build a fresh session. Hand it the shared PSR-4 map so it can
162        // lazy-resolve `UndefinedClass` candidates without us having to mirror
163        // every vendor file upfront.
164        let resolver: Arc<dyn mir_analyzer::ClassResolver> = self.psr4.load_full();
165        let mut builder =
166            mir_analyzer::AnalysisSession::new(php_version).with_class_resolver(resolver);
167        if let Some(dir) = self.session_cache_dir.get() {
168            builder = builder.with_cache_dir(dir);
169        }
170        let session = Arc::new(builder);
171        session.ensure_all_stubs();
172        *guard = Some((php_version, Arc::clone(&session)));
173        session
174    }
175
176    /// Current PHP version tracked by the workspace input.
177    pub fn workspace_php_version(&self) -> mir_analyzer::PhpVersion {
178        self.with_host(|h| self.workspace.php_version(h.db()))
179    }
180
181    /// Return the `Arc<ArcSwap<Psr4Map>>` so callers can share it.
182    /// `Backend` clones this arc at construction time so writes
183    /// (e.g. loading composer.json on `initialized`) are immediately visible
184    /// to `lazy_load_psr4_imports` without extra plumbing.
185    pub fn psr4_arc(&self) -> Arc<ArcSwap<Psr4Map>> {
186        Arc::clone(&self.psr4)
187    }
188
189    /// Mirror a file's current text into the salsa layer. Creates the
190    /// `SourceFile` input on first sight, otherwise updates `text` on the
191    /// existing input (bumping the salsa revision so downstream queries
192    /// invalidate). Returns the `SourceFile` handle for this `uri`.
193    ///
194    /// B4a: called from every text-changing mutation site. Reads still come
195    /// from the legacy `map` — this mirror is not yet observed by production
196    /// code paths.
197    pub fn mirror_text(&self, uri: &Url, text: &str) -> SourceFile {
198        // G2 fast path: compare against the lock-free text cache. When the
199        // new text byte-matches what we already mirrored, skip the host
200        // mutex entirely. Common during workspace scan + `did_open` for
201        // unchanged files, where most threads would otherwise serialise on
202        // `host.lock()` just to confirm a no-op. Cache is only populated
203        // after the matching `source_files` entry, so a cache hit implies
204        // the handle exists.
205        if let Some(cached) = self.text_cache.get(uri)
206            && **cached == *text
207            && let Some(sf) = self.source_files.get(uri)
208        {
209            return *sf;
210        }
211        self.mirror_text_arc(uri, Arc::from(text))
212    }
213
214    /// Like [`mirror_text`] but takes an already-allocated `Arc<str>`.
215    ///
216    /// Callers that already hold an `Arc<str>` (e.g. `index_from_doc` reusing
217    /// `ParsedDoc::source_arc()`) use this to avoid a second allocation and to
218    /// ensure `text_cache` and `parsed_cache` hold the same Arc pointer —
219    /// enabling `Arc::ptr_eq` validation in `get_parsed_cached`.
220    pub fn mirror_text_arc(&self, uri: &Url, text_arc: Arc<str>) -> SourceFile {
221        if let Some(existing) = self.source_files.get(uri) {
222            let sf = *existing;
223            drop(existing);
224            // Slow path: another writer may have raced us; re-check inside
225            // the mutex. Salsa's `set_text` unconditionally bumps the
226            // revision, so every spurious setter invalidates every
227            // downstream query.
228            let mut host = self.host.lock().unwrap();
229            let current: Arc<str> = sf.text(host.db());
230            if *current == *text_arc {
231                drop(host);
232                self.text_cache.insert(uri.clone(), current);
233                return sf;
234            }
235            sf.set_text(host.db_mut()).to(text_arc.clone());
236            // Phase K2: any text change invalidates a previously-seeded
237            // cached index. Only bump the revision when a cached index is
238            // actually present — an unconditional set would cause two
239            // revision bumps per edit (one for text, one for cached_index),
240            // which needlessly cancels in-flight `file_index` queries on
241            // every keystroke.
242            if sf.cached_index(host.db()).is_some() {
243                sf.set_cached_index(host.db_mut()).to(None);
244            }
245            drop(host);
246            self.text_cache.insert(uri.clone(), text_arc);
247            // A content change to ANY file can invalidate cross-file analysis
248            // (mir resolves types/issues against other files). `cached_analysis`
249            // is validated only on a file's own `source_arc`, so a dependency
250            // edit wouldn't otherwise refresh an unchanged dependent's cached
251            // entry — drop the whole cache. Bounded by open files; recompute is
252            // ~6ms warm. Matches the salsa revision bump `set_text` just made.
253            self.analysis_cache.clear();
254            sf
255        } else {
256            let id = FileId(self.next_file_id.fetch_add(1, Ordering::Relaxed));
257            let uri_arc: Arc<str> = Arc::from(uri.as_str());
258            let is_vendor = uri.as_str().contains("/vendor/");
259            let sf = {
260                let mut host = self.host.lock().unwrap();
261                let sf = SourceFile::new(host.db(), id, uri_arc, text_arc.clone(), None);
262                if is_vendor {
263                    // Vendor files never change in a session — mark their text
264                    // as HIGH durability so salsa skips re-validating
265                    // parsed_doc/file_index for them on every user edit.
266                    sf.set_text(host.db_mut())
267                        .with_durability(salsa::Durability::HIGH)
268                        .to(Arc::clone(&text_arc));
269                }
270                sf
271            };
272            self.source_files.insert(uri.clone(), sf);
273            self.text_cache.insert(uri.clone(), text_arc);
274            self.workspace_files_dirty.store(true, Ordering::Release);
275            // A newly-ingested file may resolve references that were previously
276            // unresolved in already-analyzed files; invalidate cross-file caches.
277            self.analysis_cache.clear();
278            sf
279        }
280    }
281
282    /// Return the salsa `SourceFile` handle for a URL, if one exists.
283    pub fn source_file(&self, uri: &Url) -> Option<SourceFile> {
284        self.source_files.get(uri).map(|e| *e)
285    }
286
287    /// Phase K2: pre-seed a `FileIndex` loaded from the on-disk cache onto
288    /// the `SourceFile` input for `uri`. The next `file_index` call for that
289    /// file returns the cached index directly, skipping parse + extract.
290    ///
291    /// Must be called **before** any `file_index(db, sf)` call for this file —
292    /// otherwise salsa has already memoized the fresh-parse result and setting
293    /// `cached_index` now would only bump the revision without using the cache.
294    /// In practice the workspace-scan path seeds immediately after `mirror_text`
295    /// and before any query runs.
296    ///
297    /// Returns `false` when `uri` was not mirrored (caller should mirror
298    /// first); returns `true` on success.
299    pub fn seed_cached_index(&self, uri: &Url, index: Arc<FileIndex>) -> bool {
300        let Some(sf) = self.source_files.get(uri).map(|e| *e) else {
301            return false;
302        };
303        let mut host = self.host.lock().unwrap();
304        sf.set_cached_index(host.db_mut()).to(Some(index));
305        true
306    }
307
308    /// Run `f` with a borrow of the `AnalysisHost`. Used by tests and by the
309    /// upcoming `*_salsa` accessors to query the salsa layer.
310    pub fn with_host<R>(&self, f: impl FnOnce(&AnalysisHost) -> R) -> R {
311        let host = self.host.lock().unwrap();
312        f(&host)
313    }
314
315    /// Phase E1: take a brief lock, clone the salsa database, release the
316    /// lock. Queries then run on the cloned `RootDatabase` without blocking
317    /// writers or other readers. Salsa's `Storage<Self>` is reference-counted
318    /// (`Arc<Zalsa>`), so the clone is cheap — it shares memoized data and
319    /// the cancellation flag with the host's db.
320    fn snapshot_db(&self) -> crate::db::analysis::RootDatabase {
321        let host = self.host.lock().unwrap();
322        host.db().clone()
323    }
324
325    /// Run a query on a fresh snapshot, catching `salsa::Cancelled` (raised
326    /// when a concurrent writer advances the revision) and retrying with a
327    /// new snapshot. Writers hold the mutex only long enough to bump input
328    /// values, so a handful of retries is more than enough in practice; we
329    /// cap at 8 to avoid pathological livelock under sustained write pressure.
330    fn snapshot_query<R>(&self, f: impl Fn(&crate::db::analysis::RootDatabase) -> R + Clone) -> R {
331        use std::panic::AssertUnwindSafe;
332        for _ in 0..8 {
333            let db = self.snapshot_db();
334            let f = f.clone();
335            match salsa::Cancelled::catch(AssertUnwindSafe(move || f(&db))) {
336                Ok(r) => return r,
337                Err(_) => continue,
338            }
339        }
340        // Last-resort attempt: take the mutex for the whole query so no
341        // writer can race us. Much slower, but guaranteed to make progress.
342        let host = self.host.lock().unwrap();
343        f(host.db())
344    }
345
346    /// Evict the semantic-tokens cache for `uri`. Called by Backend when a
347    /// file is closed; diff-based tokens computed against the old revision
348    /// are no longer meaningful.
349    pub fn evict_token_cache(&self, uri: &Url) {
350        self.token_cache.remove(uri);
351    }
352
353    /// Return the `FileIndex` for `uri` by running `file_index` on a salsa
354    /// snapshot.  Returns `None` when `uri` has not been mirrored.
355    ///
356    /// Test-only — production code uses the salsa query directly via
357    /// `snapshot_query`.
358    #[cfg(test)]
359    pub fn snapshot_query_file_index(&self, uri: &Url) -> Option<crate::file_index::FileIndex> {
360        let sf = self.source_files.get(uri).map(|e| *e)?;
361        let idx = self.snapshot_query(|db| crate::db::index::file_index(db, sf));
362        Some(idx.get().clone())
363    }
364
365    /// Register a file in the salsa layer without marking it open.
366    ///
367    /// Salsa's `parsed_doc` query parses lazily on first read; diagnostics
368    /// are populated by `did_open` when the editor actually opens the file.
369    pub fn index(&self, uri: Url, text: &str) {
370        self.mirror_text(&uri, text);
371    }
372
373    /// Index a file using an already-parsed `ParsedDoc`, avoiding a second parse.
374    ///
375    /// Prefer this over [`index`] when the caller already has a `ParsedDoc` (e.g.
376    /// after running `DefinitionCollector` during workspace scan). Reuses the
377    /// `Arc<str>` already owned by `doc` so that `text_cache` and `SourceFile::text`
378    /// share the same pointer — enabling the `Arc::ptr_eq` fast path in
379    /// `get_parsed_cached` on the first subsequent salsa query, without an extra
380    /// `Arc::from(source)` allocation.
381    pub fn index_from_doc(&self, uri: Url, doc: &ParsedDoc) {
382        self.mirror_text_arc(&uri, doc.source_arc());
383    }
384
385    pub fn remove(&self, uri: &Url) {
386        self.token_cache.remove(uri);
387        // Also drop the Url→SourceFile mapping so the file stops contributing
388        // to the workspace codebase query. Salsa inputs themselves remain
389        // alive (salsa doesn't expose input removal in 0.26), but they're
390        // orphaned — no query keys them anymore, and re-opening the file
391        // allocates a fresh SourceFile with a new FileId. The ~40 bytes per
392        // orphan is acceptable; revisit if workspace-churn profiling hurts.
393        self.source_files.remove(uri);
394        self.workspace_files_dirty.store(true, Ordering::Release);
395        // Sync workspace files so the deleted file is removed from the salsa
396        // `Workspace::files` list and won't appear in workspace symbols etc.
397        self.sync_workspace_files();
398        self.text_cache.remove(uri);
399        self.parsed_cache.remove(uri);
400        self.analysis_cache.remove(uri);
401        // Also evict the file from the `AnalysisSession`'s internal state so
402        // workspace symbol queries don't keep returning the deleted file's
403        // declarations. Cheap when the session hasn't ingested this file.
404        let guard = self.analysis_session.lock().unwrap();
405        if let Some((_, session)) = guard.as_ref() {
406            session.invalidate_file(uri.as_str());
407        }
408    }
409
410    // ── B4b salsa-backed accessors ─────────────────────────────────────────
411    //
412    // These are additive and not yet called from production code. They go
413    // through the salsa layer — reads run the memoized `parsed_doc` /
414    // `file_index` / `method_returns` queries, parsing only on first access
415    // per revision. B4c will migrate feature modules to call these instead of
416    // the legacy `get_doc` / `get_index`.
417
418    /// Salsa-backed parsed document.
419    ///
420    /// Salsa-backed parsed document for any mirrored file (open or
421    /// background-indexed). Returns `None` only when the file is not known
422    /// to the store. Callers that want "only if open" should gate on
423    /// `Backend::open_files` at the call site (see `Backend::get_doc`).
424    pub fn get_doc_salsa(&self, uri: &Url) -> Option<Arc<ParsedDoc>> {
425        self.get_parsed_cached(uri)
426    }
427
428    /// Salsa-backed compact symbol index.
429    pub fn get_index_salsa(&self, uri: &Url) -> Option<Arc<FileIndex>> {
430        let sf = self.source_file(uri)?;
431        Some(self.snapshot_query(move |db| crate::db::index::file_index(db, sf).0.clone()))
432    }
433
434    /// Salsa-backed pre-computed symbol map (name → Vec<SymbolEntry>).
435    /// Memoized per revision: stable files serve from cache in O(1).
436    pub fn get_symbol_map_salsa(&self, uri: &Url) -> Option<Arc<crate::symbol_map::SymbolMap>> {
437        let sf = self.source_file(uri)?;
438        Some(self.snapshot_query(move |db| crate::db::symbol_map::symbol_map(db, sf).0.clone()))
439    }
440
441    /// Pre-computed symbol maps for every entry in `open_urls` except `uri`.
442    pub fn other_symbol_maps(
443        &self,
444        uri: &Url,
445        open_urls: &[Url],
446    ) -> Vec<(Url, Arc<crate::symbol_map::SymbolMap>)> {
447        open_urls
448            .iter()
449            .filter(|u| *u != uri)
450            .filter_map(|u| self.get_symbol_map_salsa(u).map(|m| (u.clone(), m)))
451            .collect()
452    }
453
454    /// G3: shared implementation for `get_doc_salsa`.
455    /// Tries the `parsed_cache` (lock-free) first; validates via
456    /// `Arc::ptr_eq` against the G2 `text_cache` so a concurrent writer
457    /// that has already committed a new text input cannot be masked by a
458    /// stale cache entry. On miss, captures the text Arc and ParsedDoc
459    /// together inside a single `snapshot_query`, then publishes both.
460    fn get_parsed_cached(&self, uri: &Url) -> Option<Arc<ParsedDoc>> {
461        if let Some(current_text) = self.text_cache.get(uri)
462            && let Some(entry) = self.parsed_cache.get(uri)
463            && Arc::ptr_eq(&*current_text, &entry.0)
464        {
465            return Some(entry.1.clone());
466        }
467
468        let sf = self.source_file(uri)?;
469        let (text, doc) = self.snapshot_query(move |db| {
470            let text = sf.text(db);
471            let doc = crate::db::parse::parsed_doc(db, sf).0.clone();
472            (text, doc)
473        });
474        self.insert_parsed_cache(uri.clone(), text, doc.clone());
475        Some(doc)
476    }
477
478    /// Publish a fresh `ParsedDoc` into `parsed_cache`, shedding roughly
479    /// half of the cache first if it has grown past [`PARSED_CACHE_CAP`].
480    ///
481    /// Eviction is probabilistic (DashMap iteration order is arbitrary),
482    /// not LRU. That's fine — salsa's own `parsed_doc` memo uses
483    /// `lru = 2048` on hotness-aware storage, so a cache-miss here is
484    /// cheap: the next read goes through `snapshot_query` and
485    /// `parsed_doc`, which still short-circuits on the salsa memo.
486    /// What we're bounding here is the *secondary* Arc retention that
487    /// would otherwise pin every workspace file's bumpalo arena alive
488    /// regardless of salsa's eviction decisions.
489    fn insert_parsed_cache(&self, uri: Url, text: Arc<str>, doc: Arc<ParsedDoc>) {
490        if self.parsed_cache.len() >= PARSED_CACHE_CAP {
491            let drop_target = self.parsed_cache.len() / 2;
492            let mut dropped = 0usize;
493            self.parsed_cache.retain(|_, _| {
494                if dropped < drop_target {
495                    dropped += 1;
496                    false
497                } else {
498                    true
499                }
500            });
501        }
502        self.parsed_cache.insert(uri, (text, doc));
503    }
504
505    /// Refresh `workspace.files` to mirror the current `source_files` set.
506    ///
507    /// Skips all work when `workspace_files_dirty` is `false` (the common
508    /// case after the workspace scan completes — file-set changes are rare).
509    /// When the flag is set, collects file IDs under a single lock to avoid
510    /// the O(N log N) lock/unlock cycles that `sort_by_key` + `with_host`
511    /// previously caused, then sorts and compares without holding the lock.
512    pub fn sync_workspace_files(&self) {
513        // Atomically clear the flag.  If it was already false the file set
514        // hasn't changed since the last sync; nothing to do.
515        if !self.workspace_files_dirty.swap(false, Ordering::AcqRel) {
516            return;
517        }
518
519        // One lock to read all file IDs — O(N) acquisitions become one.
520        let mut files: Vec<(u32, SourceFile)> = {
521            let host = self.host.lock().unwrap();
522            self.source_files
523                .iter()
524                .map(|e| (e.value().id(host.db()).0, *e.value()))
525                .collect()
526        };
527        // Sort without holding the lock.
528        files.sort_unstable_by_key(|(id, _)| *id);
529        let sorted: Vec<SourceFile> = files.into_iter().map(|(_, sf)| sf).collect();
530
531        let mut host = self.host.lock().unwrap();
532        let current = self.workspace.files(host.db());
533        if current.len() == sorted.len() && current.iter().zip(sorted.iter()).all(|(a, b)| a == b) {
534            return;
535        }
536        self.workspace
537            .set_files(host.db_mut())
538            .to(Arc::from(sorted));
539    }
540
541    /// Mark the workspace file set as dirty so the next `sync_workspace_files`
542    /// call re-runs the collect/sort/compare path.  Exposed for benchmarks that
543    /// need to measure the dirty-path cost in isolation.
544    pub fn mark_workspace_files_dirty(&self) {
545        self.workspace_files_dirty.store(true, Ordering::Release);
546    }
547
548    /// Update the PHP version tracked by the workspace. Salsa will invalidate
549    /// all `semantic_issues` queries so diagnostics are re-evaluated.
550    /// Skips the setter when the version hasn't changed to avoid spurious
551    /// query invalidation.
552    pub fn set_php_version(&self, version: mir_analyzer::PhpVersion) {
553        let mut host = self.host.lock().unwrap();
554        if self.workspace.php_version(host.db()) == version {
555            return;
556        }
557        self.workspace.set_php_version(host.db_mut()).to(version);
558    }
559
560    /// Stub kept for the legacy `RefLookup` closure shape consumed by
561    /// `find_references_codebase_with_target`. Always returns empty; the
562    /// AST walker handles all reference scanning. Session-backed refs go
563    /// through [`Self::session_references_to`] instead.
564    pub fn get_symbol_refs_salsa(&self, _key: &str) -> Vec<(Arc<str>, u32, u16, u16)> {
565        Vec::new()
566    }
567
568    /// Session-backed workspace reference lookup. Returns `(file, line, col)`
569    /// locations for every occurrence of `symbol` in the files that the
570    /// `AnalysisSession` has ingested so far. The session's reference index
571    /// is built incrementally during `ingest_file`, so refs for files the
572    /// session hasn't seen yet (background-indexed but never opened) won't
573    /// appear here — those are covered by the AST-walker fallback in the
574    /// references handler.
575    ///
576    /// Returns LSP-style 0-based line/column.
577    pub fn session_references_to(
578        &self,
579        symbol: &mir_analyzer::Name,
580    ) -> Vec<(Arc<str>, u32, u32, u32)> {
581        let php_version = self.workspace_php_version();
582        let session = self.analysis_session(php_version);
583        session
584            .references_to(symbol)
585            .into_iter()
586            .map(|(file, range)| {
587                // mir 0.30+ uses 1-based lines and 1-based columns; LSP uses 0-based.
588                let line = range.start.line.saturating_sub(1);
589                let col_start = range.start.column.saturating_sub(1);
590                let col_end = range.end.column.saturating_sub(1);
591                (file, line, col_start, col_end)
592            })
593            .collect()
594    }
595
596    /// Phase J: salsa-memoized aggregate workspace index.
597    ///
598    /// Returns the shared `Arc<WorkspaceIndexData>` with flat
599    /// `(Url, Arc<FileIndex>)` list plus pre-built `classes_by_name` and
600    /// `subtypes_of` reverse maps. Used by workspace_symbols,
601    /// prepare_type_hierarchy, supertypes_of, subtypes_of, and
602    /// find_implementations so they don't each rebuild the aggregate per
603    /// request. Invalidates automatically when any file's `file_index`
604    /// changes.
605    pub fn get_workspace_index_salsa(&self) -> Arc<crate::db::workspace_index::WorkspaceIndexData> {
606        self.sync_workspace_files();
607        let ws = self.workspace;
608        self.snapshot_query(move |db| {
609            crate::db::workspace_index::workspace_index(db, ws)
610                .0
611                .clone()
612        })
613    }
614
615    /// No-op after mir 0.22 migration. The session manages its own warm-up
616    /// via `ingest_file` / `analyze_dependents_of`; there's nothing for us
617    /// to pre-warm here.
618    pub fn warm_reference_index(&self) {}
619
620    /// Return the raw source text for `uri` if it has been mirrored into the
621    /// salsa workspace. Used by the references handler to pre-filter session
622    /// results by checking whether a file mentions the owning class name.
623    pub fn source_text(&self, uri: &Url) -> Option<Arc<str>> {
624        self.text_cache.get(uri).map(|e| Arc::clone(&e))
625    }
626
627    /// Run Pass 1 + Pass 2 analysis on every mirrored workspace file so that
628    /// type-aware queries (e.g. `session.references_to`) see the full workspace.
629    ///
630    /// Reference locations are only recorded during Pass 2 (`FileAnalyzer::analyze`).
631    /// `ingest_file` alone (Pass 1) is not sufficient. Only needed for cross-file
632    /// queries like `textDocument/references` that rely on the reference index.
633    /// The session's internal cache makes re-analysis of unchanged files cheap.
634    pub fn ensure_all_files_ingested(&self) {
635        let php_version = self.workspace_php_version();
636        let session = self.analysis_session(php_version);
637        let urls: Vec<Url> = self.source_files.iter().map(|e| e.key().clone()).collect();
638        for uri in &urls {
639            let Some(doc) = self.get_doc_salsa(uri) else {
640                continue;
641            };
642            let file: Arc<str> = Arc::from(uri.as_str());
643            session.ingest_file(file.clone(), doc.source_arc());
644            let source_map = php_rs_parser::source_map::SourceMap::new(doc.source());
645            let owned_program = php_ast::owned::to_owned_program(doc.program());
646            let analyzer = mir_analyzer::FileAnalyzer::new(&session);
647            analyzer.analyze(file, doc.source(), &owned_program, &source_map);
648        }
649    }
650
651    /// Cache the semantic tokens computed for a delta response.
652    /// `result_id` is an opaque string (a hash of the token data) returned to the client.
653    pub fn store_token_cache(&self, uri: &Url, result_id: String, tokens: Arc<Vec<SemanticToken>>) {
654        self.token_cache.insert(uri.clone(), (result_id, tokens));
655    }
656
657    /// Return the cached tokens if `result_id` matches the stored one.
658    pub fn get_token_cache(&self, uri: &Url, result_id: &str) -> Option<Arc<Vec<SemanticToken>>> {
659        self.token_cache
660            .get(uri)
661            .filter(|e| e.0.as_str() == result_id)
662            .map(|e| Arc::clone(&e.1))
663    }
664
665    /// Before running semantic analysis for `uri`, resolve every `use`-imported
666    /// class through the PSR-4 map and mirror any that are not yet registered.
667    /// This prevents spurious `UndefinedClass` diagnostics when the background
668    /// workspace scan has not yet reached a dependency file.
669    fn lazy_load_psr4_imports(&self, uri: &Url) {
670        let doc = match self.get_doc_salsa(uri) {
671            Some(d) => d,
672            None => return,
673        };
674        let fqns = crate::references::collect_referenced_class_fqns(&doc);
675        if fqns.is_empty() {
676            return;
677        }
678        let psr4 = self.psr4.load();
679        let paths: Vec<std::path::PathBuf> =
680            fqns.iter().filter_map(|fqcn| psr4.resolve(fqcn)).collect();
681        drop(psr4);
682
683        for path in paths {
684            let Ok(dep_url) = Url::from_file_path(&path) else {
685                continue;
686            };
687            if self.source_files.contains_key(&dep_url) {
688                continue;
689            }
690            if let Ok(text) = std::fs::read_to_string(&path) {
691                self.mirror_text(&dep_url, &text);
692            }
693        }
694    }
695
696    /// Raw semantic issues for a file, computed via mir's session-based
697    /// `FileAnalyzer`. The session lazy-loads dependencies via PSR-4 so the
698    /// LSP no longer needs to mirror vendor up-front. Callers apply their
699    /// own `DiagnosticsConfig` filter via
700    /// [`crate::semantic_diagnostics::issues_to_diagnostics`].
701    #[tracing::instrument(skip_all)]
702    pub fn get_semantic_issues_salsa(&self, uri: &Url) -> Option<Arc<[mir_issues::Issue]>> {
703        let analysis = self.cached_analysis(uri)?;
704        let file: Arc<str> = Arc::from(uri.as_str());
705        // Workspace-level class issues for this file (circular inheritance,
706        // override violations, abstract-method gaps). These are session-wide
707        // (a dependency edit changes them without changing this file's bytes),
708        // so they are recomputed live rather than cached alongside the
709        // per-file body analysis.
710        let class_issues = {
711            let _s = tracing::debug_span!("session.class_issues_for").entered();
712            self.analysis_session(self.workspace_php_version())
713                .class_issues(std::slice::from_ref(&file))
714        };
715        let combined: Vec<mir_issues::Issue> = analysis
716            .issues
717            .iter()
718            .cloned()
719            .chain(class_issues)
720            .filter(|i| !i.suppressed)
721            .collect();
722        Some(Arc::from(combined))
723    }
724
725    /// Run (or reuse) mir's per-file body analysis, retaining the full
726    /// [`mir_analyzer::FileAnalysis`] — issues **and** resolved symbols — across
727    /// requests. Diagnostics read `.issues`; position features call
728    /// `.symbol_at(offset)` for the resolved type at a cursor.
729    ///
730    /// Cache hit when the entry's captured source `Arc` is pointer-equal to the
731    /// file's current `doc.source_arc()`. A miss recomputes and overwrites, so
732    /// the entry self-evicts on any content edit.
733    #[tracing::instrument(skip_all)]
734    pub fn cached_analysis(&self, uri: &Url) -> Option<Arc<mir_analyzer::FileAnalysis>> {
735        // Need the parsed doc both for the analyzer and as the cache key.
736        let doc = self.get_doc_salsa(uri)?;
737        let source = doc.source_arc();
738
739        if let Some(entry) = self.analysis_cache.get(uri)
740            && Arc::ptr_eq(&entry.0, &source)
741        {
742            return Some(Arc::clone(&entry.1));
743        }
744
745        let php_version = self.with_host(|h| self.workspace.php_version(h.db()));
746        let session = self.analysis_session(php_version);
747        let file: Arc<str> = Arc::from(uri.as_str());
748        {
749            let _s = tracing::debug_span!("session.ingest_file").entered();
750            session.ingest_file(file.clone(), source.clone());
751        }
752        // Pre-load every imported class via PSR-4 so Pass-2 doesn't emit
753        // spurious `UndefinedClass` for classes that ARE on disk but haven't
754        // been ingested yet. The session's resolver was supplied at
755        // construction time.
756        {
757            let _s = tracing::debug_span!("session.lazy_load_imports").entered();
758            // Pre-load every class-typed reference resolved via the file's
759            // namespace + `use` imports. This covers `use` imports, FQN refs
760            // (`new \App\Foo`), and bare same-namespace refs (`new Foo` from
761            // inside `namespace App;`) in a single sweep — mir won't auto-
762            // resolve via the ClassResolver, so anything not lazy-loaded here
763            // produces a spurious `UndefinedClass`.
764            let fqns = crate::references::collect_referenced_class_fqns(&doc);
765            for fqcn in &fqns {
766                let _ = session.load_class(fqcn);
767            }
768        }
769        let source_map = php_rs_parser::source_map::SourceMap::new(doc.source());
770        let owned_program = php_ast::owned::to_owned_program(doc.program());
771        let analysis = {
772            let _s = tracing::debug_span!("FileAnalyzer::analyze").entered();
773            let analyzer = mir_analyzer::FileAnalyzer::new(&session);
774            Arc::new(analyzer.analyze(file.clone(), doc.source(), &owned_program, &source_map))
775        };
776        self.analysis_cache
777            .insert(uri.clone(), (source, Arc::clone(&analysis)));
778        Some(analysis)
779    }
780
781    /// Returns `(uri, doc)` for files currently open in the editor.
782    ///
783    /// Resolve `open_urls` (from `Backend::open_urls()`) to parsed docs.
784    /// Files not mirrored in the salsa layer are filtered out silently.
785    pub fn docs_for(&self, open_urls: &[Url]) -> Vec<(Url, Arc<ParsedDoc>)> {
786        open_urls
787            .iter()
788            .filter_map(|u| self.get_doc_salsa(u).map(|d| (u.clone(), d)))
789            .collect()
790    }
791
792    /// `(primary, doc)` first, then every other open file's parsed doc.
793    /// The `open_urls` slice should include `uri` — this helper filters it out.
794    pub fn doc_with_others(
795        &self,
796        uri: &Url,
797        doc: Arc<ParsedDoc>,
798        open_urls: &[Url],
799    ) -> Vec<(Url, Arc<ParsedDoc>)> {
800        let mut result = vec![(uri.clone(), doc)];
801        result.extend(self.other_docs(uri, open_urls));
802        result
803    }
804
805    /// Parsed docs for every entry in `open_urls` except `uri`.
806    pub fn other_docs(&self, uri: &Url, open_urls: &[Url]) -> Vec<(Url, Arc<ParsedDoc>)> {
807        open_urls
808            .iter()
809            .filter(|u| *u != uri)
810            .filter_map(|u| self.get_doc_salsa(u).map(|d| (u.clone(), d)))
811            .collect()
812    }
813
814    /// Compact symbol index for every mirrored file.
815    pub fn all_indexes(&self) -> Vec<(Url, Arc<FileIndex>)> {
816        self.get_workspace_index_salsa().files.clone()
817    }
818
819    /// Same as `all_indexes` but excludes `uri`.
820    pub fn other_indexes(&self, uri: &Url) -> Vec<(Url, Arc<FileIndex>)> {
821        self.get_workspace_index_salsa()
822            .files
823            .iter()
824            .filter(|(u, _)| u != uri)
825            .cloned()
826            .collect()
827    }
828
829    /// Parsed documents for every mirrored file (open or background-indexed).
830    /// Suitable for full-scan operations: find-references, rename,
831    /// call_hierarchy, code_lens.
832    pub fn all_docs_for_scan(&self) -> Vec<(Url, Arc<ParsedDoc>)> {
833        let urls: Vec<Url> = self.source_files.iter().map(|e| e.key().clone()).collect();
834        urls.into_iter()
835            .filter_map(|u| self.get_doc_salsa(&u).map(|d| (u, d)))
836            .collect()
837    }
838}
839
840/// Run `file_refs` for every workspace file in parallel.
841///
842/// `db` clones are cheap (they share the same `Arc<Zalsa>` memo store), so
843/// results computed on any clone are immediately visible to all others at the
844/// same revision.  After this returns, the sequential loop inside `symbol_refs`
845/// only does cheap memo lookups instead of running `StatementsAnalyzer` on
846/// every file one-by-one.
847///
848/// Per-task `salsa::Cancelled` is caught and swallowed.  If the revision was
849/// bumped, the main thread's next salsa call inside `symbol_refs` will raise
850/// `Cancelled` too and `snapshot_query` retries the whole operation from
851/// scratch.  If the revision was not bumped, any file whose task was cancelled
852/// before completion simply has no memo entry and `symbol_refs`'s sequential
853/// loop recomputes it.
854// `warm_file_refs_parallel` removed: the analyzer-side reference index is
855// now owned by `AnalysisSession` and warmed by `ingest_file`. This salsa-side
856// helper has no counterpart in the new architecture.
857
858#[cfg(test)]
859mod tests {
860    use super::*;
861
862    fn uri(path: &str) -> Url {
863        Url::parse(&format!("file://{path}")).unwrap()
864    }
865
866    /// Phase E4: open-file state lives on `Backend`, not `DocumentStore`.
867    /// Tests that need to simulate "file is open" just mirror the text into
868    /// the salsa input — the open/closed distinction is enforced by the
869    /// caller (Backend) in production.
870    fn open(store: &DocumentStore, u: Url, text: String) {
871        store.mirror_text(&u, &text);
872    }
873
874    // Removed `salsa_codebase_aggregates_all_files`: the salsa-side codebase
875    // aggregation was deleted with the mir 0.22 migration. Equivalent
876    // behaviour is now covered by mir-analyzer's own session tests.
877
878    #[test]
879    fn index_registers_file_in_salsa() {
880        let store = DocumentStore::new();
881        store.index(uri("/lib.php"), "<?php\nfunction lib_fn() {}");
882        let idx = store.get_index_salsa(&uri("/lib.php")).unwrap();
883        assert_eq!(idx.functions.len(), 1);
884        assert_eq!(idx.functions[0].name, "lib_fn".into());
885    }
886
887    #[test]
888    fn remove_drops_salsa_input() {
889        let store = DocumentStore::new();
890        store.index(uri("/lib.php"), "<?php");
891        store.remove(&uri("/lib.php"));
892        assert!(store.get_index_salsa(&uri("/lib.php")).is_none());
893    }
894
895    #[test]
896    fn all_indexes_includes_every_mirrored_file() {
897        let store = DocumentStore::new();
898        open(&store, uri("/a.php"), "<?php\nfunction a() {}".to_string());
899        store.index(uri("/b.php"), "<?php\nfunction b() {}");
900        assert_eq!(store.all_indexes().len(), 2);
901    }
902
903    #[test]
904    fn other_indexes_excludes_current_uri() {
905        let store = DocumentStore::new();
906        open(&store, uri("/a.php"), "<?php\nfunction a() {}".to_string());
907        open(&store, uri("/b.php"), "<?php\nfunction b() {}".to_string());
908        assert_eq!(store.other_indexes(&uri("/a.php")).len(), 1);
909    }
910
911    #[test]
912    fn other_docs_excludes_current_uri() {
913        let store = DocumentStore::new();
914        let ua = uri("/a.php");
915        let ub = uri("/b.php");
916        open(&store, ua.clone(), "<?php\nfunction a() {}".to_string());
917        open(&store, ub.clone(), "<?php\nfunction b() {}".to_string());
918        let open_urls = vec![ua.clone(), ub];
919        assert_eq!(store.other_docs(&ua, &open_urls).len(), 1);
920    }
921
922    #[test]
923    fn evict_token_cache_removes_entry() {
924        let store = DocumentStore::new();
925        let u = uri("/a.php");
926        open(&store, u.clone(), "<?php".to_string());
927        store.store_token_cache(&u, "id1".to_string(), Arc::new(vec![]));
928        assert!(store.get_token_cache(&u, "id1").is_some());
929        store.evict_token_cache(&u);
930        assert!(store.get_token_cache(&u, "id1").is_none());
931    }
932
933    #[test]
934    fn index_populates_file_index_with_symbols() {
935        let store = DocumentStore::new();
936        store.index(uri("/a.php"), "<?php\nfunction hello() {}");
937        let idx = store.get_index_salsa(&uri("/a.php")).unwrap();
938        assert_eq!(idx.functions.len(), 1);
939        assert_eq!(idx.functions[0].name, "hello".into());
940    }
941
942    #[test]
943    fn open_populates_file_index_with_symbols() {
944        let store = DocumentStore::new();
945        open(&store, uri("/a.php"), "<?php\nclass Foo {}".to_string());
946        let idx = store.get_index_salsa(&uri("/a.php")).unwrap();
947        assert_eq!(idx.classes.len(), 1);
948        assert_eq!(idx.classes[0].name, "Foo".into());
949    }
950
951    // ── Mirror invariants ────────────────────────────────────────────────
952    //
953    // Every mutation path that changes file text must keep the salsa layer
954    // consistent. These tests walk a set-edit-reopen cycle and assert that
955    // the salsa-derived `FileIndex` reflects the latest text at each step.
956
957    fn names_of(idx: &FileIndex) -> Vec<String> {
958        let mut out: Vec<String> = idx.classes.iter().map(|c| c.name.to_string()).collect();
959        out.extend(idx.functions.iter().map(|f| f.name.to_string()));
960        out.sort();
961        out
962    }
963
964    fn salsa_index_names(store: &DocumentStore, url: &Url) -> Vec<String> {
965        let sf = store.source_file(url).expect("mirror recorded SourceFile");
966        store.with_host(|host| {
967            let arc = crate::db::index::file_index(host.db(), sf);
968            names_of(arc.get())
969        })
970    }
971
972    #[test]
973    fn mirror_tracks_repeated_edits() {
974        let store = DocumentStore::new();
975        let u = uri("/mirror.php");
976
977        open(&store, u.clone(), "<?php\nclass A {}".to_string());
978        assert_eq!(salsa_index_names(&store, &u), vec!["A".to_string()]);
979
980        open(
981            &store,
982            u.clone(),
983            "<?php\nclass A {}\nclass B {}".to_string(),
984        );
985        assert_eq!(
986            salsa_index_names(&store, &u),
987            vec!["A".to_string(), "B".to_string()]
988        );
989
990        open(&store, u.clone(), "<?php\nfunction greet() {}".to_string());
991        assert_eq!(salsa_index_names(&store, &u), vec!["greet".to_string()]);
992    }
993
994    #[test]
995    fn mirror_tracks_index_and_index_from_doc() {
996        let store = DocumentStore::new();
997
998        // Background `index(url, text)` path.
999        let u1 = uri("/bg1.php");
1000        store.index(u1.clone(), "<?php\nclass Bg1 {}");
1001        assert_eq!(salsa_index_names(&store, &u1), vec!["Bg1".to_string()]);
1002
1003        // `index_from_doc(url, &doc)` path (workspace-scan Phase 2).
1004        let u2 = uri("/bg2.php");
1005        let doc =
1006            crate::diagnostics::parse_document_no_diags("<?php\nclass Bg2 {}\nfunction f() {}");
1007        store.index_from_doc(u2.clone(), &doc);
1008        assert_eq!(
1009            salsa_index_names(&store, &u2),
1010            vec!["Bg2".to_string(), "f".to_string()]
1011        );
1012    }
1013
1014    /// G3: confirms the `parsed_cache` actually hits — two consecutive
1015    /// `get_doc_salsa` calls on unchanged text return the same `Arc`
1016    /// (pointer equality), and an edit forces a miss that produces a
1017    /// different `Arc`.
1018    /// parsed_cache must stay bounded — inserting more than
1019    /// `PARSED_CACHE_CAP` unique URLs must not cause unbounded growth.
1020    /// Eviction is probabilistic, so we only assert the bound, not which
1021    /// Seeding a cached index for a URL that was never mirrored is a no-op
1022    /// (returns `false`) — avoids silently allocating SourceFiles outside
1023    /// `mirror_text`'s control.
1024    #[test]
1025    fn seed_cached_index_noops_for_unknown_uri() {
1026        let store = DocumentStore::new();
1027        let u = uri("/never_mirrored.php");
1028        let index = Arc::new(crate::file_index::FileIndex::default());
1029        assert!(!store.seed_cached_index(&u, index));
1030    }
1031
1032    /// entries survive.
1033    #[test]
1034    fn parsed_cache_stays_bounded_under_many_inserts() {
1035        let store = DocumentStore::new();
1036        let overflow = PARSED_CACHE_CAP + 100;
1037        for i in 0..overflow {
1038            let u = uri(&format!("/cap/file{i}.php"));
1039            store.index(u.clone(), "<?php\nclass A {}");
1040            // Force a parsed_cache insert via get_doc_salsa.
1041            let _ = store.get_doc_salsa(&u);
1042        }
1043        assert!(
1044            store.parsed_cache.len() <= PARSED_CACHE_CAP,
1045            "parsed_cache grew to {} entries (cap {})",
1046            store.parsed_cache.len(),
1047            PARSED_CACHE_CAP
1048        );
1049    }
1050
1051    #[test]
1052    fn get_doc_salsa_cache_hits_across_calls() {
1053        let store = DocumentStore::new();
1054        let u = uri("/g3_cache.php");
1055        open(&store, u.clone(), "<?php\nclass G3 {}".to_string());
1056
1057        let a = store.get_doc_salsa(&u).unwrap();
1058        let b = store.get_doc_salsa(&u).unwrap();
1059        assert!(
1060            Arc::ptr_eq(&a, &b),
1061            "parsed_cache hit should yield the same Arc across calls"
1062        );
1063
1064        open(&store, u.clone(), "<?php\nclass G3b {}".to_string());
1065        let c = store.get_doc_salsa(&u).unwrap();
1066        assert!(
1067            !Arc::ptr_eq(&a, &c),
1068            "edit should invalidate the parsed_cache entry"
1069        );
1070    }
1071
1072    #[test]
1073    fn get_doc_salsa_returns_some_for_mirrored_files() {
1074        // Phase E4: `get_doc_salsa` no longer gates on open-state. The
1075        // open/closed distinction now lives on `Backend::get_doc`.
1076        let store = DocumentStore::new();
1077        let u = uri("/e4_doc.php");
1078        store.index(u.clone(), "<?php\nclass P {}");
1079        assert!(store.get_doc_salsa(&u).is_some());
1080    }
1081
1082    #[test]
1083    fn get_salsa_accessors_return_none_for_unknown_uri() {
1084        let store = DocumentStore::new();
1085        let u = uri("/never-seen.php");
1086        assert!(store.get_doc_salsa(&u).is_none());
1087        assert!(store.get_index_salsa(&u).is_none());
1088    }
1089
1090    /// Phase E1: concurrent readers and writers must not deadlock, panic, or
1091    /// return stale data. Writers briefly bump inputs while readers are
1092    /// running on cloned snapshots; any `salsa::Cancelled` raised on the
1093    /// reader side must be caught and retried by `snapshot_query`.
1094    ///
1095    /// Post mir 0.22: `get_symbol_refs_salsa` is a no-op stub (returns empty
1096    /// vec), so reader threads cannot exhaust the retry cap or panic on that
1097    /// path. The remaining salsa surface (`get_doc_salsa`, `get_index_salsa`)
1098    /// is protected by `snapshot_query`'s last-resort host-lock fallback.
1099    #[test]
1100    fn concurrent_reads_and_writes_do_not_panic() {
1101        use std::sync::Arc;
1102        use std::thread;
1103        use std::time::{Duration, Instant};
1104
1105        let store = Arc::new(DocumentStore::new());
1106        let urls: Vec<Url> = (0..8).map(|i| uri(&format!("/f{i}.php"))).collect();
1107        for (i, u) in urls.iter().enumerate() {
1108            open(&store, u.clone(), format!("<?php\nclass C{i} {{}}"));
1109        }
1110
1111        let deadline = Instant::now() + Duration::from_millis(400);
1112        let mut handles = Vec::new();
1113
1114        // Writer thread: keep bumping every file's text.
1115        {
1116            let store = Arc::clone(&store);
1117            let urls = urls.clone();
1118            handles.push(thread::spawn(move || {
1119                let mut rev = 0u32;
1120                while Instant::now() < deadline {
1121                    for u in &urls {
1122                        let text = format!("<?php\nclass C{{}}\n// rev {rev}");
1123                        store.mirror_text(u, &text);
1124                    }
1125                    rev += 1;
1126                }
1127            }));
1128        }
1129
1130        // Reader threads: hammer the salsa accessors.
1131        for _ in 0..4 {
1132            let store = Arc::clone(&store);
1133            let urls = urls.clone();
1134            handles.push(thread::spawn(move || {
1135                while Instant::now() < deadline {
1136                    for u in &urls {
1137                        let _ = store.get_doc_salsa(u);
1138                        let _ = store.get_index_salsa(u);
1139                    }
1140                    // Post mir 0.22: codebase + refs live in the session,
1141                    // not salsa. Concurrent-read smoke is now limited to the
1142                    // remaining salsa surface (parsed_doc, file_index).
1143                    let _ = store.get_symbol_refs_salsa("C0");
1144                }
1145            }));
1146        }
1147
1148        for h in handles {
1149            h.join().expect("no panic under concurrent read/write");
1150        }
1151    }
1152
1153    /// PSR-4 lazy-loading: `get_semantic_issues_salsa` must not emit
1154    /// `UndefinedClass` for a class that is PSR-4-resolvable on disk, even
1155    /// when the dependency file is not yet in `source_files`.
1156    #[test]
1157    fn psr4_lazy_load_suppresses_undefined_class() {
1158        let tmp = tempfile::tempdir().unwrap();
1159
1160        // Write Entity.php to disk (not mirrored into the store).
1161        std::fs::create_dir_all(tmp.path().join("src/Model")).unwrap();
1162        std::fs::write(
1163            tmp.path().join("src/Model/Entity.php"),
1164            "<?php\nnamespace App\\Model;\nclass Entity {}\n",
1165        )
1166        .unwrap();
1167
1168        // Write composer.json so Psr4Map::load can build the map.
1169        std::fs::write(
1170            tmp.path().join("composer.json"),
1171            r#"{"autoload":{"psr-4":{"App\\":"src/"}}}"#,
1172        )
1173        .unwrap();
1174
1175        let store = DocumentStore::new();
1176
1177        // Inject a PSR-4 map pointing at the tmp dir.
1178        store
1179            .psr4
1180            .store(Arc::new(crate::autoload::Psr4Map::load(tmp.path())));
1181
1182        // Mirror the consuming file (Entity not yet in source_files).
1183        // Uses Entity as a parameter type hint — the analyzer resolves these
1184        // through use statements, so this exercises the full PSR-4 lazy-load path.
1185        let handler_url = Url::from_file_path(tmp.path().join("src/Service/Handler.php")).unwrap();
1186        store.mirror_text(
1187            &handler_url,
1188            "<?php\nnamespace App\\Service;\nuse App\\Model\\Entity;\nfunction handle(Entity $e): Entity { return $e; }\n",
1189        );
1190
1191        let issues = store.get_semantic_issues_salsa(&handler_url).unwrap();
1192        let undef: Vec<_> = issues
1193            .iter()
1194            .filter(|i| matches!(i.kind, mir_issues::IssueKind::UndefinedClass { .. }))
1195            .collect();
1196        assert!(
1197            undef.is_empty(),
1198            "PSR-4 lazy-loading must prevent UndefinedClass for App\\Model\\Entity; got: {undef:?}"
1199        );
1200    }
1201
1202    /// Issue #191 regression: workspace-wide scans (find-references, rename,
1203    /// call-hierarchy) must not re-parse closed/indexed files on repeated
1204    /// invocations. Once a file's `ParsedDoc` has been produced, subsequent
1205    /// `all_docs_for_scan()` calls must hit the cache and return the same
1206    /// `Arc<ParsedDoc>` (pointer equality), proving no re-parse occurred.
1207    ///
1208    /// The cache layers protecting this are:
1209    ///   1. `parsed_cache` (cap [`PARSED_CACHE_CAP`]) — read-through, validated
1210    ///      via `Arc::ptr_eq` on the text Arc.
1211    ///   2. salsa `parsed_doc` memo (`lru = 2048`) — second line of defense
1212    ///      when `parsed_cache` evicts.
1213    ///
1214    /// Together they keep every workspace-scan op O(N) memo lookups, never
1215    /// O(N) parses, for any workspace whose file count fits the cap.
1216    #[test]
1217    fn all_docs_for_scan_does_not_reparse_indexed_files() {
1218        let store = DocumentStore::new();
1219        const N: usize = 50;
1220        for i in 0..N {
1221            let u = uri(&format!("/scan/file{i}.php"));
1222            store.index(u, &format!("<?php\nclass C{i} {{}}\nfunction f{i}() {{}}"));
1223        }
1224
1225        let first: Vec<_> = store.all_docs_for_scan();
1226        let second: Vec<_> = store.all_docs_for_scan();
1227        assert_eq!(first.len(), N);
1228        assert_eq!(second.len(), N);
1229
1230        let by_url_first: std::collections::HashMap<Url, Arc<ParsedDoc>> =
1231            first.into_iter().collect();
1232        for (u, doc2) in second {
1233            let doc1 = by_url_first
1234                .get(&u)
1235                .expect("second scan returned a URL the first didn't");
1236            assert!(
1237                Arc::ptr_eq(doc1, &doc2),
1238                "{u} re-parsed across all_docs_for_scan calls — \
1239                 cache (parsed_cache + salsa parsed_doc memo) failed to hit"
1240            );
1241        }
1242
1243        // Editing one file's text must invalidate just that file's entry,
1244        // not the rest. This locks in self-eviction via Arc::ptr_eq on text.
1245        let edited_url = uri("/scan/file0.php");
1246        let pre_edit = store.get_doc_salsa(&edited_url).unwrap();
1247        store.index(edited_url.clone(), "<?php\nclass C0Edited {}");
1248        let post_edit = store.get_doc_salsa(&edited_url).unwrap();
1249        assert!(
1250            !Arc::ptr_eq(&pre_edit, &post_edit),
1251            "edited file must produce a fresh ParsedDoc"
1252        );
1253        for i in 1..N {
1254            let u = uri(&format!("/scan/file{i}.php"));
1255            let original = by_url_first.get(&u).unwrap();
1256            let after = store.get_doc_salsa(&u).unwrap();
1257            assert!(
1258                Arc::ptr_eq(original, &after),
1259                "{u} should not have re-parsed because of an unrelated edit"
1260            );
1261        }
1262    }
1263}