Skip to main content

php_lsp/
document_store.rs

1use std::sync::atomic::{AtomicU32, Ordering};
2use std::sync::{Arc, Mutex};
3
4use arc_swap::ArcSwap;
5
6use dashmap::DashMap;
7use salsa::Setter;
8use tower_lsp::lsp_types::{SemanticToken, Url};
9
10use crate::ast::ParsedDoc;
11use crate::autoload::Psr4Map;
12use crate::db::analysis::AnalysisHost;
13use crate::db::input::{FileId, SourceFile, Workspace};
14use crate::file_index::FileIndex;
15
16/// Upper bound on `parsed_cache` entries. Matched to the `lru = 2048` on
17/// `parsed_doc` in `src/db/parse.rs` so the secondary Arc retention can't
18/// pin more ASTs alive than salsa's memo already bounds. Exceeding this
19/// triggers probabilistic eviction (see [`DocumentStore::insert_parsed_cache`]).
20const PARSED_CACHE_CAP: usize = 2048;
21
22pub struct DocumentStore {
23    /// Cached semantic tokens per document: (result_id, tokens).
24    /// Used to compute incremental deltas for `textDocument/semanticTokens/full/delta`.
25    /// Tokens are stored in an `Arc` so the delta-path lookup can hand the
26    /// previous snapshot back without cloning the inner Vec.
27    token_cache: DashMap<Url, (String, Arc<Vec<SemanticToken>>)>,
28
29    // ── Salsa-input storage ────────────────────────────────────────────────
30    // Phase E4: `DocumentStore` is now a pure salsa-input wrapper. Open-file
31    // state (live text, version token, parse-diagnostics cache) lives on
32    // `Backend` in its `open_files` map; the set of files tracked by salsa
33    // is exactly `source_files.keys()`.
34    /// Mutex — held briefly to clone the database for reads and to mutate
35    /// it for writes. Per-thread salsa state (`zalsa_local`) is `!Sync`,
36    /// which rules out `RwLock<AnalysisHost>`. Readers instead snapshot the
37    /// db (cheap — storage is `Arc<Zalsa>`) and run queries on the clone
38    /// with the lock released, giving real read/read parallelism. Writers
39    /// during an in-flight read bump the shared revision; the reader raises
40    /// `salsa::Cancelled` on its next query call and `snapshot_query` below
41    /// retries with a fresh snapshot.
42    host: Mutex<AnalysisHost>,
43    /// `Url -> SourceFile` lookup. The `SourceFile` is a salsa-id handle; the
44    /// underlying input lives in `host.db` for the lifetime of the database.
45    source_files: DashMap<Url, SourceFile>,
46    /// G2: lock-free mirror of each `SourceFile`'s last-set text. Lets
47    /// `mirror_text` dedup repeated no-op updates (common during workspace
48    /// scan and `did_open` for already-indexed files) without taking
49    /// `host.lock()`. Updated inside the mutex whenever the salsa input is
50    /// set, so it is always consistent with the salsa revision for the
51    /// purposes of byte-equality comparison.
52    text_cache: DashMap<Url, Arc<str>>,
53    /// G3: cross-revision read-through cache for `parsed_doc`. Keyed on
54    /// `Url`, stored value is `(text_arc, Arc<ParsedDoc>)` — the text Arc
55    /// captured at parse time. On read, compare against `text_cache[uri]`
56    /// via `Arc::ptr_eq`; a match guarantees the cached ParsedDoc matches
57    /// the current salsa revision's text input, so the query can return
58    /// without snapshotting the db or invoking salsa at all. A miss
59    /// (different pointer, stale or absent entry) falls through to
60    /// `snapshot_query`. Self-evicts on text change — no writer-side
61    /// invalidation is required, which avoids the TOCTOU window where a
62    /// concurrent reader could re-insert a stale entry after a writer's
63    /// eviction.
64    ///
65    /// Size-bounded at [`PARSED_CACHE_CAP`] — see `insert_parsed_cache`.
66    /// Without this bound, every workspace file read-through would pin
67    /// its bumpalo arena alive regardless of salsa's `lru = 2048` on the
68    /// `parsed_doc` memo.
69    parsed_cache: DashMap<Url, (Arc<str>, Arc<ParsedDoc>)>,
70    /// Monotonic allocator for `FileId`s (one per ever-seen URL).
71    next_file_id: AtomicU32,
72    /// Workspace salsa input. Tracks the full set of `SourceFile`s that
73    /// participate in whole-program queries (`codebase`, `file_refs`).
74    /// Re-synced from `source_files` on demand by `sync_workspace_files`.
75    workspace: Workspace,
76    /// Shared PSR-4 namespace-to-path map. Shared with `Backend` via `Arc`
77    /// so updates from `initialized` (when composer.json is loaded) are
78    /// visible here without any additional wiring. `ArcSwap` makes reads
79    /// lock-free — a poisoned guard can no longer crash a request handler.
80    psr4: Arc<ArcSwap<Psr4Map>>,
81    /// mir-analyzer's `AnalysisSession` — owns the workspace MirDb, runs
82    /// Pass-2 analysis, and lazy-loads dependencies via PSR-4. Built lazily
83    /// on first use; rebuilt when PHP version changes.
84    analysis_session: Mutex<Option<(mir_analyzer::PhpVersion, Arc<mir_analyzer::AnalysisSession>)>>,
85}
86
87impl Default for DocumentStore {
88    fn default() -> Self {
89        Self::new()
90    }
91}
92
93impl DocumentStore {
94    pub fn new() -> Self {
95        let host = AnalysisHost::new();
96        let workspace = Workspace::new(
97            host.db(),
98            Arc::<[SourceFile]>::from(Vec::new()),
99            mir_analyzer::PhpVersion::LATEST,
100        );
101        DocumentStore {
102            token_cache: DashMap::new(),
103            host: Mutex::new(host),
104            source_files: DashMap::new(),
105            text_cache: DashMap::new(),
106            parsed_cache: DashMap::new(),
107            next_file_id: AtomicU32::new(0),
108            workspace,
109            psr4: Arc::new(ArcSwap::from_pointee(Psr4Map::empty())),
110            analysis_session: Mutex::new(None),
111        }
112    }
113
114    /// Get or build the `AnalysisSession` for the given PHP version. Rebuilds
115    /// when the version changes (e.g. user flipped config). The session owns
116    /// its own salsa db and AnalysisCache; lazy-loads vendor files via the
117    /// shared PSR-4 map.
118    pub fn analysis_session(
119        &self,
120        php_version: mir_analyzer::PhpVersion,
121    ) -> Arc<mir_analyzer::AnalysisSession> {
122        let mut guard = self.analysis_session.lock().unwrap();
123        if let Some((cached_ver, session)) = guard.as_ref()
124            && *cached_ver == php_version
125        {
126            return Arc::clone(session);
127        }
128        // Build a fresh session. Hand it the shared PSR-4 map so it can
129        // lazy-resolve `UndefinedClass` candidates without us having to mirror
130        // every vendor file upfront.
131        let resolver: Arc<dyn mir_analyzer::ClassResolver> = self.psr4.load_full();
132        let session =
133            Arc::new(mir_analyzer::AnalysisSession::new(php_version).with_class_resolver(resolver));
134        session.ensure_all_stubs();
135        *guard = Some((php_version, Arc::clone(&session)));
136        session
137    }
138
139    /// Current PHP version tracked by the workspace input.
140    pub fn workspace_php_version(&self) -> mir_analyzer::PhpVersion {
141        self.with_host(|h| self.workspace.php_version(h.db()))
142    }
143
144    /// Return the `Arc<ArcSwap<Psr4Map>>` so callers can share it.
145    /// `Backend` clones this arc at construction time so writes
146    /// (e.g. loading composer.json on `initialized`) are immediately visible
147    /// to `lazy_load_psr4_imports` without extra plumbing.
148    pub fn psr4_arc(&self) -> Arc<ArcSwap<Psr4Map>> {
149        Arc::clone(&self.psr4)
150    }
151
152    /// Mirror a file's current text into the salsa layer. Creates the
153    /// `SourceFile` input on first sight, otherwise updates `text` on the
154    /// existing input (bumping the salsa revision so downstream queries
155    /// invalidate). Returns the `SourceFile` handle for this `uri`.
156    ///
157    /// B4a: called from every text-changing mutation site. Reads still come
158    /// from the legacy `map` — this mirror is not yet observed by production
159    /// code paths.
160    pub fn mirror_text(&self, uri: &Url, text: &str) -> SourceFile {
161        // G2 fast path: compare against the lock-free text cache. When the
162        // new text byte-matches what we already mirrored, skip the host
163        // mutex entirely. Common during workspace scan + `did_open` for
164        // unchanged files, where most threads would otherwise serialise on
165        // `host.lock()` just to confirm a no-op. Cache is only populated
166        // after the matching `source_files` entry, so a cache hit implies
167        // the handle exists.
168        if let Some(cached) = self.text_cache.get(uri)
169            && **cached == *text
170            && let Some(sf) = self.source_files.get(uri)
171        {
172            return *sf;
173        }
174        self.mirror_text_arc(uri, Arc::from(text))
175    }
176
177    /// Like [`mirror_text`] but takes an already-allocated `Arc<str>`.
178    ///
179    /// Callers that already hold an `Arc<str>` (e.g. `index_from_doc` reusing
180    /// `ParsedDoc::source_arc()`) use this to avoid a second allocation and to
181    /// ensure `text_cache` and `parsed_cache` hold the same Arc pointer —
182    /// enabling `Arc::ptr_eq` validation in `get_parsed_cached`.
183    pub fn mirror_text_arc(&self, uri: &Url, text_arc: Arc<str>) -> SourceFile {
184        if let Some(existing) = self.source_files.get(uri) {
185            let sf = *existing;
186            drop(existing);
187            // Slow path: another writer may have raced us; re-check inside
188            // the mutex. Salsa's `set_text` unconditionally bumps the
189            // revision, so every spurious setter invalidates every
190            // downstream query.
191            let mut host = self.host.lock().unwrap();
192            let current: Arc<str> = sf.text(host.db());
193            if *current == *text_arc {
194                drop(host);
195                self.text_cache.insert(uri.clone(), current);
196                return sf;
197            }
198            sf.set_text(host.db_mut()).to(text_arc.clone());
199            // Phase K2: any text change invalidates a previously-seeded
200            // cached index. Only bump the revision when a cached index is
201            // actually present — an unconditional set would cause two
202            // revision bumps per edit (one for text, one for cached_index),
203            // which needlessly cancels in-flight `file_index` queries on
204            // every keystroke.
205            if sf.cached_index(host.db()).is_some() {
206                sf.set_cached_index(host.db_mut()).to(None);
207            }
208            drop(host);
209            self.text_cache.insert(uri.clone(), text_arc);
210            sf
211        } else {
212            let id = FileId(self.next_file_id.fetch_add(1, Ordering::Relaxed));
213            let uri_arc: Arc<str> = Arc::from(uri.as_str());
214            let sf = {
215                let host = self.host.lock().unwrap();
216                SourceFile::new(host.db(), id, uri_arc, text_arc.clone(), None)
217            };
218            self.source_files.insert(uri.clone(), sf);
219            self.text_cache.insert(uri.clone(), text_arc);
220            sf
221        }
222    }
223
224    /// Return the salsa `SourceFile` handle for a URL, if one exists.
225    pub fn source_file(&self, uri: &Url) -> Option<SourceFile> {
226        self.source_files.get(uri).map(|e| *e)
227    }
228
229    /// Phase K2: pre-seed a `FileIndex` loaded from the on-disk cache onto
230    /// the `SourceFile` input for `uri`. The next `file_index` call for that
231    /// file returns the cached index directly, skipping parse + extract.
232    ///
233    /// Must be called **before** any `file_index(db, sf)` call for this file —
234    /// otherwise salsa has already memoized the fresh-parse result and setting
235    /// `cached_index` now would only bump the revision without using the cache.
236    /// In practice the workspace-scan path seeds immediately after `mirror_text`
237    /// and before any query runs.
238    ///
239    /// Returns `false` when `uri` was not mirrored (caller should mirror
240    /// first); returns `true` on success.
241    pub fn seed_cached_index(&self, uri: &Url, index: Arc<FileIndex>) -> bool {
242        let Some(sf) = self.source_files.get(uri).map(|e| *e) else {
243            return false;
244        };
245        let mut host = self.host.lock().unwrap();
246        sf.set_cached_index(host.db_mut()).to(Some(index));
247        true
248    }
249
250    /// Run `f` with a borrow of the `AnalysisHost`. Used by tests and by the
251    /// upcoming `*_salsa` accessors to query the salsa layer.
252    pub fn with_host<R>(&self, f: impl FnOnce(&AnalysisHost) -> R) -> R {
253        let host = self.host.lock().unwrap();
254        f(&host)
255    }
256
257    /// Phase E1: take a brief lock, clone the salsa database, release the
258    /// lock. Queries then run on the cloned `RootDatabase` without blocking
259    /// writers or other readers. Salsa's `Storage<Self>` is reference-counted
260    /// (`Arc<Zalsa>`), so the clone is cheap — it shares memoized data and
261    /// the cancellation flag with the host's db.
262    fn snapshot_db(&self) -> crate::db::analysis::RootDatabase {
263        let host = self.host.lock().unwrap();
264        host.db().clone()
265    }
266
267    /// Run a query on a fresh snapshot, catching `salsa::Cancelled` (raised
268    /// when a concurrent writer advances the revision) and retrying with a
269    /// new snapshot. Writers hold the mutex only long enough to bump input
270    /// values, so a handful of retries is more than enough in practice; we
271    /// cap at 8 to avoid pathological livelock under sustained write pressure.
272    fn snapshot_query<R>(&self, f: impl Fn(&crate::db::analysis::RootDatabase) -> R + Clone) -> R {
273        use std::panic::AssertUnwindSafe;
274        for _ in 0..8 {
275            let db = self.snapshot_db();
276            let f = f.clone();
277            match salsa::Cancelled::catch(AssertUnwindSafe(move || f(&db))) {
278                Ok(r) => return r,
279                Err(_) => continue,
280            }
281        }
282        // Last-resort attempt: take the mutex for the whole query so no
283        // writer can race us. Much slower, but guaranteed to make progress.
284        let host = self.host.lock().unwrap();
285        f(host.db())
286    }
287
288    /// Evict the semantic-tokens cache for `uri`. Called by Backend when a
289    /// file is closed; diff-based tokens computed against the old revision
290    /// are no longer meaningful.
291    pub fn evict_token_cache(&self, uri: &Url) {
292        self.token_cache.remove(uri);
293    }
294
295    /// Return the `FileIndex` for `uri` by running `file_index` on a salsa
296    /// snapshot.  Returns `None` when `uri` has not been mirrored.
297    ///
298    /// Test-only — production code uses the salsa query directly via
299    /// `snapshot_query`.
300    #[cfg(test)]
301    pub fn snapshot_query_file_index(&self, uri: &Url) -> Option<crate::file_index::FileIndex> {
302        let sf = self.source_files.get(uri).map(|e| *e)?;
303        let idx = self.snapshot_query(|db| crate::db::index::file_index(db, sf));
304        Some(idx.get().clone())
305    }
306
307    /// Register a file in the salsa layer without marking it open.
308    ///
309    /// Salsa's `parsed_doc` query parses lazily on first read; diagnostics
310    /// are populated by `did_open` when the editor actually opens the file.
311    pub fn index(&self, uri: Url, text: &str) {
312        self.mirror_text(&uri, text);
313    }
314
315    /// Index a file using an already-parsed `ParsedDoc`, avoiding a second parse.
316    ///
317    /// Prefer this over [`index`] when the caller already has a `ParsedDoc` (e.g.
318    /// after running `DefinitionCollector` during workspace scan). Reuses the
319    /// `Arc<str>` already owned by `doc` so that `text_cache` and `SourceFile::text`
320    /// share the same pointer — enabling the `Arc::ptr_eq` fast path in
321    /// `get_parsed_cached` on the first subsequent salsa query, without an extra
322    /// `Arc::from(source)` allocation.
323    pub fn index_from_doc(&self, uri: Url, doc: &ParsedDoc) {
324        self.mirror_text_arc(&uri, doc.source_arc());
325    }
326
327    pub fn remove(&self, uri: &Url) {
328        self.token_cache.remove(uri);
329        // Also drop the Url→SourceFile mapping so the file stops contributing
330        // to the workspace codebase query. Salsa inputs themselves remain
331        // alive (salsa doesn't expose input removal in 0.26), but they're
332        // orphaned — no query keys them anymore, and re-opening the file
333        // allocates a fresh SourceFile with a new FileId. The ~40 bytes per
334        // orphan is acceptable; revisit if workspace-churn profiling hurts.
335        self.source_files.remove(uri);
336        // Sync workspace files so the deleted file is removed from the salsa
337        // `Workspace::files` list and won't appear in workspace symbols etc.
338        self.sync_workspace_files();
339        self.text_cache.remove(uri);
340        self.parsed_cache.remove(uri);
341        // Also evict the file from the `AnalysisSession`'s internal state so
342        // workspace symbol queries don't keep returning the deleted file's
343        // declarations. Cheap when the session hasn't ingested this file.
344        let guard = self.analysis_session.lock().unwrap();
345        if let Some((_, session)) = guard.as_ref() {
346            session.invalidate_file(uri.as_str());
347        }
348    }
349
350    // ── B4b salsa-backed accessors ─────────────────────────────────────────
351    //
352    // These are additive and not yet called from production code. They go
353    // through the salsa layer — reads run the memoized `parsed_doc` /
354    // `file_index` / `method_returns` queries, parsing only on first access
355    // per revision. B4c will migrate feature modules to call these instead of
356    // the legacy `get_doc` / `get_index`.
357
358    /// Salsa-backed parsed document.
359    ///
360    /// Salsa-backed parsed document for any mirrored file (open or
361    /// background-indexed). Returns `None` only when the file is not known
362    /// to the store. Callers that want "only if open" should gate on
363    /// `Backend::open_files` at the call site (see `Backend::get_doc`).
364    pub fn get_doc_salsa(&self, uri: &Url) -> Option<Arc<ParsedDoc>> {
365        self.get_parsed_cached(uri)
366    }
367
368    /// Salsa-backed compact symbol index.
369    pub fn get_index_salsa(&self, uri: &Url) -> Option<Arc<FileIndex>> {
370        let sf = self.source_file(uri)?;
371        Some(self.snapshot_query(move |db| crate::db::index::file_index(db, sf).0.clone()))
372    }
373
374    /// G3: shared implementation for `get_doc_salsa`.
375    /// Tries the `parsed_cache` (lock-free) first; validates via
376    /// `Arc::ptr_eq` against the G2 `text_cache` so a concurrent writer
377    /// that has already committed a new text input cannot be masked by a
378    /// stale cache entry. On miss, captures the text Arc and ParsedDoc
379    /// together inside a single `snapshot_query`, then publishes both.
380    fn get_parsed_cached(&self, uri: &Url) -> Option<Arc<ParsedDoc>> {
381        if let Some(current_text) = self.text_cache.get(uri)
382            && let Some(entry) = self.parsed_cache.get(uri)
383            && Arc::ptr_eq(&*current_text, &entry.0)
384        {
385            return Some(entry.1.clone());
386        }
387
388        let sf = self.source_file(uri)?;
389        let (text, doc) = self.snapshot_query(move |db| {
390            let text = sf.text(db);
391            let doc = crate::db::parse::parsed_doc(db, sf).0.clone();
392            (text, doc)
393        });
394        self.insert_parsed_cache(uri.clone(), text, doc.clone());
395        Some(doc)
396    }
397
398    /// Publish a fresh `ParsedDoc` into `parsed_cache`, shedding roughly
399    /// half of the cache first if it has grown past [`PARSED_CACHE_CAP`].
400    ///
401    /// Eviction is probabilistic (DashMap iteration order is arbitrary),
402    /// not LRU. That's fine — salsa's own `parsed_doc` memo uses
403    /// `lru = 2048` on hotness-aware storage, so a cache-miss here is
404    /// cheap: the next read goes through `snapshot_query` and
405    /// `parsed_doc`, which still short-circuits on the salsa memo.
406    /// What we're bounding here is the *secondary* Arc retention that
407    /// would otherwise pin every workspace file's bumpalo arena alive
408    /// regardless of salsa's eviction decisions.
409    fn insert_parsed_cache(&self, uri: Url, text: Arc<str>, doc: Arc<ParsedDoc>) {
410        if self.parsed_cache.len() >= PARSED_CACHE_CAP {
411            let drop_target = self.parsed_cache.len() / 2;
412            let mut dropped = 0usize;
413            self.parsed_cache.retain(|_, _| {
414                if dropped < drop_target {
415                    dropped += 1;
416                    false
417                } else {
418                    true
419                }
420            });
421        }
422        self.parsed_cache.insert(uri, (text, doc));
423    }
424
425    /// Refresh `workspace.files` to mirror the current `source_files` set.
426    ///
427    /// Called by `get_codebase_salsa`. Skips the setter when the file list
428    /// hasn't changed — salsa's `set_field` unconditionally bumps revision,
429    /// which would invalidate every downstream query (codebase, file_refs).
430    /// Dedup is essential for memoization across LSP requests.
431    pub fn sync_workspace_files(&self) {
432        let mut files: Vec<SourceFile> = self.source_files.iter().map(|e| *e.value()).collect();
433        files.sort_by_key(|sf| self.with_host(|host| sf.id(host.db()).0));
434        let mut host = self.host.lock().unwrap();
435        let current = self.workspace.files(host.db());
436        if current.len() == files.len() && current.iter().zip(files.iter()).all(|(a, b)| a == b) {
437            return;
438        }
439        let arc: Arc<[SourceFile]> = Arc::from(files);
440        self.workspace.set_files(host.db_mut()).to(arc);
441    }
442
443    /// Update the PHP version tracked by the workspace. Salsa will invalidate
444    /// all `semantic_issues` queries so diagnostics are re-evaluated.
445    /// Skips the setter when the version hasn't changed to avoid spurious
446    /// query invalidation.
447    pub fn set_php_version(&self, version: mir_analyzer::PhpVersion) {
448        let mut host = self.host.lock().unwrap();
449        if self.workspace.php_version(host.db()) == version {
450            return;
451        }
452        self.workspace.set_php_version(host.db_mut()).to(version);
453    }
454
455    /// Stub kept for the legacy `RefLookup` closure shape consumed by
456    /// `find_references_codebase_with_target`. Always returns empty; the
457    /// AST walker handles all reference scanning. Session-backed refs go
458    /// through [`Self::session_references_to`] instead.
459    pub fn get_symbol_refs_salsa(&self, _key: &str) -> Vec<(Arc<str>, u32, u16, u16)> {
460        Vec::new()
461    }
462
463    /// Session-backed workspace reference lookup. Returns `(file, line, col)`
464    /// locations for every occurrence of `symbol` in the files that the
465    /// `AnalysisSession` has ingested so far. The session's reference index
466    /// is built incrementally during `ingest_file`, so refs for files the
467    /// session hasn't seen yet (background-indexed but never opened) won't
468    /// appear here — those are covered by the AST-walker fallback in the
469    /// references handler.
470    ///
471    /// Returns LSP-style 0-based line/column.
472    pub fn session_references_to(
473        &self,
474        symbol: &mir_analyzer::Name,
475    ) -> Vec<(Arc<str>, u32, u32, u32)> {
476        let php_version = self.workspace_php_version();
477        let session = self.analysis_session(php_version);
478        session
479            .references_to(symbol)
480            .into_iter()
481            .map(|(file, range)| {
482                // mir 0.30+ uses 1-based lines and 1-based columns; LSP uses 0-based.
483                let line = range.start.line.saturating_sub(1);
484                let col_start = range.start.column.saturating_sub(1);
485                let col_end = range.end.column.saturating_sub(1);
486                (file, line, col_start, col_end)
487            })
488            .collect()
489    }
490
491    /// Phase J: salsa-memoized aggregate workspace index.
492    ///
493    /// Returns the shared `Arc<WorkspaceIndexData>` with flat
494    /// `(Url, Arc<FileIndex>)` list plus pre-built `classes_by_name` and
495    /// `subtypes_of` reverse maps. Used by workspace_symbols,
496    /// prepare_type_hierarchy, supertypes_of, subtypes_of, and
497    /// find_implementations so they don't each rebuild the aggregate per
498    /// request. Invalidates automatically when any file's `file_index`
499    /// changes.
500    pub fn get_workspace_index_salsa(&self) -> Arc<crate::db::workspace_index::WorkspaceIndexData> {
501        self.sync_workspace_files();
502        let ws = self.workspace;
503        self.snapshot_query(move |db| {
504            crate::db::workspace_index::workspace_index(db, ws)
505                .0
506                .clone()
507        })
508    }
509
510    /// No-op after mir 0.22 migration. The session manages its own warm-up
511    /// via `ingest_file` / `analyze_dependents_of`; there's nothing for us
512    /// to pre-warm here.
513    pub fn warm_reference_index(&self) {}
514
515    /// Return the raw source text for `uri` if it has been mirrored into the
516    /// salsa workspace. Used by the references handler to pre-filter session
517    /// results by checking whether a file mentions the owning class name.
518    pub fn source_text(&self, uri: &Url) -> Option<Arc<str>> {
519        let sf = self.source_file(uri)?;
520        Some(self.snapshot_query(move |db| sf.text(db)))
521    }
522
523    /// Run Pass 1 + Pass 2 analysis on every mirrored workspace file so that
524    /// type-aware queries (e.g. `session.references_to`) see the full workspace.
525    ///
526    /// Reference locations are only recorded during Pass 2 (`FileAnalyzer::analyze`).
527    /// `ingest_file` alone (Pass 1) is not sufficient. Only needed for cross-file
528    /// queries like `textDocument/references` that rely on the reference index.
529    /// The session's internal cache makes re-analysis of unchanged files cheap.
530    pub fn ensure_all_files_ingested(&self) {
531        let php_version = self.workspace_php_version();
532        let session = self.analysis_session(php_version);
533        let urls: Vec<Url> = self.source_files.iter().map(|e| e.key().clone()).collect();
534        for uri in &urls {
535            let Some(doc) = self.get_doc_salsa(uri) else {
536                continue;
537            };
538            let file: Arc<str> = Arc::from(uri.as_str());
539            session.ingest_file(file.clone(), doc.source_arc());
540            let source_map = php_rs_parser::source_map::SourceMap::new(doc.source());
541            let owned_program = php_ast::owned::to_owned_program(doc.program());
542            let analyzer = mir_analyzer::FileAnalyzer::new(&session);
543            analyzer.analyze(file, doc.source(), &owned_program, &source_map);
544        }
545    }
546
547    /// Salsa-backed per-file method-return-type map.
548    pub fn get_method_returns_salsa(&self, uri: &Url) -> Option<Arc<crate::ast::MethodReturnsMap>> {
549        let sf = self.source_file(uri)?;
550        Some(
551            self.snapshot_query(move |db| {
552                crate::db::method_returns::method_returns(db, sf).0.clone()
553            }),
554        )
555    }
556
557    /// Cache the semantic tokens computed for a delta response.
558    /// `result_id` is an opaque string (a hash of the token data) returned to the client.
559    pub fn store_token_cache(&self, uri: &Url, result_id: String, tokens: Arc<Vec<SemanticToken>>) {
560        self.token_cache.insert(uri.clone(), (result_id, tokens));
561    }
562
563    /// Return the cached tokens if `result_id` matches the stored one.
564    pub fn get_token_cache(&self, uri: &Url, result_id: &str) -> Option<Arc<Vec<SemanticToken>>> {
565        self.token_cache
566            .get(uri)
567            .filter(|e| e.0.as_str() == result_id)
568            .map(|e| Arc::clone(&e.1))
569    }
570
571    /// Before running semantic analysis for `uri`, resolve every `use`-imported
572    /// class through the PSR-4 map and mirror any that are not yet registered.
573    /// This prevents spurious `UndefinedClass` diagnostics when the background
574    /// workspace scan has not yet reached a dependency file.
575    fn lazy_load_psr4_imports(&self, uri: &Url) {
576        let doc = match self.get_doc_salsa(uri) {
577            Some(d) => d,
578            None => return,
579        };
580        let fqns = crate::references::collect_referenced_class_fqns(&doc);
581        if fqns.is_empty() {
582            return;
583        }
584        let psr4 = self.psr4.load();
585        let paths: Vec<std::path::PathBuf> =
586            fqns.iter().filter_map(|fqcn| psr4.resolve(fqcn)).collect();
587        drop(psr4);
588
589        for path in paths {
590            let Ok(dep_url) = Url::from_file_path(&path) else {
591                continue;
592            };
593            if self.source_files.contains_key(&dep_url) {
594                continue;
595            }
596            if let Ok(text) = std::fs::read_to_string(&path) {
597                self.mirror_text(&dep_url, &text);
598            }
599        }
600    }
601
602    /// Raw semantic issues for a file, computed via mir's session-based
603    /// `FileAnalyzer`. The session lazy-loads dependencies via PSR-4 so the
604    /// LSP no longer needs to mirror vendor up-front. Callers apply their
605    /// own `DiagnosticsConfig` filter via
606    /// [`crate::semantic_diagnostics::issues_to_diagnostics`].
607    #[tracing::instrument(skip_all)]
608    pub fn get_semantic_issues_salsa(&self, uri: &Url) -> Option<Arc<[mir_issues::Issue]>> {
609        // Need the parsed doc for the analyzer.
610        let doc = self.get_doc_salsa(uri)?;
611        let php_version = self.with_host(|h| self.workspace.php_version(h.db()));
612        let session = self.analysis_session(php_version);
613
614        let file: Arc<str> = Arc::from(uri.as_str());
615        let source = doc.source_arc();
616        {
617            let _s = tracing::debug_span!("session.ingest_file").entered();
618            session.ingest_file(file.clone(), source);
619        }
620        // Pre-load every imported class via PSR-4 so Pass-2 doesn't emit
621        // spurious `UndefinedClass` for classes that ARE on disk but haven't
622        // been ingested yet. The session's resolver was supplied at
623        // construction time.
624        {
625            let _s = tracing::debug_span!("session.lazy_load_imports").entered();
626            // Pre-load every class-typed reference resolved via the file's
627            // namespace + `use` imports. This covers `use` imports, FQN refs
628            // (`new \App\Foo`), and bare same-namespace refs (`new Foo` from
629            // inside `namespace App;`) in a single sweep — mir won't auto-
630            // resolve via the ClassResolver, so anything not lazy-loaded here
631            // produces a spurious `UndefinedClass`.
632            let fqns = crate::references::collect_referenced_class_fqns(&doc);
633            for fqcn in &fqns {
634                let _ = session.load_class(fqcn);
635            }
636        }
637        let source_map = php_rs_parser::source_map::SourceMap::new(doc.source());
638        let owned_program = php_ast::owned::to_owned_program(doc.program());
639        let analysis = {
640            let _s = tracing::debug_span!("FileAnalyzer::analyze").entered();
641            let analyzer = mir_analyzer::FileAnalyzer::new(&session);
642            analyzer.analyze(file.clone(), doc.source(), &owned_program, &source_map)
643        };
644        // Workspace-level class issues for this file (circular inheritance,
645        // override violations, abstract-method gaps).
646        let class_issues = {
647            let _s = tracing::debug_span!("session.class_issues_for").entered();
648            session.class_issues(std::slice::from_ref(&file))
649        };
650        let combined: Vec<mir_issues::Issue> = analysis
651            .issues
652            .into_iter()
653            .chain(class_issues.into_iter())
654            .filter(|i| !i.suppressed)
655            .collect();
656        Some(Arc::from(combined))
657    }
658
659    /// Returns `(uri, doc)` for files currently open in the editor.
660    ///
661    /// Resolve `open_urls` (from `Backend::open_urls()`) to parsed docs.
662    /// Files not mirrored in the salsa layer are filtered out silently.
663    pub fn docs_for(&self, open_urls: &[Url]) -> Vec<(Url, Arc<ParsedDoc>)> {
664        open_urls
665            .iter()
666            .filter_map(|u| self.get_doc_salsa(u).map(|d| (u.clone(), d)))
667            .collect()
668    }
669
670    /// `(primary, doc)` first, then every other open file's parsed doc.
671    /// The `open_urls` slice should include `uri` — this helper filters it out.
672    pub fn doc_with_others(
673        &self,
674        uri: &Url,
675        doc: Arc<ParsedDoc>,
676        open_urls: &[Url],
677    ) -> Vec<(Url, Arc<ParsedDoc>)> {
678        let mut result = vec![(uri.clone(), doc)];
679        result.extend(self.other_docs(uri, open_urls));
680        result
681    }
682
683    /// Parsed docs for every entry in `open_urls` except `uri`.
684    pub fn other_docs(&self, uri: &Url, open_urls: &[Url]) -> Vec<(Url, Arc<ParsedDoc>)> {
685        open_urls
686            .iter()
687            .filter(|u| *u != uri)
688            .filter_map(|u| self.get_doc_salsa(u).map(|d| (u.clone(), d)))
689            .collect()
690    }
691
692    /// Batched salsa fetch for every entry in `open_urls` except `uri`:
693    /// returns each `(uri, ParsedDoc, MethodReturnsMap)` triple in a single
694    /// `snapshot_query` so cancellation retries don't run N times.
695    pub fn other_docs_with_returns(
696        &self,
697        uri: &Url,
698        open_urls: &[Url],
699    ) -> Vec<(Url, Arc<ParsedDoc>, Arc<crate::ast::MethodReturnsMap>)> {
700        let source_files: Vec<(Url, crate::db::input::SourceFile)> = open_urls
701            .iter()
702            .filter(|u| *u != uri)
703            .filter_map(|u| self.source_file(u).map(|sf| (u.clone(), sf)))
704            .collect();
705        if source_files.is_empty() {
706            return Vec::new();
707        }
708        self.snapshot_query(move |db| {
709            source_files
710                .iter()
711                .map(|(u, sf)| {
712                    let doc = crate::db::parse::parsed_doc(db, *sf).0.clone();
713                    let mr = crate::db::method_returns::method_returns(db, *sf).0.clone();
714                    (u.clone(), doc, mr)
715                })
716                .collect()
717        })
718    }
719
720    /// Compact symbol index for every mirrored file.
721    pub fn all_indexes(&self) -> Vec<(Url, Arc<FileIndex>)> {
722        let urls: Vec<Url> = self.source_files.iter().map(|e| e.key().clone()).collect();
723        urls.into_iter()
724            .filter_map(|u| self.get_index_salsa(&u).map(|idx| (u, idx)))
725            .collect()
726    }
727
728    /// Same as `all_indexes` but excludes `uri`.
729    pub fn other_indexes(&self, uri: &Url) -> Vec<(Url, Arc<FileIndex>)> {
730        let urls: Vec<Url> = self
731            .source_files
732            .iter()
733            .filter(|e| e.key() != uri)
734            .map(|e| e.key().clone())
735            .collect();
736        urls.into_iter()
737            .filter_map(|u| self.get_index_salsa(&u).map(|idx| (u, idx)))
738            .collect()
739    }
740
741    /// Parsed documents for every mirrored file (open or background-indexed).
742    /// Suitable for full-scan operations: find-references, rename,
743    /// call_hierarchy, code_lens.
744    pub fn all_docs_for_scan(&self) -> Vec<(Url, Arc<ParsedDoc>)> {
745        let urls: Vec<Url> = self.source_files.iter().map(|e| e.key().clone()).collect();
746        urls.into_iter()
747            .filter_map(|u| self.get_doc_salsa(&u).map(|d| (u, d)))
748            .collect()
749    }
750}
751
752/// Run `file_refs` for every workspace file in parallel.
753///
754/// `db` clones are cheap (they share the same `Arc<Zalsa>` memo store), so
755/// results computed on any clone are immediately visible to all others at the
756/// same revision.  After this returns, the sequential loop inside `symbol_refs`
757/// only does cheap memo lookups instead of running `StatementsAnalyzer` on
758/// every file one-by-one.
759///
760/// Per-task `salsa::Cancelled` is caught and swallowed.  If the revision was
761/// bumped, the main thread's next salsa call inside `symbol_refs` will raise
762/// `Cancelled` too and `snapshot_query` retries the whole operation from
763/// scratch.  If the revision was not bumped, any file whose task was cancelled
764/// before completion simply has no memo entry and `symbol_refs`'s sequential
765/// loop recomputes it.
766// `warm_file_refs_parallel` removed: the analyzer-side reference index is
767// now owned by `AnalysisSession` and warmed by `ingest_file`. This salsa-side
768// helper has no counterpart in the new architecture.
769
770#[cfg(test)]
771mod tests {
772    use super::*;
773
774    fn uri(path: &str) -> Url {
775        Url::parse(&format!("file://{path}")).unwrap()
776    }
777
778    /// Phase E4: open-file state lives on `Backend`, not `DocumentStore`.
779    /// Tests that need to simulate "file is open" just mirror the text into
780    /// the salsa input — the open/closed distinction is enforced by the
781    /// caller (Backend) in production.
782    fn open(store: &DocumentStore, u: Url, text: String) {
783        store.mirror_text(&u, &text);
784    }
785
786    // Removed `salsa_codebase_aggregates_all_files`: the salsa-side codebase
787    // aggregation was deleted with the mir 0.22 migration. Equivalent
788    // behaviour is now covered by mir-analyzer's own session tests.
789
790    #[test]
791    fn index_registers_file_in_salsa() {
792        let store = DocumentStore::new();
793        store.index(uri("/lib.php"), "<?php\nfunction lib_fn() {}");
794        let idx = store.get_index_salsa(&uri("/lib.php")).unwrap();
795        assert_eq!(idx.functions.len(), 1);
796        assert_eq!(idx.functions[0].name, "lib_fn".into());
797    }
798
799    #[test]
800    fn remove_drops_salsa_input() {
801        let store = DocumentStore::new();
802        store.index(uri("/lib.php"), "<?php");
803        store.remove(&uri("/lib.php"));
804        assert!(store.get_index_salsa(&uri("/lib.php")).is_none());
805    }
806
807    #[test]
808    fn all_indexes_includes_every_mirrored_file() {
809        let store = DocumentStore::new();
810        open(&store, uri("/a.php"), "<?php\nfunction a() {}".to_string());
811        store.index(uri("/b.php"), "<?php\nfunction b() {}");
812        assert_eq!(store.all_indexes().len(), 2);
813    }
814
815    #[test]
816    fn other_indexes_excludes_current_uri() {
817        let store = DocumentStore::new();
818        open(&store, uri("/a.php"), "<?php\nfunction a() {}".to_string());
819        open(&store, uri("/b.php"), "<?php\nfunction b() {}".to_string());
820        assert_eq!(store.other_indexes(&uri("/a.php")).len(), 1);
821    }
822
823    #[test]
824    fn other_docs_excludes_current_uri() {
825        let store = DocumentStore::new();
826        let ua = uri("/a.php");
827        let ub = uri("/b.php");
828        open(&store, ua.clone(), "<?php\nfunction a() {}".to_string());
829        open(&store, ub.clone(), "<?php\nfunction b() {}".to_string());
830        let open_urls = vec![ua.clone(), ub];
831        assert_eq!(store.other_docs(&ua, &open_urls).len(), 1);
832    }
833
834    #[test]
835    fn evict_token_cache_removes_entry() {
836        let store = DocumentStore::new();
837        let u = uri("/a.php");
838        open(&store, u.clone(), "<?php".to_string());
839        store.store_token_cache(&u, "id1".to_string(), Arc::new(vec![]));
840        assert!(store.get_token_cache(&u, "id1").is_some());
841        store.evict_token_cache(&u);
842        assert!(store.get_token_cache(&u, "id1").is_none());
843    }
844
845    #[test]
846    fn index_populates_file_index_with_symbols() {
847        let store = DocumentStore::new();
848        store.index(uri("/a.php"), "<?php\nfunction hello() {}");
849        let idx = store.get_index_salsa(&uri("/a.php")).unwrap();
850        assert_eq!(idx.functions.len(), 1);
851        assert_eq!(idx.functions[0].name, "hello".into());
852    }
853
854    #[test]
855    fn open_populates_file_index_with_symbols() {
856        let store = DocumentStore::new();
857        open(&store, uri("/a.php"), "<?php\nclass Foo {}".to_string());
858        let idx = store.get_index_salsa(&uri("/a.php")).unwrap();
859        assert_eq!(idx.classes.len(), 1);
860        assert_eq!(idx.classes[0].name, "Foo".into());
861    }
862
863    // ── Mirror invariants ────────────────────────────────────────────────
864    //
865    // Every mutation path that changes file text must keep the salsa layer
866    // consistent. These tests walk a set-edit-reopen cycle and assert that
867    // the salsa-derived `FileIndex` reflects the latest text at each step.
868
869    fn names_of(idx: &FileIndex) -> Vec<String> {
870        let mut out: Vec<String> = idx.classes.iter().map(|c| c.name.to_string()).collect();
871        out.extend(idx.functions.iter().map(|f| f.name.to_string()));
872        out.sort();
873        out
874    }
875
876    fn salsa_index_names(store: &DocumentStore, url: &Url) -> Vec<String> {
877        let sf = store.source_file(url).expect("mirror recorded SourceFile");
878        store.with_host(|host| {
879            let arc = crate::db::index::file_index(host.db(), sf);
880            names_of(arc.get())
881        })
882    }
883
884    #[test]
885    fn mirror_tracks_repeated_edits() {
886        let store = DocumentStore::new();
887        let u = uri("/mirror.php");
888
889        open(&store, u.clone(), "<?php\nclass A {}".to_string());
890        assert_eq!(salsa_index_names(&store, &u), vec!["A".to_string()]);
891
892        open(
893            &store,
894            u.clone(),
895            "<?php\nclass A {}\nclass B {}".to_string(),
896        );
897        assert_eq!(
898            salsa_index_names(&store, &u),
899            vec!["A".to_string(), "B".to_string()]
900        );
901
902        open(&store, u.clone(), "<?php\nfunction greet() {}".to_string());
903        assert_eq!(salsa_index_names(&store, &u), vec!["greet".to_string()]);
904    }
905
906    #[test]
907    fn mirror_tracks_index_and_index_from_doc() {
908        let store = DocumentStore::new();
909
910        // Background `index(url, text)` path.
911        let u1 = uri("/bg1.php");
912        store.index(u1.clone(), "<?php\nclass Bg1 {}");
913        assert_eq!(salsa_index_names(&store, &u1), vec!["Bg1".to_string()]);
914
915        // `index_from_doc(url, &doc)` path (workspace-scan Phase 2).
916        let u2 = uri("/bg2.php");
917        let doc =
918            crate::diagnostics::parse_document_no_diags("<?php\nclass Bg2 {}\nfunction f() {}");
919        store.index_from_doc(u2.clone(), &doc);
920        assert_eq!(
921            salsa_index_names(&store, &u2),
922            vec!["Bg2".to_string(), "f".to_string()]
923        );
924    }
925
926    /// G3: confirms the `parsed_cache` actually hits — two consecutive
927    /// `get_doc_salsa` calls on unchanged text return the same `Arc`
928    /// (pointer equality), and an edit forces a miss that produces a
929    /// different `Arc`.
930    /// parsed_cache must stay bounded — inserting more than
931    /// `PARSED_CACHE_CAP` unique URLs must not cause unbounded growth.
932    /// Eviction is probabilistic, so we only assert the bound, not which
933    /// Seeding a cached index for a URL that was never mirrored is a no-op
934    /// (returns `false`) — avoids silently allocating SourceFiles outside
935    /// `mirror_text`'s control.
936    #[test]
937    fn seed_cached_index_noops_for_unknown_uri() {
938        let store = DocumentStore::new();
939        let u = uri("/never_mirrored.php");
940        let index = Arc::new(crate::file_index::FileIndex::default());
941        assert!(!store.seed_cached_index(&u, index));
942    }
943
944    /// entries survive.
945    #[test]
946    fn parsed_cache_stays_bounded_under_many_inserts() {
947        let store = DocumentStore::new();
948        let overflow = PARSED_CACHE_CAP + 100;
949        for i in 0..overflow {
950            let u = uri(&format!("/cap/file{i}.php"));
951            store.index(u.clone(), "<?php\nclass A {}");
952            // Force a parsed_cache insert via get_doc_salsa.
953            let _ = store.get_doc_salsa(&u);
954        }
955        assert!(
956            store.parsed_cache.len() <= PARSED_CACHE_CAP,
957            "parsed_cache grew to {} entries (cap {})",
958            store.parsed_cache.len(),
959            PARSED_CACHE_CAP
960        );
961    }
962
963    #[test]
964    fn get_doc_salsa_cache_hits_across_calls() {
965        let store = DocumentStore::new();
966        let u = uri("/g3_cache.php");
967        open(&store, u.clone(), "<?php\nclass G3 {}".to_string());
968
969        let a = store.get_doc_salsa(&u).unwrap();
970        let b = store.get_doc_salsa(&u).unwrap();
971        assert!(
972            Arc::ptr_eq(&a, &b),
973            "parsed_cache hit should yield the same Arc across calls"
974        );
975
976        open(&store, u.clone(), "<?php\nclass G3b {}".to_string());
977        let c = store.get_doc_salsa(&u).unwrap();
978        assert!(
979            !Arc::ptr_eq(&a, &c),
980            "edit should invalidate the parsed_cache entry"
981        );
982    }
983
984    #[test]
985    fn get_doc_salsa_returns_some_for_mirrored_files() {
986        // Phase E4: `get_doc_salsa` no longer gates on open-state. The
987        // open/closed distinction now lives on `Backend::get_doc`.
988        let store = DocumentStore::new();
989        let u = uri("/e4_doc.php");
990        store.index(u.clone(), "<?php\nclass P {}");
991        assert!(store.get_doc_salsa(&u).is_some());
992    }
993
994    #[test]
995    fn get_salsa_accessors_return_none_for_unknown_uri() {
996        let store = DocumentStore::new();
997        let u = uri("/never-seen.php");
998        assert!(store.get_doc_salsa(&u).is_none());
999        assert!(store.get_index_salsa(&u).is_none());
1000        assert!(store.get_method_returns_salsa(&u).is_none());
1001    }
1002
1003    /// Phase E1: concurrent readers and writers must not deadlock, panic, or
1004    /// return stale data. Writers briefly bump inputs while readers are
1005    /// running on cloned snapshots; any `salsa::Cancelled` raised on the
1006    /// reader side must be caught and retried by `snapshot_query`.
1007    ///
1008    /// Post mir 0.22: `get_symbol_refs_salsa` is a no-op stub (returns empty
1009    /// vec), so reader threads cannot exhaust the retry cap or panic on that
1010    /// path. The remaining salsa surface (`get_doc_salsa`, `get_index_salsa`)
1011    /// is protected by `snapshot_query`'s last-resort host-lock fallback.
1012    #[test]
1013    fn concurrent_reads_and_writes_do_not_panic() {
1014        use std::sync::Arc;
1015        use std::thread;
1016        use std::time::{Duration, Instant};
1017
1018        let store = Arc::new(DocumentStore::new());
1019        let urls: Vec<Url> = (0..8).map(|i| uri(&format!("/f{i}.php"))).collect();
1020        for (i, u) in urls.iter().enumerate() {
1021            open(&store, u.clone(), format!("<?php\nclass C{i} {{}}"));
1022        }
1023
1024        let deadline = Instant::now() + Duration::from_millis(400);
1025        let mut handles = Vec::new();
1026
1027        // Writer thread: keep bumping every file's text.
1028        {
1029            let store = Arc::clone(&store);
1030            let urls = urls.clone();
1031            handles.push(thread::spawn(move || {
1032                let mut rev = 0u32;
1033                while Instant::now() < deadline {
1034                    for u in &urls {
1035                        let text = format!("<?php\nclass C{{}}\n// rev {rev}");
1036                        store.mirror_text(u, &text);
1037                    }
1038                    rev += 1;
1039                }
1040            }));
1041        }
1042
1043        // Reader threads: hammer the salsa accessors.
1044        for _ in 0..4 {
1045            let store = Arc::clone(&store);
1046            let urls = urls.clone();
1047            handles.push(thread::spawn(move || {
1048                while Instant::now() < deadline {
1049                    for u in &urls {
1050                        let _ = store.get_doc_salsa(u);
1051                        let _ = store.get_index_salsa(u);
1052                    }
1053                    // Post mir 0.22: codebase + refs live in the session,
1054                    // not salsa. Concurrent-read smoke is now limited to the
1055                    // remaining salsa surface (parsed_doc, file_index).
1056                    let _ = store.get_symbol_refs_salsa("C0");
1057                }
1058            }));
1059        }
1060
1061        for h in handles {
1062            h.join().expect("no panic under concurrent read/write");
1063        }
1064    }
1065
1066    /// PSR-4 lazy-loading: `get_semantic_issues_salsa` must not emit
1067    /// `UndefinedClass` for a class that is PSR-4-resolvable on disk, even
1068    /// when the dependency file is not yet in `source_files`.
1069    #[test]
1070    fn psr4_lazy_load_suppresses_undefined_class() {
1071        let tmp = tempfile::tempdir().unwrap();
1072
1073        // Write Entity.php to disk (not mirrored into the store).
1074        std::fs::create_dir_all(tmp.path().join("src/Model")).unwrap();
1075        std::fs::write(
1076            tmp.path().join("src/Model/Entity.php"),
1077            "<?php\nnamespace App\\Model;\nclass Entity {}\n",
1078        )
1079        .unwrap();
1080
1081        // Write composer.json so Psr4Map::load can build the map.
1082        std::fs::write(
1083            tmp.path().join("composer.json"),
1084            r#"{"autoload":{"psr-4":{"App\\":"src/"}}}"#,
1085        )
1086        .unwrap();
1087
1088        let store = DocumentStore::new();
1089
1090        // Inject a PSR-4 map pointing at the tmp dir.
1091        store
1092            .psr4
1093            .store(Arc::new(crate::autoload::Psr4Map::load(tmp.path())));
1094
1095        // Mirror the consuming file (Entity not yet in source_files).
1096        // Uses Entity as a parameter type hint — the analyzer resolves these
1097        // through use statements, so this exercises the full PSR-4 lazy-load path.
1098        let handler_url = Url::from_file_path(tmp.path().join("src/Service/Handler.php")).unwrap();
1099        store.mirror_text(
1100            &handler_url,
1101            "<?php\nnamespace App\\Service;\nuse App\\Model\\Entity;\nfunction handle(Entity $e): Entity { return $e; }\n",
1102        );
1103
1104        let issues = store.get_semantic_issues_salsa(&handler_url).unwrap();
1105        let undef: Vec<_> = issues
1106            .iter()
1107            .filter(|i| matches!(i.kind, mir_issues::IssueKind::UndefinedClass { .. }))
1108            .collect();
1109        assert!(
1110            undef.is_empty(),
1111            "PSR-4 lazy-loading must prevent UndefinedClass for App\\Model\\Entity; got: {undef:?}"
1112        );
1113    }
1114
1115    /// Issue #191 regression: workspace-wide scans (find-references, rename,
1116    /// call-hierarchy) must not re-parse closed/indexed files on repeated
1117    /// invocations. Once a file's `ParsedDoc` has been produced, subsequent
1118    /// `all_docs_for_scan()` calls must hit the cache and return the same
1119    /// `Arc<ParsedDoc>` (pointer equality), proving no re-parse occurred.
1120    ///
1121    /// The cache layers protecting this are:
1122    ///   1. `parsed_cache` (cap [`PARSED_CACHE_CAP`]) — read-through, validated
1123    ///      via `Arc::ptr_eq` on the text Arc.
1124    ///   2. salsa `parsed_doc` memo (`lru = 2048`) — second line of defense
1125    ///      when `parsed_cache` evicts.
1126    ///
1127    /// Together they keep every workspace-scan op O(N) memo lookups, never
1128    /// O(N) parses, for any workspace whose file count fits the cap.
1129    #[test]
1130    fn all_docs_for_scan_does_not_reparse_indexed_files() {
1131        let store = DocumentStore::new();
1132        const N: usize = 50;
1133        for i in 0..N {
1134            let u = uri(&format!("/scan/file{i}.php"));
1135            store.index(u, &format!("<?php\nclass C{i} {{}}\nfunction f{i}() {{}}"));
1136        }
1137
1138        let first: Vec<_> = store.all_docs_for_scan();
1139        let second: Vec<_> = store.all_docs_for_scan();
1140        assert_eq!(first.len(), N);
1141        assert_eq!(second.len(), N);
1142
1143        let by_url_first: std::collections::HashMap<Url, Arc<ParsedDoc>> =
1144            first.into_iter().collect();
1145        for (u, doc2) in second {
1146            let doc1 = by_url_first
1147                .get(&u)
1148                .expect("second scan returned a URL the first didn't");
1149            assert!(
1150                Arc::ptr_eq(doc1, &doc2),
1151                "{u} re-parsed across all_docs_for_scan calls — \
1152                 cache (parsed_cache + salsa parsed_doc memo) failed to hit"
1153            );
1154        }
1155
1156        // Editing one file's text must invalidate just that file's entry,
1157        // not the rest. This locks in self-eviction via Arc::ptr_eq on text.
1158        let edited_url = uri("/scan/file0.php");
1159        let pre_edit = store.get_doc_salsa(&edited_url).unwrap();
1160        store.index(edited_url.clone(), "<?php\nclass C0Edited {}");
1161        let post_edit = store.get_doc_salsa(&edited_url).unwrap();
1162        assert!(
1163            !Arc::ptr_eq(&pre_edit, &post_edit),
1164            "edited file must produce a fresh ParsedDoc"
1165        );
1166        for i in 1..N {
1167            let u = uri(&format!("/scan/file{i}.php"));
1168            let original = by_url_first.get(&u).unwrap();
1169            let after = store.get_doc_salsa(&u).unwrap();
1170            assert!(
1171                Arc::ptr_eq(original, &after),
1172                "{u} should not have re-parsed because of an unrelated edit"
1173            );
1174        }
1175    }
1176}