Skip to main content

php_lsp/document/
document_store.rs

1use std::sync::atomic::{AtomicBool, Ordering};
2use std::sync::{Arc, Mutex, OnceLock};
3
4use arc_swap::ArcSwap;
5
6use dashmap::{DashMap, DashSet};
7use salsa::Setter;
8use tower_lsp::lsp_types::{SemanticToken, Url};
9
10use crate::db::analysis::AnalysisHost;
11use crate::db::input::{FileText, Workspace, find_source_file};
12use crate::document::ast::ParsedDoc;
13use crate::document::cache_registry::CacheRegistry;
14use crate::index::file_index::FileIndex;
15use crate::lang::autoload::Psr4Map;
16
17pub struct DocumentStore {
18    /// Per-file caches with unified eviction logic. See [`CacheRegistry`].
19    caches: CacheRegistry,
20
21    // ── Salsa-input storage ────────────────────────────────────────────────
22    // Phase E4: `DocumentStore` is now a pure salsa-input wrapper. Open-file
23    // state (live text, version token, parse-diagnostics cache) lives on
24    // `Backend` in its `open_files` map; the set of files tracked by salsa
25    // is exactly `source_files.keys()`.
26    /// Mutex — held briefly to clone the database for reads and to mutate
27    /// it for writes. Per-thread salsa state (`zalsa_local`) is `!Sync`,
28    /// which rules out `RwLock<AnalysisHost>`. Readers instead snapshot the
29    /// db (cheap — storage is `Arc<Zalsa>`) and run queries on the clone
30    /// with the lock released, giving real read/read parallelism. Writers
31    /// during an in-flight read bump the shared revision; the reader raises
32    /// `salsa::Cancelled` on its next query call and `snapshot_query` below
33    /// retries with a fresh snapshot.
34    host: Mutex<AnalysisHost>,
35    /// `Url -> FileText` lookup. One immortal `FileText` salsa input per unique
36    /// URI ever seen. Text edits mutate the existing handle; delete/reopen cycles
37    /// reuse it rather than allocating a new input each time.
38    file_texts: DashMap<Url, FileText>,
39    /// URIs that have been removed. Re-opening a deleted URI un-deletes it here
40    /// and reuses the existing `FileText` handle.
41    deleted_uris: DashSet<Url>,
42    /// Set to `true` when the set of tracked files changes (add or remove).
43    /// `sync_workspace_files` skips the collect/sort/compare path when this
44    /// is `false`, avoiding a mutex acquisition on every LSP request.
45    workspace_files_dirty: AtomicBool,
46    /// Workspace salsa input. Tracks the full set of `SourceFile`s that
47    /// participate in whole-program queries (`codebase`, `file_refs`).
48    /// Re-synced from `source_files` on demand by `sync_workspace_files`.
49    workspace: Workspace,
50    /// Shared PSR-4 namespace-to-path map. Shared with `Backend` via `Arc`
51    /// so updates from `initialized` (when composer.json is loaded) are
52    /// visible here without any additional wiring. `ArcSwap` makes reads
53    /// lock-free — a poisoned guard can no longer crash a request handler.
54    psr4: Arc<ArcSwap<Psr4Map>>,
55    /// mir-analyzer's `AnalysisSession` — owns the workspace MirDb, runs
56    /// Pass-2 analysis, and lazy-loads dependencies via PSR-4. Built lazily
57    /// on first use; rebuilt when PHP version changes.
58    analysis_session: Mutex<Option<(mir_analyzer::PhpVersion, Arc<mir_analyzer::AnalysisSession>)>>,
59    /// Cache directory shared with the workspace file-index cache. When set,
60    /// new `AnalysisSession`s are built with `with_cache_dir` so that stub
61    /// parsing results survive server restarts.
62    session_cache_dir: OnceLock<std::path::PathBuf>,
63    /// URIs of autoload.files entries from composer.json. These define global
64    /// helper functions (e.g. tap, class_uses_recursive in Laravel) that are
65    /// not discoverable by namespace walk. Pre-ingested into the AnalysisSession
66    /// before each file analysis so mir doesn't emit false UndefinedFunction.
67    autoload_uris: std::sync::RwLock<Vec<Url>>,
68    /// On-demand `FileIndex` store for vendor files loaded lazily via PSR-4
69    /// navigation. Vendor is excluded from the eager workspace scan, so files
70    /// ingested by `psr4_method_goto` are not in the salsa workspace_index;
71    /// this map fills that gap for hierarchy traversal. Populated by
72    /// `cache_vendor_index`; reads via `get_vendor_index`.
73    vendor_index_cache: DashMap<Url, Arc<FileIndex>>,
74}
75
76impl Default for DocumentStore {
77    fn default() -> Self {
78        Self::new()
79    }
80}
81
82impl DocumentStore {
83    pub fn new() -> Self {
84        let host = AnalysisHost::new();
85        let workspace = Workspace::new(
86            host.db(),
87            Arc::<[(Arc<str>, FileText)]>::from(Vec::new()),
88            mir_analyzer::PhpVersion::LATEST,
89        );
90        DocumentStore {
91            caches: CacheRegistry::new(),
92            host: Mutex::new(host),
93            file_texts: DashMap::new(),
94            deleted_uris: DashSet::new(),
95            workspace_files_dirty: AtomicBool::new(true),
96            workspace,
97            psr4: Arc::new(ArcSwap::from_pointee(Psr4Map::empty())),
98            analysis_session: Mutex::new(None),
99            session_cache_dir: OnceLock::new(),
100            autoload_uris: std::sync::RwLock::new(Vec::new()),
101            vendor_index_cache: DashMap::new(),
102        }
103    }
104
105    /// Set the directory used to persist stub-parse and analysis results across
106    /// server restarts.  Must be called before the first `analysis_session` use;
107    /// subsequent calls are silently ignored (`OnceLock` semantics).
108    pub fn set_session_cache_dir(&self, dir: std::path::PathBuf) {
109        let _ = self.session_cache_dir.set(dir);
110    }
111
112    /// Register URIs discovered from composer.json `autoload.files` entries.
113    /// These PHP files define global helper functions (e.g. `tap()` in Laravel)
114    /// that are not class-resolvable via PSR-4. Clears `analysis_cache` so the
115    /// next per-file analysis pre-ingests them into the AnalysisSession before
116    /// running mir's FileAnalyzer.
117    pub fn set_autoload_uris(&self, uris: Vec<Url>) {
118        *self.autoload_uris.write().unwrap() = uris;
119        self.caches.evict_analysis_all();
120    }
121
122    /// Get or build the `AnalysisSession` for the given PHP version. Rebuilds
123    /// when the version changes (e.g. user flipped config). The session owns
124    /// its own salsa db and AnalysisCache; lazy-loads vendor files via the
125    /// shared PSR-4 map.
126    pub fn analysis_session(
127        &self,
128        php_version: mir_analyzer::PhpVersion,
129    ) -> Arc<mir_analyzer::AnalysisSession> {
130        let mut guard = self.analysis_session.lock().unwrap();
131        if let Some((cached_ver, session)) = guard.as_ref()
132            && *cached_ver == php_version
133        {
134            return Arc::clone(session);
135        }
136        // Build a fresh session. Hand it the shared PSR-4 map so it can
137        // lazy-resolve `UndefinedClass` candidates without us having to mirror
138        // every vendor file upfront.
139        let resolver: Arc<dyn mir_analyzer::ClassResolver> = self.psr4.load_full();
140        let mut builder =
141            mir_analyzer::AnalysisSession::new(php_version).with_class_resolver(resolver);
142        if let Some(dir) = self.session_cache_dir.get() {
143            builder = builder.with_cache_dir(dir);
144        }
145        let session = Arc::new(builder);
146        session.ensure_all_stubs();
147        *guard = Some((php_version, Arc::clone(&session)));
148        session
149    }
150
151    /// Current PHP version tracked by the workspace input.
152    pub fn workspace_php_version(&self) -> mir_analyzer::PhpVersion {
153        self.with_host(|h| self.workspace.php_version(h.db()))
154    }
155
156    /// Return the `Arc<ArcSwap<Psr4Map>>` so callers can share it.
157    /// `Backend` clones this arc at construction time so writes
158    /// (e.g. loading composer.json on `initialized`) are immediately visible
159    /// to PSR-4 resolution during analysis without extra plumbing.
160    pub fn psr4_arc(&self) -> Arc<ArcSwap<Psr4Map>> {
161        Arc::clone(&self.psr4)
162    }
163
164    /// Mirror a file's current text into the salsa layer. Creates the
165    /// `FileText` input on first sight, otherwise updates `text` on the
166    /// existing input (bumping the salsa revision so downstream queries
167    /// invalidate).
168    pub fn mirror_text(&self, uri: &Url, text: &str) {
169        // G2 fast path: compare against the lock-free text cache. When the
170        // new text byte-matches what we already mirrored, skip the host
171        // mutex entirely. Common during workspace scan + `did_open` for
172        // unchanged files, where most threads would otherwise serialise on
173        // `host.lock()` just to confirm a no-op.
174        if let Some(cached) = self.caches.text_cache.get(uri)
175            && **cached == *text
176            && !self.deleted_uris.contains(uri)
177            && self.file_texts.contains_key(uri)
178        {
179            return;
180        }
181        self.mirror_text_arc(uri, Arc::from(text))
182    }
183
184    /// Like [`mirror_text`] but takes an already-allocated `Arc<str>`.
185    ///
186    /// Callers that already hold an `Arc<str>` (e.g. `ingest_from_doc` reusing
187    /// `ParsedDoc::source_arc()`) use this to avoid a second allocation and to
188    /// ensure `text_cache` and `parsed_cache` hold the same Arc pointer —
189    /// enabling `Arc::ptr_eq` validation in `get_parsed_cached`.
190    pub fn mirror_text_arc(&self, uri: &Url, text_arc: Arc<str>) {
191        if let Some(ft) = self.file_texts.get(uri).map(|e| *e) {
192            self.deleted_uris.remove(uri);
193            // Slow path: re-check inside the mutex. Salsa's `set_text`
194            // unconditionally bumps the revision, so every spurious setter
195            // invalidates every downstream query.
196            let mut host = self.host.lock().unwrap();
197            let current: Arc<str> = ft.text(host.db());
198            if *current == *text_arc {
199                drop(host);
200                self.caches.text_cache.insert(uri.clone(), current);
201                return;
202            }
203            ft.set_text(host.db_mut()).to(text_arc.clone());
204            // Phase K2: any text change invalidates a previously-seeded
205            // cached index. Only bump the revision when a cached index is
206            // actually present — an unconditional set would cause two
207            // revision bumps per edit (one for text, one for cached_index),
208            // which needlessly cancels in-flight `file_index` queries on
209            // every keystroke.
210            if ft.cached_index(host.db()).is_some() {
211                ft.set_cached_index(host.db_mut()).to(None);
212            }
213            drop(host);
214            self.caches.text_cache.insert(uri.clone(), text_arc);
215            // Evict only this file's analysis. Declaration-level changes (which
216            // invalidate other files' cached analyses) are detected lazily in
217            // `cached_analysis` by comparing the new `FileIndex` against the
218            // stored fingerprint; if changed, `decl_version` is bumped and other
219            // files' cache entries (which carry the old version) become stale.
220            // Body-only edits leave `decl_version` unchanged so sibling files
221            // are served from cache without re-analysis.
222            self.caches.evict_analysis(uri);
223        } else {
224            let is_vendor = uri.as_str().contains("/vendor/");
225            let ft = {
226                let mut host = self.host.lock().unwrap();
227                let ft = FileText::new(host.db(), text_arc.clone(), None);
228                if is_vendor {
229                    // Vendor files never change in a session — mark their text
230                    // as HIGH durability so salsa skips re-validating
231                    // parsed_doc/file_index for them on every user edit.
232                    ft.set_text(host.db_mut())
233                        .with_durability(salsa::Durability::HIGH)
234                        .to(Arc::clone(&text_arc));
235                }
236                ft
237            };
238            self.file_texts.insert(uri.clone(), ft);
239            self.caches.text_cache.insert(uri.clone(), text_arc);
240            self.workspace_files_dirty.store(true, Ordering::Release);
241            // A newly-ingested file may resolve previously-unresolved references
242            // in other files. Cross-file invalidation happens lazily: the first
243            // `cached_analysis` call for this file sees no fingerprint (old_fp =
244            // None), treats it as a declaration change, and bumps `decl_version`,
245            // making every other file's cache entry stale at that point.
246            // No eager clear needed — other files' entries are still valid until
247            // this file's declarations are first observed.
248        }
249    }
250
251    /// Return the `FileText` handle for a URL, if active (not deleted).
252    #[cfg(test)]
253    pub fn source_file(&self, uri: &Url) -> Option<FileText> {
254        if self.deleted_uris.contains(uri) {
255            return None;
256        }
257        self.file_texts.get(uri).map(|e| *e)
258    }
259
260    /// Phase K2: pre-seed a `FileIndex` loaded from the on-disk cache onto
261    /// the `FileText` input for `uri`. The next `file_index` call for that
262    /// file returns the cached index directly, skipping parse + extract.
263    ///
264    /// Must be called **before** any `file_index(db, sf)` call for this file —
265    /// otherwise salsa has already memoized the fresh-parse result and setting
266    /// `cached_index` now would only bump the revision without using the cache.
267    /// In practice the workspace-scan path seeds immediately after `mirror_text`
268    /// and before any query runs.
269    ///
270    /// Returns `false` when `uri` was not mirrored (caller should mirror
271    /// first); returns `true` on success.
272    pub fn seed_cached_index(&self, uri: &Url, index: Arc<FileIndex>) -> bool {
273        let Some(ft) = self.file_texts.get(uri).map(|e| *e) else {
274            return false;
275        };
276        let mut host = self.host.lock().unwrap();
277        ft.set_cached_index(host.db_mut()).to(Some(index));
278        true
279    }
280
281    /// Run `f` with a borrow of the `AnalysisHost`. Used by tests and by the
282    /// upcoming `*_salsa` accessors to query the salsa layer.
283    pub fn with_host<R>(&self, f: impl FnOnce(&AnalysisHost) -> R) -> R {
284        let host = self.host.lock().unwrap();
285        f(&host)
286    }
287
288    /// Phase E1: take a brief lock, clone the salsa database, release the
289    /// lock. Queries then run on the cloned `RootDatabase` without blocking
290    /// writers or other readers. Salsa's `Storage<Self>` is reference-counted
291    /// (`Arc<Zalsa>`), so the clone is cheap — it shares memoized data and
292    /// the cancellation flag with the host's db.
293    fn snapshot_db(&self) -> crate::db::analysis::RootDatabase {
294        let host = self.host.lock().unwrap();
295        host.db().clone()
296    }
297
298    /// Run a query on a fresh snapshot, catching `salsa::Cancelled` (raised
299    /// when a concurrent writer advances the revision) and retrying with a
300    /// new snapshot. Writers hold the mutex only long enough to bump input
301    /// values, so a handful of retries is more than enough in practice; we
302    /// cap at 8 to avoid pathological livelock under sustained write pressure.
303    fn snapshot_query<R>(&self, f: impl Fn(&crate::db::analysis::RootDatabase) -> R + Clone) -> R {
304        use std::panic::AssertUnwindSafe;
305        for _ in 0..8 {
306            let db = self.snapshot_db();
307            let f = f.clone();
308            match salsa::Cancelled::catch(AssertUnwindSafe(move || f(&db))) {
309                Ok(r) => return r,
310                Err(_) => continue,
311            }
312        }
313        // Last-resort attempt: take the mutex for the whole query so no
314        // writer can race us. Much slower, but guaranteed to make progress.
315        let host = self.host.lock().unwrap();
316        f(host.db())
317    }
318
319    /// Evict the semantic-tokens cache for `uri`. Called by Backend when a
320    /// file is closed; diff-based tokens computed against the old revision
321    /// are no longer meaningful.
322    pub fn evict_token_cache(&self, uri: &Url) {
323        self.caches.evict_tokens(uri);
324    }
325
326    /// Return the `FileIndex` for `uri` by running `file_index` on a salsa
327    /// snapshot.  Returns `None` when `uri` has not been mirrored.
328    ///
329    /// Test-only — production code uses the salsa query directly via
330    /// `snapshot_query`.
331    #[cfg(test)]
332    pub fn source_files_len(&self) -> usize {
333        self.file_texts.len()
334    }
335
336    #[cfg(test)]
337    pub fn snapshot_query_file_index(
338        &self,
339        uri: &Url,
340    ) -> Option<crate::index::file_index::FileIndex> {
341        if self.deleted_uris.contains(uri) {
342            return None;
343        }
344        if !self.file_texts.contains_key(uri) {
345            return None;
346        }
347        self.sync_workspace_files();
348        let uri_str: Arc<str> = Arc::from(uri.as_str());
349        let ws = self.workspace;
350        self.snapshot_query(move |db| {
351            let sf = find_source_file(db, ws, &uri_str)?;
352            Some(crate::db::index::file_index(db, sf).get().clone())
353        })
354    }
355
356    /// Register a file in the salsa layer without marking it open.
357    ///
358    /// Salsa's `parsed_doc` query parses lazily on first read; diagnostics
359    /// are populated by `did_open` when the editor actually opens the file.
360    pub fn ingest(&self, uri: Url, text: &str) {
361        self.mirror_text(&uri, text);
362    }
363
364    /// Index a file using an already-parsed `ParsedDoc`, avoiding a second parse.
365    ///
366    /// Prefer this over [`ingest`] when the caller already has a `ParsedDoc` (e.g.
367    /// after running `DefinitionCollector` during workspace scan). Reuses the
368    /// `Arc<str>` already owned by `doc` so that `text_cache` and `SourceFile::text`
369    /// share the same pointer — enabling the `Arc::ptr_eq` fast path in
370    /// `get_parsed_cached` on the first subsequent salsa query, without an extra
371    /// `Arc::from(source)` allocation.
372    pub fn ingest_from_doc(&self, uri: Url, doc: &ParsedDoc) {
373        self.mirror_text_arc(&uri, doc.source_arc());
374    }
375
376    pub fn remove(&self, uri: &Url) {
377        self.caches.evict(uri);
378        // Mark the URI as deleted but keep the `source_files` entry so the
379        // salsa `SourceFile` handle remains alive. Re-opening the file reuses
380        // the same handle instead of calling `SourceFile::new()` again, which
381        // would create a new orphaned salsa input on every delete-reopen cycle.
382        self.deleted_uris.insert(uri.clone());
383        self.workspace_files_dirty.store(true, Ordering::Release);
384        // Sync workspace files so the deleted file is removed from the salsa
385        // `Workspace::files` list and won't appear in workspace symbols etc.
386        self.sync_workspace_files();
387        // Also evict the file from the `AnalysisSession`'s internal state so
388        // workspace symbol queries don't keep returning the deleted file's
389        // declarations. Cheap when the session hasn't ingested this file.
390        let guard = self.analysis_session.lock().unwrap();
391        if let Some((_, session)) = guard.as_ref() {
392            session.invalidate_file(uri.as_str());
393        }
394    }
395
396    // ── Salsa-backed accessors ─────────────────────────────────────────────
397    //
398    // Reads run the memoized `parsed_doc` / `file_index` queries, parsing
399    // only on first access per revision. These are the production accessors
400    // used by every handler.
401
402    /// Salsa-backed parsed document.
403    ///
404    /// Salsa-backed parsed document for any mirrored file (open or
405    /// background-indexed). Returns `None` only when the file is not known
406    /// to the store. Callers that want "only if open" should gate on
407    /// `Backend::open_files` at the call site (see `Backend::get_doc`).
408    pub fn get_doc_salsa(&self, uri: &Url) -> Option<Arc<ParsedDoc>> {
409        self.get_parsed_cached(uri)
410    }
411
412    /// Salsa-backed compact symbol index.
413    pub fn get_index_salsa(&self, uri: &Url) -> Option<Arc<FileIndex>> {
414        if self.deleted_uris.contains(uri) {
415            return None;
416        }
417        if !self.file_texts.contains_key(uri) {
418            return None;
419        }
420        self.sync_workspace_files();
421        let uri_str: Arc<str> = Arc::from(uri.as_str());
422        let ws = self.workspace;
423        self.snapshot_query(move |db| {
424            let sf = find_source_file(db, ws, &uri_str)?;
425            Some(crate::db::index::file_index(db, sf).0.clone())
426        })
427    }
428
429    /// Salsa-backed pre-computed symbol map (name → Vec<SymbolEntry>).
430    /// Memoized per revision: stable files serve from cache in O(1).
431    pub fn get_symbol_map_salsa(
432        &self,
433        uri: &Url,
434    ) -> Option<Arc<crate::types::symbol_map::SymbolMap>> {
435        if self.deleted_uris.contains(uri) {
436            return None;
437        }
438        if !self.file_texts.contains_key(uri) {
439            return None;
440        }
441        self.sync_workspace_files();
442        let uri_str: Arc<str> = Arc::from(uri.as_str());
443        let ws = self.workspace;
444        self.snapshot_query(move |db| {
445            let sf = find_source_file(db, ws, &uri_str)?;
446            Some(crate::db::symbol_map::symbol_map(db, sf).0.clone())
447        })
448    }
449
450    /// Pre-computed symbol maps for every entry in `open_urls` except `uri`.
451    pub fn other_symbol_maps(
452        &self,
453        uri: &Url,
454        open_urls: &[Url],
455    ) -> Vec<(Url, Arc<crate::types::symbol_map::SymbolMap>)> {
456        open_urls
457            .iter()
458            .filter(|u| *u != uri)
459            .filter_map(|u| self.get_symbol_map_salsa(u).map(|m| (u.clone(), m)))
460            .collect()
461    }
462
463    /// G3: shared implementation for `get_doc_salsa`.
464    /// Tries the `parsed_cache` (lock-free) first; validates via
465    /// `Arc::ptr_eq` against the G2 `text_cache` so a concurrent writer
466    /// that has already committed a new text input cannot be masked by a
467    /// stale cache entry. On miss, captures the text Arc and ParsedDoc
468    /// together inside a single `snapshot_query`, then publishes both.
469    fn get_parsed_cached(&self, uri: &Url) -> Option<Arc<ParsedDoc>> {
470        if let Some(current_text) = self.caches.text_cache.get(uri)
471            && let Some(entry) = self.caches.parsed_cache.get(uri)
472            && Arc::ptr_eq(&*current_text, &entry.0)
473        {
474            return Some(entry.1.clone());
475        }
476
477        if self.deleted_uris.contains(uri) {
478            return None;
479        }
480        if !self.file_texts.contains_key(uri) {
481            return None;
482        }
483        self.sync_workspace_files();
484        let uri_str: Arc<str> = Arc::from(uri.as_str());
485        let ws = self.workspace;
486        let (text, doc) = self.snapshot_query(move |db| {
487            let sf = find_source_file(db, ws, &uri_str)?;
488            let text = sf.text_input(db).text(db);
489            let doc = crate::db::parse::parsed_doc(db, sf).0.clone();
490            Some((text, doc))
491        })?;
492        self.caches.insert_parsed(uri.clone(), text, doc.clone());
493        Some(doc)
494    }
495
496    /// Refresh `workspace.files` to mirror the current active file set.
497    ///
498    /// Skips all work when `workspace_files_dirty` is `false` (the common
499    /// case after the workspace scan completes — file-set changes are rare).
500    pub fn sync_workspace_files(&self) {
501        // Atomically clear the flag.  If it was already false the file set
502        // hasn't changed since the last sync; nothing to do.
503        if !self.workspace_files_dirty.swap(false, Ordering::AcqRel) {
504            return;
505        }
506
507        // Collect active (non-deleted) files without holding the host lock.
508        let mut files: Vec<(Arc<str>, FileText)> = self
509            .file_texts
510            .iter()
511            .filter(|e| !self.deleted_uris.contains(e.key()))
512            .map(|e| (Arc::<str>::from(e.key().as_str()), *e.value()))
513            .collect();
514        // Sort by URI string for stable ordering.
515        files.sort_unstable_by(|(a, _), (b, _)| a.cmp(b));
516
517        let mut host = self.host.lock().unwrap();
518        let current = self.workspace.files(host.db());
519        if current.len() == files.len()
520            && current
521                .iter()
522                .zip(files.iter())
523                .all(|(a, b)| a.0 == b.0 && a.1 == b.1)
524        {
525            return;
526        }
527        self.workspace.set_files(host.db_mut()).to(Arc::from(files));
528    }
529
530    /// Mark the workspace file set as dirty so the next `sync_workspace_files`
531    /// call re-runs the collect/sort/compare path.  Exposed for benchmarks that
532    /// need to measure the dirty-path cost in isolation.
533    pub fn mark_workspace_files_dirty(&self) {
534        self.workspace_files_dirty.store(true, Ordering::Release);
535    }
536
537    /// Update the PHP version tracked by the workspace. Salsa will invalidate
538    /// all `semantic_issues` queries so diagnostics are re-evaluated.
539    /// Skips the setter when the version hasn't changed to avoid spurious
540    /// query invalidation.
541    pub fn set_php_version(&self, version: mir_analyzer::PhpVersion) {
542        let mut host = self.host.lock().unwrap();
543        if self.workspace.php_version(host.db()) == version {
544            return;
545        }
546        self.workspace.set_php_version(host.db_mut()).to(version);
547        // The analysis_cache validates against source content only, so stale
548        // FileAnalysis results from the old PHP version would survive unchanged
549        // files. Clear it so the next request re-runs with the new version.
550        drop(host);
551        self.caches.evict_analysis_all();
552    }
553
554    /// Session-backed workspace reference lookup. Returns `(file, line, col)`
555    /// locations for every occurrence of `symbol` in the files that the
556    /// `AnalysisSession` has ingested so far. The session's reference index
557    /// is built incrementally during `ingest_file`, so refs for files the
558    /// session hasn't seen yet (background-indexed but never opened) won't
559    /// appear here — those are covered by the AST-walker fallback in the
560    /// references handler.
561    ///
562    /// Returns LSP-style 0-based line/column.
563    pub fn session_references_to(
564        &self,
565        symbol: &mir_analyzer::Name,
566    ) -> Vec<(Arc<str>, u32, u32, u32)> {
567        let php_version = self.workspace_php_version();
568        let session = self.analysis_session(php_version);
569        session
570            .references_to(symbol)
571            .into_iter()
572            .map(|(file, range)| {
573                // mir uses 1-based lines; 0-based columns (since mir 0.42.0).
574                let line = range.start.line.saturating_sub(1);
575                let col_start = range.start.column;
576                let col_end = range.end.column;
577                (file, line, col_start, col_end)
578            })
579            .collect()
580    }
581
582    /// Phase J: salsa-memoized aggregate workspace index.
583    ///
584    /// Returns the shared `Arc<WorkspaceIndexData>` with flat
585    /// `(Url, Arc<FileIndex>)` list plus pre-built `classes_by_name` and
586    /// `subtypes_of` reverse maps. Used by workspace_symbols,
587    /// prepare_type_hierarchy, supertypes_of, subtypes_of, and
588    /// find_implementations so they don't each rebuild the aggregate per
589    /// request. Invalidates automatically when any file's `file_index`
590    /// changes.
591    pub fn get_workspace_index_salsa(&self) -> Arc<crate::db::workspace_index::WorkspaceIndexData> {
592        self.sync_workspace_files();
593        let ws = self.workspace;
594        self.snapshot_query(move |db| {
595            crate::db::workspace_index::workspace_index(db, ws)
596                .0
597                .clone()
598        })
599    }
600
601    /// No-op after mir 0.22 migration. The session manages its own warm-up
602    /// via `ingest_file` / `analyze_dependents_of`; there's nothing for us
603    /// to pre-warm here.
604    pub fn warm_reference_index(&self) {}
605
606    /// Return the raw source text for `uri` if it has been mirrored into the
607    /// salsa workspace. Used by the references handler to pre-filter session
608    /// results by checking whether a file mentions the owning class name.
609    pub fn source_text(&self, uri: &Url) -> Option<Arc<str>> {
610        self.caches.text_cache.get(uri).map(|e| Arc::clone(&e))
611    }
612
613    /// Run Pass 1 + Pass 2 analysis on every mirrored workspace file so that
614    /// type-aware queries (e.g. `session.references_to`) see the full workspace.
615    ///
616    /// Reference locations are only recorded during Pass 2 (`FileAnalyzer::analyze`).
617    /// `ingest_file` alone (Pass 1) is not sufficient. Only needed for cross-file
618    /// queries like `textDocument/references` that rely on the reference index.
619    /// The session's internal cache makes re-analysis of unchanged files cheap.
620    pub fn ensure_all_files_ingested(&self) {
621        let php_version = self.workspace_php_version();
622        let session = self.analysis_session(php_version);
623        let urls: Vec<Url> = self
624            .file_texts
625            .iter()
626            .filter(|e| !self.deleted_uris.contains(e.key()))
627            .map(|e| e.key().clone())
628            .collect();
629        for uri in &urls {
630            let Some(doc) = self.get_doc_salsa(uri) else {
631                continue;
632            };
633            let file: Arc<str> = Arc::from(uri.as_str());
634            session.ingest_file(file.clone(), doc.source_arc());
635            let source_map = php_rs_parser::source_map::SourceMap::new(doc.source());
636            let owned_program = php_ast::owned::to_owned_program(doc.program());
637            let analyzer = mir_analyzer::FileAnalyzer::new(&session);
638            analyzer.analyze(file, doc.source(), &owned_program, &source_map);
639        }
640    }
641
642    /// Cache the semantic tokens computed for a delta response.
643    /// `result_id` is an opaque string (a hash of the token data) returned to the client.
644    pub fn store_token_cache(&self, uri: &Url, result_id: String, tokens: Arc<Vec<SemanticToken>>) {
645        self.caches.store_token(uri, result_id, tokens);
646    }
647
648    /// Return the cached tokens if `result_id` matches the stored one.
649    pub fn get_token_cache(&self, uri: &Url, result_id: &str) -> Option<Arc<Vec<SemanticToken>>> {
650        self.caches.get_token(uri, result_id)
651    }
652
653    /// Raw semantic issues for a file, computed via mir's session-based
654    /// `FileAnalyzer`. The session lazy-loads dependencies via PSR-4 so the
655    /// LSP no longer needs to mirror vendor up-front. Callers apply their
656    /// own `DiagnosticsConfig` filter via
657    /// [`crate::semantic_diagnostics::issues_to_diagnostics`].
658    #[tracing::instrument(skip_all)]
659    pub fn get_semantic_issues_salsa(&self, uri: &Url) -> Option<Arc<[mir_issues::Issue]>> {
660        let analysis = self.cached_analysis(uri)?;
661        let file: Arc<str> = Arc::from(uri.as_str());
662        // Workspace-level class issues for this file (circular inheritance,
663        // override violations, abstract-method gaps). These are session-wide
664        // (a dependency edit changes them without changing this file's bytes),
665        // so they are recomputed live rather than cached alongside the
666        // per-file body analysis.
667        let class_issues = {
668            let _s = tracing::debug_span!("session.class_issues_for").entered();
669            self.analysis_session(self.workspace_php_version())
670                .class_issues(std::slice::from_ref(&file))
671        };
672        let combined: Vec<mir_issues::Issue> = analysis
673            .issues
674            .iter()
675            .cloned()
676            .chain(class_issues)
677            .filter(|i| !i.suppressed)
678            .collect();
679        Some(Arc::from(combined))
680    }
681
682    /// Run (or reuse) mir's per-file body analysis, retaining the full
683    /// [`mir_analyzer::FileAnalysis`] — issues **and** resolved symbols — across
684    /// requests. Diagnostics read `.issues`; position features call
685    /// `.symbol_at(offset)` for the resolved type at a cursor.
686    ///
687    /// Cache hit when the entry's captured source `Arc` is pointer-equal to the
688    /// file's current `doc.source_arc()`. A miss recomputes and overwrites, so
689    /// the entry self-evicts on any content edit.
690    /// Build (or reuse) the whole-doc completion [`crate::types::type_map::TypeMap`]
691    /// for `uri`. Cache hit when the entry's captured source `Arc` is
692    /// pointer-equal to `doc.source_arc()` and the PHPStorm-meta pointer is
693    /// unchanged (meta lives behind `ArcSwap`, so its address is stable until
694    /// `.phpstorm.meta.php` is reloaded). A miss rebuilds and overwrites, so
695    /// the entry self-evicts on any content edit.
696    pub fn cached_type_map(
697        &self,
698        uri: &Url,
699        doc: &crate::document::ast::ParsedDoc,
700        meta: Option<&crate::lang::phpstorm_meta::PhpStormMeta>,
701    ) -> Arc<crate::types::type_map::TypeMap> {
702        let source = doc.source_arc();
703        let meta_key = meta.map_or(0usize, |m| std::ptr::from_ref(m) as usize);
704        if let Some(entry) = self.caches.type_map_cache.get(uri)
705            && Arc::ptr_eq(&entry.0, &source)
706            && entry.1 == meta_key
707        {
708            return Arc::clone(&entry.2);
709        }
710        let map = Arc::new(crate::types::type_map::TypeMap::from_doc_with_meta(
711            doc, meta,
712        ));
713        self.caches
714            .type_map_cache
715            .insert(uri.clone(), (source, meta_key, Arc::clone(&map)));
716        map
717    }
718
719    /// Cache-hit-only variant of [`Self::cached_analysis`]: returns the cached
720    /// analysis when the entry is current for the file's text, never computes.
721    /// Lets async handlers take the warm path synchronously and reserve
722    /// `spawn_blocking` for the cold path (mir Pass 1 + Pass 2 can take
723    /// hundreds of ms on large files).
724    pub fn cached_analysis_if_fresh(&self, uri: &Url) -> Option<Arc<mir_analyzer::FileAnalysis>> {
725        let doc = self.get_doc_salsa(uri)?;
726        let source = doc.source_arc();
727        let entry = self.caches.analysis_cache.get(uri)?;
728        let cur_ver = self.caches.decl_version();
729        (Arc::ptr_eq(&entry.0, &source) && entry.1 == cur_ver).then(|| Arc::clone(&entry.2))
730    }
731
732    #[tracing::instrument(skip_all)]
733    pub fn cached_analysis(&self, uri: &Url) -> Option<Arc<mir_analyzer::FileAnalysis>> {
734        // Need the parsed doc both for the analyzer and as the cache key.
735        let doc = self.get_doc_salsa(uri)?;
736        let source = doc.source_arc();
737
738        let cur_ver = self.caches.decl_version();
739        if let Some(entry) = self.caches.analysis_cache.get(uri)
740            && Arc::ptr_eq(&entry.0, &source)
741            && entry.1 == cur_ver
742        {
743            return Some(Arc::clone(&entry.2));
744        }
745
746        let php_version = self.with_host(|h| self.workspace.php_version(h.db()));
747        let session = self.analysis_session(php_version);
748        let file: Arc<str> = Arc::from(uri.as_str());
749        {
750            let _s = tracing::debug_span!("session.ingest_file").entered();
751            session.ingest_file(file.clone(), source.clone());
752        }
753        // Pre-ingest autoload.files helpers (e.g. tap(), class_uses_recursive()
754        // in Laravel) so mir sees their function definitions before analyzing
755        // the current file. ingest_file is idempotent — already-ingested files
756        // are skipped cheaply by the session's internal content cache.
757        {
758            let autoload_uris = self.autoload_uris.read().unwrap().clone();
759            for auri in &autoload_uris {
760                if let Some(atext) = self.caches.text_cache.get(auri).map(|t| Arc::clone(&*t)) {
761                    let afile: Arc<str> = Arc::from(auri.as_str());
762                    session.ingest_file(afile, atext);
763                }
764            }
765        }
766        // Pre-load every class-typed reference via PSR-4 before FileAnalyzer
767        // runs. Although mir 0.45.0 added priority_index_for_ast (called inside
768        // FileAnalyzer::analyze), it does not resolve bare same-namespace refs
769        // (e.g. `extends Base` inside `namespace App;` → App\Base) or
770        // use-imported names in `implements` clauses. Without this block, those
771        // cases produce spurious UndefinedClass.
772        //
773        // TODO: upstream — extend mir's collect_class_refs_from_ast to cover
774        // same-namespace bare refs and use-imported implements entries so this
775        // pre-load can be removed.
776        {
777            let _s = tracing::debug_span!("session.lazy_load_imports").entered();
778            let fqns = crate::references::collect_referenced_class_fqns(&doc);
779            for fqcn in &fqns {
780                let _ = session.load_class(fqcn);
781            }
782        }
783        let source_map = php_rs_parser::source_map::SourceMap::new(doc.source());
784        let owned_program = php_ast::owned::to_owned_program(doc.program());
785        let analysis = {
786            let _s = tracing::debug_span!("FileAnalyzer::analyze").entered();
787            let analyzer = mir_analyzer::FileAnalyzer::new(&session);
788            Arc::new(analyzer.analyze(file.clone(), doc.source(), &owned_program, &source_map))
789        };
790        // Compare the new FileIndex against the stored fingerprint. If
791        // declarations changed (or this is the first analysis), bump
792        // `decl_version` so other files' cache entries become stale. Body-only
793        // edits leave the counter unchanged, allowing sibling files to be
794        // served from cache on the next request.
795        let new_index = self.get_index_salsa(uri);
796        let old_fp = self
797            .caches
798            .decl_fingerprints
799            .get(uri)
800            .map(|e| Arc::clone(&*e));
801        let decl_changed = match (&old_fp, &new_index) {
802            (Some(old), Some(new)) => **old != **new,
803            (None, Some(_)) => true,
804            _ => false,
805        };
806        if decl_changed {
807            if let Some(idx) = new_index {
808                self.caches.decl_fingerprints.insert(uri.clone(), idx);
809            }
810            self.caches.bump_decl_version();
811        }
812        let ver = self.caches.decl_version();
813        self.caches
814            .analysis_cache
815            .insert(uri.clone(), (source, ver, Arc::clone(&analysis)));
816        Some(analysis)
817    }
818
819    /// Returns `(uri, doc)` for files currently open in the editor.
820    ///
821    /// Resolve `open_urls` (from `Backend::open_urls()`) to parsed docs.
822    /// Files not mirrored in the salsa layer are filtered out silently.
823    pub fn docs_for(&self, open_urls: &[Url]) -> Vec<(Url, Arc<ParsedDoc>)> {
824        open_urls
825            .iter()
826            .filter_map(|u| self.get_doc_salsa(u).map(|d| (u.clone(), d)))
827            .collect()
828    }
829
830    /// `(primary, doc)` first, then every other open file's parsed doc.
831    /// The `open_urls` slice should include `uri` — this helper filters it out.
832    pub fn doc_with_others(
833        &self,
834        uri: &Url,
835        doc: Arc<ParsedDoc>,
836        open_urls: &[Url],
837    ) -> Vec<(Url, Arc<ParsedDoc>)> {
838        let mut result = vec![(uri.clone(), doc)];
839        result.extend(self.other_docs(uri, open_urls));
840        result
841    }
842
843    /// Parsed docs for every entry in `open_urls` except `uri`.
844    pub fn other_docs(&self, uri: &Url, open_urls: &[Url]) -> Vec<(Url, Arc<ParsedDoc>)> {
845        open_urls
846            .iter()
847            .filter(|u| *u != uri)
848            .filter_map(|u| self.get_doc_salsa(u).map(|d| (u.clone(), d)))
849            .collect()
850    }
851
852    /// Compact symbol index for every mirrored file.
853    pub fn all_indexes(&self) -> Vec<(Url, Arc<FileIndex>)> {
854        self.get_workspace_index_salsa().files.clone()
855    }
856
857    /// Store a lazily-loaded vendor `FileIndex` in the session cache.
858    /// Only call this for files that are not part of the normal workspace scan
859    /// (i.e. vendor files loaded on-demand by PSR-4 navigation).
860    pub fn cache_vendor_index(&self, uri: Url, index: Arc<FileIndex>) {
861        self.vendor_index_cache.insert(uri, index);
862    }
863
864    /// Retrieve a previously cached vendor `FileIndex`.
865    pub fn get_vendor_index(&self, uri: &Url) -> Option<Arc<FileIndex>> {
866        self.vendor_index_cache.get(uri).map(|e| Arc::clone(&*e))
867    }
868
869    /// Same as `all_indexes` but excludes `uri`.
870    pub fn other_indexes(&self, uri: &Url) -> Vec<(Url, Arc<FileIndex>)> {
871        self.get_workspace_index_salsa()
872            .files
873            .iter()
874            .filter(|(u, _)| u != uri)
875            .cloned()
876            .collect()
877    }
878
879    /// Parsed documents for every mirrored file (open or background-indexed).
880    /// Suitable for full-scan operations: find-references, rename,
881    /// call_hierarchy, code_lens.
882    pub fn all_docs_for_scan(&self) -> Vec<(Url, Arc<ParsedDoc>)> {
883        let urls: Vec<Url> = self
884            .file_texts
885            .iter()
886            .filter(|e| !self.deleted_uris.contains(e.key()))
887            .map(|e| e.key().clone())
888            .collect();
889        urls.into_iter()
890            .filter_map(|u| self.get_doc_salsa(&u).map(|d| (u, d)))
891            .collect()
892    }
893
894    /// Parsed documents limited to files whose raw source text contains `word`.
895    ///
896    /// Prefilters via [`Self::text_cache`] (a cheap substring scan on the raw
897    /// `Arc<str>` already in memory) before calling [`Self::get_doc_salsa`],
898    /// which triggers a salsa parse for files not yet in the AST cache.  This
899    /// means only candidate files are ever parsed — the key win over
900    /// [`all_docs_for_scan`] for find-references, which otherwise parses the
901    /// entire workspace before the memchr gate in `find_references_inner` fires.
902    ///
903    /// Files whose text is not yet in `text_cache` are included conservatively
904    /// (safe superset — never produces false negatives).
905    pub fn candidate_docs_for(&self, word: &str) -> Vec<(Url, Arc<ParsedDoc>)> {
906        let candidate_urls: Vec<Url> = self
907            .file_texts
908            .iter()
909            .filter(|e| !self.deleted_uris.contains(e.key()))
910            .filter(|e| {
911                self.caches
912                    .text_cache
913                    .get(e.key())
914                    .map(|src| src.contains(word))
915                    .unwrap_or(true)
916            })
917            .map(|e| e.key().clone())
918            .collect();
919        candidate_urls
920            .into_iter()
921            .filter_map(|u| self.get_doc_salsa(&u).map(|d| (u, d)))
922            .collect()
923    }
924
925    /// URLs of files whose raw source text contains `word`. No parsing.
926    ///
927    /// Used to scope [`ensure_files_ingested`] for method references: only
928    /// files that mention the method name by text need mir Pass 2 analysis.
929    pub fn candidate_urls_mentioning(&self, word: &str) -> Vec<Url> {
930        self.file_texts
931            .iter()
932            .filter(|e| !self.deleted_uris.contains(e.key()))
933            .filter(|e| {
934                self.caches
935                    .text_cache
936                    .get(e.key())
937                    .map(|src| src.contains(word))
938                    .unwrap_or(true)
939            })
940            .map(|e| e.key().clone())
941            .collect()
942    }
943
944    /// Run Pass 1 + Pass 2 analysis on the given files only.
945    ///
946    /// Scoped alternative to [`ensure_all_files_ingested`] used by
947    /// `textDocument/references` for method symbols: only files that textually
948    /// mention the method name need to be analyzed, cutting the Pass-2 cost
949    /// from O(workspace) to O(candidates).
950    ///
951    /// Uses `BatchFileAnalyzer` so Pass 2 runs in parallel across rayon threads,
952    /// cutting wall time from O(N × per-file) to O(N/cores × per-file).
953    pub fn ensure_files_ingested(&self, urls: &[Url]) {
954        let php_version = self.workspace_php_version();
955        let session = self.analysis_session(php_version);
956
957        // Pass 1: ingest all files (sequential — session serialises writes internally).
958        let parsed_files: Vec<mir_analyzer::ParsedFile> = urls
959            .iter()
960            .filter_map(|uri| {
961                let doc = self.get_doc_salsa(uri)?;
962                let file: Arc<str> = Arc::from(uri.as_str());
963                session.ingest_file(file.clone(), doc.source_arc());
964                let source_map = php_rs_parser::source_map::SourceMap::new(doc.source());
965                let owned_program = php_ast::owned::to_owned_program(doc.program());
966                Some(mir_analyzer::ParsedFile::new(
967                    file,
968                    doc.source_arc(),
969                    owned_program,
970                    source_map,
971                ))
972            })
973            .collect();
974
975        // Pass 2: analyze in parallel via rayon — each worker gets its own db clone.
976        let batch = mir_analyzer::BatchFileAnalyzer::new(&session);
977        batch.analyze_batch(parsed_files);
978    }
979}
980
981// `warm_file_refs_parallel` removed: the analyzer-side reference index is
982// now owned by `AnalysisSession` and warmed by `ingest_file`. This salsa-side
983// helper has no counterpart in the new architecture.
984
985#[cfg(test)]
986mod tests {
987    use super::*;
988
989    fn uri(path: &str) -> Url {
990        Url::parse(&format!("file://{path}")).unwrap()
991    }
992
993    /// Phase E4: open-file state lives on `Backend`, not `DocumentStore`.
994    /// Tests that need to simulate "file is open" just mirror the text into
995    /// the salsa input — the open/closed distinction is enforced by the
996    /// caller (Backend) in production.
997    fn open(store: &DocumentStore, u: Url, text: String) {
998        store.mirror_text(&u, &text);
999    }
1000
1001    // Removed `salsa_codebase_aggregates_all_files`: the salsa-side codebase
1002    // aggregation was deleted with the mir 0.22 migration. Equivalent
1003    // behaviour is now covered by mir-analyzer's own session tests.
1004
1005    #[test]
1006    fn index_registers_file_in_salsa() {
1007        let store = DocumentStore::new();
1008        store.ingest(uri("/lib.php"), "<?php\nfunction lib_fn() {}");
1009        let idx = store.get_index_salsa(&uri("/lib.php")).unwrap();
1010        assert_eq!(idx.functions.len(), 1);
1011        assert_eq!(idx.functions[0].name, "lib_fn".into());
1012    }
1013
1014    #[test]
1015    fn remove_hides_file_from_index() {
1016        let store = DocumentStore::new();
1017        let u = uri("/lib.php");
1018        store.ingest(u.clone(), "<?php");
1019        store.remove(&u);
1020        assert!(store.get_index_salsa(&u).is_none());
1021    }
1022
1023    #[test]
1024    fn remove_and_reopen_reuses_source_file_handle() {
1025        let store = DocumentStore::new();
1026        let u = uri("/lib.php");
1027        store.ingest(u.clone(), "<?php");
1028        let ft_before = store.source_file(&u).unwrap();
1029        store.remove(&u);
1030        assert!(
1031            store.source_file(&u).is_none(),
1032            "deleted file should be hidden"
1033        );
1034        store.mirror_text(&u, "<?php");
1035        let ft_after = store.source_file(&u).unwrap();
1036        assert!(
1037            ft_before == ft_after,
1038            "reopen must reuse the same FileText handle"
1039        );
1040    }
1041
1042    #[test]
1043    fn delete_reopen_churn_does_not_amplify_salsa_inputs() {
1044        let store = DocumentStore::new();
1045        let uris: Vec<Url> = (0..20).map(|i| uri(&format!("/churn/f{i}.php"))).collect();
1046        for u in &uris {
1047            store.ingest(u.clone(), "<?php class A {}");
1048        }
1049        let count_before = store.source_files_len();
1050        for _ in 0..10 {
1051            for u in &uris {
1052                store.remove(u);
1053            }
1054            for u in &uris {
1055                store.ingest(u.clone(), "<?php class A {}");
1056            }
1057        }
1058        assert_eq!(
1059            store.source_files_len(),
1060            count_before,
1061            "delete-reopen cycles must not create new salsa inputs (L1-B regression guard)"
1062        );
1063    }
1064
1065    #[test]
1066    fn all_indexes_includes_every_mirrored_file() {
1067        let store = DocumentStore::new();
1068        open(&store, uri("/a.php"), "<?php\nfunction a() {}".to_string());
1069        store.ingest(uri("/b.php"), "<?php\nfunction b() {}");
1070        assert_eq!(store.all_indexes().len(), 2);
1071    }
1072
1073    #[test]
1074    fn other_indexes_excludes_current_uri() {
1075        let store = DocumentStore::new();
1076        open(&store, uri("/a.php"), "<?php\nfunction a() {}".to_string());
1077        open(&store, uri("/b.php"), "<?php\nfunction b() {}".to_string());
1078        assert_eq!(store.other_indexes(&uri("/a.php")).len(), 1);
1079    }
1080
1081    #[test]
1082    fn other_docs_excludes_current_uri() {
1083        let store = DocumentStore::new();
1084        let ua = uri("/a.php");
1085        let ub = uri("/b.php");
1086        open(&store, ua.clone(), "<?php\nfunction a() {}".to_string());
1087        open(&store, ub.clone(), "<?php\nfunction b() {}".to_string());
1088        let open_urls = vec![ua.clone(), ub];
1089        assert_eq!(store.other_docs(&ua, &open_urls).len(), 1);
1090    }
1091
1092    #[test]
1093    fn evict_token_cache_removes_entry() {
1094        let store = DocumentStore::new();
1095        let u = uri("/a.php");
1096        open(&store, u.clone(), "<?php".to_string());
1097        store.store_token_cache(&u, "id1".to_string(), Arc::new(vec![]));
1098        assert!(store.get_token_cache(&u, "id1").is_some());
1099        store.evict_token_cache(&u);
1100        assert!(store.get_token_cache(&u, "id1").is_none());
1101    }
1102
1103    #[test]
1104    fn index_populates_file_index_with_symbols() {
1105        let store = DocumentStore::new();
1106        store.ingest(uri("/a.php"), "<?php\nfunction hello() {}");
1107        let idx = store.get_index_salsa(&uri("/a.php")).unwrap();
1108        assert_eq!(idx.functions.len(), 1);
1109        assert_eq!(idx.functions[0].name, "hello".into());
1110    }
1111
1112    #[test]
1113    fn open_populates_file_index_with_symbols() {
1114        let store = DocumentStore::new();
1115        open(&store, uri("/a.php"), "<?php\nclass Foo {}".to_string());
1116        let idx = store.get_index_salsa(&uri("/a.php")).unwrap();
1117        assert_eq!(idx.classes.len(), 1);
1118        assert_eq!(idx.classes[0].name, "Foo".into());
1119    }
1120
1121    // ── Mirror invariants ────────────────────────────────────────────────
1122    //
1123    // Every mutation path that changes file text must keep the salsa layer
1124    // consistent. These tests walk a set-edit-reopen cycle and assert that
1125    // the salsa-derived `FileIndex` reflects the latest text at each step.
1126
1127    fn names_of(idx: &FileIndex) -> Vec<String> {
1128        let mut out: Vec<String> = idx.classes.iter().map(|c| c.name.to_string()).collect();
1129        out.extend(idx.functions.iter().map(|f| f.name.to_string()));
1130        out.sort();
1131        out
1132    }
1133
1134    fn salsa_index_names(store: &DocumentStore, url: &Url) -> Vec<String> {
1135        store
1136            .snapshot_query_file_index(url)
1137            .map(|idx| names_of(&idx))
1138            .unwrap_or_default()
1139    }
1140
1141    #[test]
1142    fn mirror_tracks_repeated_edits() {
1143        let store = DocumentStore::new();
1144        let u = uri("/mirror.php");
1145
1146        open(&store, u.clone(), "<?php\nclass A {}".to_string());
1147        assert_eq!(salsa_index_names(&store, &u), vec!["A".to_string()]);
1148
1149        open(
1150            &store,
1151            u.clone(),
1152            "<?php\nclass A {}\nclass B {}".to_string(),
1153        );
1154        assert_eq!(
1155            salsa_index_names(&store, &u),
1156            vec!["A".to_string(), "B".to_string()]
1157        );
1158
1159        open(&store, u.clone(), "<?php\nfunction greet() {}".to_string());
1160        assert_eq!(salsa_index_names(&store, &u), vec!["greet".to_string()]);
1161    }
1162
1163    #[test]
1164    fn mirror_tracks_ingest_and_ingest_from_doc() {
1165        let store = DocumentStore::new();
1166
1167        // Background `index(url, text)` path.
1168        let u1 = uri("/bg1.php");
1169        store.ingest(u1.clone(), "<?php\nclass Bg1 {}");
1170        assert_eq!(salsa_index_names(&store, &u1), vec!["Bg1".to_string()]);
1171
1172        // `ingest_from_doc(url, &doc)` path (workspace-scan Phase 2).
1173        let u2 = uri("/bg2.php");
1174        let doc = crate::analysis::diagnostics::parse_document_no_diags(
1175            "<?php\nclass Bg2 {}\nfunction f() {}",
1176        );
1177        store.ingest_from_doc(u2.clone(), &doc);
1178        assert_eq!(
1179            salsa_index_names(&store, &u2),
1180            vec!["Bg2".to_string(), "f".to_string()]
1181        );
1182    }
1183
1184    /// G3: confirms the `parsed_cache` actually hits — two consecutive
1185    /// `get_doc_salsa` calls on unchanged text return the same `Arc`
1186    /// (pointer equality), and an edit forces a miss that produces a
1187    /// different `Arc`.
1188    /// parsed_cache must stay bounded — inserting more than
1189    /// `PARSED_CACHE_CAP` unique URLs must not cause unbounded growth.
1190    /// Eviction is probabilistic, so we only assert the bound, not which
1191    /// Seeding a cached index for a URL that was never mirrored is a no-op
1192    /// (returns `false`) — avoids silently allocating SourceFiles outside
1193    /// `mirror_text`'s control.
1194    #[test]
1195    fn seed_cached_index_noops_for_unknown_uri() {
1196        let store = DocumentStore::new();
1197        let u = uri("/never_mirrored.php");
1198        let index = Arc::new(crate::index::file_index::FileIndex::default());
1199        assert!(!store.seed_cached_index(&u, index));
1200    }
1201
1202    /// entries survive.
1203    #[test]
1204    fn parsed_cache_stays_bounded_under_many_inserts() {
1205        let store = DocumentStore::new();
1206        use crate::document::cache_registry::PARSED_CACHE_CAP;
1207        let overflow = PARSED_CACHE_CAP + 100;
1208        for i in 0..overflow {
1209            let u = uri(&format!("/cap/file{i}.php"));
1210            store.ingest(u.clone(), "<?php\nclass A {}");
1211            // Force a parsed_cache insert via get_doc_salsa.
1212            let _ = store.get_doc_salsa(&u);
1213        }
1214        assert!(
1215            store.caches.parsed_cache.len() <= PARSED_CACHE_CAP,
1216            "parsed_cache grew to {} entries (cap {})",
1217            store.caches.parsed_cache.len(),
1218            PARSED_CACHE_CAP
1219        );
1220    }
1221
1222    #[test]
1223    fn get_doc_salsa_cache_hits_across_calls() {
1224        let store = DocumentStore::new();
1225        let u = uri("/g3_cache.php");
1226        open(&store, u.clone(), "<?php\nclass G3 {}".to_string());
1227
1228        let a = store.get_doc_salsa(&u).unwrap();
1229        let b = store.get_doc_salsa(&u).unwrap();
1230        assert!(
1231            Arc::ptr_eq(&a, &b),
1232            "parsed_cache hit should yield the same Arc across calls"
1233        );
1234
1235        open(&store, u.clone(), "<?php\nclass G3b {}".to_string());
1236        let c = store.get_doc_salsa(&u).unwrap();
1237        assert!(
1238            !Arc::ptr_eq(&a, &c),
1239            "edit should invalidate the parsed_cache entry"
1240        );
1241    }
1242
1243    #[test]
1244    fn get_doc_salsa_returns_some_for_mirrored_files() {
1245        // Phase E4: `get_doc_salsa` no longer gates on open-state. The
1246        // open/closed distinction now lives on `Backend::get_doc`.
1247        let store = DocumentStore::new();
1248        let u = uri("/e4_doc.php");
1249        store.ingest(u.clone(), "<?php\nclass P {}");
1250        assert!(store.get_doc_salsa(&u).is_some());
1251    }
1252
1253    #[test]
1254    fn get_salsa_accessors_return_none_for_unknown_uri() {
1255        let store = DocumentStore::new();
1256        let u = uri("/never-seen.php");
1257        assert!(store.get_doc_salsa(&u).is_none());
1258        assert!(store.get_index_salsa(&u).is_none());
1259    }
1260
1261    /// Phase E1: concurrent readers and writers must not deadlock, panic, or
1262    /// return stale data. Writers briefly bump inputs while readers are
1263    /// running on cloned snapshots; any `salsa::Cancelled` raised on the
1264    /// reader side must be caught and retried by `snapshot_query`.
1265    ///
1266    /// The salsa surface (`get_doc_salsa`, `get_index_salsa`) is protected by
1267    /// `snapshot_query`'s last-resort host-lock fallback.
1268    #[test]
1269    fn concurrent_reads_and_writes_do_not_panic() {
1270        use std::sync::Arc;
1271        use std::thread;
1272        use std::time::{Duration, Instant};
1273
1274        let store = Arc::new(DocumentStore::new());
1275        let urls: Vec<Url> = (0..8).map(|i| uri(&format!("/f{i}.php"))).collect();
1276        for (i, u) in urls.iter().enumerate() {
1277            open(&store, u.clone(), format!("<?php\nclass C{i} {{}}"));
1278        }
1279
1280        let deadline = Instant::now() + Duration::from_millis(400);
1281        let mut handles = Vec::new();
1282
1283        // Writer thread: keep bumping every file's text.
1284        {
1285            let store = Arc::clone(&store);
1286            let urls = urls.clone();
1287            handles.push(thread::spawn(move || {
1288                let mut rev = 0u32;
1289                while Instant::now() < deadline {
1290                    for u in &urls {
1291                        let text = format!("<?php\nclass C{{}}\n// rev {rev}");
1292                        store.mirror_text(u, &text);
1293                    }
1294                    rev += 1;
1295                }
1296            }));
1297        }
1298
1299        // Reader threads: hammer the salsa accessors.
1300        for _ in 0..4 {
1301            let store = Arc::clone(&store);
1302            let urls = urls.clone();
1303            handles.push(thread::spawn(move || {
1304                while Instant::now() < deadline {
1305                    for u in &urls {
1306                        let _ = store.get_doc_salsa(u);
1307                        let _ = store.get_index_salsa(u);
1308                    }
1309                    // Post mir 0.22: codebase + refs live in the session,
1310                    // not salsa. Concurrent-read smoke is limited to the
1311                    // remaining salsa surface (parsed_doc, file_index).
1312                }
1313            }));
1314        }
1315
1316        for h in handles {
1317            h.join().expect("no panic under concurrent read/write");
1318        }
1319    }
1320
1321    /// PSR-4 lazy-loading: `get_semantic_issues_salsa` must not emit
1322    /// `UndefinedClass` for a class that is PSR-4-resolvable on disk, even
1323    /// when the dependency file is not yet in `source_files`.
1324    #[test]
1325    fn psr4_lazy_load_suppresses_undefined_class() {
1326        let tmp = tempfile::tempdir().unwrap();
1327
1328        // Write Entity.php to disk (not mirrored into the store).
1329        std::fs::create_dir_all(tmp.path().join("src/Model")).unwrap();
1330        std::fs::write(
1331            tmp.path().join("src/Model/Entity.php"),
1332            "<?php\nnamespace App\\Model;\nclass Entity {}\n",
1333        )
1334        .unwrap();
1335
1336        // Write composer.json so Psr4Map::load can build the map.
1337        std::fs::write(
1338            tmp.path().join("composer.json"),
1339            r#"{"autoload":{"psr-4":{"App\\":"src/"}}}"#,
1340        )
1341        .unwrap();
1342
1343        let store = DocumentStore::new();
1344
1345        // Inject a PSR-4 map pointing at the tmp dir.
1346        store
1347            .psr4
1348            .store(Arc::new(crate::lang::autoload::Psr4Map::load(tmp.path())));
1349
1350        // Mirror the consuming file (Entity not yet in source_files).
1351        // Uses Entity as a parameter type hint — the analyzer resolves these
1352        // through use statements, so this exercises the full PSR-4 lazy-load path.
1353        let handler_url = Url::from_file_path(tmp.path().join("src/Service/Handler.php")).unwrap();
1354        store.mirror_text(
1355            &handler_url,
1356            "<?php\nnamespace App\\Service;\nuse App\\Model\\Entity;\nfunction handle(Entity $e): Entity { return $e; }\n",
1357        );
1358
1359        let issues = store.get_semantic_issues_salsa(&handler_url).unwrap();
1360        let undef: Vec<_> = issues
1361            .iter()
1362            .filter(|i| matches!(i.kind, mir_issues::IssueKind::UndefinedClass { .. }))
1363            .collect();
1364        assert!(
1365            undef.is_empty(),
1366            "PSR-4 lazy-loading must prevent UndefinedClass for App\\Model\\Entity; got: {undef:?}"
1367        );
1368    }
1369
1370    /// Issue #191 regression: workspace-wide scans (find-references, rename,
1371    /// call-hierarchy) must not re-parse closed/indexed files on repeated
1372    /// invocations. Once a file's `ParsedDoc` has been produced, subsequent
1373    /// `all_docs_for_scan()` calls must hit the cache and return the same
1374    /// `Arc<ParsedDoc>` (pointer equality), proving no re-parse occurred.
1375    ///
1376    /// The cache layers protecting this are:
1377    ///   1. `parsed_cache` (cap [`PARSED_CACHE_CAP`]) — read-through, validated
1378    ///      via `Arc::ptr_eq` on the text Arc.
1379    ///   2. salsa `parsed_doc` memo (`lru = 2048`) — second line of defense
1380    ///      when `parsed_cache` evicts.
1381    ///
1382    /// Together they keep every workspace-scan op O(N) memo lookups, never
1383    /// O(N) parses, for any workspace whose file count fits the cap.
1384    #[test]
1385    fn all_docs_for_scan_does_not_reparse_indexed_files() {
1386        let store = DocumentStore::new();
1387        const N: usize = 50;
1388        for i in 0..N {
1389            let u = uri(&format!("/scan/file{i}.php"));
1390            store.ingest(u, &format!("<?php\nclass C{i} {{}}\nfunction f{i}() {{}}"));
1391        }
1392
1393        let first: Vec<_> = store.all_docs_for_scan();
1394        let second: Vec<_> = store.all_docs_for_scan();
1395        assert_eq!(first.len(), N);
1396        assert_eq!(second.len(), N);
1397
1398        let by_url_first: std::collections::HashMap<Url, Arc<ParsedDoc>> =
1399            first.into_iter().collect();
1400        for (u, doc2) in second {
1401            let doc1 = by_url_first
1402                .get(&u)
1403                .expect("second scan returned a URL the first didn't");
1404            assert!(
1405                Arc::ptr_eq(doc1, &doc2),
1406                "{u} re-parsed across all_docs_for_scan calls — \
1407                 cache (parsed_cache + salsa parsed_doc memo) failed to hit"
1408            );
1409        }
1410
1411        // Editing one file's text must invalidate just that file's entry,
1412        // not the rest. This locks in self-eviction via Arc::ptr_eq on text.
1413        let edited_url = uri("/scan/file0.php");
1414        let pre_edit = store.get_doc_salsa(&edited_url).unwrap();
1415        store.ingest(edited_url.clone(), "<?php\nclass C0Edited {}");
1416        let post_edit = store.get_doc_salsa(&edited_url).unwrap();
1417        assert!(
1418            !Arc::ptr_eq(&pre_edit, &post_edit),
1419            "edited file must produce a fresh ParsedDoc"
1420        );
1421        for i in 1..N {
1422            let u = uri(&format!("/scan/file{i}.php"));
1423            let original = by_url_first.get(&u).unwrap();
1424            let after = store.get_doc_salsa(&u).unwrap();
1425            assert!(
1426                Arc::ptr_eq(original, &after),
1427                "{u} should not have re-parsed because of an unrelated edit"
1428            );
1429        }
1430    }
1431
1432    /// Incremental analysis cache: a body-only edit to file A (no declaration
1433    /// changes) must not bump `decl_version`, so file B's cached analysis
1434    /// survives. A declaration edit MUST bump the version so B's entry goes
1435    /// stale.
1436    #[test]
1437    fn body_only_edit_does_not_invalidate_sibling_analysis_cache() {
1438        let store = DocumentStore::new();
1439        let ua = uri("/ic_a.php");
1440        let ub = uri("/ic_b.php");
1441
1442        // Analyze both files to establish their fingerprints.
1443        open(
1444            &store,
1445            ua.clone(),
1446            "<?php\nfunction a() { return 1; }".to_string(),
1447        );
1448        open(
1449            &store,
1450            ub.clone(),
1451            "<?php\nfunction b() { return 2; }".to_string(),
1452        );
1453        let _ = store.cached_analysis(&ua).unwrap();
1454        let analysis_b_first = store.cached_analysis(&ub).unwrap();
1455        let ver_after_warm = store.caches.decl_version();
1456
1457        // Body-only edit to A: same function name, different body → FileIndex unchanged.
1458        store.mirror_text(&ua, "<?php\nfunction a() { return 999; }");
1459        let _ = store.cached_analysis(&ua);
1460        let ver_after_body_edit = store.caches.decl_version();
1461        assert_eq!(
1462            ver_after_warm, ver_after_body_edit,
1463            "body-only edit must not bump decl_version"
1464        );
1465
1466        // B's cached entry should still be valid (ptr-eq source AND same version).
1467        let analysis_b_second = store.cached_analysis_if_fresh(&ub);
1468        assert!(
1469            analysis_b_second.is_some(),
1470            "B's analysis should hit cache after body-only edit to A"
1471        );
1472        assert!(
1473            Arc::ptr_eq(&analysis_b_first, &analysis_b_second.unwrap()),
1474            "B's analysis should be the identical Arc (no re-analysis)"
1475        );
1476
1477        // Declaration edit to A: rename the function → FileIndex changes.
1478        store.mirror_text(&ua, "<?php\nfunction a_renamed() { return 999; }");
1479        let _ = store.cached_analysis(&ua);
1480        let ver_after_decl_edit = store.caches.decl_version();
1481        assert!(
1482            ver_after_decl_edit > ver_after_body_edit,
1483            "declaration edit must bump decl_version (was {ver_after_body_edit}, now {ver_after_decl_edit})"
1484        );
1485
1486        // B's entry is now stale — cached_analysis_if_fresh must return None.
1487        let analysis_b_stale = store.cached_analysis_if_fresh(&ub);
1488        assert!(
1489            analysis_b_stale.is_none(),
1490            "B's analysis should be stale after A's declaration changed"
1491        );
1492    }
1493}