php_lsp/document_store.rs
1use std::sync::atomic::{AtomicU32, Ordering};
2use std::sync::{Arc, Mutex, RwLock};
3
4use dashmap::DashMap;
5use salsa::Setter;
6use tower_lsp::lsp_types::{SemanticToken, Url};
7
8use crate::ast::ParsedDoc;
9use crate::autoload::Psr4Map;
10use crate::db::analysis::AnalysisHost;
11use crate::db::input::{FileId, SourceFile, Workspace};
12use crate::file_index::FileIndex;
13
14/// Upper bound on `parsed_cache` entries. Matched to the `lru = 2048` on
15/// `parsed_doc` in `src/db/parse.rs` so the secondary Arc retention can't
16/// pin more ASTs alive than salsa's memo already bounds. Exceeding this
17/// triggers probabilistic eviction (see [`DocumentStore::insert_parsed_cache`]).
18const PARSED_CACHE_CAP: usize = 2048;
19
20pub struct DocumentStore {
21 /// Cached semantic tokens per document: (result_id, tokens).
22 /// Used to compute incremental deltas for `textDocument/semanticTokens/full/delta`.
23 /// Tokens are stored in an `Arc` so the delta-path lookup can hand the
24 /// previous snapshot back without cloning the inner Vec.
25 token_cache: DashMap<Url, (String, Arc<Vec<SemanticToken>>)>,
26
27 // ── Salsa-input storage ────────────────────────────────────────────────
28 // Phase E4: `DocumentStore` is now a pure salsa-input wrapper. Open-file
29 // state (live text, version token, parse-diagnostics cache) lives on
30 // `Backend` in its `open_files` map; the set of files tracked by salsa
31 // is exactly `source_files.keys()`.
32 /// Mutex — held briefly to clone the database for reads and to mutate
33 /// it for writes. Per-thread salsa state (`zalsa_local`) is `!Sync`,
34 /// which rules out `RwLock<AnalysisHost>`. Readers instead snapshot the
35 /// db (cheap — storage is `Arc<Zalsa>`) and run queries on the clone
36 /// with the lock released, giving real read/read parallelism. Writers
37 /// during an in-flight read bump the shared revision; the reader raises
38 /// `salsa::Cancelled` on its next query call and `snapshot_query` below
39 /// retries with a fresh snapshot.
40 host: Mutex<AnalysisHost>,
41 /// `Url -> SourceFile` lookup. The `SourceFile` is a salsa-id handle; the
42 /// underlying input lives in `host.db` for the lifetime of the database.
43 source_files: DashMap<Url, SourceFile>,
44 /// G2: lock-free mirror of each `SourceFile`'s last-set text. Lets
45 /// `mirror_text` dedup repeated no-op updates (common during workspace
46 /// scan and `did_open` for already-indexed files) without taking
47 /// `host.lock()`. Updated inside the mutex whenever the salsa input is
48 /// set, so it is always consistent with the salsa revision for the
49 /// purposes of byte-equality comparison.
50 text_cache: DashMap<Url, Arc<str>>,
51 /// G3: cross-revision read-through cache for `parsed_doc`. Keyed on
52 /// `Url`, stored value is `(text_arc, Arc<ParsedDoc>)` — the text Arc
53 /// captured at parse time. On read, compare against `text_cache[uri]`
54 /// via `Arc::ptr_eq`; a match guarantees the cached ParsedDoc matches
55 /// the current salsa revision's text input, so the query can return
56 /// without snapshotting the db or invoking salsa at all. A miss
57 /// (different pointer, stale or absent entry) falls through to
58 /// `snapshot_query`. Self-evicts on text change — no writer-side
59 /// invalidation is required, which avoids the TOCTOU window where a
60 /// concurrent reader could re-insert a stale entry after a writer's
61 /// eviction.
62 ///
63 /// Size-bounded at [`PARSED_CACHE_CAP`] — see `insert_parsed_cache`.
64 /// Without this bound, every workspace file read-through would pin
65 /// its bumpalo arena alive regardless of salsa's `lru = 2048` on the
66 /// `parsed_doc` memo.
67 parsed_cache: DashMap<Url, (Arc<str>, Arc<ParsedDoc>)>,
68 /// Monotonic allocator for `FileId`s (one per ever-seen URL).
69 next_file_id: AtomicU32,
70 /// Workspace salsa input. Tracks the full set of `SourceFile`s that
71 /// participate in whole-program queries (`codebase`, `file_refs`).
72 /// Re-synced from `source_files` on demand by `sync_workspace_files`.
73 workspace: Workspace,
74 /// Shared PSR-4 namespace-to-path map. Shared with `Backend` via `Arc`
75 /// so updates from `initialized` (when composer.json is loaded) are
76 /// visible here without any additional wiring.
77 psr4: Arc<RwLock<Psr4Map>>,
78 /// mir-analyzer's `AnalysisSession` — owns the workspace MirDb, runs
79 /// Pass-2 analysis, and lazy-loads dependencies via PSR-4. Built lazily
80 /// on first use; rebuilt when PHP version changes.
81 analysis_session: Mutex<Option<(mir_analyzer::PhpVersion, Arc<mir_analyzer::AnalysisSession>)>>,
82}
83
84impl Default for DocumentStore {
85 fn default() -> Self {
86 Self::new()
87 }
88}
89
90impl DocumentStore {
91 pub fn new() -> Self {
92 let host = AnalysisHost::new();
93 let workspace = Workspace::new(
94 host.db(),
95 Arc::<[SourceFile]>::from(Vec::new()),
96 mir_analyzer::PhpVersion::LATEST,
97 );
98 DocumentStore {
99 token_cache: DashMap::new(),
100 host: Mutex::new(host),
101 source_files: DashMap::new(),
102 text_cache: DashMap::new(),
103 parsed_cache: DashMap::new(),
104 next_file_id: AtomicU32::new(0),
105 workspace,
106 psr4: Arc::new(RwLock::new(Psr4Map::empty())),
107 analysis_session: Mutex::new(None),
108 }
109 }
110
111 /// Get or build the `AnalysisSession` for the given PHP version. Rebuilds
112 /// when the version changes (e.g. user flipped config). The session owns
113 /// its own salsa db and AnalysisCache; lazy-loads vendor files via the
114 /// shared PSR-4 map.
115 pub fn analysis_session(
116 &self,
117 php_version: mir_analyzer::PhpVersion,
118 ) -> Arc<mir_analyzer::AnalysisSession> {
119 let mut guard = self.analysis_session.lock().unwrap();
120 if let Some((cached_ver, session)) = guard.as_ref()
121 && *cached_ver == php_version
122 {
123 return Arc::clone(session);
124 }
125 // Build a fresh session. Hand it the shared PSR-4 map so it can
126 // lazy-resolve `UndefinedClass` candidates without us having to mirror
127 // every vendor file upfront.
128 let resolver: Arc<dyn mir_analyzer::ClassResolver> =
129 Arc::new(self.psr4.read().unwrap().clone());
130 let session =
131 Arc::new(mir_analyzer::AnalysisSession::new(php_version).with_class_resolver(resolver));
132 session.ensure_essential_stubs_loaded();
133 *guard = Some((php_version, Arc::clone(&session)));
134 session
135 }
136
137 /// Current PHP version tracked by the workspace input.
138 pub fn workspace_php_version(&self) -> mir_analyzer::PhpVersion {
139 self.with_host(|h| self.workspace.php_version(h.db()))
140 }
141
142 /// Return the `Arc<RwLock<Psr4Map>>` so callers can share it.
143 /// `Backend` clones this arc at construction time so writes to the lock
144 /// (e.g. loading composer.json on `initialized`) are immediately visible
145 /// to `lazy_load_psr4_imports` without extra plumbing.
146 pub fn psr4_arc(&self) -> Arc<RwLock<Psr4Map>> {
147 Arc::clone(&self.psr4)
148 }
149
150 /// Mirror a file's current text into the salsa layer. Creates the
151 /// `SourceFile` input on first sight, otherwise updates `text` on the
152 /// existing input (bumping the salsa revision so downstream queries
153 /// invalidate). Returns the `SourceFile` handle for this `uri`.
154 ///
155 /// B4a: called from every text-changing mutation site. Reads still come
156 /// from the legacy `map` — this mirror is not yet observed by production
157 /// code paths.
158 pub fn mirror_text(&self, uri: &Url, text: &str) -> SourceFile {
159 // G2 fast path: compare against the lock-free text cache. When the
160 // new text byte-matches what we already mirrored, skip the host
161 // mutex entirely. Common during workspace scan + `did_open` for
162 // unchanged files, where most threads would otherwise serialise on
163 // `host.lock()` just to confirm a no-op. Cache is only populated
164 // after the matching `source_files` entry, so a cache hit implies
165 // the handle exists.
166 if let Some(cached) = self.text_cache.get(uri)
167 && **cached == *text
168 && let Some(sf) = self.source_files.get(uri)
169 {
170 return *sf;
171 }
172 self.mirror_text_arc(uri, Arc::from(text))
173 }
174
175 /// Like [`mirror_text`] but takes an already-allocated `Arc<str>`.
176 ///
177 /// Callers that already hold an `Arc<str>` (e.g. `index_from_doc` reusing
178 /// `ParsedDoc::source_arc()`) use this to avoid a second allocation and to
179 /// ensure `text_cache` and `parsed_cache` hold the same Arc pointer —
180 /// enabling `Arc::ptr_eq` validation in `get_parsed_cached`.
181 pub fn mirror_text_arc(&self, uri: &Url, text_arc: Arc<str>) -> SourceFile {
182 if let Some(existing) = self.source_files.get(uri) {
183 let sf = *existing;
184 drop(existing);
185 // Slow path: another writer may have raced us; re-check inside
186 // the mutex. Salsa's `set_text` unconditionally bumps the
187 // revision, so every spurious setter invalidates every
188 // downstream query.
189 let mut host = self.host.lock().unwrap();
190 let current: Arc<str> = sf.text(host.db());
191 if *current == *text_arc {
192 drop(host);
193 self.text_cache.insert(uri.clone(), current);
194 return sf;
195 }
196 sf.set_text(host.db_mut()).to(text_arc.clone());
197 // Phase K2: any text change invalidates a previously-seeded
198 // cached slice. Clearing it forces the fresh-parse branch of
199 // `file_definitions` on the next query, which is correct —
200 // the cached slice no longer matches the new text.
201 sf.set_cached_slice(host.db_mut()).to(None);
202 drop(host);
203 self.text_cache.insert(uri.clone(), text_arc);
204 sf
205 } else {
206 let id = FileId(self.next_file_id.fetch_add(1, Ordering::Relaxed));
207 let uri_arc: Arc<str> = Arc::from(uri.as_str());
208 let sf = {
209 let host = self.host.lock().unwrap();
210 SourceFile::new(host.db(), id, uri_arc, text_arc.clone(), None)
211 };
212 self.source_files.insert(uri.clone(), sf);
213 self.text_cache.insert(uri.clone(), text_arc);
214 sf
215 }
216 }
217
218 /// Return the salsa `SourceFile` handle for a URL, if one exists.
219 pub fn source_file(&self, uri: &Url) -> Option<SourceFile> {
220 self.source_files.get(uri).map(|e| *e)
221 }
222
223 /// Phase K2: pre-seed a `StubSlice` loaded from the on-disk cache
224 /// onto the `SourceFile` input for `uri`. The next `file_definitions`
225 /// call for that file returns the cached slice directly, skipping
226 /// parse + `DefinitionCollector`.
227 ///
228 /// Must be called **before** any `file_definitions(db, sf)` call for
229 /// this file — otherwise salsa has already memoized the fresh-parse
230 /// result and setting `cached_slice` now would only bump the revision
231 /// without actually using the cache. In practice the workspace-scan
232 /// path seeds immediately after `mirror_text` and before any query
233 /// runs.
234 ///
235 /// Returns `false` when `uri` was not mirrored (caller should mirror
236 /// first); returns `true` on success.
237 pub fn seed_cached_slice(
238 &self,
239 uri: &Url,
240 slice: Arc<mir_codebase::storage::StubSlice>,
241 ) -> bool {
242 let Some(sf) = self.source_files.get(uri).map(|e| *e) else {
243 return false;
244 };
245 let mut host = self.host.lock().unwrap();
246 sf.set_cached_slice(host.db_mut()).to(Some(slice));
247 true
248 }
249
250 /// Run `f` with a borrow of the `AnalysisHost`. Used by tests and by the
251 /// upcoming `*_salsa` accessors to query the salsa layer.
252 pub fn with_host<R>(&self, f: impl FnOnce(&AnalysisHost) -> R) -> R {
253 let host = self.host.lock().unwrap();
254 f(&host)
255 }
256
257 /// Phase E1: take a brief lock, clone the salsa database, release the
258 /// lock. Queries then run on the cloned `RootDatabase` without blocking
259 /// writers or other readers. Salsa's `Storage<Self>` is reference-counted
260 /// (`Arc<Zalsa>`), so the clone is cheap — it shares memoized data and
261 /// the cancellation flag with the host's db.
262 fn snapshot_db(&self) -> crate::db::analysis::RootDatabase {
263 let host = self.host.lock().unwrap();
264 host.db().clone()
265 }
266
267 /// Run a query on a fresh snapshot, catching `salsa::Cancelled` (raised
268 /// when a concurrent writer advances the revision) and retrying with a
269 /// new snapshot. Writers hold the mutex only long enough to bump input
270 /// values, so a handful of retries is more than enough in practice; we
271 /// cap at 8 to avoid pathological livelock under sustained write pressure.
272 fn snapshot_query<R>(&self, f: impl Fn(&crate::db::analysis::RootDatabase) -> R + Clone) -> R {
273 use std::panic::AssertUnwindSafe;
274 for _ in 0..8 {
275 let db = self.snapshot_db();
276 let f = f.clone();
277 match salsa::Cancelled::catch(AssertUnwindSafe(move || f(&db))) {
278 Ok(r) => return r,
279 Err(_) => continue,
280 }
281 }
282 // Last-resort attempt: take the mutex for the whole query so no
283 // writer can race us. Much slower, but guaranteed to make progress.
284 let host = self.host.lock().unwrap();
285 f(host.db())
286 }
287
288 /// Evict the semantic-tokens cache for `uri`. Called by Backend when a
289 /// file is closed; diff-based tokens computed against the old revision
290 /// are no longer meaningful.
291 pub fn evict_token_cache(&self, uri: &Url) {
292 self.token_cache.remove(uri);
293 }
294
295 /// Register a file in the salsa layer without marking it open.
296 ///
297 /// Salsa's `parsed_doc` query parses lazily on first read; diagnostics
298 /// are populated by `did_open` when the editor actually opens the file.
299 pub fn index(&self, uri: Url, text: &str) {
300 self.mirror_text(&uri, text);
301 }
302
303 /// Index a file using an already-parsed `ParsedDoc`, avoiding a second parse.
304 ///
305 /// Prefer this over [`index`] when the caller already has a `ParsedDoc` (e.g.
306 /// after running `DefinitionCollector` during workspace scan). Reuses the
307 /// `Arc<str>` already owned by `doc` so that `text_cache` and `SourceFile::text`
308 /// share the same pointer — enabling the `Arc::ptr_eq` fast path in
309 /// `get_parsed_cached` on the first subsequent salsa query, without an extra
310 /// `Arc::from(source)` allocation.
311 pub fn index_from_doc(&self, uri: Url, doc: &ParsedDoc) {
312 self.mirror_text_arc(&uri, doc.source_arc());
313 }
314
315 pub fn remove(&self, uri: &Url) {
316 self.token_cache.remove(uri);
317 // Also drop the Url→SourceFile mapping so the file stops contributing
318 // to the workspace codebase query. Salsa inputs themselves remain
319 // alive (salsa doesn't expose input removal in 0.26), but they're
320 // orphaned — no query keys them anymore, and re-opening the file
321 // allocates a fresh SourceFile with a new FileId. The ~40 bytes per
322 // orphan is acceptable; revisit if workspace-churn profiling hurts.
323 self.source_files.remove(uri);
324 self.text_cache.remove(uri);
325 self.parsed_cache.remove(uri);
326 // Also evict the file from the `AnalysisSession`'s internal state so
327 // workspace symbol queries don't keep returning the deleted file's
328 // declarations. Cheap when the session hasn't ingested this file.
329 let guard = self.analysis_session.lock().unwrap();
330 if let Some((_, session)) = guard.as_ref() {
331 session.invalidate_file(uri.as_str());
332 }
333 }
334
335 // ── B4b salsa-backed accessors ─────────────────────────────────────────
336 //
337 // These are additive and not yet called from production code. They go
338 // through the salsa layer — reads run the memoized `parsed_doc` /
339 // `file_index` / `method_returns` queries, parsing only on first access
340 // per revision. B4c will migrate feature modules to call these instead of
341 // the legacy `get_doc` / `get_index`.
342
343 /// Salsa-backed parsed document.
344 ///
345 /// Salsa-backed parsed document for any mirrored file (open or
346 /// background-indexed). Returns `None` only when the file is not known
347 /// to the store. Callers that want "only if open" should gate on
348 /// `Backend::open_files` at the call site (see `Backend::get_doc`).
349 pub fn get_doc_salsa(&self, uri: &Url) -> Option<Arc<ParsedDoc>> {
350 self.get_parsed_cached(uri)
351 }
352
353 /// Salsa-backed compact symbol index.
354 pub fn get_index_salsa(&self, uri: &Url) -> Option<Arc<FileIndex>> {
355 let sf = self.source_file(uri)?;
356 Some(self.snapshot_query(move |db| crate::db::index::file_index(db, sf).0.clone()))
357 }
358
359 /// G3: shared implementation for `get_doc_salsa`.
360 /// Tries the `parsed_cache` (lock-free) first; validates via
361 /// `Arc::ptr_eq` against the G2 `text_cache` so a concurrent writer
362 /// that has already committed a new text input cannot be masked by a
363 /// stale cache entry. On miss, captures the text Arc and ParsedDoc
364 /// together inside a single `snapshot_query`, then publishes both.
365 fn get_parsed_cached(&self, uri: &Url) -> Option<Arc<ParsedDoc>> {
366 if let Some(current_text) = self.text_cache.get(uri)
367 && let Some(entry) = self.parsed_cache.get(uri)
368 && Arc::ptr_eq(&*current_text, &entry.0)
369 {
370 return Some(entry.1.clone());
371 }
372
373 let sf = self.source_file(uri)?;
374 let (text, doc) = self.snapshot_query(move |db| {
375 let text = sf.text(db);
376 let doc = crate::db::parse::parsed_doc(db, sf).0.clone();
377 (text, doc)
378 });
379 self.insert_parsed_cache(uri.clone(), text, doc.clone());
380 Some(doc)
381 }
382
383 /// Publish a fresh `ParsedDoc` into `parsed_cache`, shedding roughly
384 /// half of the cache first if it has grown past [`PARSED_CACHE_CAP`].
385 ///
386 /// Eviction is probabilistic (DashMap iteration order is arbitrary),
387 /// not LRU. That's fine — salsa's own `parsed_doc` memo uses
388 /// `lru = 2048` on hotness-aware storage, so a cache-miss here is
389 /// cheap: the next read goes through `snapshot_query` and
390 /// `parsed_doc`, which still short-circuits on the salsa memo.
391 /// What we're bounding here is the *secondary* Arc retention that
392 /// would otherwise pin every workspace file's bumpalo arena alive
393 /// regardless of salsa's eviction decisions.
394 fn insert_parsed_cache(&self, uri: Url, text: Arc<str>, doc: Arc<ParsedDoc>) {
395 if self.parsed_cache.len() >= PARSED_CACHE_CAP {
396 let drop_target = self.parsed_cache.len() / 2;
397 let mut dropped = 0usize;
398 self.parsed_cache.retain(|_, _| {
399 if dropped < drop_target {
400 dropped += 1;
401 false
402 } else {
403 true
404 }
405 });
406 }
407 self.parsed_cache.insert(uri, (text, doc));
408 }
409
410 /// Refresh `workspace.files` to mirror the current `source_files` set.
411 ///
412 /// Called by `get_codebase_salsa`. Skips the setter when the file list
413 /// hasn't changed — salsa's `set_field` unconditionally bumps revision,
414 /// which would invalidate every downstream query (codebase, file_refs).
415 /// Dedup is essential for memoization across LSP requests.
416 pub fn sync_workspace_files(&self) {
417 let mut files: Vec<SourceFile> = self.source_files.iter().map(|e| *e.value()).collect();
418 files.sort_by_key(|sf| self.with_host(|host| sf.id(host.db()).0));
419 let mut host = self.host.lock().unwrap();
420 let current = self.workspace.files(host.db());
421 if current.len() == files.len() && current.iter().zip(files.iter()).all(|(a, b)| a == b) {
422 return;
423 }
424 let arc: Arc<[SourceFile]> = Arc::from(files);
425 self.workspace.set_files(host.db_mut()).to(arc);
426 }
427
428 /// Update the PHP version tracked by the workspace. Salsa will invalidate
429 /// all `semantic_issues` queries so diagnostics are re-evaluated.
430 /// Skips the setter when the version hasn't changed to avoid spurious
431 /// query invalidation.
432 pub fn set_php_version(&self, version: mir_analyzer::PhpVersion) {
433 let mut host = self.host.lock().unwrap();
434 if self.workspace.php_version(host.db()) == version {
435 return;
436 }
437 self.workspace.set_php_version(host.db_mut()).to(version);
438 }
439
440 /// Stub kept for the legacy `RefLookup` closure shape consumed by
441 /// `find_references_codebase_with_target`. Always returns empty; the
442 /// AST walker handles all reference scanning. Session-backed refs go
443 /// through [`Self::session_references_to`] instead.
444 pub fn get_symbol_refs_salsa(&self, _key: &str) -> Vec<(Arc<str>, u32, u16, u16)> {
445 Vec::new()
446 }
447
448 /// Session-backed workspace reference lookup. Returns `(file, line, col)`
449 /// locations for every occurrence of `symbol` in the files that the
450 /// `AnalysisSession` has ingested so far. The session's reference index
451 /// is built incrementally during `ingest_file`, so refs for files the
452 /// session hasn't seen yet (background-indexed but never opened) won't
453 /// appear here — those are covered by the AST-walker fallback in the
454 /// references handler.
455 ///
456 /// Returns LSP-style 0-based line/column.
457 pub fn session_references_to(
458 &self,
459 symbol: &mir_analyzer::Symbol,
460 ) -> Vec<(Arc<str>, u32, u32, u32)> {
461 let php_version = self.workspace_php_version();
462 let session = self.analysis_session(php_version);
463 session
464 .references_to(symbol)
465 .into_iter()
466 .map(|(file, range)| {
467 // mir uses 1-based lines, 0-based codepoint columns.
468 let line = range.start.line.saturating_sub(1);
469 let col_start = range.start.column;
470 let col_end = range.end.column;
471 (file, line, col_start, col_end)
472 })
473 .collect()
474 }
475
476 /// Phase J: salsa-memoized aggregate workspace index.
477 ///
478 /// Returns the shared `Arc<WorkspaceIndexData>` with flat
479 /// `(Url, Arc<FileIndex>)` list plus pre-built `classes_by_name` and
480 /// `subtypes_of` reverse maps. Used by workspace_symbols,
481 /// prepare_type_hierarchy, supertypes_of, subtypes_of, and
482 /// find_implementations so they don't each rebuild the aggregate per
483 /// request. Invalidates automatically when any file's `file_index`
484 /// changes.
485 pub fn get_workspace_index_salsa(&self) -> Arc<crate::db::workspace_index::WorkspaceIndexData> {
486 self.sync_workspace_files();
487 let ws = self.workspace;
488 self.snapshot_query(move |db| {
489 crate::db::workspace_index::workspace_index(db, ws)
490 .0
491 .clone()
492 })
493 }
494
495 /// No-op after mir 0.22 migration. The session manages its own warm-up
496 /// via `ingest_file` / `analyze_dependents_of`; there's nothing for us
497 /// to pre-warm here.
498 pub fn warm_reference_index(&self) {}
499
500 /// Return the raw source text for `uri` if it has been mirrored into the
501 /// salsa workspace. Used by the references handler to pre-filter session
502 /// results by checking whether a file mentions the owning class name.
503 pub fn source_text(&self, uri: &Url) -> Option<Arc<str>> {
504 let sf = self.source_file(uri)?;
505 Some(self.snapshot_query(move |db| sf.text(db)))
506 }
507
508 /// Run Pass 1 + Pass 2 analysis on every mirrored workspace file so that
509 /// type-aware queries (e.g. `session.references_to`) see the full workspace.
510 ///
511 /// Reference locations are only recorded during Pass 2 (`FileAnalyzer::analyze`).
512 /// `ingest_file` alone (Pass 1) is not sufficient. Only needed for cross-file
513 /// queries like `textDocument/references` that rely on the reference index.
514 /// The session's internal cache makes re-analysis of unchanged files cheap.
515 pub fn ensure_all_files_ingested(&self) {
516 let php_version = self.workspace_php_version();
517 let session = self.analysis_session(php_version);
518 let urls: Vec<Url> = self.source_files.iter().map(|e| e.key().clone()).collect();
519 for uri in &urls {
520 let Some(doc) = self.get_doc_salsa(uri) else {
521 continue;
522 };
523 let file: Arc<str> = Arc::from(uri.as_str());
524 session.ingest_file(file.clone(), doc.source_arc());
525 let source_map = php_rs_parser::source_map::SourceMap::new(doc.source());
526 let analyzer = mir_analyzer::FileAnalyzer::new(&session);
527 analyzer.analyze(file, doc.source(), doc.program(), &source_map);
528 }
529 }
530
531 /// Salsa-backed per-file method-return-type map.
532 pub fn get_method_returns_salsa(&self, uri: &Url) -> Option<Arc<crate::ast::MethodReturnsMap>> {
533 let sf = self.source_file(uri)?;
534 Some(
535 self.snapshot_query(move |db| {
536 crate::db::method_returns::method_returns(db, sf).0.clone()
537 }),
538 )
539 }
540
541 /// Cache the semantic tokens computed for a delta response.
542 /// `result_id` is an opaque string (a hash of the token data) returned to the client.
543 pub fn store_token_cache(&self, uri: &Url, result_id: String, tokens: Arc<Vec<SemanticToken>>) {
544 self.token_cache.insert(uri.clone(), (result_id, tokens));
545 }
546
547 /// Return the cached tokens if `result_id` matches the stored one.
548 pub fn get_token_cache(&self, uri: &Url, result_id: &str) -> Option<Arc<Vec<SemanticToken>>> {
549 self.token_cache
550 .get(uri)
551 .filter(|e| e.0.as_str() == result_id)
552 .map(|e| Arc::clone(&e.1))
553 }
554
555 /// Before running semantic analysis for `uri`, resolve every `use`-imported
556 /// class through the PSR-4 map and mirror any that are not yet registered.
557 /// This prevents spurious `UndefinedClass` diagnostics when the background
558 /// workspace scan has not yet reached a dependency file.
559 fn lazy_load_psr4_imports(&self, uri: &Url) {
560 let doc = match self.get_doc_salsa(uri) {
561 Some(d) => d,
562 None => return,
563 };
564 let imports = crate::references::collect_file_imports(&doc);
565 if imports.is_empty() {
566 return;
567 }
568 let psr4 = self.psr4.read().unwrap();
569 let paths: Vec<std::path::PathBuf> = imports
570 .values()
571 .filter_map(|fqcn| psr4.resolve(fqcn))
572 .collect();
573 drop(psr4);
574
575 for path in paths {
576 let Ok(dep_url) = Url::from_file_path(&path) else {
577 continue;
578 };
579 if self.source_files.contains_key(&dep_url) {
580 continue;
581 }
582 if let Ok(text) = std::fs::read_to_string(&path) {
583 self.mirror_text(&dep_url, &text);
584 }
585 }
586 }
587
588 /// Raw semantic issues for a file, computed via mir's session-based
589 /// `FileAnalyzer`. The session lazy-loads dependencies via PSR-4 so the
590 /// LSP no longer needs to mirror vendor up-front. Callers apply their
591 /// own `DiagnosticsConfig` filter via
592 /// [`crate::semantic_diagnostics::issues_to_diagnostics`].
593 #[tracing::instrument(skip_all)]
594 pub fn get_semantic_issues_salsa(&self, uri: &Url) -> Option<Arc<[mir_issues::Issue]>> {
595 // Need the parsed doc for the analyzer.
596 let doc = self.get_doc_salsa(uri)?;
597 let php_version = self.with_host(|h| self.workspace.php_version(h.db()));
598 let session = self.analysis_session(php_version);
599
600 let file: Arc<str> = Arc::from(uri.as_str());
601 let source = doc.source_arc();
602 {
603 let _s = tracing::debug_span!("session.ingest_file").entered();
604 session.ingest_file(file.clone(), source);
605 }
606 // Pre-load every imported class via PSR-4 so Pass-2 doesn't emit
607 // spurious `UndefinedClass` for classes that ARE on disk but haven't
608 // been ingested yet. The session's resolver was supplied at
609 // construction time.
610 {
611 let _s = tracing::debug_span!("session.lazy_load_imports").entered();
612 let imports = crate::references::collect_file_imports(&doc);
613 for fqcn in imports.values() {
614 let _ = session.lazy_load_class(fqcn);
615 }
616 // Also pre-load classes referenced via FQN `new \App\Model\Entity()`
617 // which bypass the `use` statement import map.
618 let fqn_refs = crate::references::collect_fqn_new_class_refs(&doc);
619 for fqcn in &fqn_refs {
620 let _ = session.lazy_load_class(fqcn);
621 }
622 }
623 let source_map = php_rs_parser::source_map::SourceMap::new(doc.source());
624 let analysis = {
625 let _s = tracing::debug_span!("FileAnalyzer::analyze").entered();
626 let analyzer = mir_analyzer::FileAnalyzer::new(&session);
627 analyzer.analyze(file.clone(), doc.source(), doc.program(), &source_map)
628 };
629 // Workspace-level class issues for this file (circular inheritance,
630 // override violations, abstract-method gaps).
631 let class_issues = {
632 let _s = tracing::debug_span!("session.class_issues_for").entered();
633 session.class_issues_for(std::slice::from_ref(&file))
634 };
635 let combined: Vec<mir_issues::Issue> = analysis
636 .issues
637 .into_iter()
638 .chain(class_issues.into_iter())
639 .filter(|i| !i.suppressed)
640 .collect();
641 Some(Arc::from(combined))
642 }
643
644 /// Returns `(uri, doc)` for files currently open in the editor.
645 ///
646 /// Resolve `open_urls` (from `Backend::open_urls()`) to parsed docs.
647 /// Files not mirrored in the salsa layer are filtered out silently.
648 pub fn docs_for(&self, open_urls: &[Url]) -> Vec<(Url, Arc<ParsedDoc>)> {
649 open_urls
650 .iter()
651 .filter_map(|u| self.get_doc_salsa(u).map(|d| (u.clone(), d)))
652 .collect()
653 }
654
655 /// `(primary, doc)` first, then every other open file's parsed doc.
656 /// The `open_urls` slice should include `uri` — this helper filters it out.
657 pub fn doc_with_others(
658 &self,
659 uri: &Url,
660 doc: Arc<ParsedDoc>,
661 open_urls: &[Url],
662 ) -> Vec<(Url, Arc<ParsedDoc>)> {
663 let mut result = vec![(uri.clone(), doc)];
664 result.extend(self.other_docs(uri, open_urls));
665 result
666 }
667
668 /// Parsed docs for every entry in `open_urls` except `uri`.
669 pub fn other_docs(&self, uri: &Url, open_urls: &[Url]) -> Vec<(Url, Arc<ParsedDoc>)> {
670 open_urls
671 .iter()
672 .filter(|u| *u != uri)
673 .filter_map(|u| self.get_doc_salsa(u).map(|d| (u.clone(), d)))
674 .collect()
675 }
676
677 /// Batched salsa fetch for every entry in `open_urls` except `uri`:
678 /// returns each `(uri, ParsedDoc, MethodReturnsMap)` triple in a single
679 /// `snapshot_query` so cancellation retries don't run N times.
680 pub fn other_docs_with_returns(
681 &self,
682 uri: &Url,
683 open_urls: &[Url],
684 ) -> Vec<(Url, Arc<ParsedDoc>, Arc<crate::ast::MethodReturnsMap>)> {
685 let source_files: Vec<(Url, crate::db::input::SourceFile)> = open_urls
686 .iter()
687 .filter(|u| *u != uri)
688 .filter_map(|u| self.source_file(u).map(|sf| (u.clone(), sf)))
689 .collect();
690 if source_files.is_empty() {
691 return Vec::new();
692 }
693 self.snapshot_query(move |db| {
694 source_files
695 .iter()
696 .map(|(u, sf)| {
697 let doc = crate::db::parse::parsed_doc(db, *sf).0.clone();
698 let mr = crate::db::method_returns::method_returns(db, *sf).0.clone();
699 (u.clone(), doc, mr)
700 })
701 .collect()
702 })
703 }
704
705 /// Compact symbol index for every mirrored file.
706 pub fn all_indexes(&self) -> Vec<(Url, Arc<FileIndex>)> {
707 let urls: Vec<Url> = self.source_files.iter().map(|e| e.key().clone()).collect();
708 urls.into_iter()
709 .filter_map(|u| self.get_index_salsa(&u).map(|idx| (u, idx)))
710 .collect()
711 }
712
713 /// Same as `all_indexes` but excludes `uri`.
714 pub fn other_indexes(&self, uri: &Url) -> Vec<(Url, Arc<FileIndex>)> {
715 let urls: Vec<Url> = self
716 .source_files
717 .iter()
718 .filter(|e| e.key() != uri)
719 .map(|e| e.key().clone())
720 .collect();
721 urls.into_iter()
722 .filter_map(|u| self.get_index_salsa(&u).map(|idx| (u, idx)))
723 .collect()
724 }
725
726 /// Parsed documents for every mirrored file (open or background-indexed).
727 /// Suitable for full-scan operations: find-references, rename,
728 /// call_hierarchy, code_lens.
729 pub fn all_docs_for_scan(&self) -> Vec<(Url, Arc<ParsedDoc>)> {
730 let urls: Vec<Url> = self.source_files.iter().map(|e| e.key().clone()).collect();
731 urls.into_iter()
732 .filter_map(|u| self.get_doc_salsa(&u).map(|d| (u, d)))
733 .collect()
734 }
735}
736
737/// Run `file_refs` for every workspace file in parallel.
738///
739/// `db` clones are cheap (they share the same `Arc<Zalsa>` memo store), so
740/// results computed on any clone are immediately visible to all others at the
741/// same revision. After this returns, the sequential loop inside `symbol_refs`
742/// only does cheap memo lookups instead of running `StatementsAnalyzer` on
743/// every file one-by-one.
744///
745/// Per-task `salsa::Cancelled` is caught and swallowed. If the revision was
746/// bumped, the main thread's next salsa call inside `symbol_refs` will raise
747/// `Cancelled` too and `snapshot_query` retries the whole operation from
748/// scratch. If the revision was not bumped, any file whose task was cancelled
749/// before completion simply has no memo entry and `symbol_refs`'s sequential
750/// loop recomputes it.
751// `warm_file_refs_parallel` removed: the analyzer-side reference index is
752// now owned by `AnalysisSession` and warmed by `ingest_file`. This salsa-side
753// helper has no counterpart in the new architecture.
754
755#[cfg(test)]
756mod tests {
757 use super::*;
758
759 fn uri(path: &str) -> Url {
760 Url::parse(&format!("file://{path}")).unwrap()
761 }
762
763 /// Phase E4: open-file state lives on `Backend`, not `DocumentStore`.
764 /// Tests that need to simulate "file is open" just mirror the text into
765 /// the salsa input — the open/closed distinction is enforced by the
766 /// caller (Backend) in production.
767 fn open(store: &DocumentStore, u: Url, text: String) {
768 store.mirror_text(&u, &text);
769 }
770
771 // Removed `salsa_codebase_aggregates_all_files`: the salsa-side codebase
772 // aggregation was deleted with the mir 0.22 migration. Equivalent
773 // behaviour is now covered by mir-analyzer's own session tests.
774
775 #[test]
776 fn index_registers_file_in_salsa() {
777 let store = DocumentStore::new();
778 store.index(uri("/lib.php"), "<?php\nfunction lib_fn() {}");
779 let idx = store.get_index_salsa(&uri("/lib.php")).unwrap();
780 assert_eq!(idx.functions.len(), 1);
781 assert_eq!(idx.functions[0].name, "lib_fn".into());
782 }
783
784 #[test]
785 fn remove_drops_salsa_input() {
786 let store = DocumentStore::new();
787 store.index(uri("/lib.php"), "<?php");
788 store.remove(&uri("/lib.php"));
789 assert!(store.get_index_salsa(&uri("/lib.php")).is_none());
790 }
791
792 #[test]
793 fn all_indexes_includes_every_mirrored_file() {
794 let store = DocumentStore::new();
795 open(&store, uri("/a.php"), "<?php\nfunction a() {}".to_string());
796 store.index(uri("/b.php"), "<?php\nfunction b() {}");
797 assert_eq!(store.all_indexes().len(), 2);
798 }
799
800 #[test]
801 fn other_indexes_excludes_current_uri() {
802 let store = DocumentStore::new();
803 open(&store, uri("/a.php"), "<?php\nfunction a() {}".to_string());
804 open(&store, uri("/b.php"), "<?php\nfunction b() {}".to_string());
805 assert_eq!(store.other_indexes(&uri("/a.php")).len(), 1);
806 }
807
808 #[test]
809 fn other_docs_excludes_current_uri() {
810 let store = DocumentStore::new();
811 let ua = uri("/a.php");
812 let ub = uri("/b.php");
813 open(&store, ua.clone(), "<?php\nfunction a() {}".to_string());
814 open(&store, ub.clone(), "<?php\nfunction b() {}".to_string());
815 let open_urls = vec![ua.clone(), ub];
816 assert_eq!(store.other_docs(&ua, &open_urls).len(), 1);
817 }
818
819 #[test]
820 fn evict_token_cache_removes_entry() {
821 let store = DocumentStore::new();
822 let u = uri("/a.php");
823 open(&store, u.clone(), "<?php".to_string());
824 store.store_token_cache(&u, "id1".to_string(), Arc::new(vec![]));
825 assert!(store.get_token_cache(&u, "id1").is_some());
826 store.evict_token_cache(&u);
827 assert!(store.get_token_cache(&u, "id1").is_none());
828 }
829
830 #[test]
831 fn index_populates_file_index_with_symbols() {
832 let store = DocumentStore::new();
833 store.index(uri("/a.php"), "<?php\nfunction hello() {}");
834 let idx = store.get_index_salsa(&uri("/a.php")).unwrap();
835 assert_eq!(idx.functions.len(), 1);
836 assert_eq!(idx.functions[0].name, "hello".into());
837 }
838
839 #[test]
840 fn open_populates_file_index_with_symbols() {
841 let store = DocumentStore::new();
842 open(&store, uri("/a.php"), "<?php\nclass Foo {}".to_string());
843 let idx = store.get_index_salsa(&uri("/a.php")).unwrap();
844 assert_eq!(idx.classes.len(), 1);
845 assert_eq!(idx.classes[0].name, "Foo".into());
846 }
847
848 // ── Mirror invariants ────────────────────────────────────────────────
849 //
850 // Every mutation path that changes file text must keep the salsa layer
851 // consistent. These tests walk a set-edit-reopen cycle and assert that
852 // the salsa-derived `FileIndex` reflects the latest text at each step.
853
854 fn names_of(idx: &FileIndex) -> Vec<String> {
855 let mut out: Vec<String> = idx.classes.iter().map(|c| c.name.to_string()).collect();
856 out.extend(idx.functions.iter().map(|f| f.name.to_string()));
857 out.sort();
858 out
859 }
860
861 fn salsa_index_names(store: &DocumentStore, url: &Url) -> Vec<String> {
862 let sf = store.source_file(url).expect("mirror recorded SourceFile");
863 store.with_host(|host| {
864 let arc = crate::db::index::file_index(host.db(), sf);
865 names_of(arc.get())
866 })
867 }
868
869 #[test]
870 fn mirror_tracks_repeated_edits() {
871 let store = DocumentStore::new();
872 let u = uri("/mirror.php");
873
874 open(&store, u.clone(), "<?php\nclass A {}".to_string());
875 assert_eq!(salsa_index_names(&store, &u), vec!["A".to_string()]);
876
877 open(
878 &store,
879 u.clone(),
880 "<?php\nclass A {}\nclass B {}".to_string(),
881 );
882 assert_eq!(
883 salsa_index_names(&store, &u),
884 vec!["A".to_string(), "B".to_string()]
885 );
886
887 open(&store, u.clone(), "<?php\nfunction greet() {}".to_string());
888 assert_eq!(salsa_index_names(&store, &u), vec!["greet".to_string()]);
889 }
890
891 #[test]
892 fn mirror_tracks_index_and_index_from_doc() {
893 let store = DocumentStore::new();
894
895 // Background `index(url, text)` path.
896 let u1 = uri("/bg1.php");
897 store.index(u1.clone(), "<?php\nclass Bg1 {}");
898 assert_eq!(salsa_index_names(&store, &u1), vec!["Bg1".to_string()]);
899
900 // `index_from_doc(url, &doc)` path (workspace-scan Phase 2).
901 let u2 = uri("/bg2.php");
902 let doc =
903 crate::diagnostics::parse_document_no_diags("<?php\nclass Bg2 {}\nfunction f() {}");
904 store.index_from_doc(u2.clone(), &doc);
905 assert_eq!(
906 salsa_index_names(&store, &u2),
907 vec!["Bg2".to_string(), "f".to_string()]
908 );
909 }
910
911 /// G3: confirms the `parsed_cache` actually hits — two consecutive
912 /// `get_doc_salsa` calls on unchanged text return the same `Arc`
913 /// (pointer equality), and an edit forces a miss that produces a
914 /// different `Arc`.
915 /// parsed_cache must stay bounded — inserting more than
916 /// `PARSED_CACHE_CAP` unique URLs must not cause unbounded growth.
917 /// Eviction is probabilistic, so we only assert the bound, not which
918 /// Phase K2 end-to-end: seed a cached slice through `DocumentStore`,
919 /// confirm the workspace codebase sees the cached fact, then edit the
920 /// text and confirm the cache is cleared (codebase now reflects the
921 /// re-parsed text). Exercises `seed_cached_slice` + `mirror_text`'s
922 /// `set_cached_slice(None)` invalidation together.
923 // Removed `seed_cached_slice_then_edit_invalidates`: the cached_slice
924 // seed path is no longer relevant — mir 0.22's `AnalysisSession` owns
925 // the cache lifecycle internally.
926
927 /// Seeding for a URL that was never mirrored is a no-op (returns `false`)
928 /// — avoids silently allocating SourceFiles outside `mirror_text`'s control.
929 #[test]
930 fn seed_cached_slice_noops_for_unknown_uri() {
931 let store = DocumentStore::new();
932 let u = uri("/never_mirrored.php");
933 let slice = Arc::new(mir_codebase::storage::StubSlice::default());
934 assert!(!store.seed_cached_slice(&u, slice));
935 }
936
937 /// entries survive.
938 #[test]
939 fn parsed_cache_stays_bounded_under_many_inserts() {
940 let store = DocumentStore::new();
941 let overflow = PARSED_CACHE_CAP + 100;
942 for i in 0..overflow {
943 let u = uri(&format!("/cap/file{i}.php"));
944 store.index(u.clone(), "<?php\nclass A {}");
945 // Force a parsed_cache insert via get_doc_salsa.
946 let _ = store.get_doc_salsa(&u);
947 }
948 assert!(
949 store.parsed_cache.len() <= PARSED_CACHE_CAP,
950 "parsed_cache grew to {} entries (cap {})",
951 store.parsed_cache.len(),
952 PARSED_CACHE_CAP
953 );
954 }
955
956 #[test]
957 fn get_doc_salsa_cache_hits_across_calls() {
958 let store = DocumentStore::new();
959 let u = uri("/g3_cache.php");
960 open(&store, u.clone(), "<?php\nclass G3 {}".to_string());
961
962 let a = store.get_doc_salsa(&u).unwrap();
963 let b = store.get_doc_salsa(&u).unwrap();
964 assert!(
965 Arc::ptr_eq(&a, &b),
966 "parsed_cache hit should yield the same Arc across calls"
967 );
968
969 open(&store, u.clone(), "<?php\nclass G3b {}".to_string());
970 let c = store.get_doc_salsa(&u).unwrap();
971 assert!(
972 !Arc::ptr_eq(&a, &c),
973 "edit should invalidate the parsed_cache entry"
974 );
975 }
976
977 #[test]
978 fn get_doc_salsa_returns_some_for_mirrored_files() {
979 // Phase E4: `get_doc_salsa` no longer gates on open-state. The
980 // open/closed distinction now lives on `Backend::get_doc`.
981 let store = DocumentStore::new();
982 let u = uri("/e4_doc.php");
983 store.index(u.clone(), "<?php\nclass P {}");
984 assert!(store.get_doc_salsa(&u).is_some());
985 }
986
987 #[test]
988 fn get_salsa_accessors_return_none_for_unknown_uri() {
989 let store = DocumentStore::new();
990 let u = uri("/never-seen.php");
991 assert!(store.get_doc_salsa(&u).is_none());
992 assert!(store.get_index_salsa(&u).is_none());
993 assert!(store.get_method_returns_salsa(&u).is_none());
994 }
995
996 /// Phase E1: concurrent readers and writers must not deadlock, panic, or
997 /// return stale data. Writers briefly bump inputs while readers are
998 /// running on cloned snapshots; any `salsa::Cancelled` raised on the
999 /// reader side must be caught and retried by `snapshot_query`.
1000 ///
1001 /// Post mir 0.22: `get_symbol_refs_salsa` is a no-op stub (returns empty
1002 /// vec), so reader threads cannot exhaust the retry cap or panic on that
1003 /// path. The remaining salsa surface (`get_doc_salsa`, `get_index_salsa`)
1004 /// is protected by `snapshot_query`'s last-resort host-lock fallback.
1005 #[test]
1006 fn concurrent_reads_and_writes_do_not_panic() {
1007 use std::sync::Arc;
1008 use std::thread;
1009 use std::time::{Duration, Instant};
1010
1011 let store = Arc::new(DocumentStore::new());
1012 let urls: Vec<Url> = (0..8).map(|i| uri(&format!("/f{i}.php"))).collect();
1013 for (i, u) in urls.iter().enumerate() {
1014 open(&store, u.clone(), format!("<?php\nclass C{i} {{}}"));
1015 }
1016
1017 let deadline = Instant::now() + Duration::from_millis(400);
1018 let mut handles = Vec::new();
1019
1020 // Writer thread: keep bumping every file's text.
1021 {
1022 let store = Arc::clone(&store);
1023 let urls = urls.clone();
1024 handles.push(thread::spawn(move || {
1025 let mut rev = 0u32;
1026 while Instant::now() < deadline {
1027 for u in &urls {
1028 let text = format!("<?php\nclass C{{}}\n// rev {rev}");
1029 store.mirror_text(u, &text);
1030 }
1031 rev += 1;
1032 }
1033 }));
1034 }
1035
1036 // Reader threads: hammer the salsa accessors.
1037 for _ in 0..4 {
1038 let store = Arc::clone(&store);
1039 let urls = urls.clone();
1040 handles.push(thread::spawn(move || {
1041 while Instant::now() < deadline {
1042 for u in &urls {
1043 let _ = store.get_doc_salsa(u);
1044 let _ = store.get_index_salsa(u);
1045 }
1046 // Post mir 0.22: codebase + refs live in the session,
1047 // not salsa. Concurrent-read smoke is now limited to the
1048 // remaining salsa surface (parsed_doc, file_index).
1049 let _ = store.get_symbol_refs_salsa("C0");
1050 }
1051 }));
1052 }
1053
1054 for h in handles {
1055 h.join().expect("no panic under concurrent read/write");
1056 }
1057 }
1058
1059 /// PSR-4 lazy-loading: `get_semantic_issues_salsa` must not emit
1060 /// `UndefinedClass` for a class that is PSR-4-resolvable on disk, even
1061 /// when the dependency file is not yet in `source_files`.
1062 #[test]
1063 fn psr4_lazy_load_suppresses_undefined_class() {
1064 let tmp = tempfile::tempdir().unwrap();
1065
1066 // Write Entity.php to disk (not mirrored into the store).
1067 std::fs::create_dir_all(tmp.path().join("src/Model")).unwrap();
1068 std::fs::write(
1069 tmp.path().join("src/Model/Entity.php"),
1070 "<?php\nnamespace App\\Model;\nclass Entity {}\n",
1071 )
1072 .unwrap();
1073
1074 // Write composer.json so Psr4Map::load can build the map.
1075 std::fs::write(
1076 tmp.path().join("composer.json"),
1077 r#"{"autoload":{"psr-4":{"App\\":"src/"}}}"#,
1078 )
1079 .unwrap();
1080
1081 let store = DocumentStore::new();
1082
1083 // Inject a PSR-4 map pointing at the tmp dir.
1084 *store.psr4.write().unwrap() = crate::autoload::Psr4Map::load(tmp.path());
1085
1086 // Mirror the consuming file (Entity not yet in source_files).
1087 // Uses Entity as a parameter type hint — the analyzer resolves these
1088 // through use statements, so this exercises the full PSR-4 lazy-load path.
1089 let handler_url = Url::from_file_path(tmp.path().join("src/Service/Handler.php")).unwrap();
1090 store.mirror_text(
1091 &handler_url,
1092 "<?php\nnamespace App\\Service;\nuse App\\Model\\Entity;\nfunction handle(Entity $e): Entity { return $e; }\n",
1093 );
1094
1095 let issues = store.get_semantic_issues_salsa(&handler_url).unwrap();
1096 let undef: Vec<_> = issues
1097 .iter()
1098 .filter(|i| matches!(i.kind, mir_issues::IssueKind::UndefinedClass { .. }))
1099 .collect();
1100 assert!(
1101 undef.is_empty(),
1102 "PSR-4 lazy-loading must prevent UndefinedClass for App\\Model\\Entity; got: {undef:?}"
1103 );
1104 }
1105
1106 /// Issue #191 regression: workspace-wide scans (find-references, rename,
1107 /// call-hierarchy) must not re-parse closed/indexed files on repeated
1108 /// invocations. Once a file's `ParsedDoc` has been produced, subsequent
1109 /// `all_docs_for_scan()` calls must hit the cache and return the same
1110 /// `Arc<ParsedDoc>` (pointer equality), proving no re-parse occurred.
1111 ///
1112 /// The cache layers protecting this are:
1113 /// 1. `parsed_cache` (cap [`PARSED_CACHE_CAP`]) — read-through, validated
1114 /// via `Arc::ptr_eq` on the text Arc.
1115 /// 2. salsa `parsed_doc` memo (`lru = 2048`) — second line of defense
1116 /// when `parsed_cache` evicts.
1117 ///
1118 /// Together they keep every workspace-scan op O(N) memo lookups, never
1119 /// O(N) parses, for any workspace whose file count fits the cap.
1120 #[test]
1121 fn all_docs_for_scan_does_not_reparse_indexed_files() {
1122 let store = DocumentStore::new();
1123 const N: usize = 50;
1124 for i in 0..N {
1125 let u = uri(&format!("/scan/file{i}.php"));
1126 store.index(u, &format!("<?php\nclass C{i} {{}}\nfunction f{i}() {{}}"));
1127 }
1128
1129 let first: Vec<_> = store.all_docs_for_scan();
1130 let second: Vec<_> = store.all_docs_for_scan();
1131 assert_eq!(first.len(), N);
1132 assert_eq!(second.len(), N);
1133
1134 let by_url_first: std::collections::HashMap<Url, Arc<ParsedDoc>> =
1135 first.into_iter().collect();
1136 for (u, doc2) in second {
1137 let doc1 = by_url_first
1138 .get(&u)
1139 .expect("second scan returned a URL the first didn't");
1140 assert!(
1141 Arc::ptr_eq(doc1, &doc2),
1142 "{u} re-parsed across all_docs_for_scan calls — \
1143 cache (parsed_cache + salsa parsed_doc memo) failed to hit"
1144 );
1145 }
1146
1147 // Editing one file's text must invalidate just that file's entry,
1148 // not the rest. This locks in self-eviction via Arc::ptr_eq on text.
1149 let edited_url = uri("/scan/file0.php");
1150 let pre_edit = store.get_doc_salsa(&edited_url).unwrap();
1151 store.index(edited_url.clone(), "<?php\nclass C0Edited {}");
1152 let post_edit = store.get_doc_salsa(&edited_url).unwrap();
1153 assert!(
1154 !Arc::ptr_eq(&pre_edit, &post_edit),
1155 "edited file must produce a fresh ParsedDoc"
1156 );
1157 for i in 1..N {
1158 let u = uri(&format!("/scan/file{i}.php"));
1159 let original = by_url_first.get(&u).unwrap();
1160 let after = store.get_doc_salsa(&u).unwrap();
1161 assert!(
1162 Arc::ptr_eq(original, &after),
1163 "{u} should not have re-parsed because of an unrelated edit"
1164 );
1165 }
1166 }
1167}