php_lsp/document_store.rs
1use std::sync::atomic::{AtomicU32, Ordering};
2use std::sync::{Arc, Mutex, RwLock};
3
4use dashmap::DashMap;
5use salsa::Setter;
6use tower_lsp::lsp_types::{Diagnostic, SemanticToken, Url};
7
8use crate::ast::ParsedDoc;
9use crate::autoload::Psr4Map;
10use crate::db::analysis::AnalysisHost;
11use crate::db::input::{FileId, SourceFile, Workspace};
12use crate::file_index::FileIndex;
13
14/// Upper bound on `parsed_cache` entries. Matched to the `lru = 2048` on
15/// `parsed_doc` in `src/db/parse.rs` so the secondary Arc retention can't
16/// pin more ASTs alive than salsa's memo already bounds. Exceeding this
17/// triggers probabilistic eviction (see [`DocumentStore::insert_parsed_cache`]).
18const PARSED_CACHE_CAP: usize = 2048;
19
20pub struct DocumentStore {
21 /// Cached semantic tokens per document: (result_id, tokens).
22 /// Used to compute incremental deltas for `textDocument/semanticTokens/full/delta`.
23 token_cache: DashMap<Url, (String, Vec<SemanticToken>)>,
24
25 // ── Salsa-input storage ────────────────────────────────────────────────
26 // Phase E4: `DocumentStore` is now a pure salsa-input wrapper. Open-file
27 // state (live text, version token, parse-diagnostics cache) lives on
28 // `Backend` in its `open_files` map; the set of files tracked by salsa
29 // is exactly `source_files.keys()`.
30 /// Mutex — held briefly to clone the database for reads and to mutate
31 /// it for writes. Per-thread salsa state (`zalsa_local`) is `!Sync`,
32 /// which rules out `RwLock<AnalysisHost>`. Readers instead snapshot the
33 /// db (cheap — storage is `Arc<Zalsa>`) and run queries on the clone
34 /// with the lock released, giving real read/read parallelism. Writers
35 /// during an in-flight read bump the shared revision; the reader raises
36 /// `salsa::Cancelled` on its next query call and `snapshot_query` below
37 /// retries with a fresh snapshot.
38 host: Mutex<AnalysisHost>,
39 /// `Url -> SourceFile` lookup. The `SourceFile` is a salsa-id handle; the
40 /// underlying input lives in `host.db` for the lifetime of the database.
41 source_files: DashMap<Url, SourceFile>,
42 /// G2: lock-free mirror of each `SourceFile`'s last-set text. Lets
43 /// `mirror_text` dedup repeated no-op updates (common during workspace
44 /// scan and `did_open` for already-indexed files) without taking
45 /// `host.lock()`. Updated inside the mutex whenever the salsa input is
46 /// set, so it is always consistent with the salsa revision for the
47 /// purposes of byte-equality comparison.
48 text_cache: DashMap<Url, Arc<str>>,
49 /// G3: cross-revision read-through cache for `parsed_doc`. Keyed on
50 /// `Url`, stored value is `(text_arc, Arc<ParsedDoc>)` — the text Arc
51 /// captured at parse time. On read, compare against `text_cache[uri]`
52 /// via `Arc::ptr_eq`; a match guarantees the cached ParsedDoc matches
53 /// the current salsa revision's text input, so the query can return
54 /// without snapshotting the db or invoking salsa at all. A miss
55 /// (different pointer, stale or absent entry) falls through to
56 /// `snapshot_query`. Self-evicts on text change — no writer-side
57 /// invalidation is required, which avoids the TOCTOU window where a
58 /// concurrent reader could re-insert a stale entry after a writer's
59 /// eviction.
60 ///
61 /// Size-bounded at [`PARSED_CACHE_CAP`] — see `insert_parsed_cache`.
62 /// Without this bound, every workspace file read-through would pin
63 /// its bumpalo arena alive regardless of salsa's `lru = 2048` on the
64 /// `parsed_doc` memo.
65 parsed_cache: DashMap<Url, (Arc<str>, Arc<ParsedDoc>)>,
66 /// Monotonic allocator for `FileId`s (one per ever-seen URL).
67 next_file_id: AtomicU32,
68 /// Workspace salsa input. Tracks the full set of `SourceFile`s that
69 /// participate in whole-program queries (`codebase`, `file_refs`).
70 /// Re-synced from `source_files` on demand by `sync_workspace_files`.
71 workspace: Workspace,
72 /// Shared PSR-4 namespace-to-path map. Shared with `Backend` via `Arc`
73 /// so updates from `initialized` (when composer.json is loaded) are
74 /// visible here without any additional wiring.
75 psr4: Arc<RwLock<Psr4Map>>,
76}
77
78impl Default for DocumentStore {
79 fn default() -> Self {
80 Self::new()
81 }
82}
83
84impl DocumentStore {
85 pub fn new() -> Self {
86 let host = AnalysisHost::new();
87 let workspace = Workspace::new(
88 host.db(),
89 Arc::<[SourceFile]>::from(Vec::new()),
90 mir_analyzer::PhpVersion::LATEST,
91 );
92 DocumentStore {
93 token_cache: DashMap::new(),
94 host: Mutex::new(host),
95 source_files: DashMap::new(),
96 text_cache: DashMap::new(),
97 parsed_cache: DashMap::new(),
98 next_file_id: AtomicU32::new(0),
99 workspace,
100 psr4: Arc::new(RwLock::new(Psr4Map::empty())),
101 }
102 }
103
104 /// Return the `Arc<RwLock<Psr4Map>>` so callers can share it.
105 /// `Backend` clones this arc at construction time so writes to the lock
106 /// (e.g. loading composer.json on `initialized`) are immediately visible
107 /// to `lazy_load_psr4_imports` without extra plumbing.
108 pub fn psr4_arc(&self) -> Arc<RwLock<Psr4Map>> {
109 Arc::clone(&self.psr4)
110 }
111
112 /// Mirror a file's current text into the salsa layer. Creates the
113 /// `SourceFile` input on first sight, otherwise updates `text` on the
114 /// existing input (bumping the salsa revision so downstream queries
115 /// invalidate). Returns the `SourceFile` handle for this `uri`.
116 ///
117 /// B4a: called from every text-changing mutation site. Reads still come
118 /// from the legacy `map` — this mirror is not yet observed by production
119 /// code paths.
120 pub fn mirror_text(&self, uri: &Url, text: &str) -> SourceFile {
121 // G2 fast path: compare against the lock-free text cache. When the
122 // new text byte-matches what we already mirrored, skip the host
123 // mutex entirely. Common during workspace scan + `did_open` for
124 // unchanged files, where most threads would otherwise serialise on
125 // `host.lock()` just to confirm a no-op. Cache is only populated
126 // after the matching `source_files` entry, so a cache hit implies
127 // the handle exists.
128 if let Some(cached) = self.text_cache.get(uri)
129 && **cached == *text
130 && let Some(sf) = self.source_files.get(uri)
131 {
132 return *sf;
133 }
134
135 let text_arc: Arc<str> = Arc::from(text);
136 if let Some(existing) = self.source_files.get(uri) {
137 let sf = *existing;
138 drop(existing);
139 // Slow path: another writer may have raced us; re-check inside
140 // the mutex. Salsa's `set_text` unconditionally bumps the
141 // revision, so every spurious setter invalidates every
142 // downstream query.
143 let mut host = self.host.lock().unwrap();
144 let current: Arc<str> = sf.text(host.db());
145 if *current == *text_arc {
146 drop(host);
147 self.text_cache.insert(uri.clone(), current);
148 return sf;
149 }
150 sf.set_text(host.db_mut()).to(text_arc.clone());
151 // Phase K2: any text change invalidates a previously-seeded
152 // cached slice. Clearing it forces the fresh-parse branch of
153 // `file_definitions` on the next query, which is correct —
154 // the cached slice no longer matches the new text.
155 sf.set_cached_slice(host.db_mut()).to(None);
156 drop(host);
157 self.text_cache.insert(uri.clone(), text_arc);
158 sf
159 } else {
160 let id = FileId(self.next_file_id.fetch_add(1, Ordering::Relaxed));
161 let uri_arc: Arc<str> = Arc::from(uri.as_str());
162 let sf = {
163 let host = self.host.lock().unwrap();
164 SourceFile::new(host.db(), id, uri_arc, text_arc.clone(), None)
165 };
166 self.source_files.insert(uri.clone(), sf);
167 self.text_cache.insert(uri.clone(), text_arc);
168 sf
169 }
170 }
171
172 /// Return the salsa `SourceFile` handle for a URL, if one exists.
173 pub fn source_file(&self, uri: &Url) -> Option<SourceFile> {
174 self.source_files.get(uri).map(|e| *e)
175 }
176
177 /// Phase K2: pre-seed a `StubSlice` loaded from the on-disk cache
178 /// onto the `SourceFile` input for `uri`. The next `file_definitions`
179 /// call for that file returns the cached slice directly, skipping
180 /// parse + `DefinitionCollector`.
181 ///
182 /// Must be called **before** any `file_definitions(db, sf)` call for
183 /// this file — otherwise salsa has already memoized the fresh-parse
184 /// result and setting `cached_slice` now would only bump the revision
185 /// without actually using the cache. In practice the workspace-scan
186 /// path seeds immediately after `mirror_text` and before any query
187 /// runs.
188 ///
189 /// Returns `false` when `uri` was not mirrored (caller should mirror
190 /// first); returns `true` on success.
191 pub fn seed_cached_slice(
192 &self,
193 uri: &Url,
194 slice: Arc<mir_codebase::storage::StubSlice>,
195 ) -> bool {
196 let Some(sf) = self.source_files.get(uri).map(|e| *e) else {
197 return false;
198 };
199 let mut host = self.host.lock().unwrap();
200 sf.set_cached_slice(host.db_mut()).to(Some(slice));
201 true
202 }
203
204 /// Run `f` with a borrow of the `AnalysisHost`. Used by tests and by the
205 /// upcoming `*_salsa` accessors to query the salsa layer.
206 pub fn with_host<R>(&self, f: impl FnOnce(&AnalysisHost) -> R) -> R {
207 let host = self.host.lock().unwrap();
208 f(&host)
209 }
210
211 /// Phase E1: take a brief lock, clone the salsa database, release the
212 /// lock. Queries then run on the cloned `RootDatabase` without blocking
213 /// writers or other readers. Salsa's `Storage<Self>` is reference-counted
214 /// (`Arc<Zalsa>`), so the clone is cheap — it shares memoized data and
215 /// the cancellation flag with the host's db.
216 fn snapshot_db(&self) -> crate::db::analysis::RootDatabase {
217 let host = self.host.lock().unwrap();
218 host.db().clone()
219 }
220
221 /// Run a query on a fresh snapshot, catching `salsa::Cancelled` (raised
222 /// when a concurrent writer advances the revision) and retrying with a
223 /// new snapshot. Writers hold the mutex only long enough to bump input
224 /// values, so a handful of retries is more than enough in practice; we
225 /// cap at 8 to avoid pathological livelock under sustained write pressure.
226 fn snapshot_query<R>(&self, f: impl Fn(&crate::db::analysis::RootDatabase) -> R + Clone) -> R {
227 use std::panic::AssertUnwindSafe;
228 for _ in 0..8 {
229 let db = self.snapshot_db();
230 let f = f.clone();
231 match salsa::Cancelled::catch(AssertUnwindSafe(move || f(&db))) {
232 Ok(r) => return r,
233 Err(_) => continue,
234 }
235 }
236 // Last-resort attempt: take the mutex for the whole query so no
237 // writer can race us. Much slower, but guaranteed to make progress.
238 let host = self.host.lock().unwrap();
239 f(host.db())
240 }
241
242 /// Evict the semantic-tokens cache for `uri`. Called by Backend when a
243 /// file is closed; diff-based tokens computed against the old revision
244 /// are no longer meaningful.
245 pub fn evict_token_cache(&self, uri: &Url) {
246 self.token_cache.remove(uri);
247 }
248
249 /// Register a file in the salsa layer without marking it open.
250 ///
251 /// Salsa's `parsed_doc` query parses lazily on first read; diagnostics
252 /// are populated by `did_open` when the editor actually opens the file.
253 pub fn index(&self, uri: Url, text: &str) {
254 self.mirror_text(&uri, text);
255 }
256
257 /// Index a file using an already-parsed `ParsedDoc`, avoiding a second parse.
258 ///
259 /// Prefer this over [`index`] when the caller already has a `ParsedDoc` (e.g.
260 /// after running `DefinitionCollector` during workspace scan).
261 ///
262 /// `_diagnostics` is accepted for call-site compatibility; parse
263 /// diagnostics for background-indexed files are never consulted
264 /// (callers gate on `get_doc_salsa` returning `Some`).
265 pub fn index_from_doc(&self, uri: Url, doc: &ParsedDoc, _diagnostics: Vec<Diagnostic>) {
266 self.mirror_text(&uri, doc.source());
267 }
268
269 pub fn remove(&self, uri: &Url) {
270 self.token_cache.remove(uri);
271 // Also drop the Url→SourceFile mapping so the file stops contributing
272 // to the workspace codebase query. Salsa inputs themselves remain
273 // alive (salsa doesn't expose input removal in 0.26), but they're
274 // orphaned — no query keys them anymore, and re-opening the file
275 // allocates a fresh SourceFile with a new FileId. The ~40 bytes per
276 // orphan is acceptable; revisit if workspace-churn profiling hurts.
277 self.source_files.remove(uri);
278 self.text_cache.remove(uri);
279 self.parsed_cache.remove(uri);
280 }
281
282 // ── B4b salsa-backed accessors ─────────────────────────────────────────
283 //
284 // These are additive and not yet called from production code. They go
285 // through the salsa layer — reads run the memoized `parsed_doc` /
286 // `file_index` / `method_returns` queries, parsing only on first access
287 // per revision. B4c will migrate feature modules to call these instead of
288 // the legacy `get_doc` / `get_index`.
289
290 /// Salsa-backed parsed document.
291 ///
292 /// Salsa-backed parsed document for any mirrored file (open or
293 /// background-indexed). Returns `None` only when the file is not known
294 /// to the store. Callers that want "only if open" should gate on
295 /// `Backend::open_files` at the call site (see `Backend::get_doc`).
296 pub fn get_doc_salsa(&self, uri: &Url) -> Option<Arc<ParsedDoc>> {
297 self.get_parsed_cached(uri)
298 }
299
300 /// Salsa-backed compact symbol index.
301 pub fn get_index_salsa(&self, uri: &Url) -> Option<Arc<FileIndex>> {
302 let sf = self.source_file(uri)?;
303 Some(self.snapshot_query(move |db| crate::db::index::file_index(db, sf).0.clone()))
304 }
305
306 /// G3: shared implementation for `get_doc_salsa`.
307 /// Tries the `parsed_cache` (lock-free) first; validates via
308 /// `Arc::ptr_eq` against the G2 `text_cache` so a concurrent writer
309 /// that has already committed a new text input cannot be masked by a
310 /// stale cache entry. On miss, captures the text Arc and ParsedDoc
311 /// together inside a single `snapshot_query`, then publishes both.
312 fn get_parsed_cached(&self, uri: &Url) -> Option<Arc<ParsedDoc>> {
313 if let Some(current_text) = self.text_cache.get(uri)
314 && let Some(entry) = self.parsed_cache.get(uri)
315 && Arc::ptr_eq(&*current_text, &entry.0)
316 {
317 return Some(entry.1.clone());
318 }
319
320 let sf = self.source_file(uri)?;
321 let (text, doc) = self.snapshot_query(move |db| {
322 let text = sf.text(db);
323 let doc = crate::db::parse::parsed_doc(db, sf).0.clone();
324 (text, doc)
325 });
326 self.insert_parsed_cache(uri.clone(), text, doc.clone());
327 Some(doc)
328 }
329
330 /// Publish a fresh `ParsedDoc` into `parsed_cache`, shedding roughly
331 /// half of the cache first if it has grown past [`PARSED_CACHE_CAP`].
332 ///
333 /// Eviction is probabilistic (DashMap iteration order is arbitrary),
334 /// not LRU. That's fine — salsa's own `parsed_doc` memo uses
335 /// `lru = 2048` on hotness-aware storage, so a cache-miss here is
336 /// cheap: the next read goes through `snapshot_query` and
337 /// `parsed_doc`, which still short-circuits on the salsa memo.
338 /// What we're bounding here is the *secondary* Arc retention that
339 /// would otherwise pin every workspace file's bumpalo arena alive
340 /// regardless of salsa's eviction decisions.
341 fn insert_parsed_cache(&self, uri: Url, text: Arc<str>, doc: Arc<ParsedDoc>) {
342 if self.parsed_cache.len() >= PARSED_CACHE_CAP {
343 let drop_target = self.parsed_cache.len() / 2;
344 let mut dropped = 0usize;
345 self.parsed_cache.retain(|_, _| {
346 if dropped < drop_target {
347 dropped += 1;
348 false
349 } else {
350 true
351 }
352 });
353 }
354 self.parsed_cache.insert(uri, (text, doc));
355 }
356
357 /// Refresh `workspace.files` to mirror the current `source_files` set.
358 ///
359 /// Called by `get_codebase_salsa`. Skips the setter when the file list
360 /// hasn't changed — salsa's `set_field` unconditionally bumps revision,
361 /// which would invalidate every downstream query (codebase, file_refs).
362 /// Dedup is essential for memoization across LSP requests.
363 pub fn sync_workspace_files(&self) {
364 let mut files: Vec<SourceFile> = self.source_files.iter().map(|e| *e.value()).collect();
365 files.sort_by_key(|sf| self.with_host(|host| sf.id(host.db()).0));
366 let mut host = self.host.lock().unwrap();
367 let current = self.workspace.files(host.db());
368 if current.len() == files.len() && current.iter().zip(files.iter()).all(|(a, b)| a == b) {
369 return;
370 }
371 let arc: Arc<[SourceFile]> = Arc::from(files);
372 self.workspace.set_files(host.db_mut()).to(arc);
373 }
374
375 /// Update the PHP version tracked by the workspace. Salsa will invalidate
376 /// all `semantic_issues` queries so diagnostics are re-evaluated.
377 /// Skips the setter when the version hasn't changed to avoid spurious
378 /// query invalidation.
379 pub fn set_php_version(&self, version: mir_analyzer::PhpVersion) {
380 let mut host = self.host.lock().unwrap();
381 if self.workspace.php_version(host.db()) == version {
382 return;
383 }
384 self.workspace.set_php_version(host.db_mut()).to(version);
385 }
386
387 /// Salsa-backed finalized Codebase. Aggregates every known file's
388 /// `StubSlice` via `codebase_from_parts`, memoized by salsa.
389 ///
390 /// Phase C step 3: this runs in parallel with Backend's imperative
391 /// `Arc<Codebase>`. Comparison tests validate parity; readers migrate in
392 /// a follow-up.
393 pub fn get_codebase_salsa(&self) -> Arc<mir_codebase::Codebase> {
394 self.sync_workspace_files();
395 let ws = self.workspace;
396 self.snapshot_query(move |db| crate::db::codebase::codebase(db, ws).0.clone())
397 }
398
399 /// Salsa-backed reference lookup — drop-in replacement for
400 /// `Codebase::get_reference_locations`. First call per `key` runs
401 /// `file_refs` over every workspace file; subsequent calls hit the
402 /// `symbol_refs` memo.
403 pub fn get_symbol_refs_salsa(&self, key: &str) -> Vec<(Arc<str>, u32, u16, u16)> {
404 self.sync_workspace_files();
405 let ws = self.workspace;
406 let key = key.to_string();
407 self.snapshot_query(move |db| {
408 warm_file_refs_parallel(db, ws);
409 crate::db::refs::symbol_refs(db, ws, key.clone())
410 .0
411 .as_ref()
412 .clone()
413 })
414 }
415
416 /// Phase J: salsa-memoized aggregate workspace index.
417 ///
418 /// Returns the shared `Arc<WorkspaceIndexData>` with flat
419 /// `(Url, Arc<FileIndex>)` list plus pre-built `classes_by_name` and
420 /// `subtypes_of` reverse maps. Used by workspace_symbols,
421 /// prepare_type_hierarchy, supertypes_of, subtypes_of, and
422 /// find_implementations so they don't each rebuild the aggregate per
423 /// request. Invalidates automatically when any file's `file_index`
424 /// changes.
425 pub fn get_workspace_index_salsa(&self) -> Arc<crate::db::workspace_index::WorkspaceIndexData> {
426 self.sync_workspace_files();
427 let ws = self.workspace;
428 self.snapshot_query(move |db| {
429 crate::db::workspace_index::workspace_index(db, ws)
430 .0
431 .clone()
432 })
433 }
434
435 /// Phase L: force `file_refs` to run for every workspace file so that
436 /// subsequent `textDocument/references` / `prepare_rename` / call-hierarchy
437 /// lookups hit the memo instead of paying first-call latency.
438 ///
439 /// Uses parallel warming (`warm_file_refs_parallel`) so all `file_refs`
440 /// complete concurrently; `symbol_refs` then only aggregates memos.
441 pub fn warm_reference_index(&self) {
442 self.sync_workspace_files();
443 let ws = self.workspace;
444 let _ = self.snapshot_query(move |db| {
445 warm_file_refs_parallel(db, ws);
446 crate::db::refs::symbol_refs(db, ws, String::from("__phplsp_warmup__"))
447 .0
448 .clone()
449 });
450 }
451
452 /// Phase K2b: run `file_definitions` for `uri` and return the
453 /// resulting `StubSlice`. Used by the workspace-scan write path to
454 /// persist slices to disk after a cache miss.
455 pub fn slice_for(&self, uri: &Url) -> Option<Arc<mir_codebase::storage::StubSlice>> {
456 let sf = self.source_file(uri)?;
457 Some(
458 self.snapshot_query(move |db| {
459 crate::db::definitions::file_definitions(db, sf).0.clone()
460 }),
461 )
462 }
463
464 /// Salsa-backed per-file method-return-type map.
465 pub fn get_method_returns_salsa(&self, uri: &Url) -> Option<Arc<crate::ast::MethodReturnsMap>> {
466 let sf = self.source_file(uri)?;
467 Some(
468 self.snapshot_query(move |db| {
469 crate::db::method_returns::method_returns(db, sf).0.clone()
470 }),
471 )
472 }
473
474 /// Cache the semantic tokens computed for a delta response.
475 /// `result_id` is an opaque string (a hash of the token data) returned to the client.
476 pub fn store_token_cache(&self, uri: &Url, result_id: String, tokens: Vec<SemanticToken>) {
477 self.token_cache.insert(uri.clone(), (result_id, tokens));
478 }
479
480 /// Return the cached tokens if `result_id` matches the stored one.
481 pub fn get_token_cache(&self, uri: &Url, result_id: &str) -> Option<Vec<SemanticToken>> {
482 self.token_cache
483 .get(uri)
484 .filter(|e| e.0.as_str() == result_id)
485 .map(|e| e.1.clone())
486 }
487
488 /// Before running semantic analysis for `uri`, resolve every `use`-imported
489 /// class through the PSR-4 map and mirror any that are not yet registered.
490 /// This prevents spurious `UndefinedClass` diagnostics when the background
491 /// workspace scan has not yet reached a dependency file.
492 fn lazy_load_psr4_imports(&self, uri: &Url) {
493 let doc = match self.get_doc_salsa(uri) {
494 Some(d) => d,
495 None => return,
496 };
497 let imports = crate::references::collect_file_imports(&doc);
498 if imports.is_empty() {
499 return;
500 }
501 let psr4 = self.psr4.read().unwrap();
502 let paths: Vec<std::path::PathBuf> = imports
503 .values()
504 .filter_map(|fqcn| psr4.resolve(fqcn))
505 .collect();
506 drop(psr4);
507
508 for path in paths {
509 let Ok(dep_url) = Url::from_file_path(&path) else {
510 continue;
511 };
512 if self.source_files.contains_key(&dep_url) {
513 continue;
514 }
515 if let Ok(text) = std::fs::read_to_string(&path) {
516 self.mirror_text(&dep_url, &text);
517 }
518 }
519 }
520
521 /// Phase I: salsa-memoized raw semantic issues for a file. Callers apply
522 /// their own `DiagnosticsConfig` filter via
523 /// [`crate::semantic_diagnostics::issues_to_diagnostics`] — keeping the
524 /// filter outside the query preserves memoization across config toggles.
525 pub fn get_semantic_issues_salsa(&self, uri: &Url) -> Option<Arc<[mir_issues::Issue]>> {
526 let sf = self.source_file(uri)?;
527 self.lazy_load_psr4_imports(uri);
528 self.sync_workspace_files();
529 let ws = self.workspace;
530 Some(
531 self.snapshot_query(move |db| {
532 crate::db::semantic::semantic_issues(db, ws, sf).0.clone()
533 }),
534 )
535 }
536
537 /// Returns `(uri, doc)` for files currently open in the editor.
538 ///
539 /// Resolve `open_urls` (from `Backend::open_urls()`) to parsed docs.
540 /// Files not mirrored in the salsa layer are filtered out silently.
541 pub fn docs_for(&self, open_urls: &[Url]) -> Vec<(Url, Arc<ParsedDoc>)> {
542 open_urls
543 .iter()
544 .filter_map(|u| self.get_doc_salsa(u).map(|d| (u.clone(), d)))
545 .collect()
546 }
547
548 /// `(primary, doc)` first, then every other open file's parsed doc.
549 /// The `open_urls` slice should include `uri` — this helper filters it out.
550 pub fn doc_with_others(
551 &self,
552 uri: &Url,
553 doc: Arc<ParsedDoc>,
554 open_urls: &[Url],
555 ) -> Vec<(Url, Arc<ParsedDoc>)> {
556 let mut result = vec![(uri.clone(), doc)];
557 result.extend(self.other_docs(uri, open_urls));
558 result
559 }
560
561 /// Parsed docs for every entry in `open_urls` except `uri`.
562 pub fn other_docs(&self, uri: &Url, open_urls: &[Url]) -> Vec<(Url, Arc<ParsedDoc>)> {
563 open_urls
564 .iter()
565 .filter(|u| *u != uri)
566 .filter_map(|u| self.get_doc_salsa(u).map(|d| (u.clone(), d)))
567 .collect()
568 }
569
570 /// Batched salsa fetch for every entry in `open_urls` except `uri`:
571 /// returns each `(uri, ParsedDoc, MethodReturnsMap)` triple in a single
572 /// `snapshot_query` so cancellation retries don't run N times.
573 pub fn other_docs_with_returns(
574 &self,
575 uri: &Url,
576 open_urls: &[Url],
577 ) -> Vec<(Url, Arc<ParsedDoc>, Arc<crate::ast::MethodReturnsMap>)> {
578 let source_files: Vec<(Url, crate::db::input::SourceFile)> = open_urls
579 .iter()
580 .filter(|u| *u != uri)
581 .filter_map(|u| self.source_file(u).map(|sf| (u.clone(), sf)))
582 .collect();
583 if source_files.is_empty() {
584 return Vec::new();
585 }
586 self.snapshot_query(move |db| {
587 source_files
588 .iter()
589 .map(|(u, sf)| {
590 let doc = crate::db::parse::parsed_doc(db, *sf).0.clone();
591 let mr = crate::db::method_returns::method_returns(db, *sf).0.clone();
592 (u.clone(), doc, mr)
593 })
594 .collect()
595 })
596 }
597
598 /// Compact symbol index for every mirrored file.
599 pub fn all_indexes(&self) -> Vec<(Url, Arc<FileIndex>)> {
600 let urls: Vec<Url> = self.source_files.iter().map(|e| e.key().clone()).collect();
601 urls.into_iter()
602 .filter_map(|u| self.get_index_salsa(&u).map(|idx| (u, idx)))
603 .collect()
604 }
605
606 /// Same as `all_indexes` but excludes `uri`.
607 pub fn other_indexes(&self, uri: &Url) -> Vec<(Url, Arc<FileIndex>)> {
608 let urls: Vec<Url> = self
609 .source_files
610 .iter()
611 .filter(|e| e.key() != uri)
612 .map(|e| e.key().clone())
613 .collect();
614 urls.into_iter()
615 .filter_map(|u| self.get_index_salsa(&u).map(|idx| (u, idx)))
616 .collect()
617 }
618
619 /// Parsed documents for every mirrored file (open or background-indexed).
620 /// Suitable for full-scan operations: find-references, rename,
621 /// call_hierarchy, code_lens.
622 pub fn all_docs_for_scan(&self) -> Vec<(Url, Arc<ParsedDoc>)> {
623 let urls: Vec<Url> = self.source_files.iter().map(|e| e.key().clone()).collect();
624 urls.into_iter()
625 .filter_map(|u| self.get_doc_salsa(&u).map(|d| (u, d)))
626 .collect()
627 }
628}
629
630/// Run `file_refs` for every workspace file in parallel.
631///
632/// `db` clones are cheap (they share the same `Arc<Zalsa>` memo store), so
633/// results computed on any clone are immediately visible to all others at the
634/// same revision. After this returns, the sequential loop inside `symbol_refs`
635/// only does cheap memo lookups instead of running `StatementsAnalyzer` on
636/// every file one-by-one.
637///
638/// Per-task `salsa::Cancelled` is caught and swallowed. If the revision was
639/// bumped, the main thread's next salsa call inside `symbol_refs` will raise
640/// `Cancelled` too and `snapshot_query` retries the whole operation from
641/// scratch. If the revision was not bumped, any file whose task was cancelled
642/// before completion simply has no memo entry and `symbol_refs`'s sequential
643/// loop recomputes it.
644fn warm_file_refs_parallel(
645 db: &crate::db::analysis::RootDatabase,
646 ws: crate::db::input::Workspace,
647) {
648 let files: Vec<_> = ws.files(db).iter().copied().collect();
649 // Pre-clone one snapshot per file before entering the scope.
650 // RootDatabase: Send (ZalsaLocal owns its RefCell; Arc<Zalsa> is Sync),
651 // but RootDatabase: !Sync, so we must avoid sharing &RootDatabase across
652 // threads. Collecting owned clones first and moving each into its task
653 // requires only Send, not Sync.
654 let snaps: Vec<crate::db::analysis::RootDatabase> = files.iter().map(|_| db.clone()).collect();
655 rayon::scope(move |s| {
656 for (sf, snap) in files.into_iter().zip(snaps) {
657 s.spawn(move |_| {
658 let _ = salsa::Cancelled::catch(std::panic::AssertUnwindSafe(|| {
659 crate::db::refs::file_refs(&snap, ws, sf);
660 }));
661 });
662 }
663 });
664}
665
666#[cfg(test)]
667mod tests {
668 use super::*;
669
670 fn uri(path: &str) -> Url {
671 Url::parse(&format!("file://{path}")).unwrap()
672 }
673
674 /// Phase E4: open-file state lives on `Backend`, not `DocumentStore`.
675 /// Tests that need to simulate "file is open" just mirror the text into
676 /// the salsa input — the open/closed distinction is enforced by the
677 /// caller (Backend) in production.
678 fn open(store: &DocumentStore, u: Url, text: String) {
679 store.mirror_text(&u, &text);
680 }
681
682 #[test]
683 fn salsa_codebase_matches_imperative_codebase() {
684 // Parity check for Phase C step 3: the salsa-built codebase should
685 // contain exactly the same class/interface/function FQNs as one
686 // built imperatively via DefinitionCollector against a fresh
687 // mir_codebase::Codebase.
688 let store = DocumentStore::new();
689 let sources = [
690 (
691 "/a.php",
692 "<?php\nnamespace A;\nclass Foo {}\ninterface IX {}",
693 ),
694 (
695 "/b.php",
696 "<?php\nnamespace B;\nfunction bar(): int { return 1; }",
697 ),
698 ("/c.php", "<?php\nnamespace C;\nenum Color { case Red; }"),
699 ];
700 for (p, src) in &sources {
701 open(&store, uri(p), src.to_string());
702 }
703
704 let salsa_cb = store.get_codebase_salsa();
705
706 let imperative_cb = mir_codebase::Codebase::new();
707 for (p, src) in &sources {
708 let (doc, _) = crate::diagnostics::parse_document(src);
709 let file: Arc<str> = Arc::from(uri(p).as_str());
710 let map = php_rs_parser::source_map::SourceMap::new(src);
711 let c =
712 mir_analyzer::collector::DefinitionCollector::new(&imperative_cb, file, src, &map);
713 let _ = c.collect(doc.program());
714 }
715 imperative_cb.finalize();
716
717 for fqn in ["A\\Foo", "A\\IX", "C\\Color"] {
718 assert_eq!(
719 salsa_cb.type_exists(fqn),
720 imperative_cb.type_exists(fqn),
721 "parity mismatch on type {fqn}"
722 );
723 assert!(salsa_cb.type_exists(fqn), "{fqn} missing from salsa cb");
724 }
725 assert_eq!(
726 salsa_cb.function_exists("B\\bar"),
727 imperative_cb.function_exists("B\\bar"),
728 );
729 assert!(salsa_cb.function_exists("B\\bar"));
730 }
731
732 #[test]
733 fn index_registers_file_in_salsa() {
734 let store = DocumentStore::new();
735 store.index(uri("/lib.php"), "<?php\nfunction lib_fn() {}");
736 let idx = store.get_index_salsa(&uri("/lib.php")).unwrap();
737 assert_eq!(idx.functions.len(), 1);
738 assert_eq!(idx.functions[0].name, "lib_fn");
739 }
740
741 #[test]
742 fn remove_drops_salsa_input() {
743 let store = DocumentStore::new();
744 store.index(uri("/lib.php"), "<?php");
745 store.remove(&uri("/lib.php"));
746 assert!(store.get_index_salsa(&uri("/lib.php")).is_none());
747 }
748
749 #[test]
750 fn all_indexes_includes_every_mirrored_file() {
751 let store = DocumentStore::new();
752 open(&store, uri("/a.php"), "<?php\nfunction a() {}".to_string());
753 store.index(uri("/b.php"), "<?php\nfunction b() {}");
754 assert_eq!(store.all_indexes().len(), 2);
755 }
756
757 #[test]
758 fn other_indexes_excludes_current_uri() {
759 let store = DocumentStore::new();
760 open(&store, uri("/a.php"), "<?php\nfunction a() {}".to_string());
761 open(&store, uri("/b.php"), "<?php\nfunction b() {}".to_string());
762 assert_eq!(store.other_indexes(&uri("/a.php")).len(), 1);
763 }
764
765 #[test]
766 fn other_docs_excludes_current_uri() {
767 let store = DocumentStore::new();
768 let ua = uri("/a.php");
769 let ub = uri("/b.php");
770 open(&store, ua.clone(), "<?php\nfunction a() {}".to_string());
771 open(&store, ub.clone(), "<?php\nfunction b() {}".to_string());
772 let open_urls = vec![ua.clone(), ub];
773 assert_eq!(store.other_docs(&ua, &open_urls).len(), 1);
774 }
775
776 #[test]
777 fn evict_token_cache_removes_entry() {
778 let store = DocumentStore::new();
779 let u = uri("/a.php");
780 open(&store, u.clone(), "<?php".to_string());
781 store.store_token_cache(&u, "id1".to_string(), vec![]);
782 assert!(store.get_token_cache(&u, "id1").is_some());
783 store.evict_token_cache(&u);
784 assert!(store.get_token_cache(&u, "id1").is_none());
785 }
786
787 #[test]
788 fn index_populates_file_index_with_symbols() {
789 let store = DocumentStore::new();
790 store.index(uri("/a.php"), "<?php\nfunction hello() {}");
791 let idx = store.get_index_salsa(&uri("/a.php")).unwrap();
792 assert_eq!(idx.functions.len(), 1);
793 assert_eq!(idx.functions[0].name, "hello");
794 }
795
796 #[test]
797 fn open_populates_file_index_with_symbols() {
798 let store = DocumentStore::new();
799 open(&store, uri("/a.php"), "<?php\nclass Foo {}".to_string());
800 let idx = store.get_index_salsa(&uri("/a.php")).unwrap();
801 assert_eq!(idx.classes.len(), 1);
802 assert_eq!(idx.classes[0].name, "Foo");
803 }
804
805 // ── Mirror invariants ────────────────────────────────────────────────
806 //
807 // Every mutation path that changes file text must keep the salsa layer
808 // consistent. These tests walk a set-edit-reopen cycle and assert that
809 // the salsa-derived `FileIndex` reflects the latest text at each step.
810
811 fn names_of(idx: &FileIndex) -> Vec<String> {
812 let mut out: Vec<String> = idx.classes.iter().map(|c| c.name.clone()).collect();
813 out.extend(idx.functions.iter().map(|f| f.name.clone()));
814 out.sort();
815 out
816 }
817
818 fn salsa_index_names(store: &DocumentStore, url: &Url) -> Vec<String> {
819 let sf = store.source_file(url).expect("mirror recorded SourceFile");
820 store.with_host(|host| {
821 let arc = crate::db::index::file_index(host.db(), sf);
822 names_of(arc.get())
823 })
824 }
825
826 #[test]
827 fn mirror_tracks_repeated_edits() {
828 let store = DocumentStore::new();
829 let u = uri("/mirror.php");
830
831 open(&store, u.clone(), "<?php\nclass A {}".to_string());
832 assert_eq!(salsa_index_names(&store, &u), vec!["A".to_string()]);
833
834 open(
835 &store,
836 u.clone(),
837 "<?php\nclass A {}\nclass B {}".to_string(),
838 );
839 assert_eq!(
840 salsa_index_names(&store, &u),
841 vec!["A".to_string(), "B".to_string()]
842 );
843
844 open(&store, u.clone(), "<?php\nfunction greet() {}".to_string());
845 assert_eq!(salsa_index_names(&store, &u), vec!["greet".to_string()]);
846 }
847
848 #[test]
849 fn mirror_tracks_index_and_index_from_doc() {
850 let store = DocumentStore::new();
851
852 // Background `index(url, text)` path.
853 let u1 = uri("/bg1.php");
854 store.index(u1.clone(), "<?php\nclass Bg1 {}");
855 assert_eq!(salsa_index_names(&store, &u1), vec!["Bg1".to_string()]);
856
857 // `index_from_doc(url, &doc, diags)` path (workspace-scan Phase 2).
858 let u2 = uri("/bg2.php");
859 let (doc, diags) =
860 crate::diagnostics::parse_document("<?php\nclass Bg2 {}\nfunction f() {}");
861 store.index_from_doc(u2.clone(), &doc, diags);
862 assert_eq!(
863 salsa_index_names(&store, &u2),
864 vec!["Bg2".to_string(), "f".to_string()]
865 );
866 }
867
868 /// G3: confirms the `parsed_cache` actually hits — two consecutive
869 /// `get_doc_salsa` calls on unchanged text return the same `Arc`
870 /// (pointer equality), and an edit forces a miss that produces a
871 /// different `Arc`.
872 /// parsed_cache must stay bounded — inserting more than
873 /// `PARSED_CACHE_CAP` unique URLs must not cause unbounded growth.
874 /// Eviction is probabilistic, so we only assert the bound, not which
875 /// Phase K2 end-to-end: seed a cached slice through `DocumentStore`,
876 /// confirm the workspace codebase sees the cached fact, then edit the
877 /// text and confirm the cache is cleared (codebase now reflects the
878 /// re-parsed text). Exercises `seed_cached_slice` + `mirror_text`'s
879 /// `set_cached_slice(None)` invalidation together.
880 #[test]
881 fn seed_cached_slice_then_edit_invalidates() {
882 let store = DocumentStore::new();
883 let u = uri("/seed_test.php");
884
885 // Mirror the initial text — classes: "Real".
886 store.mirror_text(&u, "<?php\nclass Real {}");
887
888 // Build a cached slice claiming classes: "Seeded", for the same URI.
889 let seeded = {
890 let src = "<?php\nclass Seeded {}";
891 let source_map = php_rs_parser::source_map::SourceMap::new(src);
892 let (doc, _) = crate::diagnostics::parse_document(src);
893 let collector = mir_analyzer::collector::DefinitionCollector::new_for_slice(
894 Arc::<str>::from(u.as_str()),
895 src,
896 &source_map,
897 );
898 let (s, _) = collector.collect_slice(doc.program());
899 Arc::new(s)
900 };
901 assert!(store.seed_cached_slice(&u, seeded));
902
903 // Codebase should contain the seeded class, not the real one.
904 let cb = store.get_codebase_salsa();
905 assert!(cb.type_exists("Seeded"));
906 assert!(!cb.type_exists("Real"));
907
908 // Edit: mirror_text flips the text and also clears cached_slice.
909 store.mirror_text(&u, "<?php\nclass Edited {}");
910 let cb = store.get_codebase_salsa();
911 assert!(
912 cb.type_exists("Edited"),
913 "after edit, codebase must reflect fresh parse"
914 );
915 assert!(
916 !cb.type_exists("Seeded"),
917 "mirror_text must clear cached_slice so stale data is gone"
918 );
919 }
920
921 /// Seeding for a URL that was never mirrored is a no-op (returns `false`)
922 /// — avoids silently allocating SourceFiles outside `mirror_text`'s control.
923 #[test]
924 fn seed_cached_slice_noops_for_unknown_uri() {
925 let store = DocumentStore::new();
926 let u = uri("/never_mirrored.php");
927 let slice = Arc::new(mir_codebase::storage::StubSlice::default());
928 assert!(!store.seed_cached_slice(&u, slice));
929 }
930
931 /// entries survive.
932 #[test]
933 fn parsed_cache_stays_bounded_under_many_inserts() {
934 let store = DocumentStore::new();
935 let overflow = PARSED_CACHE_CAP + 100;
936 for i in 0..overflow {
937 let u = uri(&format!("/cap/file{i}.php"));
938 store.index(u.clone(), "<?php\nclass A {}");
939 // Force a parsed_cache insert via get_doc_salsa.
940 let _ = store.get_doc_salsa(&u);
941 }
942 assert!(
943 store.parsed_cache.len() <= PARSED_CACHE_CAP,
944 "parsed_cache grew to {} entries (cap {})",
945 store.parsed_cache.len(),
946 PARSED_CACHE_CAP
947 );
948 }
949
950 #[test]
951 fn get_doc_salsa_cache_hits_across_calls() {
952 let store = DocumentStore::new();
953 let u = uri("/g3_cache.php");
954 open(&store, u.clone(), "<?php\nclass G3 {}".to_string());
955
956 let a = store.get_doc_salsa(&u).unwrap();
957 let b = store.get_doc_salsa(&u).unwrap();
958 assert!(
959 Arc::ptr_eq(&a, &b),
960 "parsed_cache hit should yield the same Arc across calls"
961 );
962
963 open(&store, u.clone(), "<?php\nclass G3b {}".to_string());
964 let c = store.get_doc_salsa(&u).unwrap();
965 assert!(
966 !Arc::ptr_eq(&a, &c),
967 "edit should invalidate the parsed_cache entry"
968 );
969 }
970
971 #[test]
972 fn get_doc_salsa_returns_some_for_mirrored_files() {
973 // Phase E4: `get_doc_salsa` no longer gates on open-state. The
974 // open/closed distinction now lives on `Backend::get_doc`.
975 let store = DocumentStore::new();
976 let u = uri("/e4_doc.php");
977 store.index(u.clone(), "<?php\nclass P {}");
978 assert!(store.get_doc_salsa(&u).is_some());
979 }
980
981 #[test]
982 fn get_salsa_accessors_return_none_for_unknown_uri() {
983 let store = DocumentStore::new();
984 let u = uri("/never-seen.php");
985 assert!(store.get_doc_salsa(&u).is_none());
986 assert!(store.get_index_salsa(&u).is_none());
987 assert!(store.get_method_returns_salsa(&u).is_none());
988 }
989
990 /// Phase E1: concurrent readers and writers must not deadlock, panic, or
991 /// return stale data. Writers briefly bump inputs while readers are
992 /// running on cloned snapshots; any `salsa::Cancelled` raised on the
993 /// reader side must be caught and retried by `snapshot_query`.
994 #[test]
995 fn concurrent_reads_and_writes_do_not_panic() {
996 use std::sync::Arc;
997 use std::thread;
998 use std::time::{Duration, Instant};
999
1000 let store = Arc::new(DocumentStore::new());
1001 let urls: Vec<Url> = (0..8).map(|i| uri(&format!("/f{i}.php"))).collect();
1002 for (i, u) in urls.iter().enumerate() {
1003 open(&store, u.clone(), format!("<?php\nclass C{i} {{}}"));
1004 }
1005
1006 let deadline = Instant::now() + Duration::from_millis(400);
1007 let mut handles = Vec::new();
1008
1009 // Writer thread: keep bumping every file's text.
1010 {
1011 let store = Arc::clone(&store);
1012 let urls = urls.clone();
1013 handles.push(thread::spawn(move || {
1014 let mut rev = 0u32;
1015 while Instant::now() < deadline {
1016 for u in &urls {
1017 let text = format!("<?php\nclass C{{}}\n// rev {rev}");
1018 store.mirror_text(u, &text);
1019 }
1020 rev += 1;
1021 }
1022 }));
1023 }
1024
1025 // Reader threads: hammer the salsa accessors.
1026 for _ in 0..4 {
1027 let store = Arc::clone(&store);
1028 let urls = urls.clone();
1029 handles.push(thread::spawn(move || {
1030 while Instant::now() < deadline {
1031 for u in &urls {
1032 let _ = store.get_doc_salsa(u);
1033 let _ = store.get_index_salsa(u);
1034 }
1035 let _ = store.get_codebase_salsa();
1036 let _ = store.get_symbol_refs_salsa("C0");
1037 }
1038 }));
1039 }
1040
1041 for h in handles {
1042 h.join().expect("no panic under concurrent read/write");
1043 }
1044 }
1045
1046 /// Phase L: warm-up must not error and must pre-populate the `file_refs`
1047 /// memo. We can't cheaply observe salsa memo state from outside, so we
1048 /// instead call `warm_reference_index` and then verify that a real
1049 /// reference lookup returns the expected result — the warm-up running
1050 /// without panic across a realistic two-file workspace is the load-bearing
1051 /// guarantee.
1052 #[test]
1053 fn warm_reference_index_does_not_panic_and_keeps_lookups_correct() {
1054 let store = DocumentStore::new();
1055 open(
1056 &store,
1057 uri("/wa.php"),
1058 "<?php\nfunction a() { b(); }".to_string(),
1059 );
1060 open(
1061 &store,
1062 uri("/wb.php"),
1063 "<?php\nfunction b() {}\na();".to_string(),
1064 );
1065 store.warm_reference_index();
1066 let refs_to_a = store.get_symbol_refs_salsa("a");
1067 assert!(
1068 refs_to_a
1069 .iter()
1070 .any(|(uri, _, _, _)| uri.contains("wb.php")),
1071 "reference to a() from /wb.php should be discoverable after warm-up, got {refs_to_a:?}"
1072 );
1073 }
1074
1075 /// PSR-4 lazy-loading: `get_semantic_issues_salsa` must not emit
1076 /// `UndefinedClass` for a class that is PSR-4-resolvable on disk, even
1077 /// when the dependency file is not yet in `source_files`.
1078 #[test]
1079 fn psr4_lazy_load_suppresses_undefined_class() {
1080 let tmp = tempfile::tempdir().unwrap();
1081
1082 // Write Entity.php to disk (not mirrored into the store).
1083 std::fs::create_dir_all(tmp.path().join("src/Model")).unwrap();
1084 std::fs::write(
1085 tmp.path().join("src/Model/Entity.php"),
1086 "<?php\nnamespace App\\Model;\nclass Entity {}\n",
1087 )
1088 .unwrap();
1089
1090 // Write composer.json so Psr4Map::load can build the map.
1091 std::fs::write(
1092 tmp.path().join("composer.json"),
1093 r#"{"autoload":{"psr-4":{"App\\":"src/"}}}"#,
1094 )
1095 .unwrap();
1096
1097 let store = DocumentStore::new();
1098
1099 // Inject a PSR-4 map pointing at the tmp dir.
1100 *store.psr4.write().unwrap() = crate::autoload::Psr4Map::load(tmp.path());
1101
1102 // Mirror the consuming file (Entity not yet in source_files).
1103 // Uses Entity as a parameter type hint — the analyzer resolves these
1104 // through use statements, so this exercises the full PSR-4 lazy-load path.
1105 let handler_url = Url::from_file_path(tmp.path().join("src/Service/Handler.php")).unwrap();
1106 store.mirror_text(
1107 &handler_url,
1108 "<?php\nnamespace App\\Service;\nuse App\\Model\\Entity;\nfunction handle(Entity $e): Entity { return $e; }\n",
1109 );
1110
1111 let issues = store.get_semantic_issues_salsa(&handler_url).unwrap();
1112 let undef: Vec<_> = issues
1113 .iter()
1114 .filter(|i| matches!(i.kind, mir_issues::IssueKind::UndefinedClass { .. }))
1115 .collect();
1116 assert!(
1117 undef.is_empty(),
1118 "PSR-4 lazy-loading must prevent UndefinedClass for App\\Model\\Entity; got: {undef:?}"
1119 );
1120 }
1121}