Skip to main content

php_lsp/db/
workspace_index.rs

1//! `workspace_index` salsa query — aggregates every file's `FileIndex` into a
2//! single structure with pre-built reverse maps.
3//!
4//! Before Phase J, cross-file queries (`workspace_symbols`,
5//! `prepare_type_hierarchy`, `supertypes_of`, `subtypes_of`,
6//! `find_implementations`) called `DocumentStore::all_indexes()` on every
7//! request. `all_indexes()` takes the host mutex once per file via
8//! `get_index_salsa` → `snapshot_query`, so a workspace with 1600 files
9//! paid 1600 lock acquisitions per lookup.
10//!
11//! This query runs once per workspace revision and returns:
12//!
13//! - `files`: the flat `(Url, Arc<FileIndex>)` list every handler used to
14//!   rebuild by hand,
15//! - `classes_by_name`: `name → [ClassRef]` for constant-time prepare /
16//!   supertype resolution,
17//! - `subtypes_of`: `name → [ClassRef]` for constant-time subtype /
18//!   implementation lookups.
19//!
20//! All lookups on the aggregate run in memory against an already-materialised
21//! `Arc`; edits invalidate the aggregate through `file_index` dependency
22//! tracking as usual.
23
24use std::collections::HashMap;
25use std::sync::Arc;
26
27use salsa::Database;
28use tower_lsp::lsp_types::Url;
29
30use crate::db::index::file_index;
31use crate::db::input::Workspace;
32use crate::file_index::FileIndex;
33
34/// Back-pointer into `WorkspaceIndexData.files`: `(file_idx, class_idx)` where
35/// `class_idx` indexes into `files[file_idx].1.classes`.
36#[derive(Debug, Clone, Copy)]
37pub struct ClassRef {
38    pub file: u32,
39    pub class: u32,
40}
41
42/// What kind of declaration a [`DeclRef`] points at. Drives the per-kind
43/// matching rules in [`WorkspaceIndexData::find_declaration`] (e.g. a `$foo`
44/// query matches properties and functions/classes named `foo`, but never
45/// methods or constants).
46#[derive(Debug, Clone, Copy, PartialEq, Eq)]
47pub enum DeclKind {
48    Function,
49    Class,
50    Method,
51    Property,
52    Constant,
53    EnumCase,
54}
55
56/// One named declaration, pre-resolved to its file and (zero-width) line.
57/// Stored in encounter order — file order, then within a file: functions
58/// first, then per class: the class itself, methods, properties, constants,
59/// enum cases — which is exactly the precedence the old linear scan in
60/// `find_declaration_in_indexes` had.
61#[derive(Debug, Clone, Copy)]
62pub struct DeclRef {
63    pub file: u32,
64    pub line: u32,
65    pub kind: DeclKind,
66}
67
68/// Aggregated workspace-level index. Constructed once per salsa revision by
69/// `workspace_index` and held behind an `Arc` for cheap cross-request sharing.
70pub struct WorkspaceIndexData {
71    pub files: Vec<(Url, Arc<FileIndex>)>,
72    pub classes_by_name: HashMap<String, Vec<ClassRef>>,
73    /// `parent_or_interface_or_trait_name → [subtype ClassRef]`.
74    /// A class that extends `X` AND implements `Y` contributes separate entries
75    /// under both keys. Keyed by `Arc<str>` so insertions from `ClassDef`'s
76    /// already-interned fields are pointer copies rather than heap allocations.
77    pub subtypes_of: HashMap<Arc<str>, Vec<ClassRef>>,
78    /// `declared_name → [DeclRef]` over every function, class, method,
79    /// property (stored without `$`), class constant, and enum case in the
80    /// workspace. Replaces the O(workspace) linear scan in the go-to-definition
81    /// index fallback with an O(1) lookup.
82    pub decls_by_name: HashMap<String, Vec<DeclRef>>,
83}
84
85type BuildMapsResult = (
86    HashMap<String, Vec<ClassRef>>,
87    HashMap<Arc<str>, Vec<ClassRef>>,
88    HashMap<String, Vec<DeclRef>>,
89);
90
91fn build_maps(files: &[(Url, Arc<FileIndex>)]) -> BuildMapsResult {
92    let mut classes_by_name: HashMap<String, Vec<ClassRef>> = HashMap::new();
93    let mut subtypes_of: HashMap<Arc<str>, Vec<ClassRef>> = HashMap::new();
94    let mut decls_by_name: HashMap<String, Vec<DeclRef>> = HashMap::new();
95    let push_decl = |map: &mut HashMap<String, Vec<DeclRef>>,
96                     name: &str,
97                     file: u32,
98                     line: u32,
99                     kind: DeclKind| {
100        map.entry(name.to_string())
101            .or_default()
102            .push(DeclRef { file, line, kind });
103    };
104    for (file_idx, (_, idx)) in files.iter().enumerate() {
105        let file_idx = file_idx as u32;
106        for f in &idx.functions {
107            push_decl(
108                &mut decls_by_name,
109                &f.name,
110                file_idx,
111                f.start_line,
112                DeclKind::Function,
113            );
114        }
115        for (cls_idx, cls) in idx.classes.iter().enumerate() {
116            let cr = ClassRef {
117                file: file_idx,
118                class: cls_idx as u32,
119            };
120            classes_by_name
121                .entry(cls.name.as_ref().to_string())
122                .or_default()
123                .push(cr);
124            if let Some(parent) = &cls.parent {
125                subtypes_of.entry(Arc::clone(parent)).or_default().push(cr);
126            }
127            for iface in &cls.implements {
128                subtypes_of.entry(Arc::clone(iface)).or_default().push(cr);
129            }
130            for trt in &cls.traits {
131                subtypes_of.entry(Arc::clone(trt)).or_default().push(cr);
132            }
133            push_decl(
134                &mut decls_by_name,
135                &cls.name,
136                file_idx,
137                cls.start_line,
138                DeclKind::Class,
139            );
140            for m in &cls.methods {
141                push_decl(
142                    &mut decls_by_name,
143                    &m.name,
144                    file_idx,
145                    m.start_line,
146                    DeclKind::Method,
147                );
148            }
149            for p in &cls.properties {
150                push_decl(
151                    &mut decls_by_name,
152                    &p.name,
153                    file_idx,
154                    p.start_line,
155                    DeclKind::Property,
156                );
157            }
158            for cc in &cls.constants {
159                push_decl(
160                    &mut decls_by_name,
161                    cc,
162                    file_idx,
163                    cls.start_line,
164                    DeclKind::Constant,
165                );
166            }
167            for case in &cls.cases {
168                push_decl(
169                    &mut decls_by_name,
170                    case,
171                    file_idx,
172                    cls.start_line,
173                    DeclKind::EnumCase,
174                );
175            }
176        }
177    }
178    (classes_by_name, subtypes_of, decls_by_name)
179}
180
181impl WorkspaceIndexData {
182    /// Resolve a `ClassRef` back to its `(uri, class_def)` pair.
183    pub fn at(&self, r: ClassRef) -> Option<(&Url, &crate::file_index::ClassDef)> {
184        let (uri, idx) = self.files.get(r.file as usize)?;
185        let cls = idx.classes.get(r.class as usize)?;
186        Some((uri, cls))
187    }
188
189    /// O(1) replacement for the linear `find_declaration_in_indexes` scan:
190    /// find a declaration by name, optionally excluding one file (the current
191    /// document, which the caller has already searched with accurate AST
192    /// ranges). Matching rules mirror the old scan: a sigil query (`$foo`)
193    /// matches functions, classes, and properties named `foo`; a bare query
194    /// matches every declaration kind. Returns a zero-width line `Location`.
195    pub fn find_declaration(
196        &self,
197        name: &str,
198        exclude: Option<&Url>,
199    ) -> Option<tower_lsp::lsp_types::Location> {
200        let bare = crate::util::strip_variable_sigil(name);
201        let sigil = bare != name;
202        let refs = self.decls_by_name.get(bare)?;
203        for r in refs {
204            if sigil
205                && !matches!(
206                    r.kind,
207                    DeclKind::Function | DeclKind::Class | DeclKind::Property
208                )
209            {
210                continue;
211            }
212            let (uri, _) = self.files.get(r.file as usize)?;
213            if exclude.is_some_and(|e| e == uri) {
214                continue;
215            }
216            return Some(tower_lsp::lsp_types::Location {
217                uri: uri.clone(),
218                range: crate::util::zero_width_range(r.line),
219            });
220        }
221        None
222    }
223
224    /// Constructor that builds the reverse maps from an already-materialised
225    /// `(Url, Arc<FileIndex>)` slice. Exposed so callers that don't want to
226    /// spin up a full `AnalysisHost` (unit tests of
227    /// `find_implementations_from_workspace`, benchmark crates) can exercise
228    /// the aggregate-shaped helpers directly. Production code goes through
229    /// the `workspace_index` salsa query instead.
230    pub fn from_files(files: Vec<(Url, Arc<FileIndex>)>) -> Self {
231        let (classes_by_name, subtypes_of, decls_by_name) = build_maps(&files);
232        Self {
233            files,
234            classes_by_name,
235            subtypes_of,
236            decls_by_name,
237        }
238    }
239}
240
241/// Arc wrapper with the same `Arc::ptr_eq`-based `Update` impl used throughout
242/// `src/db/`. The inner `WorkspaceIndexData` never compares structurally.
243#[derive(Clone)]
244pub struct WorkspaceIndexArc(pub Arc<WorkspaceIndexData>);
245
246impl WorkspaceIndexArc {
247    #[cfg(test)]
248    pub fn get(&self) -> &WorkspaceIndexData {
249        &self.0
250    }
251}
252
253// SAFETY: same contract as other `*Arc` newtypes — ptr_eq is sufficient because
254// every rebuild allocates a fresh `Arc`.
255crate::impl_arc_update!(WorkspaceIndexArc);
256
257/// Build the aggregate workspace index.
258///
259/// Depends on `Workspace::files` and every per-file `file_index` query;
260/// editing any file invalidates this via normal salsa dependency tracking.
261/// The `Arc<FileIndex>` values are the same ones served by `file_index` —
262/// callers that already held one are guaranteed pointer-equality here.
263#[salsa::tracked(no_eq)]
264pub fn workspace_index(db: &dyn Database, ws: Workspace) -> WorkspaceIndexArc {
265    let files_input = crate::db::input::workspace_files(db, ws);
266
267    let mut files: Vec<(Url, Arc<FileIndex>)> = Vec::with_capacity(files_input.len());
268    for sf in files_input.iter() {
269        let uri_arc = sf.uri(db);
270        // Fall back to inserting any well-formed entry — salsa inputs carry
271        // whatever string the caller mirrored; if parsing ever fails (test
272        // harness with a non-URL string) we simply skip that file for
273        // cross-workspace queries rather than panic.
274        let Ok(url) = Url::parse(&uri_arc) else {
275            continue;
276        };
277        let idx = file_index(db, *sf).0.clone();
278        files.push((url, idx));
279    }
280
281    let (classes_by_name, subtypes_of, decls_by_name) = build_maps(&files);
282
283    WorkspaceIndexArc(Arc::new(WorkspaceIndexData {
284        files,
285        classes_by_name,
286        subtypes_of,
287        decls_by_name,
288    }))
289}
290
291#[cfg(test)]
292mod tests {
293    use std::sync::Arc;
294
295    use super::*;
296    use crate::db::analysis::AnalysisHost;
297    use crate::db::input::FileText;
298    use salsa::Setter;
299
300    fn new_file(host: &AnalysisHost, uri: &str, src: &str) -> (Arc<str>, FileText) {
301        let ft = FileText::new(host.db(), Arc::<str>::from(src), None);
302        (Arc::<str>::from(uri), ft)
303    }
304
305    #[test]
306    fn workspace_index_builds_name_and_subtype_maps() {
307        let host = AnalysisHost::new();
308        let f1 = new_file(&host, "file:///a.php", "<?php\nclass Animal {}");
309        let f2 = new_file(
310            &host,
311            "file:///b.php",
312            "<?php\nclass Dog extends Animal {}\nclass Cat extends Animal {}",
313        );
314        let ws = Workspace::new(
315            host.db(),
316            Arc::from([f1, f2]),
317            mir_analyzer::PhpVersion::LATEST,
318        );
319
320        let wi = workspace_index(host.db(), ws);
321        let data = wi.get();
322
323        assert!(data.classes_by_name.contains_key("Animal"));
324        assert!(data.classes_by_name.contains_key("Dog"));
325
326        let subs = data
327            .subtypes_of
328            .get("Animal")
329            .expect("Animal must have subtype entries");
330        assert_eq!(subs.len(), 2, "Dog + Cat extend Animal");
331
332        let names: Vec<_> = subs
333            .iter()
334            .filter_map(|r| data.at(*r).map(|(_, c)| c.name.clone()))
335            .collect();
336        assert!(names.iter().any(|n| n.as_ref() == "Dog"));
337        assert!(names.iter().any(|n| n.as_ref() == "Cat"));
338    }
339
340    #[test]
341    fn workspace_index_memoizes_and_invalidates() {
342        let mut host = AnalysisHost::new();
343        let (uri_arc, ft1) = new_file(&host, "file:///a.php", "<?php\nclass A {}");
344        let ws = Workspace::new(
345            host.db(),
346            Arc::from([(uri_arc, ft1)]),
347            mir_analyzer::PhpVersion::LATEST,
348        );
349
350        let a = workspace_index(host.db(), ws);
351        let b = workspace_index(host.db(), ws);
352        assert!(
353            Arc::ptr_eq(&a.0, &b.0),
354            "unchanged inputs must return the memoized Arc"
355        );
356
357        ft1.set_text(host.db_mut())
358            .to(Arc::<str>::from("<?php\nclass B {}"));
359        let c = workspace_index(host.db(), ws);
360        assert!(!Arc::ptr_eq(&a.0, &c.0), "an edit must produce a fresh Arc");
361        assert!(c.get().classes_by_name.contains_key("B"));
362        assert!(!c.get().classes_by_name.contains_key("A"));
363    }
364
365    #[test]
366    fn workspace_index_collects_interface_and_trait_subtypes() {
367        let host = AnalysisHost::new();
368        let src = concat!(
369            "<?php\n",
370            "interface Greeter {}\n",
371            "trait Shouting {}\n",
372            "class Hi implements Greeter { use Shouting; }\n",
373        );
374        let f = new_file(&host, "file:///m.php", src);
375        let ws = Workspace::new(host.db(), Arc::from([f]), mir_analyzer::PhpVersion::LATEST);
376        let wi = workspace_index(host.db(), ws);
377        let data = wi.get();
378
379        let greeter_subs = data.subtypes_of.get("Greeter").expect("Greeter subs");
380        assert_eq!(greeter_subs.len(), 1);
381        let shouting_subs = data.subtypes_of.get("Shouting").expect("Shouting subs");
382        assert_eq!(shouting_subs.len(), 1);
383    }
384}