Skip to main content

php_lsp/db/
workspace_index.rs

1//! `workspace_index` salsa query — aggregates every file's `FileIndex` into a
2//! single structure with pre-built reverse maps.
3//!
4//! Before Phase J, cross-file queries (`workspace_symbols`,
5//! `prepare_type_hierarchy`, `supertypes_of`, `subtypes_of`,
6//! `find_implementations`) called `DocumentStore::all_indexes()` on every
7//! request. `all_indexes()` takes the host mutex once per file via
8//! `get_index_salsa` → `snapshot_query`, so a workspace with 1600 files
9//! paid 1600 lock acquisitions per lookup.
10//!
11//! This query runs once per workspace revision and returns:
12//!
13//! - `files`: the flat `(Url, Arc<FileIndex>)` list every handler used to
14//!   rebuild by hand,
15//! - `classes_by_name`: `name → [ClassRef]` for constant-time prepare /
16//!   supertype resolution,
17//! - `subtypes_of`: `name → [ClassRef]` for constant-time subtype /
18//!   implementation lookups.
19//!
20//! All lookups on the aggregate run in memory against an already-materialised
21//! `Arc`; edits invalidate the aggregate through `file_index` dependency
22//! tracking as usual.
23//!
24//! Vocabulary note: the `-Ref` types here (`ClassRef`, `DeclRef`) are internal
25//! back-pointers/handles into the index — *not* LSP references. A symbol usage
26//! in code is a "reference" spelled out (see `navigation/references.rs`). See
27//! the crate-root glossary in `lib.rs`.
28
29use std::collections::HashMap;
30use std::sync::Arc;
31
32use salsa::Database;
33use tower_lsp::lsp_types::Url;
34
35use crate::db::index::file_index;
36use crate::db::input::Workspace;
37use crate::index::file_index::FileIndex;
38
39/// Back-pointer into `WorkspaceIndexData.files`: `(file_idx, class_idx)` where
40/// `class_idx` indexes into `files[file_idx].1.classes`.
41#[derive(Debug, Clone, Copy)]
42pub struct ClassRef {
43    pub file: u32,
44    pub class: u32,
45}
46
47/// What kind of declaration a [`DeclRef`] points at. Drives the per-kind
48/// matching rules in [`WorkspaceIndexData::find_declaration`] (e.g. a `$foo`
49/// query matches properties and functions/classes named `foo`, but never
50/// methods or constants).
51#[derive(Debug, Clone, Copy, PartialEq, Eq)]
52pub enum DeclKind {
53    Function,
54    Class,
55    Method,
56    Property,
57    Constant,
58    EnumCase,
59}
60
61/// One named declaration, pre-resolved to its file and (zero-width) line.
62/// Stored in encounter order — file order, then within a file: functions
63/// first, then per class: the class itself, methods, properties, constants,
64/// enum cases — which is exactly the precedence the old linear scan in
65/// `find_declaration_in_indexes` had.
66#[derive(Debug, Clone, Copy)]
67pub struct DeclRef {
68    pub file: u32,
69    pub line: u32,
70    pub kind: DeclKind,
71}
72
73/// Aggregated workspace-level index. Constructed once per salsa revision by
74/// `workspace_index` and held behind an `Arc` for cheap cross-request sharing.
75pub struct WorkspaceIndexData {
76    pub files: Vec<(Url, Arc<FileIndex>)>,
77    pub classes_by_name: HashMap<String, Vec<ClassRef>>,
78    /// `parent_or_interface_or_trait_name → [subtype ClassRef]`.
79    /// A class that extends `X` AND implements `Y` contributes separate entries
80    /// under both keys. Keyed by `Arc<str>` so insertions from `ClassDef`'s
81    /// already-interned fields are pointer copies rather than heap allocations.
82    pub subtypes_of: HashMap<Arc<str>, Vec<ClassRef>>,
83    /// `declared_name → [DeclRef]` over every function, class, method,
84    /// property (stored without `$`), class constant, and enum case in the
85    /// workspace. Replaces the O(workspace) linear scan in the go-to-definition
86    /// index fallback with an O(1) lookup.
87    pub decls_by_name: HashMap<String, Vec<DeclRef>>,
88}
89
90type BuildMapsResult = (
91    HashMap<String, Vec<ClassRef>>,
92    HashMap<Arc<str>, Vec<ClassRef>>,
93    HashMap<String, Vec<DeclRef>>,
94);
95
96fn build_maps(files: &[(Url, Arc<FileIndex>)]) -> BuildMapsResult {
97    let mut classes_by_name: HashMap<String, Vec<ClassRef>> = HashMap::new();
98    let mut subtypes_of: HashMap<Arc<str>, Vec<ClassRef>> = HashMap::new();
99    let mut decls_by_name: HashMap<String, Vec<DeclRef>> = HashMap::new();
100    let push_decl = |map: &mut HashMap<String, Vec<DeclRef>>,
101                     name: &str,
102                     file: u32,
103                     line: u32,
104                     kind: DeclKind| {
105        map.entry(name.to_string())
106            .or_default()
107            .push(DeclRef { file, line, kind });
108    };
109    for (file_idx, (_, idx)) in files.iter().enumerate() {
110        let file_idx = file_idx as u32;
111        for f in &idx.functions {
112            push_decl(
113                &mut decls_by_name,
114                &f.name,
115                file_idx,
116                f.start_line,
117                DeclKind::Function,
118            );
119        }
120        for (cls_idx, cls) in idx.classes.iter().enumerate() {
121            let cr = ClassRef {
122                file: file_idx,
123                class: cls_idx as u32,
124            };
125            classes_by_name
126                .entry(cls.name.as_ref().to_string())
127                .or_default()
128                .push(cr);
129            if let Some(parent) = &cls.parent {
130                subtypes_of.entry(Arc::clone(parent)).or_default().push(cr);
131            }
132            for iface in &cls.implements {
133                subtypes_of.entry(Arc::clone(iface)).or_default().push(cr);
134                // If this implements name is a use-import alias, also index by
135                // the short name of the resolved FQN so cursor-on-interface-name
136                // lookups work regardless of how the implementor named the interface.
137                if let Some((_, fqn)) = idx
138                    .use_imports
139                    .iter()
140                    .find(|(alias, _)| alias.as_ref() == iface.as_ref())
141                {
142                    let short = crate::text::fqn_short_name(fqn);
143                    if short != iface.as_ref() {
144                        subtypes_of.entry(Arc::from(short)).or_default().push(cr);
145                    }
146                }
147            }
148            for trt in &cls.traits {
149                subtypes_of.entry(Arc::clone(trt)).or_default().push(cr);
150            }
151            push_decl(
152                &mut decls_by_name,
153                &cls.name,
154                file_idx,
155                cls.start_line,
156                DeclKind::Class,
157            );
158            for m in &cls.methods {
159                push_decl(
160                    &mut decls_by_name,
161                    &m.name,
162                    file_idx,
163                    m.start_line,
164                    DeclKind::Method,
165                );
166            }
167            for p in &cls.properties {
168                push_decl(
169                    &mut decls_by_name,
170                    &p.name,
171                    file_idx,
172                    p.start_line,
173                    DeclKind::Property,
174                );
175            }
176            for cc in &cls.constants {
177                push_decl(
178                    &mut decls_by_name,
179                    cc,
180                    file_idx,
181                    cls.start_line,
182                    DeclKind::Constant,
183                );
184            }
185            for case in &cls.cases {
186                push_decl(
187                    &mut decls_by_name,
188                    case,
189                    file_idx,
190                    cls.start_line,
191                    DeclKind::EnumCase,
192                );
193            }
194        }
195    }
196    (classes_by_name, subtypes_of, decls_by_name)
197}
198
199impl WorkspaceIndexData {
200    /// Resolve a `ClassRef` back to its `(uri, class_def)` pair.
201    pub fn at(&self, r: ClassRef) -> Option<(&Url, &crate::index::file_index::ClassDef)> {
202        let (uri, idx) = self.files.get(r.file as usize)?;
203        let cls = idx.classes.get(r.class as usize)?;
204        Some((uri, cls))
205    }
206
207    /// O(1) replacement for the linear `find_declaration_in_indexes` scan:
208    /// find a declaration by name, optionally excluding one file (the current
209    /// document, which the caller has already searched with accurate AST
210    /// ranges). Matching rules mirror the old scan: a sigil query (`$foo`)
211    /// matches functions, classes, and properties named `foo`; a bare query
212    /// matches every declaration kind. Returns a zero-width line `Location`.
213    pub fn find_declaration(
214        &self,
215        name: &str,
216        exclude: Option<&Url>,
217    ) -> Option<tower_lsp::lsp_types::Location> {
218        let bare = crate::text::strip_variable_sigil(name);
219        let sigil = bare != name;
220        let refs = self.decls_by_name.get(bare)?;
221        for r in refs {
222            if sigil
223                && !matches!(
224                    r.kind,
225                    DeclKind::Function | DeclKind::Class | DeclKind::Property
226                )
227            {
228                continue;
229            }
230            let (uri, _) = self.files.get(r.file as usize)?;
231            if exclude.is_some_and(|e| e == uri) {
232                continue;
233            }
234            return Some(tower_lsp::lsp_types::Location {
235                uri: uri.clone(),
236                range: crate::text::zero_width_range(r.line),
237            });
238        }
239        None
240    }
241
242    /// Constructor that builds the reverse maps from an already-materialised
243    /// `(Url, Arc<FileIndex>)` slice. Exposed so callers that don't want to
244    /// spin up a full `AnalysisHost` (unit tests of
245    /// `find_implementations_from_workspace`, benchmark crates) can exercise
246    /// the aggregate-shaped helpers directly. Production code goes through
247    /// the `workspace_index` salsa query instead.
248    pub fn from_files(files: Vec<(Url, Arc<FileIndex>)>) -> Self {
249        let (classes_by_name, subtypes_of, decls_by_name) = build_maps(&files);
250        Self {
251            files,
252            classes_by_name,
253            subtypes_of,
254            decls_by_name,
255        }
256    }
257}
258
259/// Arc wrapper with the same `Arc::ptr_eq`-based `Update` impl used throughout
260/// `src/db/`. The inner `WorkspaceIndexData` never compares structurally.
261#[derive(Clone)]
262pub struct WorkspaceIndexArc(pub Arc<WorkspaceIndexData>);
263
264impl WorkspaceIndexArc {
265    #[cfg(test)]
266    pub fn get(&self) -> &WorkspaceIndexData {
267        &self.0
268    }
269}
270
271// SAFETY: same contract as other `*Arc` newtypes — ptr_eq is sufficient because
272// every rebuild allocates a fresh `Arc`.
273crate::impl_arc_update!(WorkspaceIndexArc);
274
275/// Build the aggregate workspace index.
276///
277/// Depends on `Workspace::files` and every per-file `file_index` query;
278/// editing any file invalidates this via normal salsa dependency tracking.
279/// The `Arc<FileIndex>` values are the same ones served by `file_index` —
280/// callers that already held one are guaranteed pointer-equality here.
281#[salsa::tracked(no_eq)]
282pub fn workspace_index(db: &dyn Database, ws: Workspace) -> WorkspaceIndexArc {
283    let files_input = crate::db::input::workspace_files(db, ws);
284
285    let mut files: Vec<(Url, Arc<FileIndex>)> = Vec::with_capacity(files_input.len());
286    for sf in files_input.iter() {
287        let uri_arc = sf.uri(db);
288        // Fall back to inserting any well-formed entry — salsa inputs carry
289        // whatever string the caller mirrored; if parsing ever fails (test
290        // harness with a non-URL string) we simply skip that file for
291        // cross-workspace queries rather than panic.
292        let Ok(url) = Url::parse(&uri_arc) else {
293            continue;
294        };
295        let idx = file_index(db, *sf).0.clone();
296        files.push((url, idx));
297    }
298
299    let (classes_by_name, subtypes_of, decls_by_name) = build_maps(&files);
300
301    WorkspaceIndexArc(Arc::new(WorkspaceIndexData {
302        files,
303        classes_by_name,
304        subtypes_of,
305        decls_by_name,
306    }))
307}
308
309#[cfg(test)]
310mod tests {
311    use std::sync::Arc;
312
313    use super::*;
314    use crate::db::analysis::AnalysisHost;
315    use crate::db::input::FileText;
316    use salsa::Setter;
317
318    fn new_file(host: &AnalysisHost, uri: &str, src: &str) -> (Arc<str>, FileText) {
319        let ft = FileText::new(host.db(), Arc::<str>::from(src), None);
320        (Arc::<str>::from(uri), ft)
321    }
322
323    #[test]
324    fn workspace_index_builds_name_and_subtype_maps() {
325        let host = AnalysisHost::new();
326        let f1 = new_file(&host, "file:///a.php", "<?php\nclass Animal {}");
327        let f2 = new_file(
328            &host,
329            "file:///b.php",
330            "<?php\nclass Dog extends Animal {}\nclass Cat extends Animal {}",
331        );
332        let ws = Workspace::new(
333            host.db(),
334            Arc::from([f1, f2]),
335            mir_analyzer::PhpVersion::LATEST,
336        );
337
338        let wi = workspace_index(host.db(), ws);
339        let data = wi.get();
340
341        assert!(data.classes_by_name.contains_key("Animal"));
342        assert!(data.classes_by_name.contains_key("Dog"));
343
344        let subs = data
345            .subtypes_of
346            .get("Animal")
347            .expect("Animal must have subtype entries");
348        assert_eq!(subs.len(), 2, "Dog + Cat extend Animal");
349
350        let names: Vec<_> = subs
351            .iter()
352            .filter_map(|r| data.at(*r).map(|(_, c)| c.name.clone()))
353            .collect();
354        assert!(names.iter().any(|n| n.as_ref() == "Dog"));
355        assert!(names.iter().any(|n| n.as_ref() == "Cat"));
356    }
357
358    #[test]
359    fn workspace_index_memoizes_and_invalidates() {
360        let mut host = AnalysisHost::new();
361        let (uri_arc, ft1) = new_file(&host, "file:///a.php", "<?php\nclass A {}");
362        let ws = Workspace::new(
363            host.db(),
364            Arc::from([(uri_arc, ft1)]),
365            mir_analyzer::PhpVersion::LATEST,
366        );
367
368        let a = workspace_index(host.db(), ws);
369        let b = workspace_index(host.db(), ws);
370        assert!(
371            Arc::ptr_eq(&a.0, &b.0),
372            "unchanged inputs must return the memoized Arc"
373        );
374
375        ft1.set_text(host.db_mut())
376            .to(Arc::<str>::from("<?php\nclass B {}"));
377        let c = workspace_index(host.db(), ws);
378        assert!(!Arc::ptr_eq(&a.0, &c.0), "an edit must produce a fresh Arc");
379        assert!(c.get().classes_by_name.contains_key("B"));
380        assert!(!c.get().classes_by_name.contains_key("A"));
381    }
382
383    #[test]
384    fn workspace_index_collects_interface_and_trait_subtypes() {
385        let host = AnalysisHost::new();
386        let src = concat!(
387            "<?php\n",
388            "interface Greeter {}\n",
389            "trait Shouting {}\n",
390            "class Hi implements Greeter { use Shouting; }\n",
391        );
392        let f = new_file(&host, "file:///m.php", src);
393        let ws = Workspace::new(host.db(), Arc::from([f]), mir_analyzer::PhpVersion::LATEST);
394        let wi = workspace_index(host.db(), ws);
395        let data = wi.get();
396
397        let greeter_subs = data.subtypes_of.get("Greeter").expect("Greeter subs");
398        assert_eq!(greeter_subs.len(), 1);
399        let shouting_subs = data.subtypes_of.get("Shouting").expect("Shouting subs");
400        assert_eq!(shouting_subs.len(), 1);
401    }
402}