Skip to main content

mir_codebase/
codebase.rs

1use std::sync::Arc;
2
3use dashmap::{DashMap, DashSet};
4
5use crate::interner::Interner;
6
7/// Maps symbol ID → flat list of `(file_id, start_byte, end_byte)`.
8///
9/// Entries are appended during Pass 2. Duplicates (e.g. from union receivers like
10/// `Foo|Foo->method()`) are filtered at insert time. IDs come from
11/// `Codebase::symbol_interner` / `Codebase::file_interner`.
12///
13/// Compared with the previous `DashMap<u32, HashMap<u32, HashSet<(u32, u32)>>>`,
14/// this eliminates two levels of hash-map overhead (a `HashMap` per symbol and a
15/// `HashSet` per file). Each entry is now 12 bytes (`u32` × 3) with no per-entry
16/// allocator overhead beyond the `Vec` backing store.
17type ReferenceLocations = DashMap<u32, Vec<(u32, u32, u32)>>;
18
19use crate::storage::{
20    ClassStorage, EnumStorage, FunctionStorage, InterfaceStorage, MethodStorage, TraitStorage,
21};
22use mir_types::Union;
23
24// ---------------------------------------------------------------------------
25// Private helper — shared insert logic for reference tracking
26// ---------------------------------------------------------------------------
27
28/// Case-insensitive method lookup within a single `own_methods` map.
29///
30/// Tries an exact key match first (O(1)), then falls back to a linear
31/// case-insensitive scan for stubs that store keys in original case.
32#[inline]
33fn lookup_method<'a>(
34    map: &'a indexmap::IndexMap<Arc<str>, Arc<MethodStorage>>,
35    name: &str,
36) -> Option<&'a Arc<MethodStorage>> {
37    map.get(name).or_else(|| {
38        map.iter()
39            .find(|(k, _)| k.as_ref().eq_ignore_ascii_case(name))
40            .map(|(_, v)| v)
41    })
42}
43
44/// Append `(sym_id, file_id, start, end)` to the reference index, skipping
45/// exact duplicates so union receivers like `Foo|Foo->method()` don't inflate
46/// the span list.
47///
48/// Both maps are updated atomically under their respective DashMap shard locks.
49#[inline]
50fn record_ref(
51    sym_locs: &ReferenceLocations,
52    file_refs: &DashMap<u32, Vec<u32>>,
53    sym_id: u32,
54    file_id: u32,
55    start: u32,
56    end: u32,
57) {
58    {
59        let mut entries = sym_locs.entry(sym_id).or_default();
60        let span = (file_id, start, end);
61        if !entries.contains(&span) {
62            entries.push(span);
63        }
64    }
65    {
66        let mut refs = file_refs.entry(file_id).or_default();
67        if !refs.contains(&sym_id) {
68            refs.push(sym_id);
69        }
70    }
71}
72
73// ---------------------------------------------------------------------------
74// Compact CSR reference index (post-Pass-2 read-optimised form)
75// ---------------------------------------------------------------------------
76
77/// Read-optimised Compressed Sparse Row representation of the reference index.
78///
79/// Built once by [`Codebase::compact_reference_index`] after Pass 2 finishes.
80/// After compaction the build-phase [`DashMap`]s are cleared, freeing the
81/// per-entry allocator overhead (~72 bytes per (symbol, file) pair).
82///
83/// Two CSR views are maintained over the same flat `entries` array:
84/// - by symbol: `entries[sym_offsets[id]..sym_offsets[id+1]]`
85/// - by file: `by_file[file_offsets[id]..file_offsets[id+1]]` (indirect indices)
86#[derive(Debug, Default)]
87struct CompactRefIndex {
88    /// All spans sorted by `(sym_id, file_id, start, end)`, deduplicated.
89    /// Each entry is 16 bytes; total size = `n_refs × 16` with no hash overhead.
90    entries: Vec<(u32, u32, u32, u32)>,
91    /// CSR offsets keyed by sym_id (length = max_sym_id + 2).
92    sym_offsets: Vec<u32>,
93    /// Indices into `entries` sorted by `(file_id, sym_id, start, end)`.
94    /// Allows O(log n) file-keyed lookups without duplicating the payload.
95    by_file: Vec<u32>,
96    /// CSR offsets keyed by file_id into `by_file` (length = max_file_id + 2).
97    file_offsets: Vec<u32>,
98}
99
100// ---------------------------------------------------------------------------
101// StructuralSnapshot — inheritance data captured before file removal
102// ---------------------------------------------------------------------------
103
104struct ClassInheritance {
105    parent: Option<Arc<str>>,
106    interfaces: Vec<Arc<str>>, // sorted for order-insensitive comparison
107    traits: Vec<Arc<str>>,     // sorted
108    all_parents: Vec<Arc<str>>,
109}
110
111struct InterfaceInheritance {
112    extends: Vec<Arc<str>>, // sorted
113    all_parents: Vec<Arc<str>>,
114}
115
116/// Snapshot of the inheritance structure of all symbols defined in a file.
117///
118/// Produced by [`Codebase::file_structural_snapshot`] before
119/// [`Codebase::remove_file_definitions`], and consumed by
120/// [`Codebase::structural_unchanged_after_pass1`] /
121/// [`Codebase::restore_all_parents`] to skip an expensive `finalize()` call
122/// when only method bodies (not class hierarchies) changed.
123pub struct StructuralSnapshot {
124    classes: std::collections::HashMap<Arc<str>, ClassInheritance>,
125    interfaces: std::collections::HashMap<Arc<str>, InterfaceInheritance>,
126}
127
128// ---------------------------------------------------------------------------
129// Codebase — thread-safe global symbol registry
130// ---------------------------------------------------------------------------
131
132#[derive(Debug, Default)]
133pub struct Codebase {
134    pub classes: DashMap<Arc<str>, ClassStorage>,
135    pub interfaces: DashMap<Arc<str>, InterfaceStorage>,
136    pub traits: DashMap<Arc<str>, TraitStorage>,
137    pub enums: DashMap<Arc<str>, EnumStorage>,
138    pub functions: DashMap<Arc<str>, FunctionStorage>,
139    pub constants: DashMap<Arc<str>, Union>,
140
141    /// Types of `@var`-annotated global variables, collected in Pass 1.
142    /// Key: variable name without the `$` prefix.
143    pub global_vars: DashMap<Arc<str>, Union>,
144    /// Maps file path → variable names declared with `@var` in that file.
145    /// Used by `remove_file_definitions` to purge stale entries on re-analysis.
146    file_global_vars: DashMap<Arc<str>, Vec<Arc<str>>>,
147
148    /// Methods referenced during Pass 2 — stored as interned symbol IDs.
149    /// Used by the dead-code detector (M18).
150    referenced_methods: DashSet<u32>,
151    /// Properties referenced during Pass 2 — stored as interned symbol IDs.
152    referenced_properties: DashSet<u32>,
153    /// Free functions referenced during Pass 2 — stored as interned symbol IDs.
154    referenced_functions: DashSet<u32>,
155
156    /// Interner for symbol keys (`"ClassName::method"`, `"ClassName::prop"`, FQN).
157    /// Replaces repeated `Arc<str>` copies (16 bytes) with compact `u32` IDs (4 bytes).
158    pub symbol_interner: Interner,
159    /// Interner for file paths. Same memory rationale as `symbol_interner`.
160    pub file_interner: Interner,
161
162    /// Maps symbol ID → { file ID → {(start_byte, end_byte)} }.
163    /// IDs come from `symbol_interner` / `file_interner`.
164    /// The inner HashMap groups spans by file for O(1) per-file cleanup.
165    /// HashSet deduplicates spans from union receivers (e.g. Foo|Foo->method()).
166    symbol_reference_locations: ReferenceLocations,
167    /// Reverse index: file ID → symbol IDs referenced in that file.
168    /// Used by `remove_file_definitions` to avoid a full scan of all symbols.
169    /// A `Vec` rather than `HashSet`: duplicate sym_ids are guarded at insert time
170    /// (same as `symbol_reference_locations`) for the same structural simplicity.
171    file_symbol_references: DashMap<u32, Vec<u32>>,
172
173    /// Compact CSR view of the reference index, built by `compact_reference_index()`.
174    /// When `Some`, the build-phase DashMaps above are empty and this is the
175    /// authoritative source for all reference queries.
176    compact_ref_index: std::sync::RwLock<Option<CompactRefIndex>>,
177    /// `true` iff `compact_ref_index` is `Some`. Checked atomically before
178    /// acquiring any lock, so the fast path during Pass 2 is a single load.
179    is_compacted: std::sync::atomic::AtomicBool,
180
181    /// Maps every FQCN (class, interface, trait, enum, function) to the absolute
182    /// path of the file that defines it. Populated during Pass 1.
183    pub symbol_to_file: DashMap<Arc<str>, Arc<str>>,
184
185    /// Lightweight FQCN index populated by `SymbolTable` before Pass 1.
186    /// Enables O(1) "does this symbol exist?" checks before full definitions
187    /// are available.
188    pub known_symbols: DashSet<Arc<str>>,
189
190    /// Per-file `use` alias maps: alias → FQCN.  Populated during Pass 1.
191    ///
192    /// Key: absolute file path (as `Arc<str>`).
193    /// Value: map of `alias → fully-qualified class name`.
194    ///
195    /// Exposed as `pub` so that external consumers (e.g. `php-lsp`) can read
196    /// import data that mir already collects, instead of reimplementing it.
197    pub file_imports: DashMap<Arc<str>, std::collections::HashMap<String, String>>,
198    /// Per-file current namespace (if any).  Populated during Pass 1.
199    ///
200    /// Key: absolute file path (as `Arc<str>`).
201    /// Value: the declared namespace string (e.g. `"App\\Controller"`).
202    ///
203    /// Exposed as `pub` so that external consumers (e.g. `php-lsp`) can read
204    /// namespace data that mir already collects, instead of reimplementing it.
205    pub file_namespaces: DashMap<Arc<str>, String>,
206
207    /// Whether finalize() has been called.
208    finalized: std::sync::atomic::AtomicBool,
209}
210
211impl Codebase {
212    pub fn new() -> Self {
213        Self::default()
214    }
215
216    // -----------------------------------------------------------------------
217    // Stub injection
218    // -----------------------------------------------------------------------
219
220    /// Insert all definitions from `slice` into this codebase.
221    ///
222    /// Called by generated stub modules (`src/generated/stubs_*.rs`) to register
223    /// their pre-compiled definitions. Later insertions overwrite earlier ones,
224    /// so custom stubs loaded after PHPStorm stubs act as overrides.
225    pub fn inject_stub_slice(&self, slice: crate::storage::StubSlice) {
226        let file = slice.file.clone();
227        for cls in slice.classes {
228            if let Some(f) = &file {
229                self.symbol_to_file.insert(cls.fqcn.clone(), f.clone());
230            }
231            self.classes.insert(cls.fqcn.clone(), cls);
232        }
233        for iface in slice.interfaces {
234            if let Some(f) = &file {
235                self.symbol_to_file.insert(iface.fqcn.clone(), f.clone());
236            }
237            self.interfaces.insert(iface.fqcn.clone(), iface);
238        }
239        for tr in slice.traits {
240            if let Some(f) = &file {
241                self.symbol_to_file.insert(tr.fqcn.clone(), f.clone());
242            }
243            self.traits.insert(tr.fqcn.clone(), tr);
244        }
245        for en in slice.enums {
246            if let Some(f) = &file {
247                self.symbol_to_file.insert(en.fqcn.clone(), f.clone());
248            }
249            self.enums.insert(en.fqcn.clone(), en);
250        }
251        for func in slice.functions {
252            if let Some(f) = &file {
253                self.symbol_to_file.insert(func.fqn.clone(), f.clone());
254            }
255            self.functions.insert(func.fqn.clone(), func);
256        }
257        for (name, ty) in slice.constants {
258            self.constants.insert(name, ty);
259        }
260        if let Some(f) = &file {
261            for (name, ty) in slice.global_vars {
262                self.register_global_var(f, name, ty);
263            }
264        }
265    }
266
267    // -----------------------------------------------------------------------
268    // Compact reference index
269    // -----------------------------------------------------------------------
270
271    /// Convert the build-phase `DashMap` reference index into a compact CSR form.
272    ///
273    /// Call this once after Pass 2 completes on all files. The method:
274    /// 1. Drains the two build-phase `DashMap`s into a single flat `Vec`.
275    /// 2. Sorts and deduplicates entries.
276    /// 3. Builds two CSR offset arrays (by symbol and by file).
277    /// 4. Clears the `DashMap`s (freeing their allocations).
278    ///
279    /// After this call all reference queries use the compact index. Incremental
280    /// re-analysis via [`Self::re_analyze_file`] will automatically decompress the
281    /// index back into `DashMap`s on the first write, then recompact can be called
282    /// again at the end of that analysis pass.
283    pub fn compact_reference_index(&self) {
284        // Collect all entries from the build-phase DashMap.
285        let mut entries: Vec<(u32, u32, u32, u32)> = self
286            .symbol_reference_locations
287            .iter()
288            .flat_map(|entry| {
289                let sym_id = *entry.key();
290                entry
291                    .value()
292                    .iter()
293                    .map(move |&(file_id, start, end)| (sym_id, file_id, start, end))
294                    .collect::<Vec<_>>()
295            })
296            .collect();
297
298        if entries.is_empty() {
299            return;
300        }
301
302        // Sort by (sym_id, file_id, start, end) and drop exact duplicates.
303        entries.sort_unstable();
304        entries.dedup();
305
306        let n = entries.len();
307
308        // ---- Build symbol-keyed CSR offsets --------------------------------
309        let max_sym = entries.iter().map(|&(s, ..)| s).max().unwrap_or(0) as usize;
310        let mut sym_offsets = vec![0u32; max_sym + 2];
311        for &(sym_id, ..) in &entries {
312            sym_offsets[sym_id as usize + 1] += 1;
313        }
314        for i in 1..sym_offsets.len() {
315            sym_offsets[i] += sym_offsets[i - 1];
316        }
317
318        // ---- Build file-keyed indirect index --------------------------------
319        // `by_file[i]` is an index into `entries`; the slice is sorted by
320        // `(file_id, sym_id, start, end)` so CSR offsets can be computed cheaply.
321        let max_file = entries.iter().map(|&(_, f, ..)| f).max().unwrap_or(0) as usize;
322        let mut by_file: Vec<u32> = (0..n as u32).collect();
323        by_file.sort_unstable_by_key(|&i| {
324            let (sym_id, file_id, start, end) = entries[i as usize];
325            (file_id, sym_id, start, end)
326        });
327
328        let mut file_offsets = vec![0u32; max_file + 2];
329        for &idx in &by_file {
330            let file_id = entries[idx as usize].1;
331            file_offsets[file_id as usize + 1] += 1;
332        }
333        for i in 1..file_offsets.len() {
334            file_offsets[i] += file_offsets[i - 1];
335        }
336
337        *self.compact_ref_index.write().unwrap() = Some(CompactRefIndex {
338            entries,
339            sym_offsets,
340            by_file,
341            file_offsets,
342        });
343        self.is_compacted
344            .store(true, std::sync::atomic::Ordering::Release);
345
346        // Free build-phase allocations.
347        self.symbol_reference_locations.clear();
348        self.file_symbol_references.clear();
349    }
350
351    /// Decompress the compact index back into the build-phase `DashMap`s.
352    ///
353    /// Called automatically by write methods when the compact index is live.
354    /// This makes incremental re-analysis transparent: callers never need to
355    /// know whether the index is compacted or not.
356    fn ensure_expanded(&self) {
357        // Fast path: not compacted — one atomic load, no lock.
358        if !self.is_compacted.load(std::sync::atomic::Ordering::Acquire) {
359            return;
360        }
361        // Slow path: acquire write lock and decompress.
362        let mut guard = self.compact_ref_index.write().unwrap();
363        if let Some(ci) = guard.take() {
364            for &(sym_id, file_id, start, end) in &ci.entries {
365                record_ref(
366                    &self.symbol_reference_locations,
367                    &self.file_symbol_references,
368                    sym_id,
369                    file_id,
370                    start,
371                    end,
372                );
373            }
374            self.is_compacted
375                .store(false, std::sync::atomic::Ordering::Release);
376        }
377        // If another thread already decompressed (guard is now None), we're done.
378    }
379
380    /// Reset the finalization flag so that `finalize()` will run again.
381    ///
382    /// Use this when new class definitions have been added after an initial
383    /// `finalize()` call (e.g., lazily loaded via PSR-4) and the inheritance
384    /// graph needs to be rebuilt.
385    pub fn invalidate_finalization(&self) {
386        self.finalized
387            .store(false, std::sync::atomic::Ordering::SeqCst);
388    }
389
390    // -----------------------------------------------------------------------
391    // Incremental: remove all definitions from a single file
392    // -----------------------------------------------------------------------
393
394    /// Remove all definitions and outgoing reference locations contributed by the given file.
395    /// This clears classes, interfaces, traits, enums, functions, and constants
396    /// whose defining file matches `file_path`, the file's import and namespace entries,
397    /// and all entries in symbol_reference_locations that originated from this file.
398    /// After calling this, `invalidate_finalization()` is called so the next `finalize()`
399    /// rebuilds inheritance.
400    pub fn remove_file_definitions(&self, file_path: &str) {
401        // Collect all symbols defined in this file
402        let symbols: Vec<Arc<str>> = self
403            .symbol_to_file
404            .iter()
405            .filter(|entry| entry.value().as_ref() == file_path)
406            .map(|entry| entry.key().clone())
407            .collect();
408
409        // Remove each symbol from its respective map and from symbol_to_file
410        for sym in &symbols {
411            self.classes.remove(sym.as_ref());
412            self.interfaces.remove(sym.as_ref());
413            self.traits.remove(sym.as_ref());
414            self.enums.remove(sym.as_ref());
415            self.functions.remove(sym.as_ref());
416            self.constants.remove(sym.as_ref());
417            self.symbol_to_file.remove(sym.as_ref());
418            self.known_symbols.remove(sym.as_ref());
419        }
420
421        // Remove file-level metadata
422        self.file_imports.remove(file_path);
423        self.file_namespaces.remove(file_path);
424
425        // Remove @var-annotated global variables declared in this file
426        if let Some((_, var_names)) = self.file_global_vars.remove(file_path) {
427            for name in var_names {
428                self.global_vars.remove(name.as_ref());
429            }
430        }
431
432        // Ensure the reference index is in DashMap form so the removal below works.
433        self.ensure_expanded();
434
435        // Remove reference locations contributed by this file.
436        // Use the reverse index to avoid a full scan of all symbols.
437        if let Some(file_id) = self.file_interner.get_id(file_path) {
438            if let Some((_, sym_ids)) = self.file_symbol_references.remove(&file_id) {
439                for sym_id in sym_ids {
440                    if let Some(mut entries) = self.symbol_reference_locations.get_mut(&sym_id) {
441                        entries.retain(|&(fid, _, _)| fid != file_id);
442                    }
443                }
444            }
445        }
446
447        self.invalidate_finalization();
448    }
449
450    // -----------------------------------------------------------------------
451    // Structural snapshot — skip finalize() on body-only changes
452    // -----------------------------------------------------------------------
453
454    /// Capture the inheritance structure of all symbols defined in `file_path`.
455    ///
456    /// Call this *before* `remove_file_definitions` to preserve the data that
457    /// `finalize()` would otherwise have to recompute.  The snapshot records, for
458    /// each class/interface in the file, the fields that feed into
459    /// `all_parents` (parent class, implemented interfaces, used traits, extended
460    /// interfaces) as well as the already-computed `all_parents` list itself.
461    pub fn file_structural_snapshot(&self, file_path: &str) -> StructuralSnapshot {
462        let symbols: Vec<Arc<str>> = self
463            .symbol_to_file
464            .iter()
465            .filter(|e| e.value().as_ref() == file_path)
466            .map(|e| e.key().clone())
467            .collect();
468
469        let mut classes = std::collections::HashMap::new();
470        let mut interfaces = std::collections::HashMap::new();
471
472        for sym in symbols {
473            if let Some(cls) = self.classes.get(sym.as_ref()) {
474                let mut ifaces = cls.interfaces.clone();
475                ifaces.sort_unstable_by(|a, b| a.as_ref().cmp(b.as_ref()));
476                let mut traits = cls.traits.clone();
477                traits.sort_unstable_by(|a, b| a.as_ref().cmp(b.as_ref()));
478                classes.insert(
479                    sym,
480                    ClassInheritance {
481                        parent: cls.parent.clone(),
482                        interfaces: ifaces,
483                        traits,
484                        all_parents: cls.all_parents.clone(),
485                    },
486                );
487            } else if let Some(iface) = self.interfaces.get(sym.as_ref()) {
488                let mut extends = iface.extends.clone();
489                extends.sort_unstable_by(|a, b| a.as_ref().cmp(b.as_ref()));
490                interfaces.insert(
491                    sym,
492                    InterfaceInheritance {
493                        extends,
494                        all_parents: iface.all_parents.clone(),
495                    },
496                );
497            }
498        }
499
500        StructuralSnapshot {
501            classes,
502            interfaces,
503        }
504    }
505
506    /// After Pass 1 completes, check whether the inheritance structure in
507    /// `file_path` matches the snapshot taken before `remove_file_definitions`.
508    ///
509    /// Returns `true` if `finalize()` can be skipped — i.e. only method bodies,
510    /// properties, or annotations changed, not any class/interface hierarchy.
511    pub fn structural_unchanged_after_pass1(
512        &self,
513        file_path: &str,
514        old: &StructuralSnapshot,
515    ) -> bool {
516        let symbols: Vec<Arc<str>> = self
517            .symbol_to_file
518            .iter()
519            .filter(|e| e.value().as_ref() == file_path)
520            .map(|e| e.key().clone())
521            .collect();
522
523        let mut seen_classes = 0usize;
524        let mut seen_interfaces = 0usize;
525
526        for sym in &symbols {
527            if let Some(cls) = self.classes.get(sym.as_ref()) {
528                seen_classes += 1;
529                let Some(old_cls) = old.classes.get(sym.as_ref()) else {
530                    return false; // new class added
531                };
532                if old_cls.parent != cls.parent {
533                    return false;
534                }
535                let mut new_ifaces = cls.interfaces.clone();
536                new_ifaces.sort_unstable_by(|a, b| a.as_ref().cmp(b.as_ref()));
537                if old_cls.interfaces != new_ifaces {
538                    return false;
539                }
540                let mut new_traits = cls.traits.clone();
541                new_traits.sort_unstable_by(|a, b| a.as_ref().cmp(b.as_ref()));
542                if old_cls.traits != new_traits {
543                    return false;
544                }
545            } else if let Some(iface) = self.interfaces.get(sym.as_ref()) {
546                seen_interfaces += 1;
547                let Some(old_iface) = old.interfaces.get(sym.as_ref()) else {
548                    return false; // new interface added
549                };
550                let mut new_extends = iface.extends.clone();
551                new_extends.sort_unstable_by(|a, b| a.as_ref().cmp(b.as_ref()));
552                if old_iface.extends != new_extends {
553                    return false;
554                }
555            }
556            // Traits, enums, functions, constants: not finalization-relevant, skip.
557        }
558
559        // Check for removed classes or interfaces.
560        seen_classes == old.classes.len() && seen_interfaces == old.interfaces.len()
561    }
562
563    /// Restore `all_parents` from a snapshot and mark the codebase as finalized.
564    ///
565    /// Call this instead of `finalize()` when `structural_unchanged_after_pass1`
566    /// returns `true`.  The newly re-registered symbols (written by Pass 1) have
567    /// `all_parents = []`; this method repopulates them from the snapshot so that
568    /// all downstream lookups that depend on `all_parents` keep working correctly.
569    pub fn restore_all_parents(&self, file_path: &str, snapshot: &StructuralSnapshot) {
570        let symbols: Vec<Arc<str>> = self
571            .symbol_to_file
572            .iter()
573            .filter(|e| e.value().as_ref() == file_path)
574            .map(|e| e.key().clone())
575            .collect();
576
577        for sym in &symbols {
578            if let Some(old_cls) = snapshot.classes.get(sym.as_ref()) {
579                if let Some(mut cls) = self.classes.get_mut(sym.as_ref()) {
580                    cls.all_parents = old_cls.all_parents.clone();
581                }
582            } else if let Some(old_iface) = snapshot.interfaces.get(sym.as_ref()) {
583                if let Some(mut iface) = self.interfaces.get_mut(sym.as_ref()) {
584                    iface.all_parents = old_iface.all_parents.clone();
585                }
586            }
587        }
588
589        self.finalized
590            .store(true, std::sync::atomic::Ordering::SeqCst);
591    }
592
593    // -----------------------------------------------------------------------
594    // Global variable registry
595    // -----------------------------------------------------------------------
596
597    /// Record an `@var`-annotated global variable type discovered in Pass 1.
598    /// If the same variable is annotated in multiple files, the last write wins.
599    pub fn register_global_var(&self, file: &Arc<str>, name: Arc<str>, ty: Union) {
600        self.file_global_vars
601            .entry(file.clone())
602            .or_default()
603            .push(name.clone());
604        self.global_vars.insert(name, ty);
605    }
606
607    // -----------------------------------------------------------------------
608    // Lookups
609    // -----------------------------------------------------------------------
610
611    /// Resolve a property, walking up the inheritance chain (parent classes and traits).
612    pub fn get_property(
613        &self,
614        fqcn: &str,
615        prop_name: &str,
616    ) -> Option<crate::storage::PropertyStorage> {
617        // Check direct class own_properties
618        if let Some(cls) = self.classes.get(fqcn) {
619            if let Some(p) = cls.own_properties.get(prop_name) {
620                return Some(p.clone());
621            }
622        }
623
624        // Walk all ancestors (collected during finalize)
625        let all_parents = {
626            if let Some(cls) = self.classes.get(fqcn) {
627                cls.all_parents.clone()
628            } else {
629                return None;
630            }
631        };
632
633        for ancestor_fqcn in &all_parents {
634            if let Some(ancestor_cls) = self.classes.get(ancestor_fqcn.as_ref()) {
635                if let Some(p) = ancestor_cls.own_properties.get(prop_name) {
636                    return Some(p.clone());
637                }
638            }
639        }
640
641        // Check traits
642        let trait_list = {
643            if let Some(cls) = self.classes.get(fqcn) {
644                cls.traits.clone()
645            } else {
646                vec![]
647            }
648        };
649        for trait_fqcn in &trait_list {
650            if let Some(tr) = self.traits.get(trait_fqcn.as_ref()) {
651                if let Some(p) = tr.own_properties.get(prop_name) {
652                    return Some(p.clone());
653                }
654            }
655        }
656
657        None
658    }
659
660    /// Resolve a class constant by name, walking up the inheritance chain.
661    pub fn get_class_constant(
662        &self,
663        fqcn: &str,
664        const_name: &str,
665    ) -> Option<crate::storage::ConstantStorage> {
666        // Class: own → traits → ancestors → interfaces
667        if let Some(cls) = self.classes.get(fqcn) {
668            if let Some(c) = cls.own_constants.get(const_name) {
669                return Some(c.clone());
670            }
671            let all_parents = cls.all_parents.clone();
672            let interfaces = cls.interfaces.clone();
673            let traits = cls.traits.clone();
674            drop(cls);
675
676            for tr_fqcn in &traits {
677                if let Some(tr) = self.traits.get(tr_fqcn.as_ref()) {
678                    if let Some(c) = tr.own_constants.get(const_name) {
679                        return Some(c.clone());
680                    }
681                }
682            }
683
684            for ancestor_fqcn in &all_parents {
685                if let Some(ancestor) = self.classes.get(ancestor_fqcn.as_ref()) {
686                    if let Some(c) = ancestor.own_constants.get(const_name) {
687                        return Some(c.clone());
688                    }
689                }
690                if let Some(iface) = self.interfaces.get(ancestor_fqcn.as_ref()) {
691                    if let Some(c) = iface.own_constants.get(const_name) {
692                        return Some(c.clone());
693                    }
694                }
695            }
696
697            for iface_fqcn in &interfaces {
698                if let Some(iface) = self.interfaces.get(iface_fqcn.as_ref()) {
699                    if let Some(c) = iface.own_constants.get(const_name) {
700                        return Some(c.clone());
701                    }
702                }
703            }
704
705            return None;
706        }
707
708        // Interface: own → parent interfaces
709        if let Some(iface) = self.interfaces.get(fqcn) {
710            if let Some(c) = iface.own_constants.get(const_name) {
711                return Some(c.clone());
712            }
713            let parents = iface.all_parents.clone();
714            drop(iface);
715            for p in &parents {
716                if let Some(parent_iface) = self.interfaces.get(p.as_ref()) {
717                    if let Some(c) = parent_iface.own_constants.get(const_name) {
718                        return Some(c.clone());
719                    }
720                }
721            }
722            return None;
723        }
724
725        // Enum: own constants + cases
726        if let Some(en) = self.enums.get(fqcn) {
727            if let Some(c) = en.own_constants.get(const_name) {
728                return Some(c.clone());
729            }
730            if en.cases.contains_key(const_name) {
731                return Some(crate::storage::ConstantStorage {
732                    name: Arc::from(const_name),
733                    ty: mir_types::Union::mixed(),
734                    visibility: None,
735                    location: None,
736                });
737            }
738            return None;
739        }
740
741        // Trait: own constants only
742        if let Some(tr) = self.traits.get(fqcn) {
743            if let Some(c) = tr.own_constants.get(const_name) {
744                return Some(c.clone());
745            }
746            return None;
747        }
748
749        None
750    }
751
752    /// Resolve a method, walking up the full inheritance chain (own → traits → ancestors).
753    pub fn get_method(&self, fqcn: &str, method_name: &str) -> Option<Arc<MethodStorage>> {
754        // PHP method names are case-insensitive — normalize to lowercase for all lookups.
755        let method_lower = method_name.to_lowercase();
756        let method_name = method_lower.as_str();
757
758        // --- Class: own methods → own traits → ancestor classes/traits/interfaces ---
759        if let Some(cls) = self.classes.get(fqcn) {
760            // 1. Own methods (highest priority)
761            if let Some(m) = lookup_method(&cls.own_methods, method_name) {
762                return Some(Arc::clone(m));
763            }
764            // Collect chain info before dropping the DashMap guard.
765            let own_traits = cls.traits.clone();
766            let ancestors = cls.all_parents.clone();
767            drop(cls);
768
769            // 2. Own trait methods (recursive into trait-of-trait)
770            for tr_fqcn in &own_traits {
771                if let Some(m) = self.get_method_in_trait(tr_fqcn, method_name) {
772                    return Some(m);
773                }
774            }
775
776            // 3. Ancestor chain (all_parents is closest-first: parent, grandparent, …)
777            for ancestor_fqcn in &ancestors {
778                if let Some(anc) = self.classes.get(ancestor_fqcn.as_ref()) {
779                    if let Some(m) = lookup_method(&anc.own_methods, method_name) {
780                        return Some(Arc::clone(m));
781                    }
782                    let anc_traits = anc.traits.clone();
783                    drop(anc);
784                    for tr_fqcn in &anc_traits {
785                        if let Some(m) = self.get_method_in_trait(tr_fqcn, method_name) {
786                            return Some(m);
787                        }
788                    }
789                } else if let Some(iface) = self.interfaces.get(ancestor_fqcn.as_ref()) {
790                    if let Some(m) = lookup_method(&iface.own_methods, method_name) {
791                        let mut ms = (**m).clone();
792                        ms.is_abstract = true;
793                        return Some(Arc::new(ms));
794                    }
795                }
796                // Traits listed in all_parents are already covered via their owning class above.
797            }
798            return None;
799        }
800
801        // --- Interface: own methods + parent interfaces ---
802        if let Some(iface) = self.interfaces.get(fqcn) {
803            if let Some(m) = lookup_method(&iface.own_methods, method_name) {
804                return Some(Arc::clone(m));
805            }
806            let parents = iface.all_parents.clone();
807            drop(iface);
808            for parent_fqcn in &parents {
809                if let Some(parent_iface) = self.interfaces.get(parent_fqcn.as_ref()) {
810                    if let Some(m) = lookup_method(&parent_iface.own_methods, method_name) {
811                        return Some(Arc::clone(m));
812                    }
813                }
814            }
815            return None;
816        }
817
818        // --- Trait (variable annotated with a trait type) ---
819        if let Some(tr) = self.traits.get(fqcn) {
820            if let Some(m) = lookup_method(&tr.own_methods, method_name) {
821                return Some(Arc::clone(m));
822            }
823            return None;
824        }
825
826        // --- Enum ---
827        if let Some(e) = self.enums.get(fqcn) {
828            if let Some(m) = lookup_method(&e.own_methods, method_name) {
829                return Some(Arc::clone(m));
830            }
831            // PHP 8.1 built-in enum methods: cases(), from(), tryFrom()
832            if matches!(method_name, "cases" | "from" | "tryfrom") {
833                return Some(Arc::new(crate::storage::MethodStorage {
834                    fqcn: Arc::from(fqcn),
835                    name: Arc::from(method_name),
836                    params: vec![],
837                    return_type: Some(mir_types::Union::mixed()),
838                    inferred_return_type: None,
839                    visibility: crate::storage::Visibility::Public,
840                    is_static: true,
841                    is_abstract: false,
842                    is_constructor: false,
843                    template_params: vec![],
844                    assertions: vec![],
845                    throws: vec![],
846                    is_final: false,
847                    is_internal: false,
848                    is_pure: false,
849                    is_deprecated: false,
850                    location: None,
851                }));
852            }
853        }
854
855        None
856    }
857
858    /// Returns true if `child` extends or implements `ancestor` (transitively).
859    pub fn extends_or_implements(&self, child: &str, ancestor: &str) -> bool {
860        if child == ancestor {
861            return true;
862        }
863        if let Some(cls) = self.classes.get(child) {
864            return cls.implements_or_extends(ancestor);
865        }
866        if let Some(iface) = self.interfaces.get(child) {
867            return iface.all_parents.iter().any(|p| p.as_ref() == ancestor);
868        }
869        // Enum: backed enums implicitly implement BackedEnum (and UnitEnum);
870        // pure enums implicitly implement UnitEnum.
871        if let Some(en) = self.enums.get(child) {
872            // Check explicitly declared interfaces (e.g. implements SomeInterface)
873            if en.interfaces.iter().any(|i| i.as_ref() == ancestor) {
874                return true;
875            }
876            // PHP built-in: every enum implements UnitEnum
877            if ancestor == "UnitEnum" || ancestor == "\\UnitEnum" {
878                return true;
879            }
880            // Backed enums implement BackedEnum
881            if (ancestor == "BackedEnum" || ancestor == "\\BackedEnum") && en.scalar_type.is_some()
882            {
883                return true;
884            }
885        }
886        false
887    }
888
889    /// Whether a class/interface/trait/enum with this FQCN exists.
890    pub fn type_exists(&self, fqcn: &str) -> bool {
891        self.classes.contains_key(fqcn)
892            || self.interfaces.contains_key(fqcn)
893            || self.traits.contains_key(fqcn)
894            || self.enums.contains_key(fqcn)
895    }
896
897    pub fn function_exists(&self, fqn: &str) -> bool {
898        self.functions.contains_key(fqn)
899    }
900
901    /// Returns true if the class is declared abstract.
902    /// Used to suppress `UndefinedMethod` on abstract class receivers: the concrete
903    /// subclass is expected to implement the method, matching Psalm errorLevel=3 behaviour.
904    pub fn is_abstract_class(&self, fqcn: &str) -> bool {
905        self.classes.get(fqcn).is_some_and(|c| c.is_abstract)
906    }
907
908    /// Return the declared template params for `fqcn` (class or interface), or
909    /// an empty vec if the type is not found or has no templates.
910    pub fn get_class_template_params(&self, fqcn: &str) -> Vec<crate::storage::TemplateParam> {
911        if let Some(cls) = self.classes.get(fqcn) {
912            return cls.template_params.clone();
913        }
914        if let Some(iface) = self.interfaces.get(fqcn) {
915            return iface.template_params.clone();
916        }
917        if let Some(tr) = self.traits.get(fqcn) {
918            return tr.template_params.clone();
919        }
920        vec![]
921    }
922
923    /// Walk the parent chain collecting template bindings from `@extends` type args.
924    ///
925    /// For `class UserRepo extends BaseRepo` with `@extends BaseRepo<User>`, this returns
926    /// `{ T → User }` where `T` is `BaseRepo`'s declared template parameter.
927    pub fn get_inherited_template_bindings(
928        &self,
929        fqcn: &str,
930    ) -> std::collections::HashMap<Arc<str>, Union> {
931        let mut bindings = std::collections::HashMap::new();
932        let mut current = fqcn.to_string();
933
934        loop {
935            let (parent_fqcn, extends_type_args) = {
936                let cls = match self.classes.get(current.as_str()) {
937                    Some(c) => c,
938                    None => break,
939                };
940                let parent = match &cls.parent {
941                    Some(p) => p.clone(),
942                    None => break,
943                };
944                let args = cls.extends_type_args.clone();
945                (parent, args)
946            };
947
948            if !extends_type_args.is_empty() {
949                let parent_tps = self.get_class_template_params(&parent_fqcn);
950                for (tp, ty) in parent_tps.iter().zip(extends_type_args.iter()) {
951                    bindings
952                        .entry(tp.name.clone())
953                        .or_insert_with(|| ty.clone());
954                }
955            }
956
957            current = parent_fqcn.to_string();
958        }
959
960        bindings
961    }
962
963    /// Returns true if the class (or any ancestor/trait) defines a `__get` magic method.
964    /// Such classes allow arbitrary property access, suppressing UndefinedProperty.
965    pub fn has_magic_get(&self, fqcn: &str) -> bool {
966        self.get_method(fqcn, "__get").is_some()
967    }
968
969    /// Returns true if the class (or any of its ancestors) has a parent/interface/trait
970    /// that is NOT present in the codebase.  Used to suppress `UndefinedMethod` false
971    /// positives: if a method might be inherited from an unscanned external class we
972    /// cannot confirm or deny its existence.
973    ///
974    /// We use the pre-computed `all_parents` list (built during finalization) rather
975    /// than recursive DashMap lookups to avoid potential deadlocks.
976    pub fn has_unknown_ancestor(&self, fqcn: &str) -> bool {
977        // For interfaces: check whether any parent interface is unknown.
978        if let Some(iface) = self.interfaces.get(fqcn) {
979            let parents = iface.all_parents.clone();
980            drop(iface);
981            for p in &parents {
982                if !self.type_exists(p.as_ref()) {
983                    return true;
984                }
985            }
986            return false;
987        }
988
989        // Clone the data we need so the DashMap ref is dropped before any further lookups.
990        let (parent, interfaces, traits, all_parents) = {
991            let Some(cls) = self.classes.get(fqcn) else {
992                return false;
993            };
994            (
995                cls.parent.clone(),
996                cls.interfaces.clone(),
997                cls.traits.clone(),
998                cls.all_parents.clone(),
999            )
1000        };
1001
1002        // Fast path: check direct parent/interfaces/traits
1003        if let Some(ref p) = parent {
1004            if !self.type_exists(p.as_ref()) {
1005                return true;
1006            }
1007        }
1008        for iface in &interfaces {
1009            if !self.type_exists(iface.as_ref()) {
1010                return true;
1011            }
1012        }
1013        for tr in &traits {
1014            if !self.type_exists(tr.as_ref()) {
1015                return true;
1016            }
1017        }
1018
1019        // Also check the full ancestor chain (pre-computed during finalization)
1020        for ancestor in &all_parents {
1021            if !self.type_exists(ancestor.as_ref()) {
1022                return true;
1023            }
1024        }
1025
1026        false
1027    }
1028
1029    /// Resolve a short class/function name to its FQCN using the import table
1030    /// and namespace recorded for `file` during Pass 1.
1031    ///
1032    /// - Names already containing `\` (after stripping a leading `\`) are
1033    ///   returned as-is (already fully qualified).
1034    /// - `self`, `parent`, `static` are returned unchanged (caller handles them).
1035    pub fn resolve_class_name(&self, file: &str, name: &str) -> String {
1036        let name = name.trim_start_matches('\\');
1037        if name.is_empty() {
1038            return name.to_string();
1039        }
1040        // Fully qualified absolute paths start with '\' (already stripped above).
1041        // Names containing '\' but not starting with it may be:
1042        //   - Already-resolved FQCNs (e.g. Frontify\Util\Foo) — check type_exists
1043        //   - Qualified relative names (e.g. Option\Some from within Frontify\Utility) — need namespace prefix
1044        if name.contains('\\') {
1045            // Check if the leading segment matches a use-import alias
1046            let first_segment = name.split('\\').next().unwrap_or(name);
1047            if let Some(imports) = self.file_imports.get(file) {
1048                if let Some(resolved_prefix) = imports.get(first_segment) {
1049                    let rest = &name[first_segment.len()..]; // includes leading '\'
1050                    return format!("{}{}", resolved_prefix, rest);
1051                }
1052            }
1053            // If already known in codebase as-is, it's FQCN — trust it
1054            if self.type_exists(name) {
1055                return name.to_string();
1056            }
1057            // Otherwise it's a relative qualified name — prepend the file namespace
1058            if let Some(ns) = self.file_namespaces.get(file) {
1059                let qualified = format!("{}\\{}", *ns, name);
1060                if self.type_exists(&qualified) {
1061                    return qualified;
1062                }
1063            }
1064            return name.to_string();
1065        }
1066        // Built-in pseudo-types / keywords handled by the caller
1067        match name {
1068            "self" | "parent" | "static" | "this" => return name.to_string(),
1069            _ => {}
1070        }
1071        // Check use aliases for this file (PHP class names are case-insensitive)
1072        if let Some(imports) = self.file_imports.get(file) {
1073            if let Some(resolved) = imports.get(name) {
1074                return resolved.clone();
1075            }
1076            // Fall back to case-insensitive alias lookup
1077            let name_lower = name.to_lowercase();
1078            for (alias, resolved) in imports.iter() {
1079                if alias.to_lowercase() == name_lower {
1080                    return resolved.clone();
1081                }
1082            }
1083        }
1084        // Qualify with the file's namespace if one exists
1085        if let Some(ns) = self.file_namespaces.get(file) {
1086            let qualified = format!("{}\\{}", *ns, name);
1087            // If the namespaced version exists in the codebase, use it.
1088            // Otherwise fall back to the global (unqualified) name if that exists.
1089            // This handles `DateTimeInterface`, `Exception`, etc. used without import
1090            // while not overriding user-defined classes in namespaces.
1091            if self.type_exists(&qualified) {
1092                return qualified;
1093            }
1094            if self.type_exists(name) {
1095                return name.to_string();
1096            }
1097            return qualified;
1098        }
1099        name.to_string()
1100    }
1101
1102    // -----------------------------------------------------------------------
1103    // Definition location lookups
1104    // -----------------------------------------------------------------------
1105
1106    /// Look up the definition location of any symbol (class, interface, trait, enum, function).
1107    /// Returns the file path and byte offsets.
1108    pub fn get_symbol_location(&self, fqcn: &str) -> Option<crate::storage::Location> {
1109        if let Some(cls) = self.classes.get(fqcn) {
1110            return cls.location.clone();
1111        }
1112        if let Some(iface) = self.interfaces.get(fqcn) {
1113            return iface.location.clone();
1114        }
1115        if let Some(tr) = self.traits.get(fqcn) {
1116            return tr.location.clone();
1117        }
1118        if let Some(en) = self.enums.get(fqcn) {
1119            return en.location.clone();
1120        }
1121        if let Some(func) = self.functions.get(fqcn) {
1122            return func.location.clone();
1123        }
1124        None
1125    }
1126
1127    /// Look up the definition location of a class member (method, property, constant).
1128    pub fn get_member_location(
1129        &self,
1130        fqcn: &str,
1131        member_name: &str,
1132    ) -> Option<crate::storage::Location> {
1133        // Check methods
1134        if let Some(method) = self.get_method(fqcn, member_name) {
1135            return method.location.clone();
1136        }
1137        // Check properties
1138        if let Some(prop) = self.get_property(fqcn, member_name) {
1139            return prop.location.clone();
1140        }
1141        // Check class constants
1142        if let Some(cls) = self.classes.get(fqcn) {
1143            if let Some(c) = cls.own_constants.get(member_name) {
1144                return c.location.clone();
1145            }
1146        }
1147        // Check interface constants
1148        if let Some(iface) = self.interfaces.get(fqcn) {
1149            if let Some(c) = iface.own_constants.get(member_name) {
1150                return c.location.clone();
1151            }
1152        }
1153        // Check trait constants
1154        if let Some(tr) = self.traits.get(fqcn) {
1155            if let Some(c) = tr.own_constants.get(member_name) {
1156                return c.location.clone();
1157            }
1158        }
1159        // Check enum constants and cases
1160        if let Some(en) = self.enums.get(fqcn) {
1161            if let Some(c) = en.own_constants.get(member_name) {
1162                return c.location.clone();
1163            }
1164            if let Some(case) = en.cases.get(member_name) {
1165                return case.location.clone();
1166            }
1167        }
1168        None
1169    }
1170
1171    // -----------------------------------------------------------------------
1172    // Reference tracking (M18 dead-code detection)
1173    // -----------------------------------------------------------------------
1174
1175    /// Mark a method as referenced from user code.
1176    pub fn mark_method_referenced(&self, fqcn: &str, method_name: &str) {
1177        let key = format!("{}::{}", fqcn, method_name.to_lowercase());
1178        let id = self.symbol_interner.intern_str(&key);
1179        self.referenced_methods.insert(id);
1180    }
1181
1182    /// Mark a property as referenced from user code.
1183    pub fn mark_property_referenced(&self, fqcn: &str, prop_name: &str) {
1184        let key = format!("{}::{}", fqcn, prop_name);
1185        let id = self.symbol_interner.intern_str(&key);
1186        self.referenced_properties.insert(id);
1187    }
1188
1189    /// Mark a free function as referenced from user code.
1190    pub fn mark_function_referenced(&self, fqn: &str) {
1191        let id = self.symbol_interner.intern_str(fqn);
1192        self.referenced_functions.insert(id);
1193    }
1194
1195    pub fn is_method_referenced(&self, fqcn: &str, method_name: &str) -> bool {
1196        let key = format!("{}::{}", fqcn, method_name.to_lowercase());
1197        match self.symbol_interner.get_id(&key) {
1198            Some(id) => self.referenced_methods.contains(&id),
1199            None => false,
1200        }
1201    }
1202
1203    pub fn is_property_referenced(&self, fqcn: &str, prop_name: &str) -> bool {
1204        let key = format!("{}::{}", fqcn, prop_name);
1205        match self.symbol_interner.get_id(&key) {
1206            Some(id) => self.referenced_properties.contains(&id),
1207            None => false,
1208        }
1209    }
1210
1211    pub fn is_function_referenced(&self, fqn: &str) -> bool {
1212        match self.symbol_interner.get_id(fqn) {
1213            Some(id) => self.referenced_functions.contains(&id),
1214            None => false,
1215        }
1216    }
1217
1218    /// Record a method reference with its source location.
1219    /// Also updates the referenced_methods DashSet for dead-code detection.
1220    pub fn mark_method_referenced_at(
1221        &self,
1222        fqcn: &str,
1223        method_name: &str,
1224        file: Arc<str>,
1225        start: u32,
1226        end: u32,
1227    ) {
1228        let key = format!("{}::{}", fqcn, method_name.to_lowercase());
1229        self.ensure_expanded();
1230        let sym_id = self.symbol_interner.intern_str(&key);
1231        let file_id = self.file_interner.intern(file);
1232        self.referenced_methods.insert(sym_id);
1233        record_ref(
1234            &self.symbol_reference_locations,
1235            &self.file_symbol_references,
1236            sym_id,
1237            file_id,
1238            start,
1239            end,
1240        );
1241    }
1242
1243    /// Record a property reference with its source location.
1244    /// Also updates the referenced_properties DashSet for dead-code detection.
1245    pub fn mark_property_referenced_at(
1246        &self,
1247        fqcn: &str,
1248        prop_name: &str,
1249        file: Arc<str>,
1250        start: u32,
1251        end: u32,
1252    ) {
1253        let key = format!("{}::{}", fqcn, prop_name);
1254        self.ensure_expanded();
1255        let sym_id = self.symbol_interner.intern_str(&key);
1256        let file_id = self.file_interner.intern(file);
1257        self.referenced_properties.insert(sym_id);
1258        record_ref(
1259            &self.symbol_reference_locations,
1260            &self.file_symbol_references,
1261            sym_id,
1262            file_id,
1263            start,
1264            end,
1265        );
1266    }
1267
1268    /// Record a function reference with its source location.
1269    /// Also updates the referenced_functions DashSet for dead-code detection.
1270    pub fn mark_function_referenced_at(&self, fqn: &str, file: Arc<str>, start: u32, end: u32) {
1271        self.ensure_expanded();
1272        let sym_id = self.symbol_interner.intern_str(fqn);
1273        let file_id = self.file_interner.intern(file);
1274        self.referenced_functions.insert(sym_id);
1275        record_ref(
1276            &self.symbol_reference_locations,
1277            &self.file_symbol_references,
1278            sym_id,
1279            file_id,
1280            start,
1281            end,
1282        );
1283    }
1284
1285    /// Record a class reference (e.g. `new Foo()`) with its source location.
1286    /// Does not update any dead-code DashSet — class instantiation tracking is
1287    /// separate from method/property/function dead-code detection.
1288    pub fn mark_class_referenced_at(&self, fqcn: &str, file: Arc<str>, start: u32, end: u32) {
1289        self.ensure_expanded();
1290        let sym_id = self.symbol_interner.intern_str(fqcn);
1291        let file_id = self.file_interner.intern(file);
1292        record_ref(
1293            &self.symbol_reference_locations,
1294            &self.file_symbol_references,
1295            sym_id,
1296            file_id,
1297            start,
1298            end,
1299        );
1300    }
1301
1302    /// Replay cached reference locations for a file into the reference index.
1303    /// Called on cache hits to avoid re-running Pass 2 just to rebuild the index.
1304    /// `locs` is a slice of `(symbol_key, start_byte, end_byte)` as stored in the cache.
1305    pub fn replay_reference_locations(&self, file: Arc<str>, locs: &[(String, u32, u32)]) {
1306        if locs.is_empty() {
1307            return;
1308        }
1309        self.ensure_expanded();
1310        let file_id = self.file_interner.intern(file);
1311        for (symbol_key, start, end) in locs {
1312            let sym_id = self.symbol_interner.intern_str(symbol_key);
1313            record_ref(
1314                &self.symbol_reference_locations,
1315                &self.file_symbol_references,
1316                sym_id,
1317                file_id,
1318                *start,
1319                *end,
1320            );
1321        }
1322    }
1323
1324    /// Return all reference locations for `symbol` as a flat `Vec<(file, start, end)>`.
1325    /// Returns an empty Vec if the symbol has no recorded references.
1326    pub fn get_reference_locations(&self, symbol: &str) -> Vec<(Arc<str>, u32, u32)> {
1327        let Some(sym_id) = self.symbol_interner.get_id(symbol) else {
1328            return Vec::new();
1329        };
1330        // Fast path: compact CSR index.
1331        if let Some(ref ci) = *self.compact_ref_index.read().unwrap() {
1332            let id = sym_id as usize;
1333            if id + 1 >= ci.sym_offsets.len() {
1334                return Vec::new();
1335            }
1336            let start = ci.sym_offsets[id] as usize;
1337            let end = ci.sym_offsets[id + 1] as usize;
1338            return ci.entries[start..end]
1339                .iter()
1340                .map(|&(_, file_id, s, e)| (self.file_interner.get(file_id), s, e))
1341                .collect();
1342        }
1343        // Slow path: build-phase DashMap.
1344        let Some(entries) = self.symbol_reference_locations.get(&sym_id) else {
1345            return Vec::new();
1346        };
1347        entries
1348            .iter()
1349            .map(|&(file_id, start, end)| (self.file_interner.get(file_id), start, end))
1350            .collect()
1351    }
1352
1353    /// Extract all reference locations recorded for `file` as `(symbol_key, start, end)` triples.
1354    /// Used by the cache layer to persist per-file reference data between runs.
1355    pub fn extract_file_reference_locations(&self, file: &str) -> Vec<(Arc<str>, u32, u32)> {
1356        let Some(file_id) = self.file_interner.get_id(file) else {
1357            return Vec::new();
1358        };
1359        // Fast path: compact CSR index.
1360        if let Some(ref ci) = *self.compact_ref_index.read().unwrap() {
1361            let id = file_id as usize;
1362            if id + 1 >= ci.file_offsets.len() {
1363                return Vec::new();
1364            }
1365            let start = ci.file_offsets[id] as usize;
1366            let end = ci.file_offsets[id + 1] as usize;
1367            return ci.by_file[start..end]
1368                .iter()
1369                .map(|&entry_idx| {
1370                    let (sym_id, _, s, e) = ci.entries[entry_idx as usize];
1371                    (self.symbol_interner.get(sym_id), s, e)
1372                })
1373                .collect();
1374        }
1375        // Slow path: build-phase DashMaps.
1376        let Some(sym_ids) = self.file_symbol_references.get(&file_id) else {
1377            return Vec::new();
1378        };
1379        let mut out = Vec::new();
1380        for &sym_id in sym_ids.iter() {
1381            let Some(entries) = self.symbol_reference_locations.get(&sym_id) else {
1382                continue;
1383            };
1384            let sym_key = self.symbol_interner.get(sym_id);
1385            for &(entry_file_id, start, end) in entries.iter() {
1386                if entry_file_id == file_id {
1387                    out.push((sym_key.clone(), start, end));
1388                }
1389            }
1390        }
1391        out
1392    }
1393
1394    /// Returns true if the given file has any recorded symbol references.
1395    pub fn file_has_symbol_references(&self, file: &str) -> bool {
1396        let Some(file_id) = self.file_interner.get_id(file) else {
1397            return false;
1398        };
1399        // Check compact index first.
1400        if let Some(ref ci) = *self.compact_ref_index.read().unwrap() {
1401            let id = file_id as usize;
1402            return id + 1 < ci.file_offsets.len() && ci.file_offsets[id] < ci.file_offsets[id + 1];
1403        }
1404        self.file_symbol_references.contains_key(&file_id)
1405    }
1406
1407    // -----------------------------------------------------------------------
1408    // Finalization
1409    // -----------------------------------------------------------------------
1410
1411    /// Must be called after all files have been parsed (pass 1 complete).
1412    /// Resolves inheritance chains and builds method dispatch tables.
1413    pub fn finalize(&self) {
1414        if self.finalized.load(std::sync::atomic::Ordering::SeqCst) {
1415            return;
1416        }
1417
1418        // 1. Resolve all_parents for classes
1419        let class_keys: Vec<Arc<str>> = self.classes.iter().map(|e| e.key().clone()).collect();
1420        for fqcn in &class_keys {
1421            let parents = self.collect_class_ancestors(fqcn);
1422            if let Some(mut cls) = self.classes.get_mut(fqcn.as_ref()) {
1423                cls.all_parents = parents;
1424            }
1425        }
1426
1427        // 2. Resolve all_parents for interfaces
1428        let iface_keys: Vec<Arc<str>> = self.interfaces.iter().map(|e| e.key().clone()).collect();
1429        for fqcn in &iface_keys {
1430            let parents = self.collect_interface_ancestors(fqcn);
1431            if let Some(mut iface) = self.interfaces.get_mut(fqcn.as_ref()) {
1432                iface.all_parents = parents;
1433            }
1434        }
1435
1436        self.finalized
1437            .store(true, std::sync::atomic::Ordering::SeqCst);
1438    }
1439
1440    // -----------------------------------------------------------------------
1441    // Private helpers
1442    // -----------------------------------------------------------------------
1443
1444    /// Look up `method_name` in a trait's own methods, then recursively in any
1445    /// traits that the trait itself uses (`use OtherTrait;` inside a trait body).
1446    /// A visited set prevents infinite loops on pathological mutual trait use.
1447    fn get_method_in_trait(
1448        &self,
1449        tr_fqcn: &Arc<str>,
1450        method_name: &str,
1451    ) -> Option<Arc<MethodStorage>> {
1452        let mut visited = std::collections::HashSet::new();
1453        self.get_method_in_trait_inner(tr_fqcn, method_name, &mut visited)
1454    }
1455
1456    fn get_method_in_trait_inner(
1457        &self,
1458        tr_fqcn: &Arc<str>,
1459        method_name: &str,
1460        visited: &mut std::collections::HashSet<String>,
1461    ) -> Option<Arc<MethodStorage>> {
1462        if !visited.insert(tr_fqcn.to_string()) {
1463            return None; // cycle guard
1464        }
1465        let tr = self.traits.get(tr_fqcn.as_ref())?;
1466        if let Some(m) = lookup_method(&tr.own_methods, method_name) {
1467            return Some(Arc::clone(m));
1468        }
1469        let used_traits = tr.traits.clone();
1470        drop(tr);
1471        for used_fqcn in &used_traits {
1472            if let Some(m) = self.get_method_in_trait_inner(used_fqcn, method_name, visited) {
1473                return Some(m);
1474            }
1475        }
1476        None
1477    }
1478
1479    fn collect_class_ancestors(&self, fqcn: &str) -> Vec<Arc<str>> {
1480        let mut result = Vec::new();
1481        let mut visited = std::collections::HashSet::new();
1482        self.collect_class_ancestors_inner(fqcn, &mut result, &mut visited);
1483        result
1484    }
1485
1486    fn collect_class_ancestors_inner(
1487        &self,
1488        fqcn: &str,
1489        out: &mut Vec<Arc<str>>,
1490        visited: &mut std::collections::HashSet<String>,
1491    ) {
1492        if !visited.insert(fqcn.to_string()) {
1493            return; // cycle guard
1494        }
1495        let (parent, interfaces, traits) = {
1496            if let Some(cls) = self.classes.get(fqcn) {
1497                (
1498                    cls.parent.clone(),
1499                    cls.interfaces.clone(),
1500                    cls.traits.clone(),
1501                )
1502            } else {
1503                return;
1504            }
1505        };
1506
1507        if let Some(p) = parent {
1508            out.push(p.clone());
1509            self.collect_class_ancestors_inner(&p, out, visited);
1510        }
1511        for iface in interfaces {
1512            out.push(iface.clone());
1513            self.collect_interface_ancestors_inner(&iface, out, visited);
1514        }
1515        for t in traits {
1516            out.push(t);
1517        }
1518    }
1519
1520    fn collect_interface_ancestors(&self, fqcn: &str) -> Vec<Arc<str>> {
1521        let mut result = Vec::new();
1522        let mut visited = std::collections::HashSet::new();
1523        self.collect_interface_ancestors_inner(fqcn, &mut result, &mut visited);
1524        result
1525    }
1526
1527    fn collect_interface_ancestors_inner(
1528        &self,
1529        fqcn: &str,
1530        out: &mut Vec<Arc<str>>,
1531        visited: &mut std::collections::HashSet<String>,
1532    ) {
1533        if !visited.insert(fqcn.to_string()) {
1534            return;
1535        }
1536        let extends = {
1537            if let Some(iface) = self.interfaces.get(fqcn) {
1538                iface.extends.clone()
1539            } else {
1540                return;
1541            }
1542        };
1543        for e in extends {
1544            out.push(e.clone());
1545            self.collect_interface_ancestors_inner(&e, out, visited);
1546        }
1547    }
1548}
1549
1550// ---------------------------------------------------------------------------
1551// CodebaseBuilder — compose a finalized Codebase from per-file StubSlices
1552// ---------------------------------------------------------------------------
1553
1554/// Incremental builder that accumulates [`crate::storage::StubSlice`] values
1555/// into a fresh [`Codebase`] and finalizes it on demand.
1556///
1557/// Designed for callers (e.g. salsa queries in downstream consumers) that want
1558/// to treat Pass-1 definition collection as a pure function from source to
1559/// `StubSlice`, then compose the slices into a full codebase outside the
1560/// collector.
1561pub struct CodebaseBuilder {
1562    cb: Codebase,
1563}
1564
1565impl CodebaseBuilder {
1566    pub fn new() -> Self {
1567        Self {
1568            cb: Codebase::new(),
1569        }
1570    }
1571
1572    /// Inject a single slice. Later injections overwrite earlier definitions
1573    /// with the same FQN, matching [`Codebase::inject_stub_slice`] semantics.
1574    pub fn add(&mut self, slice: crate::storage::StubSlice) {
1575        self.cb.inject_stub_slice(slice);
1576    }
1577
1578    /// Finalize inheritance graphs and return the built `Codebase`.
1579    pub fn finalize(self) -> Codebase {
1580        self.cb.finalize();
1581        self.cb
1582    }
1583
1584    /// Access the in-progress codebase without consuming the builder.
1585    pub fn codebase(&self) -> &Codebase {
1586        &self.cb
1587    }
1588}
1589
1590impl Default for CodebaseBuilder {
1591    fn default() -> Self {
1592        Self::new()
1593    }
1594}
1595
1596/// One-shot: build a finalized [`Codebase`] from a set of per-file slices.
1597pub fn codebase_from_parts(parts: Vec<crate::storage::StubSlice>) -> Codebase {
1598    let mut b = CodebaseBuilder::new();
1599    for p in parts {
1600        b.add(p);
1601    }
1602    b.finalize()
1603}
1604
1605#[cfg(test)]
1606mod tests {
1607    use super::*;
1608
1609    fn arc(s: &str) -> Arc<str> {
1610        Arc::from(s)
1611    }
1612
1613    #[test]
1614    fn method_referenced_at_groups_spans_by_file() {
1615        let cb = Codebase::new();
1616        cb.mark_method_referenced_at("Foo", "bar", arc("a.php"), 0, 5);
1617        cb.mark_method_referenced_at("Foo", "bar", arc("a.php"), 10, 15);
1618        cb.mark_method_referenced_at("Foo", "bar", arc("b.php"), 20, 25);
1619
1620        let locs = cb.get_reference_locations("Foo::bar");
1621        let files: std::collections::HashSet<&str> =
1622            locs.iter().map(|(f, _, _)| f.as_ref()).collect();
1623        assert_eq!(files.len(), 2, "two files, not three spans");
1624        assert!(locs.contains(&(arc("a.php"), 0, 5)));
1625        assert!(locs.contains(&(arc("a.php"), 10, 15)));
1626        assert_eq!(
1627            locs.iter()
1628                .filter(|(f, _, _)| f.as_ref() == "a.php")
1629                .count(),
1630            2
1631        );
1632        assert!(locs.contains(&(arc("b.php"), 20, 25)));
1633        assert!(
1634            cb.is_method_referenced("Foo", "bar"),
1635            "DashSet also updated"
1636        );
1637    }
1638
1639    #[test]
1640    fn duplicate_spans_are_deduplicated() {
1641        let cb = Codebase::new();
1642        // Same call site recorded twice (e.g. union receiver Foo|Foo)
1643        cb.mark_method_referenced_at("Foo", "bar", arc("a.php"), 0, 5);
1644        cb.mark_method_referenced_at("Foo", "bar", arc("a.php"), 0, 5);
1645
1646        let count = cb
1647            .get_reference_locations("Foo::bar")
1648            .iter()
1649            .filter(|(f, _, _)| f.as_ref() == "a.php")
1650            .count();
1651        assert_eq!(count, 1, "duplicate span deduplicated");
1652    }
1653
1654    #[test]
1655    fn method_key_is_lowercased() {
1656        let cb = Codebase::new();
1657        cb.mark_method_referenced_at("Cls", "MyMethod", arc("f.php"), 0, 3);
1658        assert!(!cb.get_reference_locations("Cls::mymethod").is_empty());
1659    }
1660
1661    #[test]
1662    fn property_referenced_at_records_location() {
1663        let cb = Codebase::new();
1664        cb.mark_property_referenced_at("Bar", "count", arc("x.php"), 5, 10);
1665
1666        assert!(cb
1667            .get_reference_locations("Bar::count")
1668            .contains(&(arc("x.php"), 5, 10)));
1669        assert!(cb.is_property_referenced("Bar", "count"));
1670    }
1671
1672    #[test]
1673    fn function_referenced_at_records_location() {
1674        let cb = Codebase::new();
1675        cb.mark_function_referenced_at("my_fn", arc("a.php"), 10, 15);
1676
1677        assert!(cb
1678            .get_reference_locations("my_fn")
1679            .contains(&(arc("a.php"), 10, 15)));
1680        assert!(cb.is_function_referenced("my_fn"));
1681    }
1682
1683    #[test]
1684    fn class_referenced_at_records_location() {
1685        let cb = Codebase::new();
1686        cb.mark_class_referenced_at("Foo", arc("a.php"), 5, 8);
1687
1688        assert!(cb
1689            .get_reference_locations("Foo")
1690            .contains(&(arc("a.php"), 5, 8)));
1691    }
1692
1693    #[test]
1694    fn get_reference_locations_flattens_all_files() {
1695        let cb = Codebase::new();
1696        cb.mark_function_referenced_at("fn1", arc("a.php"), 0, 5);
1697        cb.mark_function_referenced_at("fn1", arc("b.php"), 10, 15);
1698
1699        let mut locs = cb.get_reference_locations("fn1");
1700        locs.sort_by_key(|(_, s, _)| *s);
1701        assert_eq!(locs.len(), 2);
1702        assert_eq!(locs[0], (arc("a.php"), 0, 5));
1703        assert_eq!(locs[1], (arc("b.php"), 10, 15));
1704    }
1705
1706    #[test]
1707    fn replay_reference_locations_restores_index() {
1708        let cb = Codebase::new();
1709        let locs = vec![
1710            ("Foo::bar".to_string(), 0u32, 5u32),
1711            ("Foo::bar".to_string(), 10, 15),
1712            ("greet".to_string(), 20, 25),
1713        ];
1714        cb.replay_reference_locations(arc("a.php"), &locs);
1715
1716        let bar_locs = cb.get_reference_locations("Foo::bar");
1717        assert!(bar_locs.contains(&(arc("a.php"), 0, 5)));
1718        assert!(bar_locs.contains(&(arc("a.php"), 10, 15)));
1719
1720        assert!(cb
1721            .get_reference_locations("greet")
1722            .contains(&(arc("a.php"), 20, 25)));
1723
1724        assert!(cb.file_has_symbol_references("a.php"));
1725    }
1726
1727    #[test]
1728    fn remove_file_clears_its_spans_only() {
1729        let cb = Codebase::new();
1730        cb.mark_function_referenced_at("fn1", arc("a.php"), 0, 5);
1731        cb.mark_function_referenced_at("fn1", arc("b.php"), 10, 15);
1732
1733        cb.remove_file_definitions("a.php");
1734
1735        let locs = cb.get_reference_locations("fn1");
1736        assert!(
1737            !locs.iter().any(|(f, _, _)| f.as_ref() == "a.php"),
1738            "a.php spans removed"
1739        );
1740        assert!(
1741            locs.contains(&(arc("b.php"), 10, 15)),
1742            "b.php spans untouched"
1743        );
1744        assert!(!cb.file_has_symbol_references("a.php"));
1745    }
1746
1747    #[test]
1748    fn remove_file_does_not_affect_other_files() {
1749        let cb = Codebase::new();
1750        cb.mark_property_referenced_at("Cls", "prop", arc("x.php"), 1, 4);
1751        cb.mark_property_referenced_at("Cls", "prop", arc("y.php"), 7, 10);
1752
1753        cb.remove_file_definitions("x.php");
1754
1755        let locs = cb.get_reference_locations("Cls::prop");
1756        assert!(!locs.iter().any(|(f, _, _)| f.as_ref() == "x.php"));
1757        assert!(locs.contains(&(arc("y.php"), 7, 10)));
1758    }
1759
1760    #[test]
1761    fn remove_file_definitions_on_never_analyzed_file_is_noop() {
1762        let cb = Codebase::new();
1763        cb.mark_function_referenced_at("fn1", arc("a.php"), 0, 5);
1764
1765        // "ghost.php" was never analyzed — removing it must not panic or corrupt state.
1766        cb.remove_file_definitions("ghost.php");
1767
1768        // Existing data must be untouched.
1769        assert!(cb
1770            .get_reference_locations("fn1")
1771            .contains(&(arc("a.php"), 0, 5)));
1772        assert!(!cb.file_has_symbol_references("ghost.php"));
1773    }
1774
1775    #[test]
1776    fn replay_reference_locations_with_empty_list_is_noop() {
1777        let cb = Codebase::new();
1778        cb.mark_function_referenced_at("fn1", arc("a.php"), 0, 5);
1779
1780        // Replaying an empty list must not touch existing entries.
1781        cb.replay_reference_locations(arc("b.php"), &[]);
1782
1783        assert!(
1784            !cb.file_has_symbol_references("b.php"),
1785            "empty replay must not create a file entry"
1786        );
1787        assert!(
1788            cb.get_reference_locations("fn1")
1789                .contains(&(arc("a.php"), 0, 5)),
1790            "existing spans untouched"
1791        );
1792    }
1793
1794    #[test]
1795    fn replay_reference_locations_twice_does_not_duplicate_spans() {
1796        let cb = Codebase::new();
1797        let locs = vec![("fn1".to_string(), 0u32, 5u32)];
1798
1799        cb.replay_reference_locations(arc("a.php"), &locs);
1800        cb.replay_reference_locations(arc("a.php"), &locs);
1801
1802        let count = cb
1803            .get_reference_locations("fn1")
1804            .iter()
1805            .filter(|(f, _, _)| f.as_ref() == "a.php")
1806            .count();
1807        assert_eq!(
1808            count, 1,
1809            "replaying the same location twice must not create duplicate spans"
1810        );
1811    }
1812}