Skip to main content

mir_codebase/
codebase.rs

1use std::sync::Arc;
2
3use dashmap::{DashMap, DashSet};
4
5use crate::interner::Interner;
6
7/// Maps symbol ID → flat list of `(file_id, start_byte, end_byte)`.
8///
9/// Entries are appended during Pass 2. Duplicates (e.g. from union receivers like
10/// `Foo|Foo->method()`) are filtered at insert time. IDs come from
11/// `Codebase::symbol_interner` / `Codebase::file_interner`.
12///
13/// Compared with the previous `DashMap<u32, HashMap<u32, HashSet<(u32, u32)>>>`,
14/// this eliminates two levels of hash-map overhead (a `HashMap` per symbol and a
15/// `HashSet` per file). Each entry is now 12 bytes (`u32` × 3) with no per-entry
16/// allocator overhead beyond the `Vec` backing store.
17type ReferenceLocations = DashMap<u32, Vec<(u32, u32, u32)>>;
18
19use crate::storage::{
20    ClassStorage, EnumStorage, FunctionStorage, InterfaceStorage, MethodStorage, TraitStorage,
21};
22use mir_types::Union;
23
24// ---------------------------------------------------------------------------
25// Private helper — shared insert logic for reference tracking
26// ---------------------------------------------------------------------------
27
28/// Case-insensitive method lookup within a single `own_methods` map.
29///
30/// Tries an exact key match first (O(1)), then falls back to a linear
31/// case-insensitive scan for stubs that store keys in original case.
32#[inline]
33fn lookup_method<'a>(
34    map: &'a indexmap::IndexMap<Arc<str>, Arc<MethodStorage>>,
35    name: &str,
36) -> Option<&'a Arc<MethodStorage>> {
37    map.get(name).or_else(|| {
38        map.iter()
39            .find(|(k, _)| k.as_ref().eq_ignore_ascii_case(name))
40            .map(|(_, v)| v)
41    })
42}
43
44/// Append `(sym_id, file_id, start, end)` to the reference index, skipping
45/// exact duplicates so union receivers like `Foo|Foo->method()` don't inflate
46/// the span list.
47///
48/// Both maps are updated atomically under their respective DashMap shard locks.
49#[inline]
50fn record_ref(
51    sym_locs: &ReferenceLocations,
52    file_refs: &DashMap<u32, Vec<u32>>,
53    sym_id: u32,
54    file_id: u32,
55    start: u32,
56    end: u32,
57) {
58    {
59        let mut entries = sym_locs.entry(sym_id).or_default();
60        let span = (file_id, start, end);
61        if !entries.contains(&span) {
62            entries.push(span);
63        }
64    }
65    {
66        let mut refs = file_refs.entry(file_id).or_default();
67        if !refs.contains(&sym_id) {
68            refs.push(sym_id);
69        }
70    }
71}
72
73// ---------------------------------------------------------------------------
74// Compact CSR reference index (post-Pass-2 read-optimised form)
75// ---------------------------------------------------------------------------
76
77/// Read-optimised Compressed Sparse Row representation of the reference index.
78///
79/// Built once by [`Codebase::compact_reference_index`] after Pass 2 finishes.
80/// After compaction the build-phase [`DashMap`]s are cleared, freeing the
81/// per-entry allocator overhead (~72 bytes per (symbol, file) pair).
82///
83/// Two CSR views are maintained over the same flat `entries` array:
84/// - by symbol: `entries[sym_offsets[id]..sym_offsets[id+1]]`
85/// - by file: `by_file[file_offsets[id]..file_offsets[id+1]]` (indirect indices)
86#[derive(Debug, Default)]
87struct CompactRefIndex {
88    /// All spans sorted by `(sym_id, file_id, start, end)`, deduplicated.
89    /// Each entry is 16 bytes; total size = `n_refs × 16` with no hash overhead.
90    entries: Vec<(u32, u32, u32, u32)>,
91    /// CSR offsets keyed by sym_id (length = max_sym_id + 2).
92    sym_offsets: Vec<u32>,
93    /// Indices into `entries` sorted by `(file_id, sym_id, start, end)`.
94    /// Allows O(log n) file-keyed lookups without duplicating the payload.
95    by_file: Vec<u32>,
96    /// CSR offsets keyed by file_id into `by_file` (length = max_file_id + 2).
97    file_offsets: Vec<u32>,
98}
99
100// ---------------------------------------------------------------------------
101// StructuralSnapshot — inheritance data captured before file removal
102// ---------------------------------------------------------------------------
103
104struct ClassInheritance {
105    parent: Option<Arc<str>>,
106    interfaces: Vec<Arc<str>>, // sorted for order-insensitive comparison
107    traits: Vec<Arc<str>>,     // sorted
108    all_parents: Vec<Arc<str>>,
109}
110
111struct InterfaceInheritance {
112    extends: Vec<Arc<str>>, // sorted
113    all_parents: Vec<Arc<str>>,
114}
115
116/// Snapshot of the inheritance structure of all symbols defined in a file.
117///
118/// Produced by [`Codebase::file_structural_snapshot`] before
119/// [`Codebase::remove_file_definitions`], and consumed by
120/// [`Codebase::structural_unchanged_after_pass1`] /
121/// [`Codebase::restore_all_parents`] to skip an expensive `finalize()` call
122/// when only method bodies (not class hierarchies) changed.
123pub struct StructuralSnapshot {
124    classes: std::collections::HashMap<Arc<str>, ClassInheritance>,
125    interfaces: std::collections::HashMap<Arc<str>, InterfaceInheritance>,
126}
127
128// ---------------------------------------------------------------------------
129// Codebase — thread-safe global symbol registry
130// ---------------------------------------------------------------------------
131
132#[derive(Debug, Default)]
133pub struct Codebase {
134    pub classes: DashMap<Arc<str>, ClassStorage>,
135    pub interfaces: DashMap<Arc<str>, InterfaceStorage>,
136    pub traits: DashMap<Arc<str>, TraitStorage>,
137    pub enums: DashMap<Arc<str>, EnumStorage>,
138    pub functions: DashMap<Arc<str>, FunctionStorage>,
139    pub constants: DashMap<Arc<str>, Union>,
140
141    /// Types of `@var`-annotated global variables, collected in Pass 1.
142    /// Key: variable name without the `$` prefix.
143    pub global_vars: DashMap<Arc<str>, Union>,
144    /// Maps file path → variable names declared with `@var` in that file.
145    /// Used by `remove_file_definitions` to purge stale entries on re-analysis.
146    file_global_vars: DashMap<Arc<str>, Vec<Arc<str>>>,
147
148    /// Methods referenced during Pass 2 — stored as interned symbol IDs.
149    /// Used by the dead-code detector (M18).
150    referenced_methods: DashSet<u32>,
151    /// Properties referenced during Pass 2 — stored as interned symbol IDs.
152    referenced_properties: DashSet<u32>,
153    /// Free functions referenced during Pass 2 — stored as interned symbol IDs.
154    referenced_functions: DashSet<u32>,
155
156    /// Interner for symbol keys (`"ClassName::method"`, `"ClassName::prop"`, FQN).
157    /// Replaces repeated `Arc<str>` copies (16 bytes) with compact `u32` IDs (4 bytes).
158    pub symbol_interner: Interner,
159    /// Interner for file paths. Same memory rationale as `symbol_interner`.
160    pub file_interner: Interner,
161
162    /// Maps symbol ID → { file ID → {(start_byte, end_byte)} }.
163    /// IDs come from `symbol_interner` / `file_interner`.
164    /// The inner HashMap groups spans by file for O(1) per-file cleanup.
165    /// HashSet deduplicates spans from union receivers (e.g. Foo|Foo->method()).
166    symbol_reference_locations: ReferenceLocations,
167    /// Reverse index: file ID → symbol IDs referenced in that file.
168    /// Used by `remove_file_definitions` to avoid a full scan of all symbols.
169    /// A `Vec` rather than `HashSet`: duplicate sym_ids are guarded at insert time
170    /// (same as `symbol_reference_locations`) for the same structural simplicity.
171    file_symbol_references: DashMap<u32, Vec<u32>>,
172
173    /// Compact CSR view of the reference index, built by `compact_reference_index()`.
174    /// When `Some`, the build-phase DashMaps above are empty and this is the
175    /// authoritative source for all reference queries.
176    compact_ref_index: std::sync::RwLock<Option<CompactRefIndex>>,
177    /// `true` iff `compact_ref_index` is `Some`. Checked atomically before
178    /// acquiring any lock, so the fast path during Pass 2 is a single load.
179    is_compacted: std::sync::atomic::AtomicBool,
180
181    /// Maps every FQCN (class, interface, trait, enum, function) to the absolute
182    /// path of the file that defines it. Populated during Pass 1.
183    pub symbol_to_file: DashMap<Arc<str>, Arc<str>>,
184
185    /// Lightweight FQCN index populated by `SymbolTable` before Pass 1.
186    /// Enables O(1) "does this symbol exist?" checks before full definitions
187    /// are available.
188    pub known_symbols: DashSet<Arc<str>>,
189
190    /// Per-file `use` alias maps: alias → FQCN.  Populated during Pass 1.
191    ///
192    /// Key: absolute file path (as `Arc<str>`).
193    /// Value: map of `alias → fully-qualified class name`.
194    ///
195    /// Exposed as `pub` so that external consumers (e.g. `php-lsp`) can read
196    /// import data that mir already collects, instead of reimplementing it.
197    pub file_imports: DashMap<Arc<str>, std::collections::HashMap<String, String>>,
198    /// Per-file current namespace (if any).  Populated during Pass 1.
199    ///
200    /// Key: absolute file path (as `Arc<str>`).
201    /// Value: the declared namespace string (e.g. `"App\\Controller"`).
202    ///
203    /// Exposed as `pub` so that external consumers (e.g. `php-lsp`) can read
204    /// namespace data that mir already collects, instead of reimplementing it.
205    pub file_namespaces: DashMap<Arc<str>, String>,
206
207    /// Whether finalize() has been called.
208    finalized: std::sync::atomic::AtomicBool,
209}
210
211impl Codebase {
212    pub fn new() -> Self {
213        Self::default()
214    }
215
216    // -----------------------------------------------------------------------
217    // Stub injection
218    // -----------------------------------------------------------------------
219
220    /// Insert all definitions from `slice` into this codebase.
221    ///
222    /// Called by generated stub modules (`src/generated/stubs_*.rs`) to register
223    /// their pre-compiled definitions. Later insertions overwrite earlier ones,
224    /// so custom stubs loaded after PHPStorm stubs act as overrides.
225    pub fn inject_stub_slice(&self, slice: crate::storage::StubSlice) {
226        let file = slice.file.clone();
227        for cls in slice.classes {
228            if let Some(f) = &file {
229                self.symbol_to_file.insert(cls.fqcn.clone(), f.clone());
230            }
231            self.classes.insert(cls.fqcn.clone(), cls);
232        }
233        for iface in slice.interfaces {
234            if let Some(f) = &file {
235                self.symbol_to_file.insert(iface.fqcn.clone(), f.clone());
236            }
237            self.interfaces.insert(iface.fqcn.clone(), iface);
238        }
239        for tr in slice.traits {
240            if let Some(f) = &file {
241                self.symbol_to_file.insert(tr.fqcn.clone(), f.clone());
242            }
243            self.traits.insert(tr.fqcn.clone(), tr);
244        }
245        for en in slice.enums {
246            if let Some(f) = &file {
247                self.symbol_to_file.insert(en.fqcn.clone(), f.clone());
248            }
249            self.enums.insert(en.fqcn.clone(), en);
250        }
251        for func in slice.functions {
252            if let Some(f) = &file {
253                self.symbol_to_file.insert(func.fqn.clone(), f.clone());
254            }
255            self.functions.insert(func.fqn.clone(), func);
256        }
257        for (name, ty) in slice.constants {
258            self.constants.insert(name, ty);
259        }
260        if let Some(f) = &file {
261            for (name, ty) in slice.global_vars {
262                self.register_global_var(f, name, ty);
263            }
264        }
265    }
266
267    // -----------------------------------------------------------------------
268    // Compact reference index
269    // -----------------------------------------------------------------------
270
271    /// Convert the build-phase `DashMap` reference index into a compact CSR form.
272    ///
273    /// Call this once after Pass 2 completes on all files. The method:
274    /// 1. Drains the two build-phase `DashMap`s into a single flat `Vec`.
275    /// 2. Sorts and deduplicates entries.
276    /// 3. Builds two CSR offset arrays (by symbol and by file).
277    /// 4. Clears the `DashMap`s (freeing their allocations).
278    ///
279    /// After this call all reference queries use the compact index. Incremental
280    /// re-analysis via [`Self::re_analyze_file`] will automatically decompress the
281    /// index back into `DashMap`s on the first write, then recompact can be called
282    /// again at the end of that analysis pass.
283    pub fn compact_reference_index(&self) {
284        // Collect all entries from the build-phase DashMap.
285        let mut entries: Vec<(u32, u32, u32, u32)> = self
286            .symbol_reference_locations
287            .iter()
288            .flat_map(|entry| {
289                let sym_id = *entry.key();
290                entry
291                    .value()
292                    .iter()
293                    .map(move |&(file_id, start, end)| (sym_id, file_id, start, end))
294                    .collect::<Vec<_>>()
295            })
296            .collect();
297
298        if entries.is_empty() {
299            return;
300        }
301
302        // Sort by (sym_id, file_id, start, end) and drop exact duplicates.
303        entries.sort_unstable();
304        entries.dedup();
305
306        let n = entries.len();
307
308        // ---- Build symbol-keyed CSR offsets --------------------------------
309        let max_sym = entries.iter().map(|&(s, ..)| s).max().unwrap_or(0) as usize;
310        let mut sym_offsets = vec![0u32; max_sym + 2];
311        for &(sym_id, ..) in &entries {
312            sym_offsets[sym_id as usize + 1] += 1;
313        }
314        for i in 1..sym_offsets.len() {
315            sym_offsets[i] += sym_offsets[i - 1];
316        }
317
318        // ---- Build file-keyed indirect index --------------------------------
319        // `by_file[i]` is an index into `entries`; the slice is sorted by
320        // `(file_id, sym_id, start, end)` so CSR offsets can be computed cheaply.
321        let max_file = entries.iter().map(|&(_, f, ..)| f).max().unwrap_or(0) as usize;
322        let mut by_file: Vec<u32> = (0..n as u32).collect();
323        by_file.sort_unstable_by_key(|&i| {
324            let (sym_id, file_id, start, end) = entries[i as usize];
325            (file_id, sym_id, start, end)
326        });
327
328        let mut file_offsets = vec![0u32; max_file + 2];
329        for &idx in &by_file {
330            let file_id = entries[idx as usize].1;
331            file_offsets[file_id as usize + 1] += 1;
332        }
333        for i in 1..file_offsets.len() {
334            file_offsets[i] += file_offsets[i - 1];
335        }
336
337        *self.compact_ref_index.write().unwrap() = Some(CompactRefIndex {
338            entries,
339            sym_offsets,
340            by_file,
341            file_offsets,
342        });
343        self.is_compacted
344            .store(true, std::sync::atomic::Ordering::Release);
345
346        // Free build-phase allocations.
347        self.symbol_reference_locations.clear();
348        self.file_symbol_references.clear();
349    }
350
351    /// Decompress the compact index back into the build-phase `DashMap`s.
352    ///
353    /// Called automatically by write methods when the compact index is live.
354    /// This makes incremental re-analysis transparent: callers never need to
355    /// know whether the index is compacted or not.
356    fn ensure_expanded(&self) {
357        // Fast path: not compacted — one atomic load, no lock.
358        if !self.is_compacted.load(std::sync::atomic::Ordering::Acquire) {
359            return;
360        }
361        // Slow path: acquire write lock and decompress.
362        let mut guard = self.compact_ref_index.write().unwrap();
363        if let Some(ci) = guard.take() {
364            for &(sym_id, file_id, start, end) in &ci.entries {
365                record_ref(
366                    &self.symbol_reference_locations,
367                    &self.file_symbol_references,
368                    sym_id,
369                    file_id,
370                    start,
371                    end,
372                );
373            }
374            self.is_compacted
375                .store(false, std::sync::atomic::Ordering::Release);
376        }
377        // If another thread already decompressed (guard is now None), we're done.
378    }
379
380    /// Reset the finalization flag so that `finalize()` will run again.
381    ///
382    /// Use this when new class definitions have been added after an initial
383    /// `finalize()` call (e.g., lazily loaded via PSR-4) and the inheritance
384    /// graph needs to be rebuilt.
385    pub fn invalidate_finalization(&self) {
386        self.finalized
387            .store(false, std::sync::atomic::Ordering::SeqCst);
388    }
389
390    // -----------------------------------------------------------------------
391    // Incremental: remove all definitions from a single file
392    // -----------------------------------------------------------------------
393
394    /// Remove all definitions and outgoing reference locations contributed by the given file.
395    /// This clears classes, interfaces, traits, enums, functions, and constants
396    /// whose defining file matches `file_path`, the file's import and namespace entries,
397    /// and all entries in symbol_reference_locations that originated from this file.
398    /// After calling this, `invalidate_finalization()` is called so the next `finalize()`
399    /// rebuilds inheritance.
400    pub fn remove_file_definitions(&self, file_path: &str) {
401        // Collect all symbols defined in this file
402        let symbols: Vec<Arc<str>> = self
403            .symbol_to_file
404            .iter()
405            .filter(|entry| entry.value().as_ref() == file_path)
406            .map(|entry| entry.key().clone())
407            .collect();
408
409        // Remove each symbol from its respective map and from symbol_to_file
410        for sym in &symbols {
411            self.classes.remove(sym.as_ref());
412            self.interfaces.remove(sym.as_ref());
413            self.traits.remove(sym.as_ref());
414            self.enums.remove(sym.as_ref());
415            self.functions.remove(sym.as_ref());
416            self.constants.remove(sym.as_ref());
417            self.symbol_to_file.remove(sym.as_ref());
418            self.known_symbols.remove(sym.as_ref());
419        }
420
421        // Remove file-level metadata
422        self.file_imports.remove(file_path);
423        self.file_namespaces.remove(file_path);
424
425        // Remove @var-annotated global variables declared in this file
426        if let Some((_, var_names)) = self.file_global_vars.remove(file_path) {
427            for name in var_names {
428                self.global_vars.remove(name.as_ref());
429            }
430        }
431
432        // Ensure the reference index is in DashMap form so the removal below works.
433        self.ensure_expanded();
434
435        // Remove reference locations contributed by this file.
436        // Use the reverse index to avoid a full scan of all symbols.
437        if let Some(file_id) = self.file_interner.get_id(file_path) {
438            if let Some((_, sym_ids)) = self.file_symbol_references.remove(&file_id) {
439                for sym_id in sym_ids {
440                    if let Some(mut entries) = self.symbol_reference_locations.get_mut(&sym_id) {
441                        entries.retain(|&(fid, _, _)| fid != file_id);
442                    }
443                }
444            }
445        }
446
447        self.invalidate_finalization();
448    }
449
450    // -----------------------------------------------------------------------
451    // Structural snapshot — skip finalize() on body-only changes
452    // -----------------------------------------------------------------------
453
454    /// Capture the inheritance structure of all symbols defined in `file_path`.
455    ///
456    /// Call this *before* `remove_file_definitions` to preserve the data that
457    /// `finalize()` would otherwise have to recompute.  The snapshot records, for
458    /// each class/interface in the file, the fields that feed into
459    /// `all_parents` (parent class, implemented interfaces, used traits, extended
460    /// interfaces) as well as the already-computed `all_parents` list itself.
461    pub fn file_structural_snapshot(&self, file_path: &str) -> StructuralSnapshot {
462        let symbols: Vec<Arc<str>> = self
463            .symbol_to_file
464            .iter()
465            .filter(|e| e.value().as_ref() == file_path)
466            .map(|e| e.key().clone())
467            .collect();
468
469        let mut classes = std::collections::HashMap::new();
470        let mut interfaces = std::collections::HashMap::new();
471
472        for sym in symbols {
473            if let Some(cls) = self.classes.get(sym.as_ref()) {
474                let mut ifaces = cls.interfaces.clone();
475                ifaces.sort_unstable_by(|a, b| a.as_ref().cmp(b.as_ref()));
476                let mut traits = cls.traits.clone();
477                traits.sort_unstable_by(|a, b| a.as_ref().cmp(b.as_ref()));
478                classes.insert(
479                    sym,
480                    ClassInheritance {
481                        parent: cls.parent.clone(),
482                        interfaces: ifaces,
483                        traits,
484                        all_parents: cls.all_parents.clone(),
485                    },
486                );
487            } else if let Some(iface) = self.interfaces.get(sym.as_ref()) {
488                let mut extends = iface.extends.clone();
489                extends.sort_unstable_by(|a, b| a.as_ref().cmp(b.as_ref()));
490                interfaces.insert(
491                    sym,
492                    InterfaceInheritance {
493                        extends,
494                        all_parents: iface.all_parents.clone(),
495                    },
496                );
497            }
498        }
499
500        StructuralSnapshot {
501            classes,
502            interfaces,
503        }
504    }
505
506    /// After Pass 1 completes, check whether the inheritance structure in
507    /// `file_path` matches the snapshot taken before `remove_file_definitions`.
508    ///
509    /// Returns `true` if `finalize()` can be skipped — i.e. only method bodies,
510    /// properties, or annotations changed, not any class/interface hierarchy.
511    pub fn structural_unchanged_after_pass1(
512        &self,
513        file_path: &str,
514        old: &StructuralSnapshot,
515    ) -> bool {
516        let symbols: Vec<Arc<str>> = self
517            .symbol_to_file
518            .iter()
519            .filter(|e| e.value().as_ref() == file_path)
520            .map(|e| e.key().clone())
521            .collect();
522
523        let mut seen_classes = 0usize;
524        let mut seen_interfaces = 0usize;
525
526        for sym in &symbols {
527            if let Some(cls) = self.classes.get(sym.as_ref()) {
528                seen_classes += 1;
529                let Some(old_cls) = old.classes.get(sym.as_ref()) else {
530                    return false; // new class added
531                };
532                if old_cls.parent != cls.parent {
533                    return false;
534                }
535                let mut new_ifaces = cls.interfaces.clone();
536                new_ifaces.sort_unstable_by(|a, b| a.as_ref().cmp(b.as_ref()));
537                if old_cls.interfaces != new_ifaces {
538                    return false;
539                }
540                let mut new_traits = cls.traits.clone();
541                new_traits.sort_unstable_by(|a, b| a.as_ref().cmp(b.as_ref()));
542                if old_cls.traits != new_traits {
543                    return false;
544                }
545            } else if let Some(iface) = self.interfaces.get(sym.as_ref()) {
546                seen_interfaces += 1;
547                let Some(old_iface) = old.interfaces.get(sym.as_ref()) else {
548                    return false; // new interface added
549                };
550                let mut new_extends = iface.extends.clone();
551                new_extends.sort_unstable_by(|a, b| a.as_ref().cmp(b.as_ref()));
552                if old_iface.extends != new_extends {
553                    return false;
554                }
555            }
556            // Traits, enums, functions, constants: not finalization-relevant, skip.
557        }
558
559        // Check for removed classes or interfaces.
560        seen_classes == old.classes.len() && seen_interfaces == old.interfaces.len()
561    }
562
563    /// Restore `all_parents` from a snapshot and mark the codebase as finalized.
564    ///
565    /// Call this instead of `finalize()` when `structural_unchanged_after_pass1`
566    /// returns `true`.  The newly re-registered symbols (written by Pass 1) have
567    /// `all_parents = []`; this method repopulates them from the snapshot so that
568    /// all downstream lookups that depend on `all_parents` keep working correctly.
569    pub fn restore_all_parents(&self, file_path: &str, snapshot: &StructuralSnapshot) {
570        let symbols: Vec<Arc<str>> = self
571            .symbol_to_file
572            .iter()
573            .filter(|e| e.value().as_ref() == file_path)
574            .map(|e| e.key().clone())
575            .collect();
576
577        for sym in &symbols {
578            if let Some(old_cls) = snapshot.classes.get(sym.as_ref()) {
579                if let Some(mut cls) = self.classes.get_mut(sym.as_ref()) {
580                    cls.all_parents = old_cls.all_parents.clone();
581                }
582            } else if let Some(old_iface) = snapshot.interfaces.get(sym.as_ref()) {
583                if let Some(mut iface) = self.interfaces.get_mut(sym.as_ref()) {
584                    iface.all_parents = old_iface.all_parents.clone();
585                }
586            }
587        }
588
589        self.finalized
590            .store(true, std::sync::atomic::Ordering::SeqCst);
591    }
592
593    // -----------------------------------------------------------------------
594    // Global variable registry
595    // -----------------------------------------------------------------------
596
597    /// Record an `@var`-annotated global variable type discovered in Pass 1.
598    /// If the same variable is annotated in multiple files, the last write wins.
599    pub fn register_global_var(&self, file: &Arc<str>, name: Arc<str>, ty: Union) {
600        self.file_global_vars
601            .entry(file.clone())
602            .or_default()
603            .push(name.clone());
604        self.global_vars.insert(name, ty);
605    }
606
607    // -----------------------------------------------------------------------
608    // Lookups
609    // -----------------------------------------------------------------------
610
611    /// Resolve a property, walking up the inheritance chain (parent classes and traits).
612    pub fn get_property(
613        &self,
614        fqcn: &str,
615        prop_name: &str,
616    ) -> Option<crate::storage::PropertyStorage> {
617        // Check direct class own_properties
618        if let Some(cls) = self.classes.get(fqcn) {
619            if let Some(p) = cls.own_properties.get(prop_name) {
620                return Some(p.clone());
621            }
622            let mixins = cls.mixins.clone();
623            drop(cls);
624            for mixin in &mixins {
625                if let Some(p) = self.get_property(mixin.as_ref(), prop_name) {
626                    return Some(p);
627                }
628            }
629        }
630
631        // Walk all ancestors (collected during finalize)
632        let all_parents = {
633            if let Some(cls) = self.classes.get(fqcn) {
634                cls.all_parents.clone()
635            } else {
636                return None;
637            }
638        };
639
640        for ancestor_fqcn in &all_parents {
641            if let Some(ancestor_cls) = self.classes.get(ancestor_fqcn.as_ref()) {
642                if let Some(p) = ancestor_cls.own_properties.get(prop_name) {
643                    return Some(p.clone());
644                }
645            }
646        }
647
648        // Check traits
649        let trait_list = {
650            if let Some(cls) = self.classes.get(fqcn) {
651                cls.traits.clone()
652            } else {
653                vec![]
654            }
655        };
656        for trait_fqcn in &trait_list {
657            if let Some(tr) = self.traits.get(trait_fqcn.as_ref()) {
658                if let Some(p) = tr.own_properties.get(prop_name) {
659                    return Some(p.clone());
660                }
661            }
662        }
663
664        None
665    }
666
667    /// Resolve a class constant by name, walking up the inheritance chain.
668    pub fn get_class_constant(
669        &self,
670        fqcn: &str,
671        const_name: &str,
672    ) -> Option<crate::storage::ConstantStorage> {
673        // Class: own → traits → ancestors → interfaces
674        if let Some(cls) = self.classes.get(fqcn) {
675            if let Some(c) = cls.own_constants.get(const_name) {
676                return Some(c.clone());
677            }
678            let all_parents = cls.all_parents.clone();
679            let interfaces = cls.interfaces.clone();
680            let traits = cls.traits.clone();
681            drop(cls);
682
683            for tr_fqcn in &traits {
684                if let Some(tr) = self.traits.get(tr_fqcn.as_ref()) {
685                    if let Some(c) = tr.own_constants.get(const_name) {
686                        return Some(c.clone());
687                    }
688                }
689            }
690
691            for ancestor_fqcn in &all_parents {
692                if let Some(ancestor) = self.classes.get(ancestor_fqcn.as_ref()) {
693                    if let Some(c) = ancestor.own_constants.get(const_name) {
694                        return Some(c.clone());
695                    }
696                }
697                if let Some(iface) = self.interfaces.get(ancestor_fqcn.as_ref()) {
698                    if let Some(c) = iface.own_constants.get(const_name) {
699                        return Some(c.clone());
700                    }
701                }
702            }
703
704            for iface_fqcn in &interfaces {
705                if let Some(iface) = self.interfaces.get(iface_fqcn.as_ref()) {
706                    if let Some(c) = iface.own_constants.get(const_name) {
707                        return Some(c.clone());
708                    }
709                }
710            }
711
712            return None;
713        }
714
715        // Interface: own → parent interfaces
716        if let Some(iface) = self.interfaces.get(fqcn) {
717            if let Some(c) = iface.own_constants.get(const_name) {
718                return Some(c.clone());
719            }
720            let parents = iface.all_parents.clone();
721            drop(iface);
722            for p in &parents {
723                if let Some(parent_iface) = self.interfaces.get(p.as_ref()) {
724                    if let Some(c) = parent_iface.own_constants.get(const_name) {
725                        return Some(c.clone());
726                    }
727                }
728            }
729            return None;
730        }
731
732        // Enum: own constants + cases
733        if let Some(en) = self.enums.get(fqcn) {
734            if let Some(c) = en.own_constants.get(const_name) {
735                return Some(c.clone());
736            }
737            if en.cases.contains_key(const_name) {
738                return Some(crate::storage::ConstantStorage {
739                    name: Arc::from(const_name),
740                    ty: mir_types::Union::mixed(),
741                    visibility: None,
742                    location: None,
743                });
744            }
745            return None;
746        }
747
748        // Trait: own constants only
749        if let Some(tr) = self.traits.get(fqcn) {
750            if let Some(c) = tr.own_constants.get(const_name) {
751                return Some(c.clone());
752            }
753            return None;
754        }
755
756        None
757    }
758
759    /// Resolve a method, walking up the full inheritance chain (own → traits → ancestors).
760    pub fn get_method(&self, fqcn: &str, method_name: &str) -> Option<Arc<MethodStorage>> {
761        // PHP method names are case-insensitive — normalize to lowercase for all lookups.
762        let method_lower = method_name.to_lowercase();
763        let method_name = method_lower.as_str();
764
765        // --- Class: own methods → own traits → ancestor classes/traits/interfaces ---
766        if let Some(cls) = self.classes.get(fqcn) {
767            // 1. Own methods (highest priority)
768            if let Some(m) = lookup_method(&cls.own_methods, method_name) {
769                return Some(Arc::clone(m));
770            }
771            // Collect chain info before dropping the DashMap guard.
772            let own_traits = cls.traits.clone();
773            let ancestors = cls.all_parents.clone();
774            let mixins = cls.mixins.clone();
775            drop(cls);
776
777            // 2. Docblock mixins (delegated magic lookup)
778            for mixin_fqcn in &mixins {
779                if let Some(m) = self.get_method(mixin_fqcn, method_name) {
780                    return Some(m);
781                }
782            }
783
784            // 3. Own trait methods (recursive into trait-of-trait)
785            for tr_fqcn in &own_traits {
786                if let Some(m) = self.get_method_in_trait(tr_fqcn, method_name) {
787                    return Some(m);
788                }
789            }
790
791            // 4. Ancestor chain (all_parents is closest-first: parent, grandparent, …)
792            for ancestor_fqcn in &ancestors {
793                if let Some(anc) = self.classes.get(ancestor_fqcn.as_ref()) {
794                    if let Some(m) = lookup_method(&anc.own_methods, method_name) {
795                        return Some(Arc::clone(m));
796                    }
797                    let anc_traits = anc.traits.clone();
798                    drop(anc);
799                    for tr_fqcn in &anc_traits {
800                        if let Some(m) = self.get_method_in_trait(tr_fqcn, method_name) {
801                            return Some(m);
802                        }
803                    }
804                } else if let Some(iface) = self.interfaces.get(ancestor_fqcn.as_ref()) {
805                    if let Some(m) = lookup_method(&iface.own_methods, method_name) {
806                        let mut ms = (**m).clone();
807                        ms.is_abstract = true;
808                        return Some(Arc::new(ms));
809                    }
810                }
811                // Traits listed in all_parents are already covered via their owning class above.
812            }
813            return None;
814        }
815
816        // --- Interface: own methods + parent interfaces ---
817        if let Some(iface) = self.interfaces.get(fqcn) {
818            if let Some(m) = lookup_method(&iface.own_methods, method_name) {
819                return Some(Arc::clone(m));
820            }
821            let parents = iface.all_parents.clone();
822            drop(iface);
823            for parent_fqcn in &parents {
824                if let Some(parent_iface) = self.interfaces.get(parent_fqcn.as_ref()) {
825                    if let Some(m) = lookup_method(&parent_iface.own_methods, method_name) {
826                        return Some(Arc::clone(m));
827                    }
828                }
829            }
830            return None;
831        }
832
833        // --- Trait (variable annotated with a trait type) ---
834        if let Some(tr) = self.traits.get(fqcn) {
835            if let Some(m) = lookup_method(&tr.own_methods, method_name) {
836                return Some(Arc::clone(m));
837            }
838            return None;
839        }
840
841        // --- Enum ---
842        if let Some(e) = self.enums.get(fqcn) {
843            if let Some(m) = lookup_method(&e.own_methods, method_name) {
844                return Some(Arc::clone(m));
845            }
846            // PHP 8.1 built-in enum methods: cases(), from(), tryFrom()
847            if matches!(method_name, "cases" | "from" | "tryfrom") {
848                return Some(Arc::new(crate::storage::MethodStorage {
849                    fqcn: Arc::from(fqcn),
850                    name: Arc::from(method_name),
851                    params: vec![],
852                    return_type: Some(mir_types::Union::mixed()),
853                    inferred_return_type: None,
854                    visibility: crate::storage::Visibility::Public,
855                    is_static: true,
856                    is_abstract: false,
857                    is_constructor: false,
858                    template_params: vec![],
859                    assertions: vec![],
860                    throws: vec![],
861                    is_final: false,
862                    is_internal: false,
863                    is_pure: false,
864                    deprecated: None,
865                    location: None,
866                }));
867            }
868        }
869
870        None
871    }
872
873    /// Returns true if `child` extends or implements `ancestor` (transitively).
874    pub fn extends_or_implements(&self, child: &str, ancestor: &str) -> bool {
875        if child == ancestor {
876            return true;
877        }
878        if let Some(cls) = self.classes.get(child) {
879            return cls.implements_or_extends(ancestor);
880        }
881        if let Some(iface) = self.interfaces.get(child) {
882            return iface.all_parents.iter().any(|p| p.as_ref() == ancestor);
883        }
884        // Enum: backed enums implicitly implement BackedEnum (and UnitEnum);
885        // pure enums implicitly implement UnitEnum.
886        if let Some(en) = self.enums.get(child) {
887            // Check explicitly declared interfaces (e.g. implements SomeInterface)
888            if en.interfaces.iter().any(|i| i.as_ref() == ancestor) {
889                return true;
890            }
891            // PHP built-in: every enum implements UnitEnum
892            if ancestor == "UnitEnum" || ancestor == "\\UnitEnum" {
893                return true;
894            }
895            // Backed enums implement BackedEnum
896            if (ancestor == "BackedEnum" || ancestor == "\\BackedEnum") && en.scalar_type.is_some()
897            {
898                return true;
899            }
900        }
901        false
902    }
903
904    /// Whether a class/interface/trait/enum with this FQCN exists.
905    pub fn type_exists(&self, fqcn: &str) -> bool {
906        self.classes.contains_key(fqcn)
907            || self.interfaces.contains_key(fqcn)
908            || self.traits.contains_key(fqcn)
909            || self.enums.contains_key(fqcn)
910    }
911
912    pub fn function_exists(&self, fqn: &str) -> bool {
913        self.functions.contains_key(fqn)
914    }
915
916    /// Returns true if the class is declared abstract.
917    /// Used to suppress `UndefinedMethod` on abstract class receivers: the concrete
918    /// subclass is expected to implement the method, matching Psalm errorLevel=3 behaviour.
919    pub fn is_abstract_class(&self, fqcn: &str) -> bool {
920        self.classes.get(fqcn).is_some_and(|c| c.is_abstract)
921    }
922
923    /// Return the declared template params for `fqcn` (class or interface), or
924    /// an empty vec if the type is not found or has no templates.
925    pub fn get_class_template_params(&self, fqcn: &str) -> Vec<crate::storage::TemplateParam> {
926        if let Some(cls) = self.classes.get(fqcn) {
927            return cls.template_params.clone();
928        }
929        if let Some(iface) = self.interfaces.get(fqcn) {
930            return iface.template_params.clone();
931        }
932        if let Some(tr) = self.traits.get(fqcn) {
933            return tr.template_params.clone();
934        }
935        vec![]
936    }
937
938    /// Walk the parent chain collecting template bindings from `@extends` type args.
939    ///
940    /// For `class UserRepo extends BaseRepo` with `@extends BaseRepo<User>`, this returns
941    /// `{ T → User }` where `T` is `BaseRepo`'s declared template parameter.
942    pub fn get_inherited_template_bindings(
943        &self,
944        fqcn: &str,
945    ) -> std::collections::HashMap<Arc<str>, Union> {
946        let mut bindings = std::collections::HashMap::new();
947        let mut current = fqcn.to_string();
948
949        loop {
950            let (parent_fqcn, extends_type_args) = {
951                let cls = match self.classes.get(current.as_str()) {
952                    Some(c) => c,
953                    None => break,
954                };
955                let parent = match &cls.parent {
956                    Some(p) => p.clone(),
957                    None => break,
958                };
959                let args = cls.extends_type_args.clone();
960                (parent, args)
961            };
962
963            if !extends_type_args.is_empty() {
964                let parent_tps = self.get_class_template_params(&parent_fqcn);
965                for (tp, ty) in parent_tps.iter().zip(extends_type_args.iter()) {
966                    bindings
967                        .entry(tp.name.clone())
968                        .or_insert_with(|| ty.clone());
969                }
970            }
971
972            current = parent_fqcn.to_string();
973        }
974
975        bindings
976    }
977
978    /// Returns true if the class (or any ancestor/trait) defines a `__get` magic method.
979    /// Such classes allow arbitrary property access, suppressing UndefinedProperty.
980    pub fn has_magic_get(&self, fqcn: &str) -> bool {
981        self.get_method(fqcn, "__get").is_some()
982    }
983
984    /// Returns true if the class (or any of its ancestors) has a parent/interface/trait
985    /// that is NOT present in the codebase.  Used to suppress `UndefinedMethod` false
986    /// positives: if a method might be inherited from an unscanned external class we
987    /// cannot confirm or deny its existence.
988    ///
989    /// We use the pre-computed `all_parents` list (built during finalization) rather
990    /// than recursive DashMap lookups to avoid potential deadlocks.
991    pub fn has_unknown_ancestor(&self, fqcn: &str) -> bool {
992        // For interfaces: check whether any parent interface is unknown.
993        if let Some(iface) = self.interfaces.get(fqcn) {
994            let parents = iface.all_parents.clone();
995            drop(iface);
996            for p in &parents {
997                if !self.type_exists(p.as_ref()) {
998                    return true;
999                }
1000            }
1001            return false;
1002        }
1003
1004        // Clone the data we need so the DashMap ref is dropped before any further lookups.
1005        let (parent, interfaces, traits, all_parents) = {
1006            let Some(cls) = self.classes.get(fqcn) else {
1007                return false;
1008            };
1009            (
1010                cls.parent.clone(),
1011                cls.interfaces.clone(),
1012                cls.traits.clone(),
1013                cls.all_parents.clone(),
1014            )
1015        };
1016
1017        // Fast path: check direct parent/interfaces/traits
1018        if let Some(ref p) = parent {
1019            if !self.type_exists(p.as_ref()) {
1020                return true;
1021            }
1022        }
1023        for iface in &interfaces {
1024            if !self.type_exists(iface.as_ref()) {
1025                return true;
1026            }
1027        }
1028        for tr in &traits {
1029            if !self.type_exists(tr.as_ref()) {
1030                return true;
1031            }
1032        }
1033
1034        // Also check the full ancestor chain (pre-computed during finalization)
1035        for ancestor in &all_parents {
1036            if !self.type_exists(ancestor.as_ref()) {
1037                return true;
1038            }
1039        }
1040
1041        false
1042    }
1043
1044    /// Resolve a short class/function name to its FQCN using the import table
1045    /// and namespace recorded for `file` during Pass 1.
1046    ///
1047    /// - Names already containing `\` (after stripping a leading `\`) are
1048    ///   returned as-is (already fully qualified).
1049    /// - `self`, `parent`, `static` are returned unchanged (caller handles them).
1050    pub fn resolve_class_name(&self, file: &str, name: &str) -> String {
1051        let name = name.trim_start_matches('\\');
1052        if name.is_empty() {
1053            return name.to_string();
1054        }
1055        // Fully qualified absolute paths start with '\' (already stripped above).
1056        // Names containing '\' but not starting with it may be:
1057        //   - Already-resolved FQCNs (e.g. Frontify\Util\Foo) — check type_exists
1058        //   - Qualified relative names (e.g. Option\Some from within Frontify\Utility) — need namespace prefix
1059        if name.contains('\\') {
1060            // Check if the leading segment matches a use-import alias
1061            let first_segment = name.split('\\').next().unwrap_or(name);
1062            if let Some(imports) = self.file_imports.get(file) {
1063                if let Some(resolved_prefix) = imports.get(first_segment) {
1064                    let rest = &name[first_segment.len()..]; // includes leading '\'
1065                    return format!("{}{}", resolved_prefix, rest);
1066                }
1067            }
1068            // If already known in codebase as-is, it's FQCN — trust it
1069            if self.type_exists(name) {
1070                return name.to_string();
1071            }
1072            // Otherwise it's a relative qualified name — prepend the file namespace
1073            if let Some(ns) = self.file_namespaces.get(file) {
1074                let qualified = format!("{}\\{}", *ns, name);
1075                if self.type_exists(&qualified) {
1076                    return qualified;
1077                }
1078            }
1079            return name.to_string();
1080        }
1081        // Built-in pseudo-types / keywords handled by the caller
1082        match name {
1083            "self" | "parent" | "static" | "this" => return name.to_string(),
1084            _ => {}
1085        }
1086        // Check use aliases for this file (PHP class names are case-insensitive)
1087        if let Some(imports) = self.file_imports.get(file) {
1088            if let Some(resolved) = imports.get(name) {
1089                return resolved.clone();
1090            }
1091            // Fall back to case-insensitive alias lookup
1092            let name_lower = name.to_lowercase();
1093            for (alias, resolved) in imports.iter() {
1094                if alias.to_lowercase() == name_lower {
1095                    return resolved.clone();
1096                }
1097            }
1098        }
1099        // Qualify with the file's namespace if one exists
1100        if let Some(ns) = self.file_namespaces.get(file) {
1101            let qualified = format!("{}\\{}", *ns, name);
1102            // If the namespaced version exists in the codebase, use it.
1103            // Otherwise fall back to the global (unqualified) name if that exists.
1104            // This handles `DateTimeInterface`, `Exception`, etc. used without import
1105            // while not overriding user-defined classes in namespaces.
1106            if self.type_exists(&qualified) {
1107                return qualified;
1108            }
1109            if self.type_exists(name) {
1110                return name.to_string();
1111            }
1112            return qualified;
1113        }
1114        name.to_string()
1115    }
1116
1117    // -----------------------------------------------------------------------
1118    // Definition location lookups
1119    // -----------------------------------------------------------------------
1120
1121    /// Look up the definition location of any symbol (class, interface, trait, enum, function).
1122    /// Returns the file path and byte offsets.
1123    pub fn get_symbol_location(&self, fqcn: &str) -> Option<crate::storage::Location> {
1124        if let Some(cls) = self.classes.get(fqcn) {
1125            return cls.location.clone();
1126        }
1127        if let Some(iface) = self.interfaces.get(fqcn) {
1128            return iface.location.clone();
1129        }
1130        if let Some(tr) = self.traits.get(fqcn) {
1131            return tr.location.clone();
1132        }
1133        if let Some(en) = self.enums.get(fqcn) {
1134            return en.location.clone();
1135        }
1136        if let Some(func) = self.functions.get(fqcn) {
1137            return func.location.clone();
1138        }
1139        None
1140    }
1141
1142    /// Look up the definition location of a class member (method, property, constant).
1143    pub fn get_member_location(
1144        &self,
1145        fqcn: &str,
1146        member_name: &str,
1147    ) -> Option<crate::storage::Location> {
1148        // Check methods
1149        if let Some(method) = self.get_method(fqcn, member_name) {
1150            return method.location.clone();
1151        }
1152        // Check properties
1153        if let Some(prop) = self.get_property(fqcn, member_name) {
1154            return prop.location.clone();
1155        }
1156        // Check class constants
1157        if let Some(cls) = self.classes.get(fqcn) {
1158            if let Some(c) = cls.own_constants.get(member_name) {
1159                return c.location.clone();
1160            }
1161        }
1162        // Check interface constants
1163        if let Some(iface) = self.interfaces.get(fqcn) {
1164            if let Some(c) = iface.own_constants.get(member_name) {
1165                return c.location.clone();
1166            }
1167        }
1168        // Check trait constants
1169        if let Some(tr) = self.traits.get(fqcn) {
1170            if let Some(c) = tr.own_constants.get(member_name) {
1171                return c.location.clone();
1172            }
1173        }
1174        // Check enum constants and cases
1175        if let Some(en) = self.enums.get(fqcn) {
1176            if let Some(c) = en.own_constants.get(member_name) {
1177                return c.location.clone();
1178            }
1179            if let Some(case) = en.cases.get(member_name) {
1180                return case.location.clone();
1181            }
1182        }
1183        None
1184    }
1185
1186    // -----------------------------------------------------------------------
1187    // Reference tracking (M18 dead-code detection)
1188    // -----------------------------------------------------------------------
1189
1190    /// Mark a method as referenced from user code.
1191    pub fn mark_method_referenced(&self, fqcn: &str, method_name: &str) {
1192        let key = format!("{}::{}", fqcn, method_name.to_lowercase());
1193        let id = self.symbol_interner.intern_str(&key);
1194        self.referenced_methods.insert(id);
1195    }
1196
1197    /// Mark a property as referenced from user code.
1198    pub fn mark_property_referenced(&self, fqcn: &str, prop_name: &str) {
1199        let key = format!("{}::{}", fqcn, prop_name);
1200        let id = self.symbol_interner.intern_str(&key);
1201        self.referenced_properties.insert(id);
1202    }
1203
1204    /// Mark a free function as referenced from user code.
1205    pub fn mark_function_referenced(&self, fqn: &str) {
1206        let id = self.symbol_interner.intern_str(fqn);
1207        self.referenced_functions.insert(id);
1208    }
1209
1210    pub fn is_method_referenced(&self, fqcn: &str, method_name: &str) -> bool {
1211        let key = format!("{}::{}", fqcn, method_name.to_lowercase());
1212        match self.symbol_interner.get_id(&key) {
1213            Some(id) => self.referenced_methods.contains(&id),
1214            None => false,
1215        }
1216    }
1217
1218    pub fn is_property_referenced(&self, fqcn: &str, prop_name: &str) -> bool {
1219        let key = format!("{}::{}", fqcn, prop_name);
1220        match self.symbol_interner.get_id(&key) {
1221            Some(id) => self.referenced_properties.contains(&id),
1222            None => false,
1223        }
1224    }
1225
1226    pub fn is_function_referenced(&self, fqn: &str) -> bool {
1227        match self.symbol_interner.get_id(fqn) {
1228            Some(id) => self.referenced_functions.contains(&id),
1229            None => false,
1230        }
1231    }
1232
1233    /// Record a method reference with its source location.
1234    /// Also updates the referenced_methods DashSet for dead-code detection.
1235    pub fn mark_method_referenced_at(
1236        &self,
1237        fqcn: &str,
1238        method_name: &str,
1239        file: Arc<str>,
1240        start: u32,
1241        end: u32,
1242    ) {
1243        let key = format!("{}::{}", fqcn, method_name.to_lowercase());
1244        self.ensure_expanded();
1245        let sym_id = self.symbol_interner.intern_str(&key);
1246        let file_id = self.file_interner.intern(file);
1247        self.referenced_methods.insert(sym_id);
1248        record_ref(
1249            &self.symbol_reference_locations,
1250            &self.file_symbol_references,
1251            sym_id,
1252            file_id,
1253            start,
1254            end,
1255        );
1256    }
1257
1258    /// Record a property reference with its source location.
1259    /// Also updates the referenced_properties DashSet for dead-code detection.
1260    pub fn mark_property_referenced_at(
1261        &self,
1262        fqcn: &str,
1263        prop_name: &str,
1264        file: Arc<str>,
1265        start: u32,
1266        end: u32,
1267    ) {
1268        let key = format!("{}::{}", fqcn, prop_name);
1269        self.ensure_expanded();
1270        let sym_id = self.symbol_interner.intern_str(&key);
1271        let file_id = self.file_interner.intern(file);
1272        self.referenced_properties.insert(sym_id);
1273        record_ref(
1274            &self.symbol_reference_locations,
1275            &self.file_symbol_references,
1276            sym_id,
1277            file_id,
1278            start,
1279            end,
1280        );
1281    }
1282
1283    /// Record a function reference with its source location.
1284    /// Also updates the referenced_functions DashSet for dead-code detection.
1285    pub fn mark_function_referenced_at(&self, fqn: &str, file: Arc<str>, start: u32, end: u32) {
1286        self.ensure_expanded();
1287        let sym_id = self.symbol_interner.intern_str(fqn);
1288        let file_id = self.file_interner.intern(file);
1289        self.referenced_functions.insert(sym_id);
1290        record_ref(
1291            &self.symbol_reference_locations,
1292            &self.file_symbol_references,
1293            sym_id,
1294            file_id,
1295            start,
1296            end,
1297        );
1298    }
1299
1300    /// Record a class reference (e.g. `new Foo()`) with its source location.
1301    /// Does not update any dead-code DashSet — class instantiation tracking is
1302    /// separate from method/property/function dead-code detection.
1303    pub fn mark_class_referenced_at(&self, fqcn: &str, file: Arc<str>, start: u32, end: u32) {
1304        self.ensure_expanded();
1305        let sym_id = self.symbol_interner.intern_str(fqcn);
1306        let file_id = self.file_interner.intern(file);
1307        record_ref(
1308            &self.symbol_reference_locations,
1309            &self.file_symbol_references,
1310            sym_id,
1311            file_id,
1312            start,
1313            end,
1314        );
1315    }
1316
1317    /// Replay cached reference locations for a file into the reference index.
1318    /// Called on cache hits to avoid re-running Pass 2 just to rebuild the index.
1319    /// `locs` is a slice of `(symbol_key, start_byte, end_byte)` as stored in the cache.
1320    pub fn replay_reference_locations(&self, file: Arc<str>, locs: &[(String, u32, u32)]) {
1321        if locs.is_empty() {
1322            return;
1323        }
1324        self.ensure_expanded();
1325        let file_id = self.file_interner.intern(file);
1326        for (symbol_key, start, end) in locs {
1327            let sym_id = self.symbol_interner.intern_str(symbol_key);
1328            record_ref(
1329                &self.symbol_reference_locations,
1330                &self.file_symbol_references,
1331                sym_id,
1332                file_id,
1333                *start,
1334                *end,
1335            );
1336        }
1337    }
1338
1339    /// Return all reference locations for `symbol` as a flat `Vec<(file, start, end)>`.
1340    /// Returns an empty Vec if the symbol has no recorded references.
1341    pub fn get_reference_locations(&self, symbol: &str) -> Vec<(Arc<str>, u32, u32)> {
1342        let Some(sym_id) = self.symbol_interner.get_id(symbol) else {
1343            return Vec::new();
1344        };
1345        // Fast path: compact CSR index.
1346        if let Some(ref ci) = *self.compact_ref_index.read().unwrap() {
1347            let id = sym_id as usize;
1348            if id + 1 >= ci.sym_offsets.len() {
1349                return Vec::new();
1350            }
1351            let start = ci.sym_offsets[id] as usize;
1352            let end = ci.sym_offsets[id + 1] as usize;
1353            return ci.entries[start..end]
1354                .iter()
1355                .map(|&(_, file_id, s, e)| (self.file_interner.get(file_id), s, e))
1356                .collect();
1357        }
1358        // Slow path: build-phase DashMap.
1359        let Some(entries) = self.symbol_reference_locations.get(&sym_id) else {
1360            return Vec::new();
1361        };
1362        entries
1363            .iter()
1364            .map(|&(file_id, start, end)| (self.file_interner.get(file_id), start, end))
1365            .collect()
1366    }
1367
1368    /// Extract all reference locations recorded for `file` as `(symbol_key, start, end)` triples.
1369    /// Used by the cache layer to persist per-file reference data between runs.
1370    pub fn extract_file_reference_locations(&self, file: &str) -> Vec<(Arc<str>, u32, u32)> {
1371        let Some(file_id) = self.file_interner.get_id(file) else {
1372            return Vec::new();
1373        };
1374        // Fast path: compact CSR index.
1375        if let Some(ref ci) = *self.compact_ref_index.read().unwrap() {
1376            let id = file_id as usize;
1377            if id + 1 >= ci.file_offsets.len() {
1378                return Vec::new();
1379            }
1380            let start = ci.file_offsets[id] as usize;
1381            let end = ci.file_offsets[id + 1] as usize;
1382            return ci.by_file[start..end]
1383                .iter()
1384                .map(|&entry_idx| {
1385                    let (sym_id, _, s, e) = ci.entries[entry_idx as usize];
1386                    (self.symbol_interner.get(sym_id), s, e)
1387                })
1388                .collect();
1389        }
1390        // Slow path: build-phase DashMaps.
1391        let Some(sym_ids) = self.file_symbol_references.get(&file_id) else {
1392            return Vec::new();
1393        };
1394        let mut out = Vec::new();
1395        for &sym_id in sym_ids.iter() {
1396            let Some(entries) = self.symbol_reference_locations.get(&sym_id) else {
1397                continue;
1398            };
1399            let sym_key = self.symbol_interner.get(sym_id);
1400            for &(entry_file_id, start, end) in entries.iter() {
1401                if entry_file_id == file_id {
1402                    out.push((sym_key.clone(), start, end));
1403                }
1404            }
1405        }
1406        out
1407    }
1408
1409    /// Returns true if the given file has any recorded symbol references.
1410    pub fn file_has_symbol_references(&self, file: &str) -> bool {
1411        let Some(file_id) = self.file_interner.get_id(file) else {
1412            return false;
1413        };
1414        // Check compact index first.
1415        if let Some(ref ci) = *self.compact_ref_index.read().unwrap() {
1416            let id = file_id as usize;
1417            return id + 1 < ci.file_offsets.len() && ci.file_offsets[id] < ci.file_offsets[id + 1];
1418        }
1419        self.file_symbol_references.contains_key(&file_id)
1420    }
1421
1422    // -----------------------------------------------------------------------
1423    // Finalization
1424    // -----------------------------------------------------------------------
1425
1426    /// Must be called after all files have been parsed (pass 1 complete).
1427    /// Resolves inheritance chains and builds method dispatch tables.
1428    pub fn finalize(&self) {
1429        if self.finalized.load(std::sync::atomic::Ordering::SeqCst) {
1430            return;
1431        }
1432
1433        // 1. Resolve all_parents for classes
1434        let class_keys: Vec<Arc<str>> = self.classes.iter().map(|e| e.key().clone()).collect();
1435        for fqcn in &class_keys {
1436            let parents = self.collect_class_ancestors(fqcn);
1437            if let Some(mut cls) = self.classes.get_mut(fqcn.as_ref()) {
1438                cls.all_parents = parents;
1439            }
1440        }
1441
1442        // 2. Resolve all_parents for interfaces
1443        let iface_keys: Vec<Arc<str>> = self.interfaces.iter().map(|e| e.key().clone()).collect();
1444        for fqcn in &iface_keys {
1445            let parents = self.collect_interface_ancestors(fqcn);
1446            if let Some(mut iface) = self.interfaces.get_mut(fqcn.as_ref()) {
1447                iface.all_parents = parents;
1448            }
1449        }
1450
1451        self.finalized
1452            .store(true, std::sync::atomic::Ordering::SeqCst);
1453    }
1454
1455    // -----------------------------------------------------------------------
1456    // Private helpers
1457    // -----------------------------------------------------------------------
1458
1459    /// Look up `method_name` in a trait's own methods, then recursively in any
1460    /// traits that the trait itself uses (`use OtherTrait;` inside a trait body).
1461    /// A visited set prevents infinite loops on pathological mutual trait use.
1462    fn get_method_in_trait(
1463        &self,
1464        tr_fqcn: &Arc<str>,
1465        method_name: &str,
1466    ) -> Option<Arc<MethodStorage>> {
1467        let mut visited = std::collections::HashSet::new();
1468        self.get_method_in_trait_inner(tr_fqcn, method_name, &mut visited)
1469    }
1470
1471    fn get_method_in_trait_inner(
1472        &self,
1473        tr_fqcn: &Arc<str>,
1474        method_name: &str,
1475        visited: &mut std::collections::HashSet<String>,
1476    ) -> Option<Arc<MethodStorage>> {
1477        if !visited.insert(tr_fqcn.to_string()) {
1478            return None; // cycle guard
1479        }
1480        let tr = self.traits.get(tr_fqcn.as_ref())?;
1481        if let Some(m) = lookup_method(&tr.own_methods, method_name) {
1482            return Some(Arc::clone(m));
1483        }
1484        let used_traits = tr.traits.clone();
1485        drop(tr);
1486        for used_fqcn in &used_traits {
1487            if let Some(m) = self.get_method_in_trait_inner(used_fqcn, method_name, visited) {
1488                return Some(m);
1489            }
1490        }
1491        None
1492    }
1493
1494    fn collect_class_ancestors(&self, fqcn: &str) -> Vec<Arc<str>> {
1495        let mut result = Vec::new();
1496        let mut visited = std::collections::HashSet::new();
1497        self.collect_class_ancestors_inner(fqcn, &mut result, &mut visited);
1498        result
1499    }
1500
1501    fn collect_class_ancestors_inner(
1502        &self,
1503        fqcn: &str,
1504        out: &mut Vec<Arc<str>>,
1505        visited: &mut std::collections::HashSet<String>,
1506    ) {
1507        if !visited.insert(fqcn.to_string()) {
1508            return; // cycle guard
1509        }
1510        let (parent, interfaces, traits) = {
1511            if let Some(cls) = self.classes.get(fqcn) {
1512                (
1513                    cls.parent.clone(),
1514                    cls.interfaces.clone(),
1515                    cls.traits.clone(),
1516                )
1517            } else {
1518                return;
1519            }
1520        };
1521
1522        if let Some(p) = parent {
1523            out.push(p.clone());
1524            self.collect_class_ancestors_inner(&p, out, visited);
1525        }
1526        for iface in interfaces {
1527            out.push(iface.clone());
1528            self.collect_interface_ancestors_inner(&iface, out, visited);
1529        }
1530        for t in traits {
1531            out.push(t);
1532        }
1533    }
1534
1535    fn collect_interface_ancestors(&self, fqcn: &str) -> Vec<Arc<str>> {
1536        let mut result = Vec::new();
1537        let mut visited = std::collections::HashSet::new();
1538        self.collect_interface_ancestors_inner(fqcn, &mut result, &mut visited);
1539        result
1540    }
1541
1542    fn collect_interface_ancestors_inner(
1543        &self,
1544        fqcn: &str,
1545        out: &mut Vec<Arc<str>>,
1546        visited: &mut std::collections::HashSet<String>,
1547    ) {
1548        if !visited.insert(fqcn.to_string()) {
1549            return;
1550        }
1551        let extends = {
1552            if let Some(iface) = self.interfaces.get(fqcn) {
1553                iface.extends.clone()
1554            } else {
1555                return;
1556            }
1557        };
1558        for e in extends {
1559            out.push(e.clone());
1560            self.collect_interface_ancestors_inner(&e, out, visited);
1561        }
1562    }
1563}
1564
1565// ---------------------------------------------------------------------------
1566// CodebaseBuilder — compose a finalized Codebase from per-file StubSlices
1567// ---------------------------------------------------------------------------
1568
1569/// Incremental builder that accumulates [`crate::storage::StubSlice`] values
1570/// into a fresh [`Codebase`] and finalizes it on demand.
1571///
1572/// Designed for callers (e.g. salsa queries in downstream consumers) that want
1573/// to treat Pass-1 definition collection as a pure function from source to
1574/// `StubSlice`, then compose the slices into a full codebase outside the
1575/// collector.
1576pub struct CodebaseBuilder {
1577    cb: Codebase,
1578}
1579
1580impl CodebaseBuilder {
1581    pub fn new() -> Self {
1582        Self {
1583            cb: Codebase::new(),
1584        }
1585    }
1586
1587    /// Inject a single slice. Later injections overwrite earlier definitions
1588    /// with the same FQN, matching [`Codebase::inject_stub_slice`] semantics.
1589    pub fn add(&mut self, slice: crate::storage::StubSlice) {
1590        self.cb.inject_stub_slice(slice);
1591    }
1592
1593    /// Finalize inheritance graphs and return the built `Codebase`.
1594    pub fn finalize(self) -> Codebase {
1595        self.cb.finalize();
1596        self.cb
1597    }
1598
1599    /// Access the in-progress codebase without consuming the builder.
1600    pub fn codebase(&self) -> &Codebase {
1601        &self.cb
1602    }
1603}
1604
1605impl Default for CodebaseBuilder {
1606    fn default() -> Self {
1607        Self::new()
1608    }
1609}
1610
1611/// One-shot: build a finalized [`Codebase`] from a set of per-file slices.
1612pub fn codebase_from_parts(parts: Vec<crate::storage::StubSlice>) -> Codebase {
1613    let mut b = CodebaseBuilder::new();
1614    for p in parts {
1615        b.add(p);
1616    }
1617    b.finalize()
1618}
1619
1620#[cfg(test)]
1621mod tests {
1622    use super::*;
1623
1624    fn arc(s: &str) -> Arc<str> {
1625        Arc::from(s)
1626    }
1627
1628    #[test]
1629    fn method_referenced_at_groups_spans_by_file() {
1630        let cb = Codebase::new();
1631        cb.mark_method_referenced_at("Foo", "bar", arc("a.php"), 0, 5);
1632        cb.mark_method_referenced_at("Foo", "bar", arc("a.php"), 10, 15);
1633        cb.mark_method_referenced_at("Foo", "bar", arc("b.php"), 20, 25);
1634
1635        let locs = cb.get_reference_locations("Foo::bar");
1636        let files: std::collections::HashSet<&str> =
1637            locs.iter().map(|(f, _, _)| f.as_ref()).collect();
1638        assert_eq!(files.len(), 2, "two files, not three spans");
1639        assert!(locs.contains(&(arc("a.php"), 0, 5)));
1640        assert!(locs.contains(&(arc("a.php"), 10, 15)));
1641        assert_eq!(
1642            locs.iter()
1643                .filter(|(f, _, _)| f.as_ref() == "a.php")
1644                .count(),
1645            2
1646        );
1647        assert!(locs.contains(&(arc("b.php"), 20, 25)));
1648        assert!(
1649            cb.is_method_referenced("Foo", "bar"),
1650            "DashSet also updated"
1651        );
1652    }
1653
1654    #[test]
1655    fn duplicate_spans_are_deduplicated() {
1656        let cb = Codebase::new();
1657        // Same call site recorded twice (e.g. union receiver Foo|Foo)
1658        cb.mark_method_referenced_at("Foo", "bar", arc("a.php"), 0, 5);
1659        cb.mark_method_referenced_at("Foo", "bar", arc("a.php"), 0, 5);
1660
1661        let count = cb
1662            .get_reference_locations("Foo::bar")
1663            .iter()
1664            .filter(|(f, _, _)| f.as_ref() == "a.php")
1665            .count();
1666        assert_eq!(count, 1, "duplicate span deduplicated");
1667    }
1668
1669    #[test]
1670    fn method_key_is_lowercased() {
1671        let cb = Codebase::new();
1672        cb.mark_method_referenced_at("Cls", "MyMethod", arc("f.php"), 0, 3);
1673        assert!(!cb.get_reference_locations("Cls::mymethod").is_empty());
1674    }
1675
1676    #[test]
1677    fn property_referenced_at_records_location() {
1678        let cb = Codebase::new();
1679        cb.mark_property_referenced_at("Bar", "count", arc("x.php"), 5, 10);
1680
1681        assert!(cb
1682            .get_reference_locations("Bar::count")
1683            .contains(&(arc("x.php"), 5, 10)));
1684        assert!(cb.is_property_referenced("Bar", "count"));
1685    }
1686
1687    #[test]
1688    fn function_referenced_at_records_location() {
1689        let cb = Codebase::new();
1690        cb.mark_function_referenced_at("my_fn", arc("a.php"), 10, 15);
1691
1692        assert!(cb
1693            .get_reference_locations("my_fn")
1694            .contains(&(arc("a.php"), 10, 15)));
1695        assert!(cb.is_function_referenced("my_fn"));
1696    }
1697
1698    #[test]
1699    fn class_referenced_at_records_location() {
1700        let cb = Codebase::new();
1701        cb.mark_class_referenced_at("Foo", arc("a.php"), 5, 8);
1702
1703        assert!(cb
1704            .get_reference_locations("Foo")
1705            .contains(&(arc("a.php"), 5, 8)));
1706    }
1707
1708    #[test]
1709    fn get_reference_locations_flattens_all_files() {
1710        let cb = Codebase::new();
1711        cb.mark_function_referenced_at("fn1", arc("a.php"), 0, 5);
1712        cb.mark_function_referenced_at("fn1", arc("b.php"), 10, 15);
1713
1714        let mut locs = cb.get_reference_locations("fn1");
1715        locs.sort_by_key(|(_, s, _)| *s);
1716        assert_eq!(locs.len(), 2);
1717        assert_eq!(locs[0], (arc("a.php"), 0, 5));
1718        assert_eq!(locs[1], (arc("b.php"), 10, 15));
1719    }
1720
1721    #[test]
1722    fn replay_reference_locations_restores_index() {
1723        let cb = Codebase::new();
1724        let locs = vec![
1725            ("Foo::bar".to_string(), 0u32, 5u32),
1726            ("Foo::bar".to_string(), 10, 15),
1727            ("greet".to_string(), 20, 25),
1728        ];
1729        cb.replay_reference_locations(arc("a.php"), &locs);
1730
1731        let bar_locs = cb.get_reference_locations("Foo::bar");
1732        assert!(bar_locs.contains(&(arc("a.php"), 0, 5)));
1733        assert!(bar_locs.contains(&(arc("a.php"), 10, 15)));
1734
1735        assert!(cb
1736            .get_reference_locations("greet")
1737            .contains(&(arc("a.php"), 20, 25)));
1738
1739        assert!(cb.file_has_symbol_references("a.php"));
1740    }
1741
1742    #[test]
1743    fn remove_file_clears_its_spans_only() {
1744        let cb = Codebase::new();
1745        cb.mark_function_referenced_at("fn1", arc("a.php"), 0, 5);
1746        cb.mark_function_referenced_at("fn1", arc("b.php"), 10, 15);
1747
1748        cb.remove_file_definitions("a.php");
1749
1750        let locs = cb.get_reference_locations("fn1");
1751        assert!(
1752            !locs.iter().any(|(f, _, _)| f.as_ref() == "a.php"),
1753            "a.php spans removed"
1754        );
1755        assert!(
1756            locs.contains(&(arc("b.php"), 10, 15)),
1757            "b.php spans untouched"
1758        );
1759        assert!(!cb.file_has_symbol_references("a.php"));
1760    }
1761
1762    #[test]
1763    fn remove_file_does_not_affect_other_files() {
1764        let cb = Codebase::new();
1765        cb.mark_property_referenced_at("Cls", "prop", arc("x.php"), 1, 4);
1766        cb.mark_property_referenced_at("Cls", "prop", arc("y.php"), 7, 10);
1767
1768        cb.remove_file_definitions("x.php");
1769
1770        let locs = cb.get_reference_locations("Cls::prop");
1771        assert!(!locs.iter().any(|(f, _, _)| f.as_ref() == "x.php"));
1772        assert!(locs.contains(&(arc("y.php"), 7, 10)));
1773    }
1774
1775    #[test]
1776    fn remove_file_definitions_on_never_analyzed_file_is_noop() {
1777        let cb = Codebase::new();
1778        cb.mark_function_referenced_at("fn1", arc("a.php"), 0, 5);
1779
1780        // "ghost.php" was never analyzed — removing it must not panic or corrupt state.
1781        cb.remove_file_definitions("ghost.php");
1782
1783        // Existing data must be untouched.
1784        assert!(cb
1785            .get_reference_locations("fn1")
1786            .contains(&(arc("a.php"), 0, 5)));
1787        assert!(!cb.file_has_symbol_references("ghost.php"));
1788    }
1789
1790    #[test]
1791    fn replay_reference_locations_with_empty_list_is_noop() {
1792        let cb = Codebase::new();
1793        cb.mark_function_referenced_at("fn1", arc("a.php"), 0, 5);
1794
1795        // Replaying an empty list must not touch existing entries.
1796        cb.replay_reference_locations(arc("b.php"), &[]);
1797
1798        assert!(
1799            !cb.file_has_symbol_references("b.php"),
1800            "empty replay must not create a file entry"
1801        );
1802        assert!(
1803            cb.get_reference_locations("fn1")
1804                .contains(&(arc("a.php"), 0, 5)),
1805            "existing spans untouched"
1806        );
1807    }
1808
1809    #[test]
1810    fn replay_reference_locations_twice_does_not_duplicate_spans() {
1811        let cb = Codebase::new();
1812        let locs = vec![("fn1".to_string(), 0u32, 5u32)];
1813
1814        cb.replay_reference_locations(arc("a.php"), &locs);
1815        cb.replay_reference_locations(arc("a.php"), &locs);
1816
1817        let count = cb
1818            .get_reference_locations("fn1")
1819            .iter()
1820            .filter(|(f, _, _)| f.as_ref() == "a.php")
1821            .count();
1822        assert_eq!(
1823            count, 1,
1824            "replaying the same location twice must not create duplicate spans"
1825        );
1826    }
1827
1828    // -----------------------------------------------------------------------
1829    // inject_stub_slice — correctness-critical tests
1830    // -----------------------------------------------------------------------
1831
1832    fn make_fn(fqn: &str, short_name: &str) -> crate::storage::FunctionStorage {
1833        crate::storage::FunctionStorage {
1834            fqn: Arc::from(fqn),
1835            short_name: Arc::from(short_name),
1836            params: vec![],
1837            return_type: None,
1838            inferred_return_type: None,
1839            template_params: vec![],
1840            assertions: vec![],
1841            throws: vec![],
1842            deprecated: None,
1843            is_pure: false,
1844            location: None,
1845        }
1846    }
1847
1848    #[test]
1849    fn inject_stub_slice_later_injection_overwrites_earlier() {
1850        let cb = Codebase::new();
1851
1852        cb.inject_stub_slice(crate::storage::StubSlice {
1853            functions: vec![make_fn("strlen", "phpstorm_version")],
1854            file: Some(Arc::from("phpstorm/standard.php")),
1855            ..Default::default()
1856        });
1857        assert_eq!(
1858            cb.functions.get("strlen").unwrap().short_name.as_ref(),
1859            "phpstorm_version"
1860        );
1861
1862        cb.inject_stub_slice(crate::storage::StubSlice {
1863            functions: vec![make_fn("strlen", "custom_version")],
1864            file: Some(Arc::from("stubs/standard/basic.php")),
1865            ..Default::default()
1866        });
1867
1868        assert_eq!(
1869            cb.functions.get("strlen").unwrap().short_name.as_ref(),
1870            "custom_version",
1871            "custom stub must overwrite phpstorm stub"
1872        );
1873        assert_eq!(
1874            cb.symbol_to_file.get("strlen").unwrap().as_ref(),
1875            "stubs/standard/basic.php",
1876            "symbol_to_file must point to the overriding file"
1877        );
1878    }
1879
1880    #[test]
1881    fn inject_stub_slice_constants_not_added_to_symbol_to_file() {
1882        let cb = Codebase::new();
1883
1884        cb.inject_stub_slice(crate::storage::StubSlice {
1885            constants: vec![(Arc::from("PHP_EOL"), mir_types::Union::empty())],
1886            file: Some(Arc::from("stubs/core/constants.php")),
1887            ..Default::default()
1888        });
1889
1890        assert!(
1891            cb.constants.contains_key("PHP_EOL"),
1892            "constant must be registered in constants map"
1893        );
1894        assert!(
1895            !cb.symbol_to_file.contains_key("PHP_EOL"),
1896            "constants must not appear in symbol_to_file — go-to-definition is not supported for them"
1897        );
1898    }
1899
1900    #[test]
1901    fn remove_file_definitions_purges_injected_global_vars() {
1902        let cb = Codebase::new();
1903
1904        cb.inject_stub_slice(crate::storage::StubSlice {
1905            global_vars: vec![(Arc::from("db_connection"), mir_types::Union::empty())],
1906            file: Some(Arc::from("src/bootstrap.php")),
1907            ..Default::default()
1908        });
1909        assert!(
1910            cb.global_vars.contains_key("db_connection"),
1911            "global var must be registered after injection"
1912        );
1913
1914        cb.remove_file_definitions("src/bootstrap.php");
1915
1916        assert!(
1917            !cb.global_vars.contains_key("db_connection"),
1918            "global var must be removed when its defining file is removed"
1919        );
1920    }
1921
1922    #[test]
1923    fn inject_stub_slice_without_file_discards_global_vars() {
1924        let cb = Codebase::new();
1925
1926        cb.inject_stub_slice(crate::storage::StubSlice {
1927            global_vars: vec![(Arc::from("orphan_var"), mir_types::Union::empty())],
1928            file: None,
1929            ..Default::default()
1930        });
1931
1932        assert!(
1933            !cb.global_vars.contains_key("orphan_var"),
1934            "global_vars must not be registered when slice.file is None"
1935        );
1936    }
1937}