Skip to main content

mir_codebase/
codebase.rs

1use std::sync::Arc;
2
3use dashmap::{DashMap, DashSet};
4
5use crate::interner::Interner;
6
7/// Maps symbol ID → flat list of `(file_id, start_byte, end_byte)`.
8///
9/// Entries are appended during Pass 2. Duplicates (e.g. from union receivers like
10/// `Foo|Foo->method()`) are filtered at insert time. IDs come from
11/// `Codebase::symbol_interner` / `Codebase::file_interner`.
12///
13/// Compared with the previous `DashMap<u32, HashMap<u32, HashSet<(u32, u32)>>>`,
14/// this eliminates two levels of hash-map overhead (a `HashMap` per symbol and a
15/// `HashSet` per file). Each entry is now 12 bytes (`u32` × 3) with no per-entry
16/// allocator overhead beyond the `Vec` backing store.
17type ReferenceLocations = DashMap<u32, Vec<(u32, u32, u32)>>;
18
19use crate::storage::{
20    ClassStorage, EnumStorage, FunctionStorage, InterfaceStorage, MethodStorage, TraitStorage,
21};
22use mir_types::Union;
23
24// ---------------------------------------------------------------------------
25// Private helper — shared insert logic for reference tracking
26// ---------------------------------------------------------------------------
27
28/// Append `(sym_id, file_id, start, end)` to the reference index, skipping
29/// exact duplicates so union receivers like `Foo|Foo->method()` don't inflate
30/// the span list.
31///
32/// Both maps are updated atomically under their respective DashMap shard locks.
33#[inline]
34fn record_ref(
35    sym_locs: &ReferenceLocations,
36    file_refs: &DashMap<u32, Vec<u32>>,
37    sym_id: u32,
38    file_id: u32,
39    start: u32,
40    end: u32,
41) {
42    {
43        let mut entries = sym_locs.entry(sym_id).or_default();
44        let span = (file_id, start, end);
45        if !entries.contains(&span) {
46            entries.push(span);
47        }
48    }
49    {
50        let mut refs = file_refs.entry(file_id).or_default();
51        if !refs.contains(&sym_id) {
52            refs.push(sym_id);
53        }
54    }
55}
56
57// ---------------------------------------------------------------------------
58// Compact CSR reference index (post-Pass-2 read-optimised form)
59// ---------------------------------------------------------------------------
60
61/// Read-optimised Compressed Sparse Row representation of the reference index.
62///
63/// Built once by [`Codebase::compact_reference_index`] after Pass 2 finishes.
64/// After compaction the build-phase [`DashMap`]s are cleared, freeing the
65/// per-entry allocator overhead (~72 bytes per (symbol, file) pair).
66///
67/// Two CSR views are maintained over the same flat `entries` array:
68/// - by symbol: `entries[sym_offsets[id]..sym_offsets[id+1]]`
69/// - by file: `by_file[file_offsets[id]..file_offsets[id+1]]` (indirect indices)
70#[derive(Debug, Default)]
71struct CompactRefIndex {
72    /// All spans sorted by `(sym_id, file_id, start, end)`, deduplicated.
73    /// Each entry is 16 bytes; total size = `n_refs × 16` with no hash overhead.
74    entries: Vec<(u32, u32, u32, u32)>,
75    /// CSR offsets keyed by sym_id (length = max_sym_id + 2).
76    sym_offsets: Vec<u32>,
77    /// Indices into `entries` sorted by `(file_id, sym_id, start, end)`.
78    /// Allows O(log n) file-keyed lookups without duplicating the payload.
79    by_file: Vec<u32>,
80    /// CSR offsets keyed by file_id into `by_file` (length = max_file_id + 2).
81    file_offsets: Vec<u32>,
82}
83
84// ---------------------------------------------------------------------------
85// Codebase — thread-safe global symbol registry
86// ---------------------------------------------------------------------------
87
88#[derive(Debug, Default)]
89pub struct Codebase {
90    pub classes: DashMap<Arc<str>, ClassStorage>,
91    pub interfaces: DashMap<Arc<str>, InterfaceStorage>,
92    pub traits: DashMap<Arc<str>, TraitStorage>,
93    pub enums: DashMap<Arc<str>, EnumStorage>,
94    pub functions: DashMap<Arc<str>, FunctionStorage>,
95    pub constants: DashMap<Arc<str>, Union>,
96
97    /// Types of `@var`-annotated global variables, collected in Pass 1.
98    /// Key: variable name without the `$` prefix.
99    pub global_vars: DashMap<Arc<str>, Union>,
100    /// Maps file path → variable names declared with `@var` in that file.
101    /// Used by `remove_file_definitions` to purge stale entries on re-analysis.
102    file_global_vars: DashMap<Arc<str>, Vec<Arc<str>>>,
103
104    /// Methods referenced during Pass 2 — stored as interned symbol IDs.
105    /// Used by the dead-code detector (M18).
106    referenced_methods: DashSet<u32>,
107    /// Properties referenced during Pass 2 — stored as interned symbol IDs.
108    referenced_properties: DashSet<u32>,
109    /// Free functions referenced during Pass 2 — stored as interned symbol IDs.
110    referenced_functions: DashSet<u32>,
111
112    /// Interner for symbol keys (`"ClassName::method"`, `"ClassName::prop"`, FQN).
113    /// Replaces repeated `Arc<str>` copies (16 bytes) with compact `u32` IDs (4 bytes).
114    pub symbol_interner: Interner,
115    /// Interner for file paths. Same memory rationale as `symbol_interner`.
116    pub file_interner: Interner,
117
118    /// Maps symbol ID → { file ID → {(start_byte, end_byte)} }.
119    /// IDs come from `symbol_interner` / `file_interner`.
120    /// The inner HashMap groups spans by file for O(1) per-file cleanup.
121    /// HashSet deduplicates spans from union receivers (e.g. Foo|Foo->method()).
122    symbol_reference_locations: ReferenceLocations,
123    /// Reverse index: file ID → symbol IDs referenced in that file.
124    /// Used by `remove_file_definitions` to avoid a full scan of all symbols.
125    /// A `Vec` rather than `HashSet`: duplicate sym_ids are guarded at insert time
126    /// (same as `symbol_reference_locations`) for the same structural simplicity.
127    file_symbol_references: DashMap<u32, Vec<u32>>,
128
129    /// Compact CSR view of the reference index, built by `compact_reference_index()`.
130    /// When `Some`, the build-phase DashMaps above are empty and this is the
131    /// authoritative source for all reference queries.
132    compact_ref_index: std::sync::RwLock<Option<CompactRefIndex>>,
133    /// `true` iff `compact_ref_index` is `Some`. Checked atomically before
134    /// acquiring any lock, so the fast path during Pass 2 is a single load.
135    is_compacted: std::sync::atomic::AtomicBool,
136
137    /// Maps every FQCN (class, interface, trait, enum, function) to the absolute
138    /// path of the file that defines it. Populated during Pass 1.
139    pub symbol_to_file: DashMap<Arc<str>, Arc<str>>,
140
141    /// Lightweight FQCN index populated by `SymbolTable` before Pass 1.
142    /// Enables O(1) "does this symbol exist?" checks before full definitions
143    /// are available.
144    pub known_symbols: DashSet<Arc<str>>,
145
146    /// Per-file `use` alias maps: alias → FQCN.  Populated during Pass 1.
147    ///
148    /// Key: absolute file path (as `Arc<str>`).
149    /// Value: map of `alias → fully-qualified class name`.
150    ///
151    /// Exposed as `pub` so that external consumers (e.g. `php-lsp`) can read
152    /// import data that mir already collects, instead of reimplementing it.
153    pub file_imports: DashMap<Arc<str>, std::collections::HashMap<String, String>>,
154    /// Per-file current namespace (if any).  Populated during Pass 1.
155    ///
156    /// Key: absolute file path (as `Arc<str>`).
157    /// Value: the declared namespace string (e.g. `"App\\Controller"`).
158    ///
159    /// Exposed as `pub` so that external consumers (e.g. `php-lsp`) can read
160    /// namespace data that mir already collects, instead of reimplementing it.
161    pub file_namespaces: DashMap<Arc<str>, String>,
162
163    /// Whether finalize() has been called.
164    finalized: std::sync::atomic::AtomicBool,
165}
166
167impl Codebase {
168    pub fn new() -> Self {
169        Self::default()
170    }
171
172    // -----------------------------------------------------------------------
173    // Compact reference index
174    // -----------------------------------------------------------------------
175
176    /// Convert the build-phase `DashMap` reference index into a compact CSR form.
177    ///
178    /// Call this once after Pass 2 completes on all files. The method:
179    /// 1. Drains the two build-phase `DashMap`s into a single flat `Vec`.
180    /// 2. Sorts and deduplicates entries.
181    /// 3. Builds two CSR offset arrays (by symbol and by file).
182    /// 4. Clears the `DashMap`s (freeing their allocations).
183    ///
184    /// After this call all reference queries use the compact index. Incremental
185    /// re-analysis via [`Self::re_analyze_file`] will automatically decompress the
186    /// index back into `DashMap`s on the first write, then recompact can be called
187    /// again at the end of that analysis pass.
188    pub fn compact_reference_index(&self) {
189        // Collect all entries from the build-phase DashMap.
190        let mut entries: Vec<(u32, u32, u32, u32)> = self
191            .symbol_reference_locations
192            .iter()
193            .flat_map(|entry| {
194                let sym_id = *entry.key();
195                entry
196                    .value()
197                    .iter()
198                    .map(move |&(file_id, start, end)| (sym_id, file_id, start, end))
199                    .collect::<Vec<_>>()
200            })
201            .collect();
202
203        if entries.is_empty() {
204            return;
205        }
206
207        // Sort by (sym_id, file_id, start, end) and drop exact duplicates.
208        entries.sort_unstable();
209        entries.dedup();
210
211        let n = entries.len();
212
213        // ---- Build symbol-keyed CSR offsets --------------------------------
214        let max_sym = entries.iter().map(|&(s, ..)| s).max().unwrap_or(0) as usize;
215        let mut sym_offsets = vec![0u32; max_sym + 2];
216        for &(sym_id, ..) in &entries {
217            sym_offsets[sym_id as usize + 1] += 1;
218        }
219        for i in 1..sym_offsets.len() {
220            sym_offsets[i] += sym_offsets[i - 1];
221        }
222
223        // ---- Build file-keyed indirect index --------------------------------
224        // `by_file[i]` is an index into `entries`; the slice is sorted by
225        // `(file_id, sym_id, start, end)` so CSR offsets can be computed cheaply.
226        let max_file = entries.iter().map(|&(_, f, ..)| f).max().unwrap_or(0) as usize;
227        let mut by_file: Vec<u32> = (0..n as u32).collect();
228        by_file.sort_unstable_by_key(|&i| {
229            let (sym_id, file_id, start, end) = entries[i as usize];
230            (file_id, sym_id, start, end)
231        });
232
233        let mut file_offsets = vec![0u32; max_file + 2];
234        for &idx in &by_file {
235            let file_id = entries[idx as usize].1;
236            file_offsets[file_id as usize + 1] += 1;
237        }
238        for i in 1..file_offsets.len() {
239            file_offsets[i] += file_offsets[i - 1];
240        }
241
242        *self.compact_ref_index.write().unwrap() = Some(CompactRefIndex {
243            entries,
244            sym_offsets,
245            by_file,
246            file_offsets,
247        });
248        self.is_compacted
249            .store(true, std::sync::atomic::Ordering::Release);
250
251        // Free build-phase allocations.
252        self.symbol_reference_locations.clear();
253        self.file_symbol_references.clear();
254    }
255
256    /// Decompress the compact index back into the build-phase `DashMap`s.
257    ///
258    /// Called automatically by write methods when the compact index is live.
259    /// This makes incremental re-analysis transparent: callers never need to
260    /// know whether the index is compacted or not.
261    fn ensure_expanded(&self) {
262        // Fast path: not compacted — one atomic load, no lock.
263        if !self.is_compacted.load(std::sync::atomic::Ordering::Acquire) {
264            return;
265        }
266        // Slow path: acquire write lock and decompress.
267        let mut guard = self.compact_ref_index.write().unwrap();
268        if let Some(ci) = guard.take() {
269            for &(sym_id, file_id, start, end) in &ci.entries {
270                record_ref(
271                    &self.symbol_reference_locations,
272                    &self.file_symbol_references,
273                    sym_id,
274                    file_id,
275                    start,
276                    end,
277                );
278            }
279            self.is_compacted
280                .store(false, std::sync::atomic::Ordering::Release);
281        }
282        // If another thread already decompressed (guard is now None), we're done.
283    }
284
285    /// Reset the finalization flag so that `finalize()` will run again.
286    ///
287    /// Use this when new class definitions have been added after an initial
288    /// `finalize()` call (e.g., lazily loaded via PSR-4) and the inheritance
289    /// graph needs to be rebuilt.
290    pub fn invalidate_finalization(&self) {
291        self.finalized
292            .store(false, std::sync::atomic::Ordering::SeqCst);
293    }
294
295    // -----------------------------------------------------------------------
296    // Incremental: remove all definitions from a single file
297    // -----------------------------------------------------------------------
298
299    /// Remove all definitions and outgoing reference locations contributed by the given file.
300    /// This clears classes, interfaces, traits, enums, functions, and constants
301    /// whose defining file matches `file_path`, the file's import and namespace entries,
302    /// and all entries in symbol_reference_locations that originated from this file.
303    /// After calling this, `invalidate_finalization()` is called so the next `finalize()`
304    /// rebuilds inheritance.
305    pub fn remove_file_definitions(&self, file_path: &str) {
306        // Collect all symbols defined in this file
307        let symbols: Vec<Arc<str>> = self
308            .symbol_to_file
309            .iter()
310            .filter(|entry| entry.value().as_ref() == file_path)
311            .map(|entry| entry.key().clone())
312            .collect();
313
314        // Remove each symbol from its respective map and from symbol_to_file
315        for sym in &symbols {
316            self.classes.remove(sym.as_ref());
317            self.interfaces.remove(sym.as_ref());
318            self.traits.remove(sym.as_ref());
319            self.enums.remove(sym.as_ref());
320            self.functions.remove(sym.as_ref());
321            self.constants.remove(sym.as_ref());
322            self.symbol_to_file.remove(sym.as_ref());
323            self.known_symbols.remove(sym.as_ref());
324        }
325
326        // Remove file-level metadata
327        self.file_imports.remove(file_path);
328        self.file_namespaces.remove(file_path);
329
330        // Remove @var-annotated global variables declared in this file
331        if let Some((_, var_names)) = self.file_global_vars.remove(file_path) {
332            for name in var_names {
333                self.global_vars.remove(name.as_ref());
334            }
335        }
336
337        // Ensure the reference index is in DashMap form so the removal below works.
338        self.ensure_expanded();
339
340        // Remove reference locations contributed by this file.
341        // Use the reverse index to avoid a full scan of all symbols.
342        if let Some(file_id) = self.file_interner.get_id(file_path) {
343            if let Some((_, sym_ids)) = self.file_symbol_references.remove(&file_id) {
344                for sym_id in sym_ids {
345                    if let Some(mut entries) = self.symbol_reference_locations.get_mut(&sym_id) {
346                        entries.retain(|&(fid, _, _)| fid != file_id);
347                    }
348                }
349            }
350        }
351
352        self.invalidate_finalization();
353    }
354
355    // -----------------------------------------------------------------------
356    // Global variable registry
357    // -----------------------------------------------------------------------
358
359    /// Record an `@var`-annotated global variable type discovered in Pass 1.
360    /// If the same variable is annotated in multiple files, the last write wins.
361    pub fn register_global_var(&self, file: &Arc<str>, name: Arc<str>, ty: Union) {
362        self.file_global_vars
363            .entry(file.clone())
364            .or_default()
365            .push(name.clone());
366        self.global_vars.insert(name, ty);
367    }
368
369    // -----------------------------------------------------------------------
370    // Lookups
371    // -----------------------------------------------------------------------
372
373    /// Resolve a property, walking up the inheritance chain (parent classes and traits).
374    pub fn get_property(
375        &self,
376        fqcn: &str,
377        prop_name: &str,
378    ) -> Option<crate::storage::PropertyStorage> {
379        // Check direct class own_properties
380        if let Some(cls) = self.classes.get(fqcn) {
381            if let Some(p) = cls.own_properties.get(prop_name) {
382                return Some(p.clone());
383            }
384        }
385
386        // Walk all ancestors (collected during finalize)
387        let all_parents = {
388            if let Some(cls) = self.classes.get(fqcn) {
389                cls.all_parents.clone()
390            } else {
391                return None;
392            }
393        };
394
395        for ancestor_fqcn in &all_parents {
396            if let Some(ancestor_cls) = self.classes.get(ancestor_fqcn.as_ref()) {
397                if let Some(p) = ancestor_cls.own_properties.get(prop_name) {
398                    return Some(p.clone());
399                }
400            }
401        }
402
403        // Check traits
404        let trait_list = {
405            if let Some(cls) = self.classes.get(fqcn) {
406                cls.traits.clone()
407            } else {
408                vec![]
409            }
410        };
411        for trait_fqcn in &trait_list {
412            if let Some(tr) = self.traits.get(trait_fqcn.as_ref()) {
413                if let Some(p) = tr.own_properties.get(prop_name) {
414                    return Some(p.clone());
415                }
416            }
417        }
418
419        None
420    }
421
422    /// Resolve a method, walking up the inheritance chain.
423    pub fn get_method(&self, fqcn: &str, method_name: &str) -> Option<MethodStorage> {
424        // PHP method names are case-insensitive — normalize to lowercase for all lookups.
425        let method_lower = method_name.to_lowercase();
426        let method_name = method_lower.as_str();
427        // Check class methods first
428        if let Some(cls) = self.classes.get(fqcn) {
429            if let Some(m) = cls.get_method(method_name) {
430                return Some(m.clone());
431            }
432        }
433        // Check interface methods (including parent interfaces via all_parents)
434        if let Some(iface) = self.interfaces.get(fqcn) {
435            if let Some(m) = iface.own_methods.get(method_name).or_else(|| {
436                iface
437                    .own_methods
438                    .iter()
439                    .find(|(k, _)| k.as_ref().eq_ignore_ascii_case(method_name))
440                    .map(|(_, v)| v)
441            }) {
442                return Some(m.clone());
443            }
444            // Traverse parent interfaces
445            let parents = iface.all_parents.clone();
446            for parent_fqcn in &parents {
447                if let Some(parent_iface) = self.interfaces.get(parent_fqcn.as_ref()) {
448                    if let Some(m) = parent_iface.own_methods.get(method_name).or_else(|| {
449                        parent_iface
450                            .own_methods
451                            .iter()
452                            .find(|(k, _)| k.as_ref().eq_ignore_ascii_case(method_name))
453                            .map(|(_, v)| v)
454                    }) {
455                        return Some(m.clone());
456                    }
457                }
458            }
459        }
460        // Check trait methods (when a variable is annotated with a trait type)
461        if let Some(tr) = self.traits.get(fqcn) {
462            if let Some(m) = tr.own_methods.get(method_name).or_else(|| {
463                tr.own_methods
464                    .iter()
465                    .find(|(k, _)| k.as_ref().eq_ignore_ascii_case(method_name))
466                    .map(|(_, v)| v)
467            }) {
468                return Some(m.clone());
469            }
470        }
471        // Check enum methods
472        if let Some(e) = self.enums.get(fqcn) {
473            if let Some(m) = e.own_methods.get(method_name).or_else(|| {
474                e.own_methods
475                    .iter()
476                    .find(|(k, _)| k.as_ref().eq_ignore_ascii_case(method_name))
477                    .map(|(_, v)| v)
478            }) {
479                return Some(m.clone());
480            }
481            // PHP 8.1 built-in enum methods: cases(), from(), tryFrom()
482            if matches!(method_name, "cases" | "from" | "tryfrom") {
483                return Some(crate::storage::MethodStorage {
484                    fqcn: Arc::from(fqcn),
485                    name: Arc::from(method_name),
486                    params: vec![],
487                    return_type: Some(mir_types::Union::mixed()),
488                    inferred_return_type: None,
489                    visibility: crate::storage::Visibility::Public,
490                    is_static: true,
491                    is_abstract: false,
492                    is_constructor: false,
493                    template_params: vec![],
494                    assertions: vec![],
495                    throws: vec![],
496                    is_final: false,
497                    is_internal: false,
498                    is_pure: false,
499                    is_deprecated: false,
500                    location: None,
501                });
502            }
503        }
504        None
505    }
506
507    /// Returns true if `child` extends or implements `ancestor` (transitively).
508    pub fn extends_or_implements(&self, child: &str, ancestor: &str) -> bool {
509        if child == ancestor {
510            return true;
511        }
512        if let Some(cls) = self.classes.get(child) {
513            return cls.implements_or_extends(ancestor);
514        }
515        if let Some(iface) = self.interfaces.get(child) {
516            return iface.all_parents.iter().any(|p| p.as_ref() == ancestor);
517        }
518        // Enum: backed enums implicitly implement BackedEnum (and UnitEnum);
519        // pure enums implicitly implement UnitEnum.
520        if let Some(en) = self.enums.get(child) {
521            // Check explicitly declared interfaces (e.g. implements SomeInterface)
522            if en.interfaces.iter().any(|i| i.as_ref() == ancestor) {
523                return true;
524            }
525            // PHP built-in: every enum implements UnitEnum
526            if ancestor == "UnitEnum" || ancestor == "\\UnitEnum" {
527                return true;
528            }
529            // Backed enums implement BackedEnum
530            if (ancestor == "BackedEnum" || ancestor == "\\BackedEnum") && en.scalar_type.is_some()
531            {
532                return true;
533            }
534        }
535        false
536    }
537
538    /// Whether a class/interface/trait/enum with this FQCN exists.
539    pub fn type_exists(&self, fqcn: &str) -> bool {
540        self.classes.contains_key(fqcn)
541            || self.interfaces.contains_key(fqcn)
542            || self.traits.contains_key(fqcn)
543            || self.enums.contains_key(fqcn)
544    }
545
546    pub fn function_exists(&self, fqn: &str) -> bool {
547        self.functions.contains_key(fqn)
548    }
549
550    /// Returns true if the class is declared abstract.
551    /// Used to suppress `UndefinedMethod` on abstract class receivers: the concrete
552    /// subclass is expected to implement the method, matching Psalm errorLevel=3 behaviour.
553    pub fn is_abstract_class(&self, fqcn: &str) -> bool {
554        self.classes.get(fqcn).is_some_and(|c| c.is_abstract)
555    }
556
557    /// Return the declared template params for `fqcn` (class or interface), or
558    /// an empty vec if the type is not found or has no templates.
559    pub fn get_class_template_params(&self, fqcn: &str) -> Vec<crate::storage::TemplateParam> {
560        if let Some(cls) = self.classes.get(fqcn) {
561            return cls.template_params.clone();
562        }
563        if let Some(iface) = self.interfaces.get(fqcn) {
564            return iface.template_params.clone();
565        }
566        if let Some(tr) = self.traits.get(fqcn) {
567            return tr.template_params.clone();
568        }
569        vec![]
570    }
571
572    /// Returns true if the class (or any ancestor/trait) defines a `__get` magic method.
573    /// Such classes allow arbitrary property access, suppressing UndefinedProperty.
574    pub fn has_magic_get(&self, fqcn: &str) -> bool {
575        if let Some(cls) = self.classes.get(fqcn) {
576            if cls.own_methods.contains_key("__get") || cls.all_methods.contains_key("__get") {
577                return true;
578            }
579            // Check traits
580            let traits = cls.traits.clone();
581            drop(cls);
582            for tr in &traits {
583                if let Some(t) = self.traits.get(tr.as_ref()) {
584                    if t.own_methods.contains_key("__get") {
585                        return true;
586                    }
587                }
588            }
589            // Check ancestors
590            let all_parents = {
591                if let Some(c) = self.classes.get(fqcn) {
592                    c.all_parents.clone()
593                } else {
594                    vec![]
595                }
596            };
597            for ancestor in &all_parents {
598                if let Some(anc) = self.classes.get(ancestor.as_ref()) {
599                    if anc.own_methods.contains_key("__get") {
600                        return true;
601                    }
602                }
603            }
604        }
605        false
606    }
607
608    /// Returns true if the class (or any of its ancestors) has a parent/interface/trait
609    /// that is NOT present in the codebase.  Used to suppress `UndefinedMethod` false
610    /// positives: if a method might be inherited from an unscanned external class we
611    /// cannot confirm or deny its existence.
612    ///
613    /// We use the pre-computed `all_parents` list (built during finalization) rather
614    /// than recursive DashMap lookups to avoid potential deadlocks.
615    pub fn has_unknown_ancestor(&self, fqcn: &str) -> bool {
616        // For interfaces: check whether any parent interface is unknown.
617        if let Some(iface) = self.interfaces.get(fqcn) {
618            let parents = iface.all_parents.clone();
619            drop(iface);
620            for p in &parents {
621                if !self.type_exists(p.as_ref()) {
622                    return true;
623                }
624            }
625            return false;
626        }
627
628        // Clone the data we need so the DashMap ref is dropped before any further lookups.
629        let (parent, interfaces, traits, all_parents) = {
630            let Some(cls) = self.classes.get(fqcn) else {
631                return false;
632            };
633            (
634                cls.parent.clone(),
635                cls.interfaces.clone(),
636                cls.traits.clone(),
637                cls.all_parents.clone(),
638            )
639        };
640
641        // Fast path: check direct parent/interfaces/traits
642        if let Some(ref p) = parent {
643            if !self.type_exists(p.as_ref()) {
644                return true;
645            }
646        }
647        for iface in &interfaces {
648            if !self.type_exists(iface.as_ref()) {
649                return true;
650            }
651        }
652        for tr in &traits {
653            if !self.type_exists(tr.as_ref()) {
654                return true;
655            }
656        }
657
658        // Also check the full ancestor chain (pre-computed during finalization)
659        for ancestor in &all_parents {
660            if !self.type_exists(ancestor.as_ref()) {
661                return true;
662            }
663        }
664
665        false
666    }
667
668    /// Resolve a short class/function name to its FQCN using the import table
669    /// and namespace recorded for `file` during Pass 1.
670    ///
671    /// - Names already containing `\` (after stripping a leading `\`) are
672    ///   returned as-is (already fully qualified).
673    /// - `self`, `parent`, `static` are returned unchanged (caller handles them).
674    pub fn resolve_class_name(&self, file: &str, name: &str) -> String {
675        let name = name.trim_start_matches('\\');
676        if name.is_empty() {
677            return name.to_string();
678        }
679        // Fully qualified absolute paths start with '\' (already stripped above).
680        // Names containing '\' but not starting with it may be:
681        //   - Already-resolved FQCNs (e.g. Frontify\Util\Foo) — check type_exists
682        //   - Qualified relative names (e.g. Option\Some from within Frontify\Utility) — need namespace prefix
683        if name.contains('\\') {
684            // Check if the leading segment matches a use-import alias
685            let first_segment = name.split('\\').next().unwrap_or(name);
686            if let Some(imports) = self.file_imports.get(file) {
687                if let Some(resolved_prefix) = imports.get(first_segment) {
688                    let rest = &name[first_segment.len()..]; // includes leading '\'
689                    return format!("{}{}", resolved_prefix, rest);
690                }
691            }
692            // If already known in codebase as-is, it's FQCN — trust it
693            if self.type_exists(name) {
694                return name.to_string();
695            }
696            // Otherwise it's a relative qualified name — prepend the file namespace
697            if let Some(ns) = self.file_namespaces.get(file) {
698                let qualified = format!("{}\\{}", *ns, name);
699                if self.type_exists(&qualified) {
700                    return qualified;
701                }
702            }
703            return name.to_string();
704        }
705        // Built-in pseudo-types / keywords handled by the caller
706        match name {
707            "self" | "parent" | "static" | "this" => return name.to_string(),
708            _ => {}
709        }
710        // Check use aliases for this file (PHP class names are case-insensitive)
711        if let Some(imports) = self.file_imports.get(file) {
712            if let Some(resolved) = imports.get(name) {
713                return resolved.clone();
714            }
715            // Fall back to case-insensitive alias lookup
716            let name_lower = name.to_lowercase();
717            for (alias, resolved) in imports.iter() {
718                if alias.to_lowercase() == name_lower {
719                    return resolved.clone();
720                }
721            }
722        }
723        // Qualify with the file's namespace if one exists
724        if let Some(ns) = self.file_namespaces.get(file) {
725            let qualified = format!("{}\\{}", *ns, name);
726            // If the namespaced version exists in the codebase, use it.
727            // Otherwise fall back to the global (unqualified) name if that exists.
728            // This handles `DateTimeInterface`, `Exception`, etc. used without import
729            // while not overriding user-defined classes in namespaces.
730            if self.type_exists(&qualified) {
731                return qualified;
732            }
733            if self.type_exists(name) {
734                return name.to_string();
735            }
736            return qualified;
737        }
738        name.to_string()
739    }
740
741    // -----------------------------------------------------------------------
742    // Definition location lookups
743    // -----------------------------------------------------------------------
744
745    /// Look up the definition location of any symbol (class, interface, trait, enum, function).
746    /// Returns the file path and byte offsets.
747    pub fn get_symbol_location(&self, fqcn: &str) -> Option<crate::storage::Location> {
748        if let Some(cls) = self.classes.get(fqcn) {
749            return cls.location.clone();
750        }
751        if let Some(iface) = self.interfaces.get(fqcn) {
752            return iface.location.clone();
753        }
754        if let Some(tr) = self.traits.get(fqcn) {
755            return tr.location.clone();
756        }
757        if let Some(en) = self.enums.get(fqcn) {
758            return en.location.clone();
759        }
760        if let Some(func) = self.functions.get(fqcn) {
761            return func.location.clone();
762        }
763        None
764    }
765
766    /// Look up the definition location of a class member (method, property, constant).
767    pub fn get_member_location(
768        &self,
769        fqcn: &str,
770        member_name: &str,
771    ) -> Option<crate::storage::Location> {
772        // Check methods
773        if let Some(method) = self.get_method(fqcn, member_name) {
774            return method.location.clone();
775        }
776        // Check properties
777        if let Some(prop) = self.get_property(fqcn, member_name) {
778            return prop.location.clone();
779        }
780        // Check class constants
781        if let Some(cls) = self.classes.get(fqcn) {
782            if let Some(c) = cls.own_constants.get(member_name) {
783                return c.location.clone();
784            }
785        }
786        // Check interface constants
787        if let Some(iface) = self.interfaces.get(fqcn) {
788            if let Some(c) = iface.own_constants.get(member_name) {
789                return c.location.clone();
790            }
791        }
792        // Check trait constants
793        if let Some(tr) = self.traits.get(fqcn) {
794            if let Some(c) = tr.own_constants.get(member_name) {
795                return c.location.clone();
796            }
797        }
798        // Check enum constants and cases
799        if let Some(en) = self.enums.get(fqcn) {
800            if let Some(c) = en.own_constants.get(member_name) {
801                return c.location.clone();
802            }
803            if let Some(case) = en.cases.get(member_name) {
804                return case.location.clone();
805            }
806        }
807        None
808    }
809
810    // -----------------------------------------------------------------------
811    // Reference tracking (M18 dead-code detection)
812    // -----------------------------------------------------------------------
813
814    /// Mark a method as referenced from user code.
815    pub fn mark_method_referenced(&self, fqcn: &str, method_name: &str) {
816        let key = format!("{}::{}", fqcn, method_name.to_lowercase());
817        let id = self.symbol_interner.intern_str(&key);
818        self.referenced_methods.insert(id);
819    }
820
821    /// Mark a property as referenced from user code.
822    pub fn mark_property_referenced(&self, fqcn: &str, prop_name: &str) {
823        let key = format!("{}::{}", fqcn, prop_name);
824        let id = self.symbol_interner.intern_str(&key);
825        self.referenced_properties.insert(id);
826    }
827
828    /// Mark a free function as referenced from user code.
829    pub fn mark_function_referenced(&self, fqn: &str) {
830        let id = self.symbol_interner.intern_str(fqn);
831        self.referenced_functions.insert(id);
832    }
833
834    pub fn is_method_referenced(&self, fqcn: &str, method_name: &str) -> bool {
835        let key = format!("{}::{}", fqcn, method_name.to_lowercase());
836        match self.symbol_interner.get_id(&key) {
837            Some(id) => self.referenced_methods.contains(&id),
838            None => false,
839        }
840    }
841
842    pub fn is_property_referenced(&self, fqcn: &str, prop_name: &str) -> bool {
843        let key = format!("{}::{}", fqcn, prop_name);
844        match self.symbol_interner.get_id(&key) {
845            Some(id) => self.referenced_properties.contains(&id),
846            None => false,
847        }
848    }
849
850    pub fn is_function_referenced(&self, fqn: &str) -> bool {
851        match self.symbol_interner.get_id(fqn) {
852            Some(id) => self.referenced_functions.contains(&id),
853            None => false,
854        }
855    }
856
857    /// Record a method reference with its source location.
858    /// Also updates the referenced_methods DashSet for dead-code detection.
859    pub fn mark_method_referenced_at(
860        &self,
861        fqcn: &str,
862        method_name: &str,
863        file: Arc<str>,
864        start: u32,
865        end: u32,
866    ) {
867        let key = format!("{}::{}", fqcn, method_name.to_lowercase());
868        self.ensure_expanded();
869        let sym_id = self.symbol_interner.intern_str(&key);
870        let file_id = self.file_interner.intern(file);
871        self.referenced_methods.insert(sym_id);
872        record_ref(
873            &self.symbol_reference_locations,
874            &self.file_symbol_references,
875            sym_id,
876            file_id,
877            start,
878            end,
879        );
880    }
881
882    /// Record a property reference with its source location.
883    /// Also updates the referenced_properties DashSet for dead-code detection.
884    pub fn mark_property_referenced_at(
885        &self,
886        fqcn: &str,
887        prop_name: &str,
888        file: Arc<str>,
889        start: u32,
890        end: u32,
891    ) {
892        let key = format!("{}::{}", fqcn, prop_name);
893        self.ensure_expanded();
894        let sym_id = self.symbol_interner.intern_str(&key);
895        let file_id = self.file_interner.intern(file);
896        self.referenced_properties.insert(sym_id);
897        record_ref(
898            &self.symbol_reference_locations,
899            &self.file_symbol_references,
900            sym_id,
901            file_id,
902            start,
903            end,
904        );
905    }
906
907    /// Record a function reference with its source location.
908    /// Also updates the referenced_functions DashSet for dead-code detection.
909    pub fn mark_function_referenced_at(&self, fqn: &str, file: Arc<str>, start: u32, end: u32) {
910        self.ensure_expanded();
911        let sym_id = self.symbol_interner.intern_str(fqn);
912        let file_id = self.file_interner.intern(file);
913        self.referenced_functions.insert(sym_id);
914        record_ref(
915            &self.symbol_reference_locations,
916            &self.file_symbol_references,
917            sym_id,
918            file_id,
919            start,
920            end,
921        );
922    }
923
924    /// Record a class reference (e.g. `new Foo()`) with its source location.
925    /// Does not update any dead-code DashSet — class instantiation tracking is
926    /// separate from method/property/function dead-code detection.
927    pub fn mark_class_referenced_at(&self, fqcn: &str, file: Arc<str>, start: u32, end: u32) {
928        self.ensure_expanded();
929        let sym_id = self.symbol_interner.intern_str(fqcn);
930        let file_id = self.file_interner.intern(file);
931        record_ref(
932            &self.symbol_reference_locations,
933            &self.file_symbol_references,
934            sym_id,
935            file_id,
936            start,
937            end,
938        );
939    }
940
941    /// Replay cached reference locations for a file into the reference index.
942    /// Called on cache hits to avoid re-running Pass 2 just to rebuild the index.
943    /// `locs` is a slice of `(symbol_key, start_byte, end_byte)` as stored in the cache.
944    pub fn replay_reference_locations(&self, file: Arc<str>, locs: &[(String, u32, u32)]) {
945        if locs.is_empty() {
946            return;
947        }
948        self.ensure_expanded();
949        let file_id = self.file_interner.intern(file);
950        for (symbol_key, start, end) in locs {
951            let sym_id = self.symbol_interner.intern_str(symbol_key);
952            record_ref(
953                &self.symbol_reference_locations,
954                &self.file_symbol_references,
955                sym_id,
956                file_id,
957                *start,
958                *end,
959            );
960        }
961    }
962
963    /// Return all reference locations for `symbol` as a flat `Vec<(file, start, end)>`.
964    /// Returns an empty Vec if the symbol has no recorded references.
965    pub fn get_reference_locations(&self, symbol: &str) -> Vec<(Arc<str>, u32, u32)> {
966        let Some(sym_id) = self.symbol_interner.get_id(symbol) else {
967            return Vec::new();
968        };
969        // Fast path: compact CSR index.
970        if let Some(ref ci) = *self.compact_ref_index.read().unwrap() {
971            let id = sym_id as usize;
972            if id + 1 >= ci.sym_offsets.len() {
973                return Vec::new();
974            }
975            let start = ci.sym_offsets[id] as usize;
976            let end = ci.sym_offsets[id + 1] as usize;
977            return ci.entries[start..end]
978                .iter()
979                .map(|&(_, file_id, s, e)| (self.file_interner.get(file_id), s, e))
980                .collect();
981        }
982        // Slow path: build-phase DashMap.
983        let Some(entries) = self.symbol_reference_locations.get(&sym_id) else {
984            return Vec::new();
985        };
986        entries
987            .iter()
988            .map(|&(file_id, start, end)| (self.file_interner.get(file_id), start, end))
989            .collect()
990    }
991
992    /// Extract all reference locations recorded for `file` as `(symbol_key, start, end)` triples.
993    /// Used by the cache layer to persist per-file reference data between runs.
994    pub fn extract_file_reference_locations(&self, file: &str) -> Vec<(Arc<str>, u32, u32)> {
995        let Some(file_id) = self.file_interner.get_id(file) else {
996            return Vec::new();
997        };
998        // Fast path: compact CSR index.
999        if let Some(ref ci) = *self.compact_ref_index.read().unwrap() {
1000            let id = file_id as usize;
1001            if id + 1 >= ci.file_offsets.len() {
1002                return Vec::new();
1003            }
1004            let start = ci.file_offsets[id] as usize;
1005            let end = ci.file_offsets[id + 1] as usize;
1006            return ci.by_file[start..end]
1007                .iter()
1008                .map(|&entry_idx| {
1009                    let (sym_id, _, s, e) = ci.entries[entry_idx as usize];
1010                    (self.symbol_interner.get(sym_id), s, e)
1011                })
1012                .collect();
1013        }
1014        // Slow path: build-phase DashMaps.
1015        let Some(sym_ids) = self.file_symbol_references.get(&file_id) else {
1016            return Vec::new();
1017        };
1018        let mut out = Vec::new();
1019        for &sym_id in sym_ids.iter() {
1020            let Some(entries) = self.symbol_reference_locations.get(&sym_id) else {
1021                continue;
1022            };
1023            let sym_key = self.symbol_interner.get(sym_id);
1024            for &(entry_file_id, start, end) in entries.iter() {
1025                if entry_file_id == file_id {
1026                    out.push((sym_key.clone(), start, end));
1027                }
1028            }
1029        }
1030        out
1031    }
1032
1033    /// Returns true if the given file has any recorded symbol references.
1034    pub fn file_has_symbol_references(&self, file: &str) -> bool {
1035        let Some(file_id) = self.file_interner.get_id(file) else {
1036            return false;
1037        };
1038        // Check compact index first.
1039        if let Some(ref ci) = *self.compact_ref_index.read().unwrap() {
1040            let id = file_id as usize;
1041            return id + 1 < ci.file_offsets.len() && ci.file_offsets[id] < ci.file_offsets[id + 1];
1042        }
1043        self.file_symbol_references.contains_key(&file_id)
1044    }
1045
1046    // -----------------------------------------------------------------------
1047    // Finalization
1048    // -----------------------------------------------------------------------
1049
1050    /// Must be called after all files have been parsed (pass 1 complete).
1051    /// Resolves inheritance chains and builds method dispatch tables.
1052    pub fn finalize(&self) {
1053        if self.finalized.load(std::sync::atomic::Ordering::SeqCst) {
1054            return;
1055        }
1056
1057        // 1. Resolve all_parents for classes
1058        let class_keys: Vec<Arc<str>> = self.classes.iter().map(|e| e.key().clone()).collect();
1059        for fqcn in &class_keys {
1060            let parents = self.collect_class_ancestors(fqcn);
1061            if let Some(mut cls) = self.classes.get_mut(fqcn.as_ref()) {
1062                cls.all_parents = parents;
1063            }
1064        }
1065
1066        // 2. Build method dispatch tables for classes (own methods override inherited)
1067        for fqcn in &class_keys {
1068            let all_methods = self.build_method_table(fqcn);
1069            if let Some(mut cls) = self.classes.get_mut(fqcn.as_ref()) {
1070                cls.all_methods = all_methods;
1071            }
1072        }
1073
1074        // 3. Resolve all_parents for interfaces
1075        let iface_keys: Vec<Arc<str>> = self.interfaces.iter().map(|e| e.key().clone()).collect();
1076        for fqcn in &iface_keys {
1077            let parents = self.collect_interface_ancestors(fqcn);
1078            if let Some(mut iface) = self.interfaces.get_mut(fqcn.as_ref()) {
1079                iface.all_parents = parents;
1080            }
1081        }
1082
1083        self.finalized
1084            .store(true, std::sync::atomic::Ordering::SeqCst);
1085    }
1086
1087    // -----------------------------------------------------------------------
1088    // Private helpers
1089    // -----------------------------------------------------------------------
1090
1091    fn collect_class_ancestors(&self, fqcn: &str) -> Vec<Arc<str>> {
1092        let mut result = Vec::new();
1093        let mut visited = std::collections::HashSet::new();
1094        self.collect_class_ancestors_inner(fqcn, &mut result, &mut visited);
1095        result
1096    }
1097
1098    fn collect_class_ancestors_inner(
1099        &self,
1100        fqcn: &str,
1101        out: &mut Vec<Arc<str>>,
1102        visited: &mut std::collections::HashSet<String>,
1103    ) {
1104        if !visited.insert(fqcn.to_string()) {
1105            return; // cycle guard
1106        }
1107        let (parent, interfaces, traits) = {
1108            if let Some(cls) = self.classes.get(fqcn) {
1109                (
1110                    cls.parent.clone(),
1111                    cls.interfaces.clone(),
1112                    cls.traits.clone(),
1113                )
1114            } else {
1115                return;
1116            }
1117        };
1118
1119        if let Some(p) = parent {
1120            out.push(p.clone());
1121            self.collect_class_ancestors_inner(&p, out, visited);
1122        }
1123        for iface in interfaces {
1124            out.push(iface.clone());
1125            self.collect_interface_ancestors_inner(&iface, out, visited);
1126        }
1127        for t in traits {
1128            out.push(t);
1129        }
1130    }
1131
1132    fn collect_interface_ancestors(&self, fqcn: &str) -> Vec<Arc<str>> {
1133        let mut result = Vec::new();
1134        let mut visited = std::collections::HashSet::new();
1135        self.collect_interface_ancestors_inner(fqcn, &mut result, &mut visited);
1136        result
1137    }
1138
1139    fn collect_interface_ancestors_inner(
1140        &self,
1141        fqcn: &str,
1142        out: &mut Vec<Arc<str>>,
1143        visited: &mut std::collections::HashSet<String>,
1144    ) {
1145        if !visited.insert(fqcn.to_string()) {
1146            return;
1147        }
1148        let extends = {
1149            if let Some(iface) = self.interfaces.get(fqcn) {
1150                iface.extends.clone()
1151            } else {
1152                return;
1153            }
1154        };
1155        for e in extends {
1156            out.push(e.clone());
1157            self.collect_interface_ancestors_inner(&e, out, visited);
1158        }
1159    }
1160
1161    /// Build the full method dispatch table for a class, with own methods taking
1162    /// priority over inherited ones.
1163    fn build_method_table(&self, fqcn: &str) -> indexmap::IndexMap<Arc<str>, MethodStorage> {
1164        use indexmap::IndexMap;
1165        let mut table: IndexMap<Arc<str>, MethodStorage> = IndexMap::new();
1166
1167        // Walk ancestor chain (broad-first from root → child, so child overrides root)
1168        let ancestors = {
1169            if let Some(cls) = self.classes.get(fqcn) {
1170                cls.all_parents.clone()
1171            } else {
1172                return table;
1173            }
1174        };
1175
1176        // Insert ancestor methods (deepest ancestor first, so closer ancestors override).
1177        // Also insert trait methods from ancestor classes.
1178        for ancestor_fqcn in ancestors.iter().rev() {
1179            if let Some(ancestor) = self.classes.get(ancestor_fqcn.as_ref()) {
1180                // First insert ancestor's own trait methods (lower priority)
1181                let ancestor_traits = ancestor.traits.clone();
1182                for trait_fqcn in ancestor_traits.iter().rev() {
1183                    if let Some(tr) = self.traits.get(trait_fqcn.as_ref()) {
1184                        for (name, method) in &tr.own_methods {
1185                            table.insert(name.clone(), method.clone());
1186                        }
1187                    }
1188                }
1189                // Then ancestor's own methods (override trait methods)
1190                for (name, method) in &ancestor.own_methods {
1191                    table.insert(name.clone(), method.clone());
1192                }
1193            } else if let Some(iface) = self.interfaces.get(ancestor_fqcn.as_ref()) {
1194                for (name, method) in &iface.own_methods {
1195                    // Interface methods are implicitly abstract — mark them so that
1196                    // ClassAnalyzer::check_interface_methods_implemented can detect
1197                    // a concrete class that fails to provide an implementation.
1198                    let mut m = method.clone();
1199                    m.is_abstract = true;
1200                    table.insert(name.clone(), m);
1201                }
1202            }
1203        }
1204
1205        // Insert the class's own trait methods
1206        let trait_list = {
1207            if let Some(cls) = self.classes.get(fqcn) {
1208                cls.traits.clone()
1209            } else {
1210                vec![]
1211            }
1212        };
1213        for trait_fqcn in &trait_list {
1214            if let Some(tr) = self.traits.get(trait_fqcn.as_ref()) {
1215                for (name, method) in &tr.own_methods {
1216                    table.insert(name.clone(), method.clone());
1217                }
1218            }
1219        }
1220
1221        // Own methods override everything
1222        if let Some(cls) = self.classes.get(fqcn) {
1223            for (name, method) in &cls.own_methods {
1224                table.insert(name.clone(), method.clone());
1225            }
1226        }
1227
1228        table
1229    }
1230}
1231
1232#[cfg(test)]
1233mod tests {
1234    use super::*;
1235
1236    fn arc(s: &str) -> Arc<str> {
1237        Arc::from(s)
1238    }
1239
1240    #[test]
1241    fn method_referenced_at_groups_spans_by_file() {
1242        let cb = Codebase::new();
1243        cb.mark_method_referenced_at("Foo", "bar", arc("a.php"), 0, 5);
1244        cb.mark_method_referenced_at("Foo", "bar", arc("a.php"), 10, 15);
1245        cb.mark_method_referenced_at("Foo", "bar", arc("b.php"), 20, 25);
1246
1247        let locs = cb.get_reference_locations("Foo::bar");
1248        let files: std::collections::HashSet<&str> =
1249            locs.iter().map(|(f, _, _)| f.as_ref()).collect();
1250        assert_eq!(files.len(), 2, "two files, not three spans");
1251        assert!(locs.contains(&(arc("a.php"), 0, 5)));
1252        assert!(locs.contains(&(arc("a.php"), 10, 15)));
1253        assert_eq!(
1254            locs.iter()
1255                .filter(|(f, _, _)| f.as_ref() == "a.php")
1256                .count(),
1257            2
1258        );
1259        assert!(locs.contains(&(arc("b.php"), 20, 25)));
1260        assert!(
1261            cb.is_method_referenced("Foo", "bar"),
1262            "DashSet also updated"
1263        );
1264    }
1265
1266    #[test]
1267    fn duplicate_spans_are_deduplicated() {
1268        let cb = Codebase::new();
1269        // Same call site recorded twice (e.g. union receiver Foo|Foo)
1270        cb.mark_method_referenced_at("Foo", "bar", arc("a.php"), 0, 5);
1271        cb.mark_method_referenced_at("Foo", "bar", arc("a.php"), 0, 5);
1272
1273        let count = cb
1274            .get_reference_locations("Foo::bar")
1275            .iter()
1276            .filter(|(f, _, _)| f.as_ref() == "a.php")
1277            .count();
1278        assert_eq!(count, 1, "duplicate span deduplicated");
1279    }
1280
1281    #[test]
1282    fn method_key_is_lowercased() {
1283        let cb = Codebase::new();
1284        cb.mark_method_referenced_at("Cls", "MyMethod", arc("f.php"), 0, 3);
1285        assert!(!cb.get_reference_locations("Cls::mymethod").is_empty());
1286    }
1287
1288    #[test]
1289    fn property_referenced_at_records_location() {
1290        let cb = Codebase::new();
1291        cb.mark_property_referenced_at("Bar", "count", arc("x.php"), 5, 10);
1292
1293        assert!(cb
1294            .get_reference_locations("Bar::count")
1295            .contains(&(arc("x.php"), 5, 10)));
1296        assert!(cb.is_property_referenced("Bar", "count"));
1297    }
1298
1299    #[test]
1300    fn function_referenced_at_records_location() {
1301        let cb = Codebase::new();
1302        cb.mark_function_referenced_at("my_fn", arc("a.php"), 10, 15);
1303
1304        assert!(cb
1305            .get_reference_locations("my_fn")
1306            .contains(&(arc("a.php"), 10, 15)));
1307        assert!(cb.is_function_referenced("my_fn"));
1308    }
1309
1310    #[test]
1311    fn class_referenced_at_records_location() {
1312        let cb = Codebase::new();
1313        cb.mark_class_referenced_at("Foo", arc("a.php"), 5, 8);
1314
1315        assert!(cb
1316            .get_reference_locations("Foo")
1317            .contains(&(arc("a.php"), 5, 8)));
1318    }
1319
1320    #[test]
1321    fn get_reference_locations_flattens_all_files() {
1322        let cb = Codebase::new();
1323        cb.mark_function_referenced_at("fn1", arc("a.php"), 0, 5);
1324        cb.mark_function_referenced_at("fn1", arc("b.php"), 10, 15);
1325
1326        let mut locs = cb.get_reference_locations("fn1");
1327        locs.sort_by_key(|(_, s, _)| *s);
1328        assert_eq!(locs.len(), 2);
1329        assert_eq!(locs[0], (arc("a.php"), 0, 5));
1330        assert_eq!(locs[1], (arc("b.php"), 10, 15));
1331    }
1332
1333    #[test]
1334    fn replay_reference_locations_restores_index() {
1335        let cb = Codebase::new();
1336        let locs = vec![
1337            ("Foo::bar".to_string(), 0u32, 5u32),
1338            ("Foo::bar".to_string(), 10, 15),
1339            ("greet".to_string(), 20, 25),
1340        ];
1341        cb.replay_reference_locations(arc("a.php"), &locs);
1342
1343        let bar_locs = cb.get_reference_locations("Foo::bar");
1344        assert!(bar_locs.contains(&(arc("a.php"), 0, 5)));
1345        assert!(bar_locs.contains(&(arc("a.php"), 10, 15)));
1346
1347        assert!(cb
1348            .get_reference_locations("greet")
1349            .contains(&(arc("a.php"), 20, 25)));
1350
1351        assert!(cb.file_has_symbol_references("a.php"));
1352    }
1353
1354    #[test]
1355    fn remove_file_clears_its_spans_only() {
1356        let cb = Codebase::new();
1357        cb.mark_function_referenced_at("fn1", arc("a.php"), 0, 5);
1358        cb.mark_function_referenced_at("fn1", arc("b.php"), 10, 15);
1359
1360        cb.remove_file_definitions("a.php");
1361
1362        let locs = cb.get_reference_locations("fn1");
1363        assert!(
1364            !locs.iter().any(|(f, _, _)| f.as_ref() == "a.php"),
1365            "a.php spans removed"
1366        );
1367        assert!(
1368            locs.contains(&(arc("b.php"), 10, 15)),
1369            "b.php spans untouched"
1370        );
1371        assert!(!cb.file_has_symbol_references("a.php"));
1372    }
1373
1374    #[test]
1375    fn remove_file_does_not_affect_other_files() {
1376        let cb = Codebase::new();
1377        cb.mark_property_referenced_at("Cls", "prop", arc("x.php"), 1, 4);
1378        cb.mark_property_referenced_at("Cls", "prop", arc("y.php"), 7, 10);
1379
1380        cb.remove_file_definitions("x.php");
1381
1382        let locs = cb.get_reference_locations("Cls::prop");
1383        assert!(!locs.iter().any(|(f, _, _)| f.as_ref() == "x.php"));
1384        assert!(locs.contains(&(arc("y.php"), 7, 10)));
1385    }
1386
1387    #[test]
1388    fn remove_file_definitions_on_never_analyzed_file_is_noop() {
1389        let cb = Codebase::new();
1390        cb.mark_function_referenced_at("fn1", arc("a.php"), 0, 5);
1391
1392        // "ghost.php" was never analyzed — removing it must not panic or corrupt state.
1393        cb.remove_file_definitions("ghost.php");
1394
1395        // Existing data must be untouched.
1396        assert!(cb
1397            .get_reference_locations("fn1")
1398            .contains(&(arc("a.php"), 0, 5)));
1399        assert!(!cb.file_has_symbol_references("ghost.php"));
1400    }
1401
1402    #[test]
1403    fn replay_reference_locations_with_empty_list_is_noop() {
1404        let cb = Codebase::new();
1405        cb.mark_function_referenced_at("fn1", arc("a.php"), 0, 5);
1406
1407        // Replaying an empty list must not touch existing entries.
1408        cb.replay_reference_locations(arc("b.php"), &[]);
1409
1410        assert!(
1411            !cb.file_has_symbol_references("b.php"),
1412            "empty replay must not create a file entry"
1413        );
1414        assert!(
1415            cb.get_reference_locations("fn1")
1416                .contains(&(arc("a.php"), 0, 5)),
1417            "existing spans untouched"
1418        );
1419    }
1420
1421    #[test]
1422    fn replay_reference_locations_twice_does_not_duplicate_spans() {
1423        let cb = Codebase::new();
1424        let locs = vec![("fn1".to_string(), 0u32, 5u32)];
1425
1426        cb.replay_reference_locations(arc("a.php"), &locs);
1427        cb.replay_reference_locations(arc("a.php"), &locs);
1428
1429        let count = cb
1430            .get_reference_locations("fn1")
1431            .iter()
1432            .filter(|(f, _, _)| f.as_ref() == "a.php")
1433            .count();
1434        assert_eq!(
1435            count, 1,
1436            "replaying the same location twice must not create duplicate spans"
1437        );
1438    }
1439}