Skip to main content

mir_codebase/
codebase.rs

1use std::collections::{HashMap, HashSet};
2use std::sync::Arc;
3
4use dashmap::{DashMap, DashSet};
5
6/// Maps symbol key → { file_path → {(start_byte, end_byte)} }.
7/// Used by `Codebase::symbol_reference_locations`.
8type ReferenceLocations = DashMap<Arc<str>, HashMap<Arc<str>, HashSet<(u32, u32)>>>;
9
10use crate::storage::{
11    ClassStorage, EnumStorage, FunctionStorage, InterfaceStorage, MethodStorage, TraitStorage,
12};
13use mir_types::Union;
14
15// ---------------------------------------------------------------------------
16// Codebase — thread-safe global symbol registry
17// ---------------------------------------------------------------------------
18
19#[derive(Debug, Default)]
20pub struct Codebase {
21    pub classes: DashMap<Arc<str>, ClassStorage>,
22    pub interfaces: DashMap<Arc<str>, InterfaceStorage>,
23    pub traits: DashMap<Arc<str>, TraitStorage>,
24    pub enums: DashMap<Arc<str>, EnumStorage>,
25    pub functions: DashMap<Arc<str>, FunctionStorage>,
26    pub constants: DashMap<Arc<str>, Union>,
27
28    /// Types of `@var`-annotated global variables, collected in Pass 1.
29    /// Key: variable name without the `$` prefix.
30    pub global_vars: DashMap<Arc<str>, Union>,
31    /// Maps file path → variable names declared with `@var` in that file.
32    /// Used by `remove_file_definitions` to purge stale entries on re-analysis.
33    file_global_vars: DashMap<Arc<str>, Vec<Arc<str>>>,
34
35    /// Methods referenced during Pass 2 — key format: `"ClassName::methodName"`.
36    /// Used by the dead-code detector (M18).
37    pub referenced_methods: DashSet<Arc<str>>,
38    /// Properties referenced during Pass 2 — key format: `"ClassName::propName"`.
39    pub referenced_properties: DashSet<Arc<str>>,
40    /// Free functions referenced during Pass 2 — key: fully-qualified name.
41    pub referenced_functions: DashSet<Arc<str>>,
42
43    /// Maps symbol key → { file_path → {(start_byte, end_byte)} }.
44    /// Key format mirrors referenced_methods / referenced_properties / referenced_functions.
45    /// The inner HashMap groups all spans from the same file under a single key,
46    /// avoiding Arc<str> duplication per span and enabling O(1) per-file cleanup.
47    /// HashSet deduplicates spans from union receivers (e.g. Foo|Foo->method()).
48    pub symbol_reference_locations: ReferenceLocations,
49    /// Reverse index: file_path → unique symbol keys referenced in that file.
50    /// Used by remove_file_definitions for O(1) cleanup without a full map scan.
51    pub file_symbol_references: DashMap<Arc<str>, HashSet<Arc<str>>>,
52
53    /// Maps every FQCN (class, interface, trait, enum, function) to the absolute
54    /// path of the file that defines it. Populated during Pass 1.
55    pub symbol_to_file: DashMap<Arc<str>, Arc<str>>,
56
57    /// Lightweight FQCN index populated by `SymbolTable` before Pass 1.
58    /// Enables O(1) "does this symbol exist?" checks before full definitions
59    /// are available.
60    pub known_symbols: DashSet<Arc<str>>,
61
62    /// Per-file `use` alias maps: alias → FQCN.  Populated during Pass 1.
63    ///
64    /// Key: absolute file path (as `Arc<str>`).
65    /// Value: map of `alias → fully-qualified class name`.
66    ///
67    /// Exposed as `pub` so that external consumers (e.g. `php-lsp`) can read
68    /// import data that mir already collects, instead of reimplementing it.
69    pub file_imports: DashMap<Arc<str>, std::collections::HashMap<String, String>>,
70    /// Per-file current namespace (if any).  Populated during Pass 1.
71    ///
72    /// Key: absolute file path (as `Arc<str>`).
73    /// Value: the declared namespace string (e.g. `"App\\Controller"`).
74    ///
75    /// Exposed as `pub` so that external consumers (e.g. `php-lsp`) can read
76    /// namespace data that mir already collects, instead of reimplementing it.
77    pub file_namespaces: DashMap<Arc<str>, String>,
78
79    /// Whether finalize() has been called.
80    finalized: std::sync::atomic::AtomicBool,
81}
82
83impl Codebase {
84    pub fn new() -> Self {
85        Self::default()
86    }
87
88    /// Reset the finalization flag so that `finalize()` will run again.
89    ///
90    /// Use this when new class definitions have been added after an initial
91    /// `finalize()` call (e.g., lazily loaded via PSR-4) and the inheritance
92    /// graph needs to be rebuilt.
93    pub fn invalidate_finalization(&self) {
94        self.finalized
95            .store(false, std::sync::atomic::Ordering::SeqCst);
96    }
97
98    // -----------------------------------------------------------------------
99    // Incremental: remove all definitions from a single file
100    // -----------------------------------------------------------------------
101
102    /// Remove all definitions and outgoing reference locations contributed by the given file.
103    /// This clears classes, interfaces, traits, enums, functions, and constants
104    /// whose defining file matches `file_path`, the file's import and namespace entries,
105    /// and all entries in symbol_reference_locations that originated from this file.
106    /// After calling this, `invalidate_finalization()` is called so the next `finalize()`
107    /// rebuilds inheritance.
108    pub fn remove_file_definitions(&self, file_path: &str) {
109        // Collect all symbols defined in this file
110        let symbols: Vec<Arc<str>> = self
111            .symbol_to_file
112            .iter()
113            .filter(|entry| entry.value().as_ref() == file_path)
114            .map(|entry| entry.key().clone())
115            .collect();
116
117        // Remove each symbol from its respective map and from symbol_to_file
118        for sym in &symbols {
119            self.classes.remove(sym.as_ref());
120            self.interfaces.remove(sym.as_ref());
121            self.traits.remove(sym.as_ref());
122            self.enums.remove(sym.as_ref());
123            self.functions.remove(sym.as_ref());
124            self.constants.remove(sym.as_ref());
125            self.symbol_to_file.remove(sym.as_ref());
126            self.known_symbols.remove(sym.as_ref());
127        }
128
129        // Remove file-level metadata
130        self.file_imports.remove(file_path);
131        self.file_namespaces.remove(file_path);
132
133        // Remove @var-annotated global variables declared in this file
134        if let Some((_, var_names)) = self.file_global_vars.remove(file_path) {
135            for name in var_names {
136                self.global_vars.remove(name.as_ref());
137            }
138        }
139
140        // Remove reference locations contributed by this file.
141        // Use the reverse index to avoid a full scan of all symbols.
142        if let Some((_, symbol_keys)) = self.file_symbol_references.remove(file_path) {
143            for key in symbol_keys {
144                if let Some(mut locs) = self.symbol_reference_locations.get_mut(&key) {
145                    locs.remove(file_path);
146                }
147            }
148        }
149
150        self.invalidate_finalization();
151    }
152
153    // -----------------------------------------------------------------------
154    // Global variable registry
155    // -----------------------------------------------------------------------
156
157    /// Record an `@var`-annotated global variable type discovered in Pass 1.
158    /// If the same variable is annotated in multiple files, the last write wins.
159    pub fn register_global_var(&self, file: &Arc<str>, name: Arc<str>, ty: Union) {
160        self.file_global_vars
161            .entry(file.clone())
162            .or_default()
163            .push(name.clone());
164        self.global_vars.insert(name, ty);
165    }
166
167    // -----------------------------------------------------------------------
168    // Lookups
169    // -----------------------------------------------------------------------
170
171    /// Resolve a property, walking up the inheritance chain (parent classes and traits).
172    pub fn get_property(
173        &self,
174        fqcn: &str,
175        prop_name: &str,
176    ) -> Option<crate::storage::PropertyStorage> {
177        // Check direct class own_properties
178        if let Some(cls) = self.classes.get(fqcn) {
179            if let Some(p) = cls.own_properties.get(prop_name) {
180                return Some(p.clone());
181            }
182        }
183
184        // Walk all ancestors (collected during finalize)
185        let all_parents = {
186            if let Some(cls) = self.classes.get(fqcn) {
187                cls.all_parents.clone()
188            } else {
189                return None;
190            }
191        };
192
193        for ancestor_fqcn in &all_parents {
194            if let Some(ancestor_cls) = self.classes.get(ancestor_fqcn.as_ref()) {
195                if let Some(p) = ancestor_cls.own_properties.get(prop_name) {
196                    return Some(p.clone());
197                }
198            }
199        }
200
201        // Check traits
202        let trait_list = {
203            if let Some(cls) = self.classes.get(fqcn) {
204                cls.traits.clone()
205            } else {
206                vec![]
207            }
208        };
209        for trait_fqcn in &trait_list {
210            if let Some(tr) = self.traits.get(trait_fqcn.as_ref()) {
211                if let Some(p) = tr.own_properties.get(prop_name) {
212                    return Some(p.clone());
213                }
214            }
215        }
216
217        None
218    }
219
220    /// Resolve a method, walking up the inheritance chain.
221    pub fn get_method(&self, fqcn: &str, method_name: &str) -> Option<MethodStorage> {
222        // PHP method names are case-insensitive — normalize to lowercase for all lookups.
223        let method_lower = method_name.to_lowercase();
224        let method_name = method_lower.as_str();
225        // Check class methods first
226        if let Some(cls) = self.classes.get(fqcn) {
227            if let Some(m) = cls.get_method(method_name) {
228                return Some(m.clone());
229            }
230        }
231        // Check interface methods (including parent interfaces via all_parents)
232        if let Some(iface) = self.interfaces.get(fqcn) {
233            if let Some(m) = iface.own_methods.get(method_name).or_else(|| {
234                iface
235                    .own_methods
236                    .iter()
237                    .find(|(k, _)| k.as_ref().eq_ignore_ascii_case(method_name))
238                    .map(|(_, v)| v)
239            }) {
240                return Some(m.clone());
241            }
242            // Traverse parent interfaces
243            let parents = iface.all_parents.clone();
244            for parent_fqcn in &parents {
245                if let Some(parent_iface) = self.interfaces.get(parent_fqcn.as_ref()) {
246                    if let Some(m) = parent_iface.own_methods.get(method_name).or_else(|| {
247                        parent_iface
248                            .own_methods
249                            .iter()
250                            .find(|(k, _)| k.as_ref().eq_ignore_ascii_case(method_name))
251                            .map(|(_, v)| v)
252                    }) {
253                        return Some(m.clone());
254                    }
255                }
256            }
257        }
258        // Check trait methods (when a variable is annotated with a trait type)
259        if let Some(tr) = self.traits.get(fqcn) {
260            if let Some(m) = tr.own_methods.get(method_name).or_else(|| {
261                tr.own_methods
262                    .iter()
263                    .find(|(k, _)| k.as_ref().eq_ignore_ascii_case(method_name))
264                    .map(|(_, v)| v)
265            }) {
266                return Some(m.clone());
267            }
268        }
269        // Check enum methods
270        if let Some(e) = self.enums.get(fqcn) {
271            if let Some(m) = e.own_methods.get(method_name).or_else(|| {
272                e.own_methods
273                    .iter()
274                    .find(|(k, _)| k.as_ref().eq_ignore_ascii_case(method_name))
275                    .map(|(_, v)| v)
276            }) {
277                return Some(m.clone());
278            }
279            // PHP 8.1 built-in enum methods: cases(), from(), tryFrom()
280            if matches!(method_name, "cases" | "from" | "tryfrom") {
281                return Some(crate::storage::MethodStorage {
282                    fqcn: Arc::from(fqcn),
283                    name: Arc::from(method_name),
284                    params: vec![],
285                    return_type: Some(mir_types::Union::mixed()),
286                    inferred_return_type: None,
287                    visibility: crate::storage::Visibility::Public,
288                    is_static: true,
289                    is_abstract: false,
290                    is_constructor: false,
291                    template_params: vec![],
292                    assertions: vec![],
293                    throws: vec![],
294                    is_final: false,
295                    is_internal: false,
296                    is_pure: false,
297                    is_deprecated: false,
298                    location: None,
299                });
300            }
301        }
302        None
303    }
304
305    /// Returns true if `child` extends or implements `ancestor` (transitively).
306    pub fn extends_or_implements(&self, child: &str, ancestor: &str) -> bool {
307        if child == ancestor {
308            return true;
309        }
310        if let Some(cls) = self.classes.get(child) {
311            return cls.implements_or_extends(ancestor);
312        }
313        if let Some(iface) = self.interfaces.get(child) {
314            return iface.all_parents.iter().any(|p| p.as_ref() == ancestor);
315        }
316        // Enum: backed enums implicitly implement BackedEnum (and UnitEnum);
317        // pure enums implicitly implement UnitEnum.
318        if let Some(en) = self.enums.get(child) {
319            // Check explicitly declared interfaces (e.g. implements SomeInterface)
320            if en.interfaces.iter().any(|i| i.as_ref() == ancestor) {
321                return true;
322            }
323            // PHP built-in: every enum implements UnitEnum
324            if ancestor == "UnitEnum" || ancestor == "\\UnitEnum" {
325                return true;
326            }
327            // Backed enums implement BackedEnum
328            if (ancestor == "BackedEnum" || ancestor == "\\BackedEnum") && en.scalar_type.is_some()
329            {
330                return true;
331            }
332        }
333        false
334    }
335
336    /// Whether a class/interface/trait/enum with this FQCN exists.
337    pub fn type_exists(&self, fqcn: &str) -> bool {
338        self.classes.contains_key(fqcn)
339            || self.interfaces.contains_key(fqcn)
340            || self.traits.contains_key(fqcn)
341            || self.enums.contains_key(fqcn)
342    }
343
344    pub fn function_exists(&self, fqn: &str) -> bool {
345        self.functions.contains_key(fqn)
346    }
347
348    /// Returns true if the class is declared abstract.
349    /// Used to suppress `UndefinedMethod` on abstract class receivers: the concrete
350    /// subclass is expected to implement the method, matching Psalm errorLevel=3 behaviour.
351    pub fn is_abstract_class(&self, fqcn: &str) -> bool {
352        self.classes.get(fqcn).is_some_and(|c| c.is_abstract)
353    }
354
355    /// Return the declared template params for `fqcn` (class or interface), or
356    /// an empty vec if the type is not found or has no templates.
357    pub fn get_class_template_params(&self, fqcn: &str) -> Vec<crate::storage::TemplateParam> {
358        if let Some(cls) = self.classes.get(fqcn) {
359            return cls.template_params.clone();
360        }
361        if let Some(iface) = self.interfaces.get(fqcn) {
362            return iface.template_params.clone();
363        }
364        if let Some(tr) = self.traits.get(fqcn) {
365            return tr.template_params.clone();
366        }
367        vec![]
368    }
369
370    /// Returns true if the class (or any ancestor/trait) defines a `__get` magic method.
371    /// Such classes allow arbitrary property access, suppressing UndefinedProperty.
372    pub fn has_magic_get(&self, fqcn: &str) -> bool {
373        if let Some(cls) = self.classes.get(fqcn) {
374            if cls.own_methods.contains_key("__get") || cls.all_methods.contains_key("__get") {
375                return true;
376            }
377            // Check traits
378            let traits = cls.traits.clone();
379            drop(cls);
380            for tr in &traits {
381                if let Some(t) = self.traits.get(tr.as_ref()) {
382                    if t.own_methods.contains_key("__get") {
383                        return true;
384                    }
385                }
386            }
387            // Check ancestors
388            let all_parents = {
389                if let Some(c) = self.classes.get(fqcn) {
390                    c.all_parents.clone()
391                } else {
392                    vec![]
393                }
394            };
395            for ancestor in &all_parents {
396                if let Some(anc) = self.classes.get(ancestor.as_ref()) {
397                    if anc.own_methods.contains_key("__get") {
398                        return true;
399                    }
400                }
401            }
402        }
403        false
404    }
405
406    /// Returns true if the class (or any of its ancestors) has a parent/interface/trait
407    /// that is NOT present in the codebase.  Used to suppress `UndefinedMethod` false
408    /// positives: if a method might be inherited from an unscanned external class we
409    /// cannot confirm or deny its existence.
410    ///
411    /// We use the pre-computed `all_parents` list (built during finalization) rather
412    /// than recursive DashMap lookups to avoid potential deadlocks.
413    pub fn has_unknown_ancestor(&self, fqcn: &str) -> bool {
414        // For interfaces: check whether any parent interface is unknown.
415        if let Some(iface) = self.interfaces.get(fqcn) {
416            let parents = iface.all_parents.clone();
417            drop(iface);
418            for p in &parents {
419                if !self.type_exists(p.as_ref()) {
420                    return true;
421                }
422            }
423            return false;
424        }
425
426        // Clone the data we need so the DashMap ref is dropped before any further lookups.
427        let (parent, interfaces, traits, all_parents) = {
428            let Some(cls) = self.classes.get(fqcn) else {
429                return false;
430            };
431            (
432                cls.parent.clone(),
433                cls.interfaces.clone(),
434                cls.traits.clone(),
435                cls.all_parents.clone(),
436            )
437        };
438
439        // Fast path: check direct parent/interfaces/traits
440        if let Some(ref p) = parent {
441            if !self.type_exists(p.as_ref()) {
442                return true;
443            }
444        }
445        for iface in &interfaces {
446            if !self.type_exists(iface.as_ref()) {
447                return true;
448            }
449        }
450        for tr in &traits {
451            if !self.type_exists(tr.as_ref()) {
452                return true;
453            }
454        }
455
456        // Also check the full ancestor chain (pre-computed during finalization)
457        for ancestor in &all_parents {
458            if !self.type_exists(ancestor.as_ref()) {
459                return true;
460            }
461        }
462
463        false
464    }
465
466    /// Resolve a short class/function name to its FQCN using the import table
467    /// and namespace recorded for `file` during Pass 1.
468    ///
469    /// - Names already containing `\` (after stripping a leading `\`) are
470    ///   returned as-is (already fully qualified).
471    /// - `self`, `parent`, `static` are returned unchanged (caller handles them).
472    pub fn resolve_class_name(&self, file: &str, name: &str) -> String {
473        let name = name.trim_start_matches('\\');
474        if name.is_empty() {
475            return name.to_string();
476        }
477        // Fully qualified absolute paths start with '\' (already stripped above).
478        // Names containing '\' but not starting with it may be:
479        //   - Already-resolved FQCNs (e.g. Frontify\Util\Foo) — check type_exists
480        //   - Qualified relative names (e.g. Option\Some from within Frontify\Utility) — need namespace prefix
481        if name.contains('\\') {
482            // Check if the leading segment matches a use-import alias
483            let first_segment = name.split('\\').next().unwrap_or(name);
484            if let Some(imports) = self.file_imports.get(file) {
485                if let Some(resolved_prefix) = imports.get(first_segment) {
486                    let rest = &name[first_segment.len()..]; // includes leading '\'
487                    return format!("{}{}", resolved_prefix, rest);
488                }
489            }
490            // If already known in codebase as-is, it's FQCN — trust it
491            if self.type_exists(name) {
492                return name.to_string();
493            }
494            // Otherwise it's a relative qualified name — prepend the file namespace
495            if let Some(ns) = self.file_namespaces.get(file) {
496                let qualified = format!("{}\\{}", *ns, name);
497                if self.type_exists(&qualified) {
498                    return qualified;
499                }
500            }
501            return name.to_string();
502        }
503        // Built-in pseudo-types / keywords handled by the caller
504        match name {
505            "self" | "parent" | "static" | "this" => return name.to_string(),
506            _ => {}
507        }
508        // Check use aliases for this file (PHP class names are case-insensitive)
509        if let Some(imports) = self.file_imports.get(file) {
510            if let Some(resolved) = imports.get(name) {
511                return resolved.clone();
512            }
513            // Fall back to case-insensitive alias lookup
514            let name_lower = name.to_lowercase();
515            for (alias, resolved) in imports.iter() {
516                if alias.to_lowercase() == name_lower {
517                    return resolved.clone();
518                }
519            }
520        }
521        // Qualify with the file's namespace if one exists
522        if let Some(ns) = self.file_namespaces.get(file) {
523            let qualified = format!("{}\\{}", *ns, name);
524            // If the namespaced version exists in the codebase, use it.
525            // Otherwise fall back to the global (unqualified) name if that exists.
526            // This handles `DateTimeInterface`, `Exception`, etc. used without import
527            // while not overriding user-defined classes in namespaces.
528            if self.type_exists(&qualified) {
529                return qualified;
530            }
531            if self.type_exists(name) {
532                return name.to_string();
533            }
534            return qualified;
535        }
536        name.to_string()
537    }
538
539    // -----------------------------------------------------------------------
540    // Definition location lookups
541    // -----------------------------------------------------------------------
542
543    /// Look up the definition location of any symbol (class, interface, trait, enum, function).
544    /// Returns the file path and byte offsets.
545    pub fn get_symbol_location(&self, fqcn: &str) -> Option<crate::storage::Location> {
546        if let Some(cls) = self.classes.get(fqcn) {
547            return cls.location.clone();
548        }
549        if let Some(iface) = self.interfaces.get(fqcn) {
550            return iface.location.clone();
551        }
552        if let Some(tr) = self.traits.get(fqcn) {
553            return tr.location.clone();
554        }
555        if let Some(en) = self.enums.get(fqcn) {
556            return en.location.clone();
557        }
558        if let Some(func) = self.functions.get(fqcn) {
559            return func.location.clone();
560        }
561        None
562    }
563
564    /// Look up the definition location of a class member (method, property, constant).
565    pub fn get_member_location(
566        &self,
567        fqcn: &str,
568        member_name: &str,
569    ) -> Option<crate::storage::Location> {
570        // Check methods
571        if let Some(method) = self.get_method(fqcn, member_name) {
572            return method.location.clone();
573        }
574        // Check properties
575        if let Some(prop) = self.get_property(fqcn, member_name) {
576            return prop.location.clone();
577        }
578        // Check class constants
579        if let Some(cls) = self.classes.get(fqcn) {
580            if let Some(c) = cls.own_constants.get(member_name) {
581                return c.location.clone();
582            }
583        }
584        // Check interface constants
585        if let Some(iface) = self.interfaces.get(fqcn) {
586            if let Some(c) = iface.own_constants.get(member_name) {
587                return c.location.clone();
588            }
589        }
590        // Check trait constants
591        if let Some(tr) = self.traits.get(fqcn) {
592            if let Some(c) = tr.own_constants.get(member_name) {
593                return c.location.clone();
594            }
595        }
596        // Check enum constants and cases
597        if let Some(en) = self.enums.get(fqcn) {
598            if let Some(c) = en.own_constants.get(member_name) {
599                return c.location.clone();
600            }
601            if let Some(case) = en.cases.get(member_name) {
602                return case.location.clone();
603            }
604        }
605        None
606    }
607
608    // -----------------------------------------------------------------------
609    // Reference tracking (M18 dead-code detection)
610    // -----------------------------------------------------------------------
611
612    /// Mark a method as referenced from user code.
613    pub fn mark_method_referenced(&self, fqcn: &str, method_name: &str) {
614        let key: Arc<str> = Arc::from(format!("{}::{}", fqcn, method_name.to_lowercase()).as_str());
615        self.referenced_methods.insert(key);
616    }
617
618    /// Mark a property as referenced from user code.
619    pub fn mark_property_referenced(&self, fqcn: &str, prop_name: &str) {
620        let key: Arc<str> = Arc::from(format!("{}::{}", fqcn, prop_name).as_str());
621        self.referenced_properties.insert(key);
622    }
623
624    /// Mark a free function as referenced from user code.
625    pub fn mark_function_referenced(&self, fqn: &str) {
626        self.referenced_functions.insert(Arc::from(fqn));
627    }
628
629    pub fn is_method_referenced(&self, fqcn: &str, method_name: &str) -> bool {
630        let key = format!("{}::{}", fqcn, method_name.to_lowercase());
631        self.referenced_methods.contains(key.as_str())
632    }
633
634    pub fn is_property_referenced(&self, fqcn: &str, prop_name: &str) -> bool {
635        let key = format!("{}::{}", fqcn, prop_name);
636        self.referenced_properties.contains(key.as_str())
637    }
638
639    pub fn is_function_referenced(&self, fqn: &str) -> bool {
640        self.referenced_functions.contains(fqn)
641    }
642
643    /// Record a method reference with its source location.
644    /// Also updates the referenced_methods DashSet for dead-code detection.
645    pub fn mark_method_referenced_at(
646        &self,
647        fqcn: &str,
648        method_name: &str,
649        file: Arc<str>,
650        start: u32,
651        end: u32,
652    ) {
653        let key: Arc<str> = Arc::from(format!("{}::{}", fqcn, method_name.to_lowercase()).as_str());
654        self.referenced_methods.insert(key.clone());
655        self.symbol_reference_locations
656            .entry(key.clone())
657            .or_default()
658            .entry(file.clone())
659            .or_default()
660            .insert((start, end));
661        self.file_symbol_references
662            .entry(file)
663            .or_default()
664            .insert(key);
665    }
666
667    /// Record a property reference with its source location.
668    /// Also updates the referenced_properties DashSet for dead-code detection.
669    pub fn mark_property_referenced_at(
670        &self,
671        fqcn: &str,
672        prop_name: &str,
673        file: Arc<str>,
674        start: u32,
675        end: u32,
676    ) {
677        let key: Arc<str> = Arc::from(format!("{}::{}", fqcn, prop_name).as_str());
678        self.referenced_properties.insert(key.clone());
679        self.symbol_reference_locations
680            .entry(key.clone())
681            .or_default()
682            .entry(file.clone())
683            .or_default()
684            .insert((start, end));
685        self.file_symbol_references
686            .entry(file)
687            .or_default()
688            .insert(key);
689    }
690
691    /// Record a function reference with its source location.
692    /// Also updates the referenced_functions DashSet for dead-code detection.
693    pub fn mark_function_referenced_at(&self, fqn: &str, file: Arc<str>, start: u32, end: u32) {
694        let key: Arc<str> = Arc::from(fqn);
695        self.referenced_functions.insert(key.clone());
696        self.symbol_reference_locations
697            .entry(key.clone())
698            .or_default()
699            .entry(file.clone())
700            .or_default()
701            .insert((start, end));
702        self.file_symbol_references
703            .entry(file)
704            .or_default()
705            .insert(key);
706    }
707
708    /// Record a class reference (e.g. `new Foo()`) with its source location.
709    /// Does not update any dead-code DashSet — class instantiation tracking is
710    /// separate from method/property/function dead-code detection.
711    pub fn mark_class_referenced_at(&self, fqcn: &str, file: Arc<str>, start: u32, end: u32) {
712        let key: Arc<str> = Arc::from(fqcn);
713        self.symbol_reference_locations
714            .entry(key.clone())
715            .or_default()
716            .entry(file.clone())
717            .or_default()
718            .insert((start, end));
719        self.file_symbol_references
720            .entry(file)
721            .or_default()
722            .insert(key);
723    }
724
725    /// Replay cached reference locations for a file into symbol_reference_locations
726    /// and file_symbol_references. Called on cache hits to avoid re-running Pass 2
727    /// just to rebuild the reference index.
728    /// `locs` is a slice of `(symbol_key, start_byte, end_byte)` as stored in the cache.
729    pub fn replay_reference_locations(&self, file: Arc<str>, locs: &[(String, u32, u32)]) {
730        for (symbol_key, start, end) in locs {
731            let key: Arc<str> = Arc::from(symbol_key.as_str());
732            self.symbol_reference_locations
733                .entry(key.clone())
734                .or_default()
735                .entry(file.clone())
736                .or_default()
737                .insert((*start, *end));
738            self.file_symbol_references
739                .entry(file.clone())
740                .or_default()
741                .insert(key);
742        }
743    }
744
745    /// Return all reference locations for `symbol` as a flat `Vec<(file, start, end)>`.
746    /// Returns an empty Vec if the symbol has no recorded references.
747    pub fn get_reference_locations(&self, symbol: &str) -> Vec<(Arc<str>, u32, u32)> {
748        match self.symbol_reference_locations.get(symbol) {
749            None => Vec::new(),
750            Some(by_file) => by_file
751                .iter()
752                .flat_map(|(file, spans)| {
753                    spans.iter().map(|&(start, end)| (file.clone(), start, end))
754                })
755                .collect(),
756        }
757    }
758
759    // -----------------------------------------------------------------------
760    // Finalization
761    // -----------------------------------------------------------------------
762
763    /// Must be called after all files have been parsed (pass 1 complete).
764    /// Resolves inheritance chains and builds method dispatch tables.
765    pub fn finalize(&self) {
766        if self.finalized.load(std::sync::atomic::Ordering::SeqCst) {
767            return;
768        }
769
770        // 1. Resolve all_parents for classes
771        let class_keys: Vec<Arc<str>> = self.classes.iter().map(|e| e.key().clone()).collect();
772        for fqcn in &class_keys {
773            let parents = self.collect_class_ancestors(fqcn);
774            if let Some(mut cls) = self.classes.get_mut(fqcn.as_ref()) {
775                cls.all_parents = parents;
776            }
777        }
778
779        // 2. Build method dispatch tables for classes (own methods override inherited)
780        for fqcn in &class_keys {
781            let all_methods = self.build_method_table(fqcn);
782            if let Some(mut cls) = self.classes.get_mut(fqcn.as_ref()) {
783                cls.all_methods = all_methods;
784            }
785        }
786
787        // 3. Resolve all_parents for interfaces
788        let iface_keys: Vec<Arc<str>> = self.interfaces.iter().map(|e| e.key().clone()).collect();
789        for fqcn in &iface_keys {
790            let parents = self.collect_interface_ancestors(fqcn);
791            if let Some(mut iface) = self.interfaces.get_mut(fqcn.as_ref()) {
792                iface.all_parents = parents;
793            }
794        }
795
796        self.finalized
797            .store(true, std::sync::atomic::Ordering::SeqCst);
798    }
799
800    // -----------------------------------------------------------------------
801    // Private helpers
802    // -----------------------------------------------------------------------
803
804    fn collect_class_ancestors(&self, fqcn: &str) -> Vec<Arc<str>> {
805        let mut result = Vec::new();
806        let mut visited = std::collections::HashSet::new();
807        self.collect_class_ancestors_inner(fqcn, &mut result, &mut visited);
808        result
809    }
810
811    fn collect_class_ancestors_inner(
812        &self,
813        fqcn: &str,
814        out: &mut Vec<Arc<str>>,
815        visited: &mut std::collections::HashSet<String>,
816    ) {
817        if !visited.insert(fqcn.to_string()) {
818            return; // cycle guard
819        }
820        let (parent, interfaces, traits) = {
821            if let Some(cls) = self.classes.get(fqcn) {
822                (
823                    cls.parent.clone(),
824                    cls.interfaces.clone(),
825                    cls.traits.clone(),
826                )
827            } else {
828                return;
829            }
830        };
831
832        if let Some(p) = parent {
833            out.push(p.clone());
834            self.collect_class_ancestors_inner(&p, out, visited);
835        }
836        for iface in interfaces {
837            out.push(iface.clone());
838            self.collect_interface_ancestors_inner(&iface, out, visited);
839        }
840        for t in traits {
841            out.push(t);
842        }
843    }
844
845    fn collect_interface_ancestors(&self, fqcn: &str) -> Vec<Arc<str>> {
846        let mut result = Vec::new();
847        let mut visited = std::collections::HashSet::new();
848        self.collect_interface_ancestors_inner(fqcn, &mut result, &mut visited);
849        result
850    }
851
852    fn collect_interface_ancestors_inner(
853        &self,
854        fqcn: &str,
855        out: &mut Vec<Arc<str>>,
856        visited: &mut std::collections::HashSet<String>,
857    ) {
858        if !visited.insert(fqcn.to_string()) {
859            return;
860        }
861        let extends = {
862            if let Some(iface) = self.interfaces.get(fqcn) {
863                iface.extends.clone()
864            } else {
865                return;
866            }
867        };
868        for e in extends {
869            out.push(e.clone());
870            self.collect_interface_ancestors_inner(&e, out, visited);
871        }
872    }
873
874    /// Build the full method dispatch table for a class, with own methods taking
875    /// priority over inherited ones.
876    fn build_method_table(&self, fqcn: &str) -> indexmap::IndexMap<Arc<str>, MethodStorage> {
877        use indexmap::IndexMap;
878        let mut table: IndexMap<Arc<str>, MethodStorage> = IndexMap::new();
879
880        // Walk ancestor chain (broad-first from root → child, so child overrides root)
881        let ancestors = {
882            if let Some(cls) = self.classes.get(fqcn) {
883                cls.all_parents.clone()
884            } else {
885                return table;
886            }
887        };
888
889        // Insert ancestor methods (deepest ancestor first, so closer ancestors override).
890        // Also insert trait methods from ancestor classes.
891        for ancestor_fqcn in ancestors.iter().rev() {
892            if let Some(ancestor) = self.classes.get(ancestor_fqcn.as_ref()) {
893                // First insert ancestor's own trait methods (lower priority)
894                let ancestor_traits = ancestor.traits.clone();
895                for trait_fqcn in ancestor_traits.iter().rev() {
896                    if let Some(tr) = self.traits.get(trait_fqcn.as_ref()) {
897                        for (name, method) in &tr.own_methods {
898                            table.insert(name.clone(), method.clone());
899                        }
900                    }
901                }
902                // Then ancestor's own methods (override trait methods)
903                for (name, method) in &ancestor.own_methods {
904                    table.insert(name.clone(), method.clone());
905                }
906            } else if let Some(iface) = self.interfaces.get(ancestor_fqcn.as_ref()) {
907                for (name, method) in &iface.own_methods {
908                    // Interface methods are implicitly abstract — mark them so that
909                    // ClassAnalyzer::check_interface_methods_implemented can detect
910                    // a concrete class that fails to provide an implementation.
911                    let mut m = method.clone();
912                    m.is_abstract = true;
913                    table.insert(name.clone(), m);
914                }
915            }
916        }
917
918        // Insert the class's own trait methods
919        let trait_list = {
920            if let Some(cls) = self.classes.get(fqcn) {
921                cls.traits.clone()
922            } else {
923                vec![]
924            }
925        };
926        for trait_fqcn in &trait_list {
927            if let Some(tr) = self.traits.get(trait_fqcn.as_ref()) {
928                for (name, method) in &tr.own_methods {
929                    table.insert(name.clone(), method.clone());
930                }
931            }
932        }
933
934        // Own methods override everything
935        if let Some(cls) = self.classes.get(fqcn) {
936            for (name, method) in &cls.own_methods {
937                table.insert(name.clone(), method.clone());
938            }
939        }
940
941        table
942    }
943}
944
945#[cfg(test)]
946mod tests {
947    use super::*;
948
949    fn arc(s: &str) -> Arc<str> {
950        Arc::from(s)
951    }
952
953    #[test]
954    fn method_referenced_at_groups_spans_by_file() {
955        let cb = Codebase::new();
956        cb.mark_method_referenced_at("Foo", "bar", arc("a.php"), 0, 5);
957        cb.mark_method_referenced_at("Foo", "bar", arc("a.php"), 10, 15);
958        cb.mark_method_referenced_at("Foo", "bar", arc("b.php"), 20, 25);
959
960        let locs = cb.symbol_reference_locations.get("Foo::bar").unwrap();
961        assert_eq!(locs.len(), 2, "two files, not three spans");
962        assert!(locs[&arc("a.php")].contains(&(0, 5)));
963        assert!(locs[&arc("a.php")].contains(&(10, 15)));
964        assert_eq!(locs[&arc("a.php")].len(), 2);
965        assert!(locs[&arc("b.php")].contains(&(20, 25)));
966        assert!(
967            cb.is_method_referenced("Foo", "bar"),
968            "DashSet also updated"
969        );
970    }
971
972    #[test]
973    fn duplicate_spans_are_deduplicated() {
974        let cb = Codebase::new();
975        // Same call site recorded twice (e.g. union receiver Foo|Foo)
976        cb.mark_method_referenced_at("Foo", "bar", arc("a.php"), 0, 5);
977        cb.mark_method_referenced_at("Foo", "bar", arc("a.php"), 0, 5);
978
979        let locs = cb.symbol_reference_locations.get("Foo::bar").unwrap();
980        assert_eq!(locs[&arc("a.php")].len(), 1, "duplicate span deduplicated");
981    }
982
983    #[test]
984    fn method_key_is_lowercased() {
985        let cb = Codebase::new();
986        cb.mark_method_referenced_at("Cls", "MyMethod", arc("f.php"), 0, 3);
987        assert!(cb.symbol_reference_locations.contains_key("Cls::mymethod"));
988    }
989
990    #[test]
991    fn property_referenced_at_records_location() {
992        let cb = Codebase::new();
993        cb.mark_property_referenced_at("Bar", "count", arc("x.php"), 5, 10);
994
995        let locs = cb.symbol_reference_locations.get("Bar::count").unwrap();
996        assert!(locs[&arc("x.php")].contains(&(5, 10)));
997        assert!(cb.is_property_referenced("Bar", "count"));
998    }
999
1000    #[test]
1001    fn function_referenced_at_records_location() {
1002        let cb = Codebase::new();
1003        cb.mark_function_referenced_at("my_fn", arc("a.php"), 10, 15);
1004
1005        let locs = cb.symbol_reference_locations.get("my_fn").unwrap();
1006        assert!(locs[&arc("a.php")].contains(&(10, 15)));
1007        assert!(cb.is_function_referenced("my_fn"));
1008    }
1009
1010    #[test]
1011    fn class_referenced_at_records_location() {
1012        let cb = Codebase::new();
1013        cb.mark_class_referenced_at("Foo", arc("a.php"), 5, 8);
1014
1015        let locs = cb.symbol_reference_locations.get("Foo").unwrap();
1016        assert!(locs[&arc("a.php")].contains(&(5, 8)));
1017    }
1018
1019    #[test]
1020    fn get_reference_locations_flattens_all_files() {
1021        let cb = Codebase::new();
1022        cb.mark_function_referenced_at("fn1", arc("a.php"), 0, 5);
1023        cb.mark_function_referenced_at("fn1", arc("b.php"), 10, 15);
1024
1025        let mut locs = cb.get_reference_locations("fn1");
1026        locs.sort_by_key(|(_, s, _)| *s);
1027        assert_eq!(locs.len(), 2);
1028        assert_eq!(locs[0], (arc("a.php"), 0, 5));
1029        assert_eq!(locs[1], (arc("b.php"), 10, 15));
1030    }
1031
1032    #[test]
1033    fn replay_reference_locations_restores_index() {
1034        let cb = Codebase::new();
1035        let locs = vec![
1036            ("Foo::bar".to_string(), 0u32, 5u32),
1037            ("Foo::bar".to_string(), 10, 15),
1038            ("greet".to_string(), 20, 25),
1039        ];
1040        cb.replay_reference_locations(arc("a.php"), &locs);
1041
1042        let bar_locs = cb.symbol_reference_locations.get("Foo::bar").unwrap();
1043        assert!(bar_locs[&arc("a.php")].contains(&(0, 5)));
1044        assert!(bar_locs[&arc("a.php")].contains(&(10, 15)));
1045
1046        let greet_locs = cb.symbol_reference_locations.get("greet").unwrap();
1047        assert!(greet_locs[&arc("a.php")].contains(&(20, 25)));
1048
1049        let keys = cb.file_symbol_references.get(&arc("a.php")).unwrap();
1050        assert!(keys.contains(&Arc::from("Foo::bar")));
1051        assert!(keys.contains(&Arc::from("greet")));
1052    }
1053
1054    #[test]
1055    fn remove_file_clears_its_spans_only() {
1056        let cb = Codebase::new();
1057        cb.mark_function_referenced_at("fn1", arc("a.php"), 0, 5);
1058        cb.mark_function_referenced_at("fn1", arc("b.php"), 10, 15);
1059
1060        cb.remove_file_definitions("a.php");
1061
1062        let locs = cb.symbol_reference_locations.get("fn1").unwrap();
1063        assert!(!locs.contains_key("a.php"), "a.php spans removed");
1064        assert!(
1065            locs[&arc("b.php")].contains(&(10, 15)),
1066            "b.php spans untouched"
1067        );
1068        assert!(!cb.file_symbol_references.contains_key("a.php"));
1069    }
1070
1071    #[test]
1072    fn remove_file_does_not_affect_other_files() {
1073        let cb = Codebase::new();
1074        cb.mark_property_referenced_at("Cls", "prop", arc("x.php"), 1, 4);
1075        cb.mark_property_referenced_at("Cls", "prop", arc("y.php"), 7, 10);
1076
1077        cb.remove_file_definitions("x.php");
1078
1079        let locs = cb.symbol_reference_locations.get("Cls::prop").unwrap();
1080        assert!(!locs.contains_key("x.php"));
1081        assert!(locs[&arc("y.php")].contains(&(7, 10)));
1082    }
1083
1084    #[test]
1085    fn remove_file_definitions_on_never_analyzed_file_is_noop() {
1086        let cb = Codebase::new();
1087        cb.mark_function_referenced_at("fn1", arc("a.php"), 0, 5);
1088
1089        // "ghost.php" was never analyzed — removing it must not panic or corrupt state.
1090        cb.remove_file_definitions("ghost.php");
1091
1092        // Existing data must be untouched.
1093        let locs = cb.symbol_reference_locations.get("fn1").unwrap();
1094        assert!(locs[&arc("a.php")].contains(&(0, 5)));
1095        assert!(!cb.file_symbol_references.contains_key("ghost.php"));
1096    }
1097
1098    #[test]
1099    fn replay_reference_locations_with_empty_list_is_noop() {
1100        let cb = Codebase::new();
1101        cb.mark_function_referenced_at("fn1", arc("a.php"), 0, 5);
1102
1103        // Replaying an empty list must not touch existing entries.
1104        cb.replay_reference_locations(arc("b.php"), &[]);
1105
1106        assert!(
1107            !cb.file_symbol_references.contains_key("b.php"),
1108            "empty replay must not create a file_symbol_references entry"
1109        );
1110        let locs = cb.symbol_reference_locations.get("fn1").unwrap();
1111        assert!(
1112            locs[&arc("a.php")].contains(&(0, 5)),
1113            "existing spans untouched"
1114        );
1115    }
1116
1117    #[test]
1118    fn replay_reference_locations_twice_does_not_duplicate_spans() {
1119        let cb = Codebase::new();
1120        let locs = vec![("fn1".to_string(), 0u32, 5u32)];
1121
1122        cb.replay_reference_locations(arc("a.php"), &locs);
1123        cb.replay_reference_locations(arc("a.php"), &locs);
1124
1125        let by_file = cb.symbol_reference_locations.get("fn1").unwrap();
1126        assert_eq!(
1127            by_file[&arc("a.php")].len(),
1128            1,
1129            "replaying the same location twice must not create duplicate spans"
1130        );
1131    }
1132}