Skip to main content

mago_codex/
reference.rs

1use foldhash::HashMap;
2use foldhash::HashSet;
3use mago_atom::ascii_lowercase_atom;
4use mago_atom::empty_atom;
5use serde::Deserialize;
6use serde::Serialize;
7
8use mago_atom::Atom;
9use mago_atom::AtomSet;
10
11use crate::context::ScopeContext;
12use crate::diff::CodebaseDiff;
13use crate::identifier::function_like::FunctionLikeIdentifier;
14use crate::identifier::method::MethodIdentifier;
15use crate::symbol::SymbolIdentifier;
16
17/// Represents the source of a reference, distinguishing between top-level symbols
18/// and members within a class-like structure.
19#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
20pub enum ReferenceSource {
21    /// A reference from a top-level symbol (function, class, enum, trait, interface, constant).
22    /// The bool indicates if the reference occurs within a signature context (true) or body (false).
23    /// The Atom is the name (FQCN or FQN) of the referencing symbol.
24    Symbol(bool, Atom),
25    /// A reference from a member within a class-like structure (method, property, class constant, enum case).
26    /// The bool indicates if the reference occurs within a signature context (true) or body (false).
27    /// The first Atom is the FQCN of the class-like structure.
28    /// The second Atom is the name of the member.
29    ClassLikeMember(bool, Atom, Atom),
30}
31
32/// Holds sets of symbols and members identified as invalid during analysis,
33/// often due to changes detected in `CodebaseDiff`.
34#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default)]
35pub struct InvalidSymbols {
36    /// Set of (Symbol, Member) pairs whose *signatures* are considered invalid.
37    /// An empty member name usually indicates the symbol itself.
38    invalid_symbol_and_member_signatures: HashSet<SymbolIdentifier>,
39    /// Set of (Symbol, Member) pairs whose *bodies* are considered invalid.
40    /// An empty member name usually indicates the symbol itself.
41    invalid_symbol_and_member_bodies: HashSet<SymbolIdentifier>,
42    /// Set of top-level symbols (class FQCN, function FQN) that are partially invalid,
43    /// meaning at least one member's signature or body is invalid, but not necessarily the whole symbol.
44    partially_invalid_symbols: AtomSet,
45}
46
47/// Stores various maps tracking references between symbols (classes, functions, etc.)
48/// and class-like members (methods, properties, constants, etc.) within the codebase.
49///
50/// This is primarily used for dependency analysis, understanding code structure,
51/// and potentially for tasks like dead code detection or impact analysis.
52#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, Default)]
53pub struct SymbolReferences {
54    /// Maps a referencing symbol/member `(RefSymbol, RefMember)` to a set of referenced symbols/members `(Symbol, Member)`
55    /// found within the *body* of the referencing context.
56    /// `RefMember` or `Member` being empty usually signifies the symbol itself.
57    symbol_references_to_symbols: HashMap<SymbolIdentifier, HashSet<SymbolIdentifier>>,
58
59    /// Maps a referencing symbol/member `(RefSymbol, RefMember)` to a set of referenced symbols/members `(Symbol, Member)`
60    /// found within the *signature* (e.g., type hints, attributes) of the referencing context.
61    symbol_references_to_symbols_in_signature: HashMap<SymbolIdentifier, HashSet<SymbolIdentifier>>,
62
63    /// Maps a referencing symbol/member `(RefSymbol, RefMember)` to a set of *overridden* members `(ParentSymbol, Member)`
64    /// that it directly references (e.g., via `parent::method()`).
65    symbol_references_to_overridden_members: HashMap<SymbolIdentifier, HashSet<SymbolIdentifier>>,
66
67    /// Maps a referencing function/method (`FunctionLikeIdentifier`) to a set of functions/methods (`FunctionLikeIdentifier`)
68    /// whose return values it references/uses. Used for dead code analysis on return values.
69    functionlike_references_to_functionlike_returns: HashMap<FunctionLikeIdentifier, HashSet<FunctionLikeIdentifier>>,
70
71    /// Maps a file (represented by its hash as an Atom) to a set of referenced symbols/members `(Symbol, Member)`
72    /// found within the file's global scope (outside any symbol). This tracks references from top-level code.
73    /// Used for incremental analysis to determine which files need re-analysis when a symbol changes.
74    file_references_to_symbols: HashMap<Atom, HashSet<SymbolIdentifier>>,
75
76    /// Maps a file (represented by its hash as an Atom) to a set of referenced symbols/members `(Symbol, Member)`
77    /// found within the file's global scope signatures (e.g., top-level type declarations).
78    file_references_to_symbols_in_signature: HashMap<Atom, HashSet<SymbolIdentifier>>,
79
80    /// Maps a referencing symbol/member to a set of properties that are *written* (assigned to).
81    /// This is separate from read references to enable detection of write-only properties.
82    /// The key is the referencing symbol/member, the value is the set of properties being written.
83    property_write_references: HashMap<SymbolIdentifier, HashSet<SymbolIdentifier>>,
84
85    /// Maps a referencing symbol/member to a set of properties that are *read* (accessed for value).
86    /// This is separate from write references to enable accurate read/write tracking.
87    /// The key is the referencing symbol/member, the value is the set of properties being read.
88    property_read_references: HashMap<SymbolIdentifier, HashSet<SymbolIdentifier>>,
89}
90
91impl SymbolReferences {
92    /// Creates a new, empty `SymbolReferences` collection.
93    #[inline]
94    #[must_use]
95    pub fn new() -> Self {
96        Self {
97            symbol_references_to_symbols: HashMap::default(),
98            symbol_references_to_symbols_in_signature: HashMap::default(),
99            symbol_references_to_overridden_members: HashMap::default(),
100            functionlike_references_to_functionlike_returns: HashMap::default(),
101            file_references_to_symbols: HashMap::default(),
102            file_references_to_symbols_in_signature: HashMap::default(),
103            property_write_references: HashMap::default(),
104            property_read_references: HashMap::default(),
105        }
106    }
107
108    /// Counts the total number of symbol-to-symbol body references.
109    #[inline]
110    pub fn count_body_references(&self) -> usize {
111        self.symbol_references_to_symbols.values().map(std::collections::HashSet::len).sum()
112    }
113
114    /// Counts the total number of symbol-to-symbol signature references.
115    #[inline]
116    pub fn count_signature_references(&self) -> usize {
117        self.symbol_references_to_symbols_in_signature.values().map(std::collections::HashSet::len).sum()
118    }
119
120    /// Returns the total number of map entries (keys) across all reference maps.
121    /// Useful for memory auditing — this count should remain stable across cycles
122    /// in a long-running process.
123    #[inline]
124    #[must_use]
125    pub fn total_map_entries(&self) -> usize {
126        self.symbol_references_to_symbols.len()
127            + self.symbol_references_to_symbols_in_signature.len()
128            + self.symbol_references_to_overridden_members.len()
129            + self.functionlike_references_to_functionlike_returns.len()
130            + self.file_references_to_symbols.len()
131            + self.file_references_to_symbols_in_signature.len()
132            + self.property_write_references.len()
133            + self.property_read_references.len()
134    }
135
136    /// Counts how many symbols reference the given symbol.
137    ///
138    /// # Arguments
139    /// * `symbol` - The symbol to check references to
140    /// * `in_signature` - If true, count signature references; if false, count body references
141    ///
142    /// # Returns
143    /// The number of symbols that reference the given symbol
144    #[inline]
145    #[must_use]
146    pub fn count_referencing_symbols(&self, symbol: &SymbolIdentifier, in_signature: bool) -> usize {
147        let map = if in_signature {
148            &self.symbol_references_to_symbols_in_signature
149        } else {
150            &self.symbol_references_to_symbols
151        };
152
153        map.values().filter(|referenced_set| referenced_set.contains(symbol)).count()
154    }
155
156    /// Counts how many symbols have a *read* reference to the given property.
157    ///
158    /// # Arguments
159    ///
160    /// * `property` - The property symbol identifier `(ClassName, PropertyName)` to check
161    ///
162    /// # Returns
163    ///
164    /// The number of symbols that read the given property
165    #[inline]
166    #[must_use]
167    pub fn count_property_reads(&self, property: &SymbolIdentifier) -> usize {
168        self.property_read_references.values().filter(|read_set| read_set.contains(property)).count()
169    }
170
171    /// Counts how many symbols have a *write* reference to the given property.
172    ///
173    /// # Arguments
174    ///
175    /// * `property` - The property symbol identifier `(ClassName, PropertyName)` to check
176    ///
177    /// # Returns
178    ///
179    /// The number of symbols that write to the given property
180    #[inline]
181    #[must_use]
182    pub fn count_property_writes(&self, property: &SymbolIdentifier) -> usize {
183        self.property_write_references.values().filter(|write_set| write_set.contains(property)).count()
184    }
185
186    /// Records that a top-level symbol (e.g., a function) references a class member.
187    ///
188    /// Automatically adds a reference from the referencing symbol to the member's class.
189    ///
190    /// # Arguments
191    ///
192    /// * `referencing_symbol`: The FQN of the function or global const making the reference.
193    /// * `class_member`: A tuple `(ClassName, MemberName)` being referenced.
194    /// * `in_signature`: `true` if the reference occurs in a signature context, `false` if in the body.
195    #[inline]
196    pub fn add_symbol_reference_to_class_member(
197        &mut self,
198        referencing_symbol: Atom,
199        class_member: SymbolIdentifier,
200        in_signature: bool,
201    ) {
202        // Reference the class itself implicitly (in body context)
203        self.add_symbol_reference_to_symbol(referencing_symbol, class_member.0, false);
204
205        // Use empty member for the referencing symbol key
206        let key = (referencing_symbol, empty_atom());
207        if in_signature {
208            self.symbol_references_to_symbols_in_signature.entry(key).or_default().insert(class_member);
209        } else {
210            self.symbol_references_to_symbols.entry(key).or_default().insert(class_member);
211        }
212    }
213
214    /// Records that a top-level symbol references another top-level symbol.
215    ///
216    /// Skips self-references. Skips body references if already referenced in signature.
217    ///
218    /// # Arguments
219    /// * `referencing_symbol`: The FQN of the symbol making the reference.
220    /// * `symbol`: The FQN of the symbol being referenced.
221    /// * `in_signature`: `true` if the reference occurs in a signature context, `false` if in the body.
222    #[inline]
223    pub fn add_symbol_reference_to_symbol(&mut self, referencing_symbol: Atom, symbol: Atom, in_signature: bool) {
224        if referencing_symbol == symbol {
225            return;
226        }
227
228        // Represent top-level symbols with an empty member identifier
229        let referencing_key = (referencing_symbol, empty_atom());
230        let referenced_key = (symbol, empty_atom());
231
232        if in_signature {
233            self.symbol_references_to_symbols_in_signature.entry(referencing_key).or_default().insert(referenced_key);
234        } else {
235            // If it's already referenced in the signature, don't add as a body reference
236            if let Some(sig_refs) = self.symbol_references_to_symbols_in_signature.get(&referencing_key)
237                && sig_refs.contains(&referenced_key)
238            {
239                return;
240            }
241            self.symbol_references_to_symbols.entry(referencing_key).or_default().insert(referenced_key);
242        }
243    }
244
245    /// Records that a class member references another class member.
246    ///
247    /// Automatically adds references from the referencing member's class to the referenced member's class,
248    /// and from the referencing member to the referenced member's class. Skips self-references.
249    ///
250    /// # Arguments
251    /// * `referencing_class_member`: Tuple `(ClassName, MemberName)` making the reference.
252    /// * `class_member`: Tuple `(ClassName, MemberName)` being referenced.
253    /// * `in_signature`: `true` if the reference occurs in a signature context, `false` if in the body.
254    #[inline]
255    pub fn add_class_member_reference_to_class_member(
256        &mut self,
257        referencing_class_member: SymbolIdentifier,
258        class_member: SymbolIdentifier,
259        in_signature: bool,
260    ) {
261        if referencing_class_member == class_member {
262            return;
263        }
264
265        // Add implicit references between the classes/symbols involved
266        self.add_symbol_reference_to_symbol(referencing_class_member.0, class_member.0, false);
267        self.add_class_member_reference_to_symbol(referencing_class_member, class_member.0, false);
268
269        // Add the direct member-to-member reference
270        if in_signature {
271            self.symbol_references_to_symbols_in_signature
272                .entry(referencing_class_member)
273                .or_default()
274                .insert(class_member);
275        } else {
276            // Check signature refs first? (Consistency with add_symbol_reference_to_symbol might be needed)
277            // Current logic adds to body refs regardless of signature refs for member->member.
278            self.symbol_references_to_symbols.entry(referencing_class_member).or_default().insert(class_member);
279        }
280    }
281
282    /// Records that a class member references a top-level symbol.
283    ///
284    /// Automatically adds a reference from the referencing member's class to the referenced symbol.
285    /// Skips references to the member's own class. Skips body references if already referenced in signature.
286    ///
287    /// # Arguments
288    /// * `referencing_class_member`: Tuple `(ClassName, MemberName)` making the reference.
289    /// * `symbol`: The FQN of the symbol being referenced.
290    /// * `in_signature`: `true` if the reference occurs in a signature context, `false` if in the body.
291    #[inline]
292    pub fn add_class_member_reference_to_symbol(
293        &mut self,
294        referencing_class_member: SymbolIdentifier,
295        symbol: Atom,
296        in_signature: bool,
297    ) {
298        if referencing_class_member.0 == symbol {
299            return;
300        }
301
302        // Add implicit reference from the class to the symbol
303        self.add_symbol_reference_to_symbol(referencing_class_member.0, symbol, false);
304
305        // Represent the referenced symbol with an empty member identifier
306        let referenced_key = (symbol, empty_atom());
307
308        if in_signature {
309            self.symbol_references_to_symbols_in_signature
310                .entry(referencing_class_member)
311                .or_default()
312                .insert(referenced_key);
313        } else {
314            // If already referenced in signature, don't add as body reference
315            if let Some(sig_refs) = self.symbol_references_to_symbols_in_signature.get(&referencing_class_member)
316                && sig_refs.contains(&referenced_key)
317            {
318                return;
319            }
320            self.symbol_references_to_symbols.entry(referencing_class_member).or_default().insert(referenced_key);
321        }
322    }
323
324    /// Adds a file-level reference to a class member.
325    /// This is used for references from global/top-level scope that aren't within any symbol.
326    #[inline]
327    pub fn add_file_reference_to_class_member(
328        &mut self,
329        file_hash: Atom,
330        class_member: SymbolIdentifier,
331        in_signature: bool,
332    ) {
333        if in_signature {
334            self.file_references_to_symbols_in_signature.entry(file_hash).or_default().insert(class_member);
335        } else {
336            // Check if already in signature to avoid duplicate tracking
337            if let Some(sig_refs) = self.file_references_to_symbols_in_signature.get(&file_hash)
338                && sig_refs.contains(&class_member)
339            {
340                return;
341            }
342            self.file_references_to_symbols.entry(file_hash).or_default().insert(class_member);
343        }
344    }
345
346    /// Convenience method to add a reference *from* the current function context *to* a class member.
347    /// Delegates to appropriate `add_*` methods based on the function context.
348    #[inline]
349    pub fn add_reference_to_class_member(
350        &mut self,
351        scope: &ScopeContext<'_>,
352        class_member: SymbolIdentifier,
353        in_signature: bool,
354    ) {
355        self.add_reference_to_class_member_with_file(scope, class_member, in_signature, None);
356    }
357
358    /// Convenience method to add a reference *from* the current function context *to* a class member.
359    /// Delegates to appropriate `add_*` methods based on the function context.
360    /// If `file_hash` is provided and the reference is from global scope, uses file-level tracking.
361    ///
362    /// # Note on Normalization
363    ///
364    /// This method assumes that symbol names (`class_member`, `function_name`, `class_name`) are already
365    /// normalized to lowercase, as they come from the codebase which stores all symbols in lowercase form.
366    /// No additional normalization is performed to avoid redundant overhead.
367    #[inline]
368    pub fn add_reference_to_class_member_with_file(
369        &mut self,
370        scope: &ScopeContext<'_>,
371        class_member: SymbolIdentifier,
372        in_signature: bool,
373        file_hash: Option<Atom>,
374    ) {
375        if let Some(referencing_functionlike) = scope.get_function_like_identifier() {
376            match referencing_functionlike {
377                FunctionLikeIdentifier::Function(function_name) => {
378                    self.add_symbol_reference_to_class_member(function_name, class_member, in_signature);
379                }
380                FunctionLikeIdentifier::Method(class_name, function_name) => self
381                    .add_class_member_reference_to_class_member(
382                        (class_name, function_name),
383                        class_member,
384                        in_signature,
385                    ),
386                _ => {
387                    // A reference from a closure or arrow function
388                    // If we have a file hash, track it at file level; otherwise use empty_atom()
389                    if let Some(hash) = file_hash {
390                        self.add_file_reference_to_class_member(hash, class_member, in_signature);
391                    } else {
392                        self.add_symbol_reference_to_class_member(empty_atom(), class_member, in_signature);
393                    }
394                }
395            }
396        } else if let Some(calling_class) = scope.get_class_like_name() {
397            // Reference from the class scope itself (e.g., property default)
398            self.add_symbol_reference_to_class_member(calling_class, class_member, in_signature);
399        } else {
400            // No function or class scope - this is a top-level/global reference
401            // Track it at file level if we have a file hash
402            if let Some(hash) = file_hash {
403                self.add_file_reference_to_class_member(hash, class_member, in_signature);
404            } else {
405                self.add_symbol_reference_to_class_member(empty_atom(), class_member, in_signature);
406            }
407        }
408    }
409
410    #[inline]
411    pub fn add_reference_for_method_call(&mut self, scope: &ScopeContext<'_>, method: &MethodIdentifier) {
412        self.add_reference_to_class_member(
413            scope,
414            (ascii_lowercase_atom(&method.get_class_name()), method.get_method_name()),
415            false,
416        );
417    }
418
419    /// Records a read reference to a property (e.g., `$this->prop` used as a value).
420    #[inline]
421    pub fn add_reference_for_property_read(&mut self, scope: &ScopeContext<'_>, class_name: Atom, property_name: Atom) {
422        let normalized_class_name = ascii_lowercase_atom(&class_name);
423        let class_member = (normalized_class_name, property_name);
424
425        self.add_reference_to_class_member(scope, class_member, false);
426
427        let referencing_key = self.get_referencing_key_from_scope(scope);
428        self.property_read_references.entry(referencing_key).or_default().insert(class_member);
429    }
430
431    /// Records a write reference to a property (e.g., `$this->prop = value`).
432    /// This is tracked separately from read references to enable write-only property detection.
433    #[inline]
434    pub fn add_reference_for_property_write(
435        &mut self,
436        scope: &ScopeContext<'_>,
437        class_name: Atom,
438        property_name: Atom,
439    ) {
440        let normalized_class_name = ascii_lowercase_atom(&class_name);
441        let class_member = (normalized_class_name, property_name);
442
443        self.add_reference_to_class_member(scope, class_member, false);
444
445        let referencing_key = self.get_referencing_key_from_scope(scope);
446        self.property_write_references.entry(referencing_key).or_default().insert(class_member);
447    }
448
449    /// Helper to get the referencing key from the current scope context.
450    #[inline]
451    fn get_referencing_key_from_scope(&self, scope: &ScopeContext<'_>) -> SymbolIdentifier {
452        if let Some(referencing_functionlike) = scope.get_function_like_identifier() {
453            match referencing_functionlike {
454                FunctionLikeIdentifier::Function(function_name) => (function_name, empty_atom()),
455                FunctionLikeIdentifier::Method(class_name, function_name) => (class_name, function_name),
456                _ => (empty_atom(), empty_atom()),
457            }
458        } else if let Some(calling_class) = scope.get_class_like_name() {
459            (ascii_lowercase_atom(&calling_class), empty_atom())
460        } else {
461            (empty_atom(), empty_atom())
462        }
463    }
464
465    /// Convenience method to add a reference *from* the current function context *to* an overridden class member (e.g., `parent::foo`).
466    /// Delegates based on the function context.
467    #[inline]
468    pub fn add_reference_to_overridden_class_member(&mut self, scope: &ScopeContext, class_member: SymbolIdentifier) {
469        let referencing_key = if let Some(referencing_functionlike) = scope.get_function_like_identifier() {
470            match referencing_functionlike {
471                FunctionLikeIdentifier::Function(function_name) => (empty_atom(), function_name),
472                FunctionLikeIdentifier::Method(class_name, function_name) => (class_name, function_name),
473                _ => {
474                    // A reference from a closure can be ignored for now.
475                    return;
476                }
477            }
478        } else if let Some(calling_class) = scope.get_class_like_name() {
479            (ascii_lowercase_atom(&calling_class), empty_atom())
480        } else {
481            return; // Cannot record reference without a source context
482        };
483
484        self.symbol_references_to_overridden_members.entry(referencing_key).or_default().insert(class_member);
485    }
486
487    /// Convenience method to add a reference *from* the current function context *to* a top-level symbol.
488    /// Delegates to appropriate `add_*` methods based on the function context.
489    #[inline]
490    pub fn add_reference_to_symbol(&mut self, scope: &ScopeContext, symbol: Atom, in_signature: bool) {
491        if let Some(referencing_functionlike) = scope.get_function_like_identifier() {
492            match referencing_functionlike {
493                FunctionLikeIdentifier::Function(function_name) => {
494                    self.add_symbol_reference_to_symbol(function_name, symbol, in_signature);
495                }
496                FunctionLikeIdentifier::Method(class_name, function_name) => {
497                    self.add_class_member_reference_to_symbol((class_name, function_name), symbol, in_signature);
498                }
499                _ => {
500                    // Ignore references from closures.
501                }
502            }
503        } else if let Some(calling_class) = scope.get_class_like_name() {
504            self.add_symbol_reference_to_symbol(ascii_lowercase_atom(&calling_class), symbol, in_signature);
505        }
506    }
507
508    /// Records that one function/method references the return value of another. Used for dead code analysis.
509    #[inline]
510    pub fn add_reference_to_functionlike_return(
511        &mut self,
512        referencing_functionlike: FunctionLikeIdentifier,
513        referenced_functionlike: FunctionLikeIdentifier,
514    ) {
515        if referencing_functionlike == referenced_functionlike {
516            return;
517        }
518
519        self.functionlike_references_to_functionlike_returns
520            .entry(referencing_functionlike)
521            .or_default()
522            .insert(referenced_functionlike);
523    }
524
525    /// Merges references from another `SymbolReferences` instance into this one.
526    /// Existing references are extended, not replaced.
527    #[inline]
528    pub fn extend(&mut self, other: Self) {
529        for (k, v) in other.symbol_references_to_symbols {
530            self.symbol_references_to_symbols.entry(k).or_default().extend(v);
531        }
532        for (k, v) in other.symbol_references_to_symbols_in_signature {
533            self.symbol_references_to_symbols_in_signature.entry(k).or_default().extend(v);
534        }
535        for (k, v) in other.symbol_references_to_overridden_members {
536            self.symbol_references_to_overridden_members.entry(k).or_default().extend(v);
537        }
538        for (k, v) in other.functionlike_references_to_functionlike_returns {
539            self.functionlike_references_to_functionlike_returns.entry(k).or_default().extend(v);
540        }
541
542        for (k, v) in other.file_references_to_symbols {
543            self.file_references_to_symbols.entry(k).or_default().extend(v);
544        }
545
546        for (k, v) in other.file_references_to_symbols_in_signature {
547            self.file_references_to_symbols_in_signature.entry(k).or_default().extend(v);
548        }
549
550        for (k, v) in other.property_write_references {
551            self.property_write_references.entry(k).or_default().extend(v);
552        }
553
554        for (k, v) in other.property_read_references {
555            self.property_read_references.entry(k).or_default().extend(v);
556        }
557    }
558
559    /// Computes the set of all unique symbols and members that are referenced *by* any symbol/member
560    /// tracked in the body or signature reference maps.
561    ///
562    /// # Returns
563    ///
564    /// A `HashSet` containing `&(SymbolName, MemberName)` tuples of all referenced items.
565    #[inline]
566    #[must_use]
567    pub fn get_referenced_symbols_and_members(&self) -> HashSet<&SymbolIdentifier> {
568        let mut referenced_items = HashSet::default();
569        for refs in self.symbol_references_to_symbols.values() {
570            referenced_items.extend(refs.iter());
571        }
572        for refs in self.symbol_references_to_symbols_in_signature.values() {
573            referenced_items.extend(refs.iter());
574        }
575
576        referenced_items
577    }
578
579    /// Computes the inverse of the body and signature reference maps.
580    ///
581    /// # Returns
582    ///
583    /// A `HashMap` where the key is the referenced symbol/member `(Symbol, Member)` and the value
584    /// is a `HashSet` of referencing symbols/members `(RefSymbol, RefMember)`.
585    #[inline]
586    #[must_use]
587    pub fn get_back_references(&self) -> HashMap<SymbolIdentifier, HashSet<SymbolIdentifier>> {
588        let mut back_refs: HashMap<SymbolIdentifier, HashSet<SymbolIdentifier>> = HashMap::default();
589
590        for (referencing_item, referenced_items) in &self.symbol_references_to_symbols {
591            for referenced_item in referenced_items {
592                back_refs.entry(*referenced_item).or_default().insert(*referencing_item);
593            }
594        }
595        for (referencing_item, referenced_items) in &self.symbol_references_to_symbols_in_signature {
596            for referenced_item in referenced_items {
597                back_refs.entry(*referenced_item).or_default().insert(*referencing_item);
598            }
599        }
600        back_refs
601    }
602
603    /// Finds all symbols/members that reference a specific target symbol/member.
604    /// Checks both body and signature references.
605    ///
606    /// # Arguments
607    ///
608    /// * `target_symbol`: The `(SymbolName, MemberName)` tuple being referenced.
609    ///
610    /// # Returns
611    ///
612    /// A `HashSet` containing `&(RefSymbol, RefMember)` tuples of all items referencing the target.
613    #[inline]
614    #[must_use]
615    pub fn get_references_to_symbol(&self, target_symbol: SymbolIdentifier) -> HashSet<&SymbolIdentifier> {
616        let mut referencing_items = HashSet::default();
617        for (referencing_item, referenced_items) in &self.symbol_references_to_symbols {
618            if referenced_items.contains(&target_symbol) {
619                referencing_items.insert(referencing_item);
620            }
621        }
622        for (referencing_item, referenced_items) in &self.symbol_references_to_symbols_in_signature {
623            if referenced_items.contains(&target_symbol) {
624                referencing_items.insert(referencing_item);
625            }
626        }
627        referencing_items
628    }
629
630    /// Computes the count of references for each unique symbol/member referenced in bodies or signatures.
631    ///
632    /// # Returns
633    ///
634    /// A `HashMap` where the key is the referenced symbol/member `(Symbol, Member)` and the value
635    /// is the total count (`u32`) of references to it.
636    #[inline]
637    #[must_use]
638    pub fn get_referenced_symbols_and_members_with_counts(&self) -> HashMap<SymbolIdentifier, u32> {
639        let mut counts = HashMap::default();
640        for referenced_items in self.symbol_references_to_symbols.values() {
641            for referenced_item in referenced_items {
642                *counts.entry(*referenced_item).or_insert(0) += 1;
643            }
644        }
645        for referenced_items in self.symbol_references_to_symbols_in_signature.values() {
646            for referenced_item in referenced_items {
647                *counts.entry(*referenced_item).or_insert(0) += 1;
648            }
649        }
650        counts
651    }
652
653    /// Computes the inverse of the overridden member reference map.
654    ///
655    /// # Returns
656    ///
657    /// A `HashMap` where the key is the overridden member `(ParentSymbol, Member)` and the value
658    /// is a `HashSet` of referencing symbols/members `(RefSymbol, RefMember)` that call it via `parent::`.
659    #[inline]
660    #[must_use]
661    pub fn get_referenced_overridden_class_members(&self) -> HashMap<SymbolIdentifier, HashSet<SymbolIdentifier>> {
662        let mut back_refs: HashMap<SymbolIdentifier, HashSet<SymbolIdentifier>> = HashMap::default();
663
664        for (referencing_item, referenced_items) in &self.symbol_references_to_overridden_members {
665            for referenced_item in referenced_items {
666                back_refs.entry(*referenced_item).or_default().insert(*referencing_item);
667            }
668        }
669        back_refs
670    }
671
672    /// Calculates sets of invalid symbols and members based on detected code changes (`CodebaseDiff`).
673    /// Propagates invalidation through the dependency graph stored in signature references.
674    /// Limits propagation expense to avoid excessive computation on large changes.
675    ///
676    /// # Arguments
677    ///
678    /// * `codebase_diff`: Information about added, deleted, or modified symbols/signatures.
679    ///
680    /// # Returns
681    ///
682    /// `Some((invalid_signatures, partially_invalid))` on success, where `invalid_signatures` contains
683    /// all symbol/member pairs whose signature is invalid (including propagated ones), and `partially_invalid`
684    /// contains symbols with at least one invalid member.
685    /// Returns `None` if the propagation exceeds an expense limit (currently 5000 steps).
686    #[inline]
687    #[must_use]
688    pub fn get_invalid_symbols(&self, codebase_diff: &CodebaseDiff) -> Option<(HashSet<SymbolIdentifier>, AtomSet)> {
689        let mut invalid_signatures = HashSet::default();
690        let mut partially_invalid_symbols = AtomSet::default();
691
692        let mut sig_reverse_index: HashMap<SymbolIdentifier, Vec<SymbolIdentifier>> = HashMap::default();
693        for (referencing_item, referenced_items) in &self.symbol_references_to_symbols_in_signature {
694            let containing_symbol = (referencing_item.0, empty_atom());
695            if codebase_diff.contains_changed_entry(&containing_symbol) {
696                invalid_signatures.insert(*referencing_item);
697                partially_invalid_symbols.insert(referencing_item.0);
698            }
699
700            for referenced in referenced_items {
701                sig_reverse_index.entry(*referenced).or_default().push(*referencing_item);
702            }
703        }
704
705        // Start with symbols directly added/deleted in the diff.
706        let mut symbols_to_process = codebase_diff.get_changed().iter().copied().collect::<Vec<_>>();
707        let mut processed_symbols = HashSet::default();
708        let mut expense_counter = 0;
709
710        const EXPENSE_LIMIT: usize = 5000;
711        while let Some(invalidated_item) = symbols_to_process.pop() {
712            if processed_symbols.contains(&invalidated_item) {
713                continue;
714            }
715
716            expense_counter += 1;
717            if expense_counter > EXPENSE_LIMIT {
718                return None;
719            }
720
721            // Mark this item as invalid (signature) and processed
722            invalid_signatures.insert(invalidated_item);
723            processed_symbols.insert(invalidated_item);
724            if !invalidated_item.1.is_empty() {
725                // If it's a member, also mark its containing symbol for processing.
726                partially_invalid_symbols.insert(invalidated_item.0);
727                let containing_symbol = (invalidated_item.0, empty_atom());
728                if !processed_symbols.contains(&containing_symbol) {
729                    symbols_to_process.push(containing_symbol);
730                }
731            }
732
733            // Find all items that reference this now-invalid item *in their signature*
734            if let Some(referencing_items) = sig_reverse_index.get(&invalidated_item) {
735                for referencing_item in referencing_items {
736                    if !processed_symbols.contains(referencing_item) {
737                        symbols_to_process.push(*referencing_item);
738                    }
739
740                    invalid_signatures.insert(*referencing_item);
741                    if !referencing_item.1.is_empty() {
742                        partially_invalid_symbols.insert(referencing_item.0);
743                    }
744                }
745            }
746        }
747
748        // An item's body is invalid if it references (anywhere, body or sig) an item with an invalid signature.
749        // Check both body and signature reference maps in a single pass where possible.
750        let mut invalid_bodies = HashSet::default();
751
752        for (referencing_item, referenced_items) in &self.symbol_references_to_symbols {
753            if referenced_items.iter().any(|r| invalid_signatures.contains(r)) {
754                invalid_bodies.insert(*referencing_item);
755                if !referencing_item.1.is_empty() {
756                    partially_invalid_symbols.insert(referencing_item.0);
757                }
758            }
759        }
760
761        for (referencing_item, referenced_items) in &self.symbol_references_to_symbols_in_signature {
762            if referenced_items.iter().any(|r| invalid_signatures.contains(r)) {
763                invalid_bodies.insert(*referencing_item);
764                if !referencing_item.1.is_empty() {
765                    partially_invalid_symbols.insert(referencing_item.0);
766                }
767            }
768        }
769
770        let mut all_invalid_symbols = invalid_signatures;
771        all_invalid_symbols.extend(invalid_bodies);
772        Some((all_invalid_symbols, partially_invalid_symbols))
773    }
774
775    /// Extracts references originating from safe (skipped) symbols and merges them into this instance.
776    ///
777    /// When incremental analysis runs with `diff = true`, the analyzer skips safe symbols,
778    /// which means their body references are not collected. This method copies those missing
779    /// references from the previous run's reference graph.
780    ///
781    /// Only references from symbols that are in `safe_symbols` or `safe_symbol_members`
782    /// (and not already present in this instance) are copied.
783    ///
784    /// # Arguments
785    ///
786    /// * `previous` - The previous run's complete symbol references
787    /// * `safe_symbols` - Set of safe top-level symbol names
788    /// * `safe_symbol_members` - Set of safe (symbol, member) pairs
789    #[inline]
790    pub fn restore_references_for_safe_symbols(
791        &mut self,
792        previous: &SymbolReferences,
793        safe_symbols: &AtomSet,
794        safe_symbol_members: &HashSet<SymbolIdentifier>,
795    ) {
796        let is_safe = |key: &SymbolIdentifier| -> bool {
797            if key.1.is_empty() { safe_symbols.contains(&key.0) } else { safe_symbol_members.contains(key) }
798        };
799
800        // Restore body references for safe symbols
801        for (key, refs) in &previous.symbol_references_to_symbols {
802            if is_safe(key) && !self.symbol_references_to_symbols.contains_key(key) {
803                self.symbol_references_to_symbols.insert(*key, refs.clone());
804            }
805        }
806
807        // Restore overridden member references for safe symbols
808        for (key, refs) in &previous.symbol_references_to_overridden_members {
809            if is_safe(key) && !self.symbol_references_to_overridden_members.contains_key(key) {
810                self.symbol_references_to_overridden_members.insert(*key, refs.clone());
811            }
812        }
813
814        // Restore function-like return references for safe symbols
815        for (key, refs) in &previous.functionlike_references_to_functionlike_returns {
816            let sym_key = match key {
817                FunctionLikeIdentifier::Function(name) => (*name, mago_atom::empty_atom()),
818                FunctionLikeIdentifier::Method(class, method) => (*class, *method),
819                _ => continue,
820            };
821
822            if is_safe(&sym_key) && !self.functionlike_references_to_functionlike_returns.contains_key(key) {
823                self.functionlike_references_to_functionlike_returns.insert(*key, refs.clone());
824            }
825        }
826
827        // Restore property write references for safe symbols
828        for (key, refs) in &previous.property_write_references {
829            if is_safe(key) && !self.property_write_references.contains_key(key) {
830                self.property_write_references.insert(*key, refs.clone());
831            }
832        }
833
834        // Restore property read references for safe symbols
835        for (key, refs) in &previous.property_read_references {
836            if is_safe(key) && !self.property_read_references.contains_key(key) {
837                self.property_read_references.insert(*key, refs.clone());
838            }
839        }
840    }
841
842    /// Removes **body** references originating from the given symbols/members.
843    ///
844    /// Used by the body-only fast path: when only function/method bodies changed (no signature
845    /// changes), we remove old body references and let the analyzer rebuild them fresh.
846    /// Signature references are kept because signatures didn't change.
847    ///
848    /// Also removes function-like return references and property read/write references from
849    /// the given symbols, as those originate from body code.
850    ///
851    /// File-level references keyed by the given file names are also removed.
852    #[inline]
853    pub fn remove_body_references_for_symbols(
854        &mut self,
855        symbols_and_members: &HashSet<SymbolIdentifier>,
856        file_names: &[Atom],
857    ) {
858        // Remove body (not signature) references
859        for key in symbols_and_members {
860            self.symbol_references_to_symbols.remove(key);
861            self.symbol_references_to_overridden_members.remove(key);
862            self.property_write_references.remove(key);
863            self.property_read_references.remove(key);
864        }
865
866        // Remove function-like return references for matching keys
867        self.functionlike_references_to_functionlike_returns.retain(|key, _| {
868            let sym_key = match key {
869                FunctionLikeIdentifier::Function(name) => (*name, mago_atom::empty_atom()),
870                FunctionLikeIdentifier::Method(class, method) => (*class, *method),
871                _ => return true,
872            };
873
874            !symbols_and_members.contains(&sym_key)
875        });
876
877        // Remove file-level body references (signature refs kept)
878        for name in file_names {
879            self.file_references_to_symbols.remove(name);
880        }
881    }
882
883    /// Removes all references *originating from* symbols/members that are marked as invalid.
884    ///
885    /// # Arguments
886    ///
887    /// * `invalid_symbols_and_members`: A set containing `(SymbolName, MemberName)` tuples for invalid items.
888    #[inline]
889    pub fn remove_references_from_invalid_symbols(&mut self, invalid_symbols_and_members: &HashSet<SymbolIdentifier>) {
890        // Retain only entries where the key (referencing item) is NOT in the invalid set.
891        self.symbol_references_to_symbols
892            .retain(|referencing_item, _| !invalid_symbols_and_members.contains(referencing_item));
893        self.symbol_references_to_symbols_in_signature
894            .retain(|referencing_item, _| !invalid_symbols_and_members.contains(referencing_item));
895        self.symbol_references_to_overridden_members
896            .retain(|referencing_item, _| !invalid_symbols_and_members.contains(referencing_item));
897        self.property_write_references
898            .retain(|referencing_item, _| !invalid_symbols_and_members.contains(referencing_item));
899        self.property_read_references
900            .retain(|referencing_item, _| !invalid_symbols_and_members.contains(referencing_item));
901    }
902
903    /// Retains only references originating from safe (unchanged) symbols, removing all others.
904    ///
905    /// This is the inverse of [`remove_references_from_invalid_symbols`]: instead of
906    /// specifying what to remove, you specify what to keep. References from non-safe symbols
907    /// will be rebuilt by `populate_codebase` and the analyzer.
908    ///
909    /// This method also retains all builtin/prelude references (those where the key symbol
910    /// is not user-defined, i.e., is in the base references).
911    #[inline]
912    pub fn retain_safe_symbol_references(
913        &mut self,
914        safe_symbols: &AtomSet,
915        safe_symbol_members: &HashSet<SymbolIdentifier>,
916    ) {
917        let is_safe = |key: &SymbolIdentifier| -> bool {
918            if key.1.is_empty() { safe_symbols.contains(&key.0) } else { safe_symbol_members.contains(key) }
919        };
920
921        self.symbol_references_to_symbols.retain(|k, _| is_safe(k));
922        self.symbol_references_to_symbols_in_signature.retain(|k, _| is_safe(k));
923        self.symbol_references_to_overridden_members.retain(|k, _| is_safe(k));
924        self.property_write_references.retain(|k, _| is_safe(k));
925        self.property_read_references.retain(|k, _| is_safe(k));
926
927        self.functionlike_references_to_functionlike_returns.retain(|key, _| {
928            let sym_key = match key {
929                FunctionLikeIdentifier::Function(name) => (*name, mago_atom::empty_atom()),
930                FunctionLikeIdentifier::Method(class, method) => (*class, *method),
931                _ => return true, // Keep closures and other non-symbol function-likes
932            };
933
934            is_safe(&sym_key)
935        });
936    }
937
938    /// Removes references for dirty (non-safe) symbols — O(dirty) instead of O(all).
939    ///
940    /// This is the inverse of [`retain_safe_symbol_references`]: instead of iterating all
941    /// entries and keeping safe ones, it directly removes entries for the given dirty set.
942    /// Much faster when the dirty set is small relative to the total number of references.
943    pub fn remove_dirty_symbol_references(&mut self, dirty_symbols: &HashSet<SymbolIdentifier>) {
944        for key in dirty_symbols {
945            self.symbol_references_to_symbols.remove(key);
946            self.symbol_references_to_symbols_in_signature.remove(key);
947            self.symbol_references_to_overridden_members.remove(key);
948            self.property_write_references.remove(key);
949            self.property_read_references.remove(key);
950
951            let fl_key = if key.1.is_empty() {
952                FunctionLikeIdentifier::Function(key.0)
953            } else {
954                FunctionLikeIdentifier::Method(key.0, key.1)
955            };
956
957            self.functionlike_references_to_functionlike_returns.remove(&fl_key);
958        }
959    }
960
961    /// Returns a reference to the map tracking references within symbol/member bodies.
962    #[inline]
963    #[must_use]
964    pub fn get_symbol_references_to_symbols(&self) -> &HashMap<SymbolIdentifier, HashSet<SymbolIdentifier>> {
965        &self.symbol_references_to_symbols
966    }
967
968    /// Returns a reference to the map tracking references within symbol/member signatures.
969    #[inline]
970    #[must_use]
971    pub fn get_symbol_references_to_symbols_in_signature(
972        &self,
973    ) -> &HashMap<SymbolIdentifier, HashSet<SymbolIdentifier>> {
974        &self.symbol_references_to_symbols_in_signature
975    }
976
977    /// Returns a reference to the map tracking references to overridden members.
978    #[inline]
979    #[must_use]
980    pub fn get_symbol_references_to_overridden_members(&self) -> &HashMap<SymbolIdentifier, HashSet<SymbolIdentifier>> {
981        &self.symbol_references_to_overridden_members
982    }
983
984    /// Returns a reference to the map tracking references to function-like return values.
985    #[inline]
986    #[must_use]
987    pub fn get_functionlike_references_to_functionlike_returns(
988        &self,
989    ) -> &HashMap<FunctionLikeIdentifier, HashSet<FunctionLikeIdentifier>> {
990        &self.functionlike_references_to_functionlike_returns
991    }
992
993    /// Returns a reference to the map tracking file-level references to symbols (body).
994    #[inline]
995    #[must_use]
996    pub fn get_file_references_to_symbols(&self) -> &HashMap<Atom, HashSet<SymbolIdentifier>> {
997        &self.file_references_to_symbols
998    }
999
1000    /// Returns a reference to the map tracking file-level references to symbols (signature).
1001    #[inline]
1002    #[must_use]
1003    pub fn get_file_references_to_symbols_in_signature(&self) -> &HashMap<Atom, HashSet<SymbolIdentifier>> {
1004        &self.file_references_to_symbols_in_signature
1005    }
1006}
1007
1008#[cfg(test)]
1009mod tests {
1010    use super::*;
1011    use mago_atom::atom;
1012    use mago_atom::empty_atom;
1013
1014    fn make_refs_with_body(entries: Vec<(SymbolIdentifier, Vec<SymbolIdentifier>)>) -> SymbolReferences {
1015        let mut refs = SymbolReferences::new();
1016        for (key, values) in entries {
1017            let set: HashSet<SymbolIdentifier> = values.into_iter().collect();
1018            refs.symbol_references_to_symbols.insert(key, set);
1019        }
1020        refs
1021    }
1022
1023    #[test]
1024    fn test_restore_references_for_safe_symbols_restores_missing_body_refs() {
1025        let class_a = atom("class_a");
1026        let class_b = atom("class_b");
1027        let method_foo = atom("foo");
1028        let method_bar = atom("bar");
1029
1030        let previous = make_refs_with_body(vec![
1031            ((class_a, method_foo), vec![(class_b, empty_atom())]),
1032            ((class_b, method_bar), vec![(class_a, empty_atom())]),
1033        ]);
1034
1035        let mut current = make_refs_with_body(vec![((class_b, method_bar), vec![(class_a, empty_atom())])]);
1036
1037        let safe_symbols = AtomSet::default();
1038        let mut safe_members = HashSet::default();
1039        safe_members.insert((class_a, method_foo));
1040
1041        current.restore_references_for_safe_symbols(&previous, &safe_symbols, &safe_members);
1042
1043        assert!(current.symbol_references_to_symbols.contains_key(&(class_a, method_foo)));
1044        let restored = &current.symbol_references_to_symbols[&(class_a, method_foo)];
1045        assert!(restored.contains(&(class_b, empty_atom())));
1046
1047        assert!(current.symbol_references_to_symbols.contains_key(&(class_b, method_bar)));
1048    }
1049
1050    #[test]
1051    fn test_restore_references_does_not_overwrite_existing() {
1052        let class_a = atom("class_a");
1053        let class_b = atom("class_b");
1054        let class_c = atom("class_c");
1055        let method_foo = atom("foo");
1056
1057        let previous = make_refs_with_body(vec![((class_a, method_foo), vec![(class_b, empty_atom())])]);
1058
1059        let mut current = make_refs_with_body(vec![((class_a, method_foo), vec![(class_c, empty_atom())])]);
1060
1061        let safe_symbols = AtomSet::default();
1062        let mut safe_members = HashSet::default();
1063        safe_members.insert((class_a, method_foo));
1064
1065        current.restore_references_for_safe_symbols(&previous, &safe_symbols, &safe_members);
1066
1067        let refs = &current.symbol_references_to_symbols[&(class_a, method_foo)];
1068        assert!(refs.contains(&(class_c, empty_atom())));
1069        assert!(!refs.contains(&(class_b, empty_atom())));
1070    }
1071
1072    #[test]
1073    fn test_restore_references_for_safe_top_level_symbols() {
1074        let func_a = atom("func_a");
1075        let class_b = atom("class_b");
1076
1077        let previous = make_refs_with_body(vec![((func_a, empty_atom()), vec![(class_b, empty_atom())])]);
1078
1079        let mut current = SymbolReferences::new();
1080
1081        let mut safe_symbols = AtomSet::default();
1082        safe_symbols.insert(func_a);
1083        let safe_members = HashSet::default();
1084
1085        current.restore_references_for_safe_symbols(&previous, &safe_symbols, &safe_members);
1086
1087        assert!(current.symbol_references_to_symbols.contains_key(&(func_a, empty_atom())));
1088        let restored = &current.symbol_references_to_symbols[&(func_a, empty_atom())];
1089        assert!(restored.contains(&(class_b, empty_atom())));
1090    }
1091
1092    #[test]
1093    fn test_restore_skips_non_safe_symbols() {
1094        let func_a = atom("func_a");
1095        let class_b = atom("class_b");
1096        let previous = make_refs_with_body(vec![((func_a, empty_atom()), vec![(class_b, empty_atom())])]);
1097
1098        let mut current = SymbolReferences::new();
1099
1100        let safe_symbols = AtomSet::default();
1101        let safe_members = HashSet::default();
1102
1103        current.restore_references_for_safe_symbols(&previous, &safe_symbols, &safe_members);
1104
1105        assert!(!current.symbol_references_to_symbols.contains_key(&(func_a, empty_atom())));
1106    }
1107
1108    #[test]
1109    fn test_get_invalid_symbols_basic_cascade() {
1110        let class_a = atom("class_a");
1111        let class_b = atom("class_b");
1112        let method_foo = atom("foo");
1113
1114        let mut refs = SymbolReferences::new();
1115        refs.symbol_references_to_symbols_in_signature.insert((class_b, method_foo), {
1116            let mut set = HashSet::default();
1117            set.insert((class_a, empty_atom()));
1118            set
1119        });
1120
1121        let mut diff = crate::diff::CodebaseDiff::new();
1122        let mut changed = HashSet::default();
1123        changed.insert((class_a, empty_atom()));
1124        diff = diff.with_changed(changed);
1125
1126        let result = refs.get_invalid_symbols(&diff);
1127        assert!(result.is_some());
1128        let (invalid, partially_invalid) = result.unwrap();
1129
1130        assert!(invalid.contains(&(class_a, empty_atom())));
1131        assert!(invalid.contains(&(class_b, method_foo)));
1132        assert!(partially_invalid.contains(&class_b));
1133    }
1134}