Skip to main content

sqry_core/graph/
local_scopes.rs

1//! Shared local scope tracking and reference resolution infrastructure.
2//!
3//! This module provides a generic `ScopeTree<K>` that builds a per-file scope
4//! tree, binds local declarations, and resolves identifier usages to declaration
5//! nodes for `References` edges.
6//!
7//! Language plugins implement the [`ScopeKindTrait`] for their language-specific
8//! `ScopeKind` enum and provide AST-specific `build_scopes_recursive()` and
9//! `bind_declarations_recursive()` functions. The shared infrastructure handles:
10//!
11//! - Scope interval indexing with O(log n) lookup
12//! - Variable binding with overlap detection
13//! - Scope chain resolution with self-reference prevention
14//! - String interning for memory efficiency
15//! - Class member tracking and resolution (for OOP languages)
16//! - Debug log rate limiting
17
18use std::collections::{HashMap, HashSet};
19use std::fmt::Debug;
20use std::sync::Arc;
21
22use log::debug;
23
24use crate::graph::unified::node::NodeId;
25use crate::graph::{GraphBuilderError, Span};
26
27/// Alias for scope indices within the tree.
28pub type ScopeId = usize;
29
30/// Maximum debug log messages per event category to avoid log flooding.
31pub const MAX_DEBUG_LOGS_PER_EVENT: usize = 5;
32
33// ============================================================================
34// ScopeKindTrait — Language plugins implement this
35// ============================================================================
36
37/// Trait that language-specific `ScopeKind` enums must implement.
38///
39/// This trait allows the shared `ScopeTree` to query scope semantics
40/// without knowing the language-specific scope variants.
41pub trait ScopeKindTrait: Copy + Eq + Debug + Send + Sync {
42    /// Returns `true` if this scope kind represents a class-like scope
43    /// (anonymous class, local class, etc.) that acts as a boundary for
44    /// variable capture and overlap detection.
45    fn is_class_scope(&self) -> bool;
46
47    /// Returns `true` if this scope kind is an overlap boundary — i.e.,
48    /// duplicate variable names in enclosing scopes should be rejected
49    /// at this boundary. Typically the same as `is_class_scope()`.
50    fn is_overlap_boundary(&self) -> bool;
51
52    /// Returns `true` if this class scope does NOT capture variables
53    /// from enclosing scopes. For example, Java's `LocalRecord`,
54    /// `LocalEnum`, and `LocalInterface` are non-capturing.
55    ///
56    /// Default: `false` (all class scopes capture by default).
57    fn is_non_capturing_class_scope(&self) -> bool {
58        false
59    }
60
61    /// Returns `true` if this language allows nested variable shadowing
62    /// (same name in nested block scope). Kotlin allows this; Java does not.
63    ///
64    /// When `true`, `is_overlap()` only checks the current scope, not parents.
65    /// Default: `false`.
66    fn allows_nested_shadowing(&self) -> bool {
67        false
68    }
69
70    /// Returns `true` if this scope kind should block capture chain traversal.
71    ///
72    /// When resolving identifiers past a class boundary, the capture chain
73    /// walks outer scopes looking for captured variables. This method controls
74    /// which scopes stop that traversal.
75    ///
76    /// Java overrides to return `is_non_capturing_class_scope()` only, because
77    /// anonymous classes and local classes CAN capture effectively-final
78    /// variables from enclosing scopes through multiple nesting levels.
79    ///
80    /// Default: `self.is_non_capturing_class_scope() || self.is_class_scope()`
81    fn blocks_capture_chain(&self) -> bool {
82        self.is_non_capturing_class_scope() || self.is_class_scope()
83    }
84
85    /// Whether unresolved base types should cause ambiguous member resolution.
86    ///
87    /// When a class has base types that cannot be resolved (e.g., from external
88    /// libraries), this controls whether member lookups return `Ambiguous`
89    /// (conservative) or `None` (allowing the capture chain to proceed).
90    ///
91    /// Java: `true` (strict — returns `Ambiguous` when bases are unresolved).
92    /// Kotlin: `false` (lenient — returns `None` so capture chain continues).
93    ///
94    /// Default: `true` (conservative, matches Java behavior).
95    fn strict_unresolved_bases(&self) -> bool {
96        true
97    }
98}
99
100// ============================================================================
101// Core Data Structures
102// ============================================================================
103
104/// A local variable binding within a scope.
105#[derive(Debug, Clone)]
106pub struct VariableBinding {
107    /// The graph node representing this binding (set after node creation).
108    pub node_id: Option<NodeId>,
109    /// Start byte of the identifier in the source.
110    pub decl_start_byte: usize,
111    /// End byte of the identifier in the source.
112    pub decl_end_byte: usize,
113    /// End byte of the full declarator (e.g., for `int x = 5`, ends after `5`).
114    pub declarator_end_byte: usize,
115    /// Start byte of the initializer expression (if present).
116    /// Used for self-reference prevention: `let x = x + 1` should NOT
117    /// resolve the RHS `x` to this same binding.
118    pub initializer_start_byte: Option<usize>,
119}
120
121/// A scope within the scope tree, parameterized by a language-specific kind.
122#[derive(Debug, Clone)]
123pub struct Scope<K: ScopeKindTrait> {
124    /// Byte offset where this scope begins in the source.
125    pub start_byte: usize,
126    /// Byte offset where this scope ends in the source.
127    pub end_byte: usize,
128    /// Parent scope index (None for root scopes).
129    pub parent: Option<ScopeId>,
130    /// Language-specific scope kind.
131    pub kind: K,
132    /// Nesting depth from root.
133    pub depth: usize,
134    /// Variable bindings indexed by interned name.
135    pub variables: HashMap<Arc<str>, Vec<VariableBinding>>,
136}
137
138/// Interval for binary-search scope lookup.
139#[derive(Debug, Clone)]
140struct ScopeInterval {
141    start: usize,
142    end: usize,
143    scope_id: ScopeId,
144    depth: usize,
145}
146
147/// Sorted interval index for O(log n) innermost-scope queries.
148#[derive(Debug, Clone, Default)]
149struct ScopeIndex {
150    intervals: Vec<ScopeInterval>,
151}
152
153/// String deduplication cache for variable names.
154#[derive(Debug, Clone, Default)]
155pub struct StringInterner {
156    map: HashMap<String, Arc<str>>,
157}
158
159impl StringInterner {
160    /// Intern a string, returning a shared `Arc<str>`.
161    pub fn intern(&mut self, value: &str) -> Arc<str> {
162        if let Some(existing) = self.map.get(value) {
163            return existing.clone();
164        }
165        let arc: Arc<str> = Arc::from(value);
166        self.map.insert(value.to_string(), arc.clone());
167        arc
168    }
169}
170
171// ============================================================================
172// Class Member Tracking (for OOP languages)
173// ============================================================================
174
175/// Source of an inherited member (tracks which base class it came from).
176#[derive(Debug, Clone)]
177pub struct MemberSource {
178    /// Qualified name of the base class that declares/inherits this member.
179    pub qualifier: String,
180}
181
182/// Class member info associated with a scope.
183#[derive(Debug, Clone)]
184pub struct ClassMemberInfo {
185    /// Qualified name of this class (e.g., `"com.example.MyClass"`).
186    pub qualifier: Option<String>,
187    /// Members declared directly in this class.
188    pub declared_members: HashSet<Arc<str>>,
189    /// Members inherited from base classes.
190    pub inherited_members: HashMap<Arc<str>, Vec<MemberSource>>,
191    /// Count of base types that couldn't be resolved.
192    pub unresolved_base_count: usize,
193    /// Count of explicitly declared base types.
194    pub explicit_base_count: usize,
195}
196
197/// Maps scope IDs to class member information.
198#[derive(Debug, Clone, Default)]
199pub struct ClassMemberIndex {
200    /// Per-scope class member information.
201    pub by_scope: HashMap<ScopeId, ClassMemberInfo>,
202}
203
204/// A class's identity and declared members (for type name resolution).
205#[derive(Debug, Clone)]
206pub struct ClassInfo {
207    /// Qualified name of this class.
208    pub qualifier: String,
209    /// Members declared directly in this class.
210    pub declared_members: HashSet<Arc<str>>,
211}
212
213/// Index from class name keys to `ClassInfo` entries.
214#[derive(Debug, Clone, Default)]
215pub struct ClassInfoIndex {
216    by_key: HashMap<String, Vec<usize>>,
217    infos: Vec<ClassInfo>,
218}
219
220impl ClassInfoIndex {
221    /// Insert a class info entry with one or more lookup keys.
222    pub fn insert(&mut self, info: ClassInfo, keys: &[String]) {
223        let idx = self.infos.len();
224        self.infos.push(info);
225        for key in keys {
226            self.by_key.entry(key.clone()).or_default().push(idx);
227        }
228    }
229
230    /// Resolve a class by key. Returns `Some` only if there is exactly one match.
231    #[must_use]
232    pub fn resolve(&self, key: &str) -> Option<&ClassInfo> {
233        let candidates = self.by_key.get(key)?;
234        if candidates.len() == 1 {
235            self.infos.get(candidates[0])
236        } else {
237            None
238        }
239    }
240}
241
242/// Resolve a class info from the index, trying multiple key formats.
243///
244/// Attempts resolution with: the raw key, `::` replaced with `.`, and the
245/// last segment (simple name) only.
246#[must_use]
247pub fn resolve_class_info<'a>(index: &'a ClassInfoIndex, base: &str) -> Option<&'a ClassInfo> {
248    if let Some(info) = index.resolve(base) {
249        return Some(info);
250    }
251    let dotted = base.replace("::", ".");
252    if dotted != base
253        && let Some(info) = index.resolve(&dotted)
254    {
255        return Some(info);
256    }
257    if let Some(last) = base.rsplit(['.', ':']).next()
258        && let Some(info) = index.resolve(last)
259    {
260        return Some(info);
261    }
262    None
263}
264
265// ============================================================================
266// Debug Log Limiter
267// ============================================================================
268
269/// Debug event categories for rate-limited logging.
270#[derive(Debug, Clone, Hash, Eq, PartialEq)]
271pub enum DebugEvent {
272    /// Scope or binding span is invalid (start > end or beyond content length).
273    InvalidSpan,
274    /// Overlap conflict when adding a binding.
275    OverlapConflict,
276    /// Ambiguous inherited member resolution.
277    InheritedAmbiguous,
278    /// Could not find a binding to attach a `NodeId` to.
279    MissingBindingNode,
280}
281
282/// Rate-limited debug logger to avoid flooding logs.
283#[derive(Debug, Default)]
284pub struct DebugLogLimiter {
285    /// Per-event counters (public for test assertions in language plugins).
286    pub counts: HashMap<DebugEvent, usize>,
287}
288
289impl DebugLogLimiter {
290    /// Log a debug message, but only up to `MAX_DEBUG_LOGS_PER_EVENT` per category.
291    pub fn log(&mut self, event: DebugEvent, message: &str) {
292        let entry = self.counts.entry(event).or_insert(0);
293        if *entry < MAX_DEBUG_LOGS_PER_EVENT {
294            *entry += 1;
295            debug!("{message}");
296        }
297    }
298}
299
300// ============================================================================
301// Resolution Types
302// ============================================================================
303
304/// Outcome of resolving an identifier to a declaration.
305#[derive(Debug)]
306pub enum ResolutionOutcome {
307    /// Resolved to a local variable binding.
308    Local(LocalBindingMatch),
309    /// Resolved to a class member (field, property, enum constant, etc.).
310    Member {
311        /// Qualified name of the member, if known.
312        qualified_name: Option<String>,
313    },
314    /// Multiple conflicting resolutions found.
315    Ambiguous,
316    /// No matching declaration found.
317    NoMatch,
318}
319
320/// A successful local variable resolution.
321#[derive(Debug, Clone, Copy)]
322pub struct LocalBindingMatch {
323    /// The graph node ID for the variable declaration (may be None if not yet created).
324    pub node_id: Option<NodeId>,
325    /// Start byte of the declaration identifier.
326    pub decl_start_byte: usize,
327    /// End byte of the declaration identifier.
328    pub decl_end_byte: usize,
329}
330
331// ============================================================================
332// ScopeTree<K> — The shared scope tree
333// ============================================================================
334
335/// Per-file scope tree with variable binding and resolution.
336///
337/// Generic over `K`, a language-specific `ScopeKind` enum that implements
338/// [`ScopeKindTrait`].
339#[derive(Debug)]
340pub struct ScopeTree<K: ScopeKindTrait> {
341    /// All scopes, indexed by `ScopeId`.
342    pub scopes: Vec<Scope<K>>,
343    /// Binary-search interval index.
344    index: ScopeIndex,
345    /// String deduplication cache.
346    pub interner: StringInterner,
347    /// Class member tracking (for OOP languages).
348    pub class_members: ClassMemberIndex,
349    /// Class info index (for type name resolution).
350    pub class_infos: ClassInfoIndex,
351    /// Rate-limited debug logger.
352    pub debug: DebugLogLimiter,
353    /// Total content length in bytes (for span validation).
354    pub content_len: usize,
355}
356
357impl<K: ScopeKindTrait> ScopeTree<K> {
358    /// Create a new empty scope tree for the given content length.
359    ///
360    /// Language plugins should populate the tree using `add_scope()`,
361    /// `add_binding()`, and `rebuild_index()`.
362    #[must_use]
363    pub fn new(content_len: usize) -> Self {
364        ScopeTree {
365            scopes: Vec::new(),
366            index: ScopeIndex::default(),
367            interner: StringInterner::default(),
368            class_members: ClassMemberIndex::default(),
369            class_infos: ClassInfoIndex::default(),
370            debug: DebugLogLimiter::default(),
371            content_len,
372        }
373    }
374
375    // ========================================================================
376    // Node ID attachment
377    // ========================================================================
378
379    /// Attach a graph `NodeId` to an existing variable binding.
380    ///
381    /// First tries exact match by name + byte position. Falls back to
382    /// a fuzzy match (single unattached binding with the same name).
383    /// Returns `true` if a binding was found and updated.
384    pub fn attach_node_id(&mut self, name: &str, decl_start_byte: usize, node_id: NodeId) -> bool {
385        // Phase 1: exact match on name + byte position
386        for scope in &mut self.scopes {
387            if let Some(bindings) = scope.variables.get_mut(name) {
388                for binding in bindings {
389                    if binding.decl_start_byte == decl_start_byte {
390                        if binding.node_id.is_none() {
391                            binding.node_id = Some(node_id);
392                        }
393                        return true;
394                    }
395                }
396            }
397        }
398
399        // Phase 2: fallback — single unattached binding with same name
400        let mut fallback: Option<(ScopeId, usize)> = None;
401        let mut ambiguous = false;
402        for (scope_id, scope) in self.scopes.iter().enumerate() {
403            if let Some(bindings) = scope.variables.get(name) {
404                for (idx, binding) in bindings.iter().enumerate() {
405                    if binding.node_id.is_none() {
406                        if fallback.is_none() {
407                            fallback = Some((scope_id, idx));
408                        } else {
409                            ambiguous = true;
410                            break;
411                        }
412                    }
413                }
414            }
415            if ambiguous {
416                break;
417            }
418        }
419
420        if !ambiguous
421            && let Some((scope_id, idx)) = fallback
422            && let Some(bindings) = self.scopes[scope_id].variables.get_mut(name)
423            && let Some(binding) = bindings.get_mut(idx)
424        {
425            binding.node_id = Some(node_id);
426            return true;
427        }
428
429        self.debug.log(
430            DebugEvent::MissingBindingNode,
431            "Missing binding for node_id attach",
432        );
433        false
434    }
435
436    // ========================================================================
437    // Resolution — with class boundary support
438    // ========================================================================
439
440    /// Resolve an identifier to a local variable, class member, or no-match.
441    ///
442    /// This is the primary entry point for resolution. It:
443    /// 1. Finds the innermost scope at the usage byte position
444    /// 2. Builds a scope chain to the root
445    /// 3. Detects class boundaries and handles capture semantics
446    /// 4. Falls back to class member resolution for OOP languages
447    pub fn resolve_identifier(&mut self, usage_byte: usize, identifier: &str) -> ResolutionOutcome {
448        let Some(innermost) = self.innermost_scope_at(usage_byte) else {
449            return ResolutionOutcome::NoMatch;
450        };
451        let chain = self.scope_chain(innermost);
452
453        let class_boundary = chain
454            .iter()
455            .position(|scope_id| self.scopes[*scope_id].kind.is_class_scope());
456
457        if let Some(boundary_idx) = class_boundary {
458            let current_class_scope = chain[boundary_idx];
459
460            // Try local resolution before the class boundary
461            if let Some(binding) =
462                self.resolve_local_in_chain(identifier, usage_byte, &chain[..boundary_idx])
463            {
464                return ResolutionOutcome::Local(binding);
465            }
466
467            // Try class member resolution
468            if let Some(member_resolution) =
469                self.resolve_class_member(current_class_scope, identifier)
470            {
471                return member_resolution;
472            }
473
474            // Check if class scope blocks capture
475            let class_kind = self.scopes[current_class_scope].kind;
476            if class_kind.is_non_capturing_class_scope() {
477                return ResolutionOutcome::NoMatch;
478            }
479
480            // Try capture chain (variables from enclosing non-class scopes)
481            let mut capture_chain = Vec::new();
482            for scope_id in &chain[boundary_idx + 1..] {
483                if self.scopes[*scope_id].kind.blocks_capture_chain() {
484                    break;
485                }
486                capture_chain.push(*scope_id);
487            }
488            if let Some(binding) =
489                self.resolve_local_in_chain(identifier, usage_byte, &capture_chain)
490            {
491                return ResolutionOutcome::Local(binding);
492            }
493
494            return ResolutionOutcome::NoMatch;
495        }
496
497        // No class boundary — resolve in entire chain
498        if let Some(binding) = self.resolve_local_in_chain(identifier, usage_byte, &chain) {
499            return ResolutionOutcome::Local(binding);
500        }
501
502        ResolutionOutcome::NoMatch
503    }
504
505    /// Quick check if a name is a known type/class name.
506    #[must_use]
507    pub fn is_known_type_name(&self, name: &str) -> bool {
508        self.class_infos.resolve(name).is_some()
509    }
510
511    /// Quick check if a local variable binding exists for `name` at the given byte position.
512    #[must_use]
513    pub fn has_local_binding(&self, name: &str, byte: usize) -> bool {
514        let Some(innermost) = self.innermost_scope_at(byte) else {
515            return false;
516        };
517        let chain = self.scope_chain(innermost);
518        self.resolve_local_in_chain(name, byte, &chain).is_some()
519    }
520
521    // ========================================================================
522    // Internal resolution helpers
523    // ========================================================================
524
525    /// Walk a scope chain looking for a binding matching `identifier` before `usage_byte`.
526    #[must_use]
527    pub fn resolve_local_in_chain(
528        &self,
529        identifier: &str,
530        usage_byte: usize,
531        chain: &[ScopeId],
532    ) -> Option<LocalBindingMatch> {
533        for scope_id in chain {
534            if let Some(bindings) = self.scopes[*scope_id].variables.get(identifier)
535                && let Some(binding) = find_applicable_binding(bindings, usage_byte)
536            {
537                return Some(LocalBindingMatch {
538                    node_id: binding.node_id,
539                    decl_start_byte: binding.decl_start_byte,
540                    decl_end_byte: binding.decl_end_byte,
541                });
542            }
543        }
544        None
545    }
546
547    /// Resolve a class member by scope ID and identifier name.
548    pub fn resolve_class_member(
549        &mut self,
550        scope_id: ScopeId,
551        identifier: &str,
552    ) -> Option<ResolutionOutcome> {
553        let info = self.class_members.by_scope.get(&scope_id)?;
554        if info.declared_members.contains(identifier) {
555            let qualified_name = info
556                .qualifier
557                .as_ref()
558                .map(|qual| format!("{qual}::{identifier}"));
559            return Some(ResolutionOutcome::Member { qualified_name });
560        }
561
562        if let Some(sources) = info.inherited_members.get(identifier) {
563            if sources.len() == 1 {
564                let qualified_name = Some(format!("{}::{}", sources[0].qualifier, identifier));
565                return Some(ResolutionOutcome::Member { qualified_name });
566            }
567            if sources.len() > 1 {
568                self.debug.log(
569                    DebugEvent::InheritedAmbiguous,
570                    "Inherited member ambiguity: multiple matches",
571                );
572                return Some(ResolutionOutcome::Ambiguous);
573            }
574        }
575
576        if info.explicit_base_count > 0
577            && info.unresolved_base_count > 0
578            && self.scopes[scope_id].kind.strict_unresolved_bases()
579        {
580            self.debug.log(
581                DebugEvent::InheritedAmbiguous,
582                "Inherited member ambiguity: unresolved base",
583            );
584            return Some(ResolutionOutcome::Ambiguous);
585        }
586
587        None
588    }
589
590    // ========================================================================
591    // Scope lookup
592    // ========================================================================
593
594    /// Find the innermost (deepest) scope containing the given byte position.
595    ///
596    /// Uses a sorted interval index with binary search + reverse scan for
597    /// O(log n) lookup.
598    #[must_use]
599    pub fn innermost_scope_at(&self, byte: usize) -> Option<ScopeId> {
600        let intervals = &self.index.intervals;
601        if intervals.is_empty() {
602            return None;
603        }
604
605        // partition_point returns the first index where start > byte,
606        // so all candidates with start <= byte are in [0..idx).
607        let idx = intervals.partition_point(|iv| iv.start <= byte);
608        if idx == 0 {
609            return None;
610        }
611
612        // Reverse scan: for properly nested intervals sorted by
613        // (start ASC, depth ASC), the first containing interval is the
614        // deepest. At the same start position, deeper scopes come later
615        // in the array and are seen first during reverse iteration.
616        for iv in intervals[..idx].iter().rev() {
617            if byte < iv.end {
618                return Some(iv.scope_id);
619            }
620        }
621
622        None
623    }
624
625    /// Build the parent chain from a scope to the root.
626    #[must_use]
627    pub fn scope_chain(&self, innermost: ScopeId) -> Vec<ScopeId> {
628        let mut chain = Vec::new();
629        let mut current = Some(innermost);
630        while let Some(scope_id) = current {
631            chain.push(scope_id);
632            current = self.scopes[scope_id].parent;
633        }
634        chain
635    }
636
637    // ========================================================================
638    // Scope and binding construction
639    // ========================================================================
640
641    /// Add a new scope to the tree.
642    ///
643    /// Returns `None` if the span is invalid (start > end or beyond content).
644    pub fn add_scope(
645        &mut self,
646        kind: K,
647        start_byte: usize,
648        end_byte: usize,
649        parent: Option<ScopeId>,
650    ) -> Option<ScopeId> {
651        if !is_valid_span(start_byte, end_byte, self.content_len) {
652            self.debug
653                .log(DebugEvent::InvalidSpan, "Invalid scope span");
654            return None;
655        }
656        let depth = parent.map_or(0, |p| self.scopes[p].depth + 1);
657        let scope_id = self.scopes.len();
658        self.scopes.push(Scope {
659            start_byte,
660            end_byte,
661            parent,
662            kind,
663            depth,
664            variables: HashMap::new(),
665        });
666        Some(scope_id)
667    }
668
669    /// Rebuild the interval index from the current scopes.
670    ///
671    /// Must be called after adding scopes and before performing lookups.
672    pub fn rebuild_index(&mut self) {
673        self.index.intervals = self
674            .scopes
675            .iter()
676            .enumerate()
677            .map(|(scope_id, scope)| ScopeInterval {
678                start: scope.start_byte,
679                end: scope.end_byte,
680                scope_id,
681                depth: scope.depth,
682            })
683            .collect();
684        // Sort by (start ASC, depth ASC) so the deepest scope at each start
685        // position comes last — and is found first during reverse scan.
686        self.index
687            .intervals
688            .sort_by(|a, b| a.start.cmp(&b.start).then(a.depth.cmp(&b.depth)));
689    }
690
691    /// Add a variable binding to a scope.
692    ///
693    /// Performs overlap detection before adding. If the binding would
694    /// overlap with an existing binding in the same or enclosing scope
695    /// (depending on language rules), it is silently rejected.
696    pub fn add_binding(
697        &mut self,
698        scope_id: ScopeId,
699        name: &str,
700        decl_start_byte: usize,
701        decl_end_byte: usize,
702        declarator_end_byte: usize,
703        initializer_start_byte: Option<usize>,
704    ) {
705        if !is_valid_span(decl_start_byte, decl_end_byte, self.content_len)
706            || !is_valid_span(decl_end_byte, declarator_end_byte, self.content_len)
707        {
708            self.debug
709                .log(DebugEvent::InvalidSpan, "Invalid binding span");
710            return;
711        }
712
713        if self.is_overlap(scope_id, name, decl_start_byte) {
714            self.debug
715                .log(DebugEvent::OverlapConflict, "Overlap conflict for binding");
716            return;
717        }
718
719        let key = self.interner.intern(name);
720        let binding = VariableBinding {
721            node_id: None,
722            decl_start_byte,
723            decl_end_byte,
724            declarator_end_byte,
725            initializer_start_byte,
726        };
727
728        let entry = self
729            .scopes
730            .get_mut(scope_id)
731            .map(|scope| scope.variables.entry(key).or_default());
732        if let Some(bindings) = entry {
733            bindings.push(binding);
734            bindings.sort_by_key(|b| b.decl_start_byte);
735        }
736    }
737
738    /// Check if adding a binding in `scope_id` with `name` would overlap.
739    ///
740    /// Language-specific behavior:
741    /// - If `allows_nested_shadowing()` is true (e.g., Kotlin): only checks
742    ///   the current scope for duplicate names.
743    /// - Otherwise (e.g., Java): walks parent scopes up to an overlap boundary.
744    fn is_overlap(&self, scope_id: ScopeId, name: &str, decl_start: usize) -> bool {
745        // Check if the language allows nested shadowing
746        if let Some(scope) = self.scopes.get(scope_id)
747            && scope.kind.allows_nested_shadowing()
748        {
749            // Only check the same scope for duplicates
750            return scope
751                .variables
752                .get(name)
753                .is_some_and(|bindings| !bindings.is_empty());
754        }
755
756        // Walk up parent scopes checking for overlapping bindings
757        let mut current = Some(scope_id);
758        while let Some(scope) = current {
759            if let Some(bindings) = self.scopes[scope].variables.get(name)
760                && !bindings.is_empty()
761                && decl_start <= self.scopes[scope].end_byte
762            {
763                return true;
764            }
765
766            if self.scopes[scope].kind.is_overlap_boundary() {
767                break;
768            }
769
770            current = self.scopes[scope].parent;
771        }
772        false
773    }
774}
775
776// ============================================================================
777// Free functions — language-agnostic utilities
778// ============================================================================
779
780/// Find the most recent applicable binding before `usage_byte`.
781///
782/// A binding is applicable if:
783/// 1. Its declaration comes before the usage (`decl_start_byte <= usage_byte`)
784/// 2. The usage is NOT within the binding's own initializer (self-reference prevention)
785///
786/// Returns the binding with the largest `decl_start_byte` (most recent).
787#[must_use]
788pub fn find_applicable_binding(
789    bindings: &[VariableBinding],
790    usage_byte: usize,
791) -> Option<&VariableBinding> {
792    bindings
793        .iter()
794        .filter(|binding| binding.decl_start_byte <= usage_byte)
795        .filter(|binding| {
796            // Self-reference prevention: skip if usage is within the
797            // binding's own initializer range [self_start, declarator_end).
798            let self_start = binding
799                .initializer_start_byte
800                .unwrap_or(binding.decl_end_byte);
801            !(self_start <= usage_byte && usage_byte < binding.declarator_end_byte)
802        })
803        .max_by_key(|binding| binding.decl_start_byte)
804}
805
806/// Validate a byte span against content bounds.
807#[must_use]
808pub fn is_valid_span(start: usize, end: usize, content_len: usize) -> bool {
809    start <= end && end <= content_len
810}
811
812/// Create a `RecursionGuard` from the configured recursion limits.
813///
814/// # Panics
815///
816/// Panics if recursion limits cannot be loaded or the guard cannot be created.
817#[must_use]
818pub fn load_recursion_guard() -> crate::query::security::RecursionGuard {
819    let recursion_limits =
820        crate::config::RecursionLimits::load_or_default().expect("Failed to load recursion limits");
821    let file_ops_depth = recursion_limits
822        .effective_file_ops_depth()
823        .expect("Invalid file_ops_depth configuration");
824    crate::query::security::RecursionGuard::new(file_ops_depth)
825        .expect("Failed to create recursion guard")
826}
827
828/// Find the first child of a tree-sitter node with the given kind.
829#[must_use]
830pub fn first_child_of_kind<'a>(
831    node: tree_sitter::Node<'a>,
832    kind: &str,
833) -> Option<tree_sitter::Node<'a>> {
834    let mut cursor = node.walk();
835    node.children(&mut cursor)
836        .find(|child| child.kind() == kind)
837}
838
839/// Map a `RecursionError` to a `GraphBuilderError::ParseError`.
840#[must_use]
841pub fn recursion_error_to_graph_error(
842    e: &crate::query::security::RecursionError,
843    node: tree_sitter::Node,
844) -> GraphBuilderError {
845    GraphBuilderError::ParseError {
846        span: Span::from_bytes(node.start_byte(), node.end_byte()),
847        reason: format!("Recursion limit: {e}"),
848    }
849}
850
851// ============================================================================
852// Test Helpers (available to all plugins via sqry-core)
853// ============================================================================
854
855/// Collect all `References` edges as `(source_name, target_name)` pairs.
856///
857/// This helper is for plugin test code that verifies local variable
858/// reference tracking. It uses the `StagingGraph`'s operations and
859/// string lookup to produce human-readable edge pairs.
860#[must_use]
861pub fn collect_reference_edges(
862    staging: &crate::graph::unified::build::StagingGraph,
863) -> Vec<(String, String)> {
864    use crate::graph::unified::build::StagingOp;
865    use crate::graph::unified::edge::EdgeKind;
866
867    let strings = crate::graph::unified::build::test_helpers::build_string_lookup(staging);
868    let node_names = build_node_name_map(staging, &strings);
869
870    staging
871        .operations()
872        .iter()
873        .filter_map(|op| {
874            if let StagingOp::AddEdge {
875                source,
876                target,
877                kind: EdgeKind::References,
878                ..
879            } = op
880            {
881                let from = node_names.get(source)?.clone();
882                let to = node_names.get(target)?.clone();
883                Some((from, to))
884            } else {
885                None
886            }
887        })
888        .collect()
889}
890
891/// Check if any `References` edge targets a variable with the given name
892/// (pattern: `name@*`).
893#[must_use]
894pub fn has_local_ref(edges: &[(String, String)], name: &str) -> bool {
895    let prefix = format!("{name}@");
896    edges.iter().any(|(_, target)| target.starts_with(&prefix))
897}
898
899/// Count `References` edges that target a variable with the given name.
900#[must_use]
901pub fn count_local_refs(edges: &[(String, String)], name: &str) -> usize {
902    let prefix = format!("{name}@");
903    edges
904        .iter()
905        .filter(|(_, target)| target.starts_with(&prefix))
906        .count()
907}
908
909/// Get the unique set of target names matching `name@*`.
910#[must_use]
911pub fn local_ref_targets(edges: &[(String, String)], name: &str) -> HashSet<String> {
912    let prefix = format!("{name}@");
913    edges
914        .iter()
915        .filter(|(_, target)| target.starts_with(&prefix))
916        .map(|(_, target)| target.clone())
917        .collect()
918}
919
920/// Internal: build a `NodeId` → name lookup map from staging operations.
921fn build_node_name_map(
922    staging: &crate::graph::unified::build::StagingGraph,
923    strings: &HashMap<u32, String>,
924) -> HashMap<NodeId, String> {
925    use crate::graph::unified::build::StagingOp;
926
927    staging
928        .operations()
929        .iter()
930        .filter_map(|op| {
931            if let StagingOp::AddNode {
932                entry, expected_id, ..
933            } = op
934            {
935                let node_id = (*expected_id)?;
936                let name_idx = entry.qualified_name.unwrap_or(entry.name).index();
937                let name = strings.get(&name_idx)?.clone();
938                Some((node_id, name))
939            } else {
940                None
941            }
942        })
943        .collect()
944}
945
946/// Build a `StringId.index() → String` lookup from staging operations.
947///
948/// Re-exported from `test_helpers` for convenience.
949#[must_use]
950pub fn build_string_lookup(
951    staging: &crate::graph::unified::build::StagingGraph,
952) -> HashMap<u32, String> {
953    crate::graph::unified::build::test_helpers::build_string_lookup(staging)
954}