sqry_core/graph/local_scopes.rs
1//! Shared local scope tracking and reference resolution infrastructure.
2//!
3//! This module provides a generic `ScopeTree<K>` that builds a per-file scope
4//! tree, binds local declarations, and resolves identifier usages to declaration
5//! nodes for `References` edges.
6//!
7//! Language plugins implement the [`ScopeKindTrait`] for their language-specific
8//! `ScopeKind` enum and provide AST-specific `build_scopes_recursive()` and
9//! `bind_declarations_recursive()` functions. The shared infrastructure handles:
10//!
11//! - Scope interval indexing with O(log n) lookup
12//! - Variable binding with overlap detection
13//! - Scope chain resolution with self-reference prevention
14//! - String interning for memory efficiency
15//! - Class member tracking and resolution (for OOP languages)
16//! - Debug log rate limiting
17
18use std::collections::{HashMap, HashSet};
19use std::fmt::Debug;
20use std::sync::Arc;
21
22use log::debug;
23
24use crate::graph::unified::node::NodeId;
25use crate::graph::{GraphBuilderError, Span};
26
27/// Alias for scope indices within the tree.
28pub type ScopeId = usize;
29
30/// Maximum debug log messages per event category to avoid log flooding.
31pub const MAX_DEBUG_LOGS_PER_EVENT: usize = 5;
32
33// ============================================================================
34// ScopeKindTrait — Language plugins implement this
35// ============================================================================
36
37/// Trait that language-specific `ScopeKind` enums must implement.
38///
39/// This trait allows the shared `ScopeTree` to query scope semantics
40/// without knowing the language-specific scope variants.
41pub trait ScopeKindTrait: Copy + Eq + Debug + Send + Sync {
42 /// Returns `true` if this scope kind represents a class-like scope
43 /// (anonymous class, local class, etc.) that acts as a boundary for
44 /// variable capture and overlap detection.
45 fn is_class_scope(&self) -> bool;
46
47 /// Returns `true` if this scope kind is an overlap boundary — i.e.,
48 /// duplicate variable names in enclosing scopes should be rejected
49 /// at this boundary. Typically the same as `is_class_scope()`.
50 fn is_overlap_boundary(&self) -> bool;
51
52 /// Returns `true` if this class scope does NOT capture variables
53 /// from enclosing scopes. For example, Java's `LocalRecord`,
54 /// `LocalEnum`, and `LocalInterface` are non-capturing.
55 ///
56 /// Default: `false` (all class scopes capture by default).
57 fn is_non_capturing_class_scope(&self) -> bool {
58 false
59 }
60
61 /// Returns `true` if this language allows nested variable shadowing
62 /// (same name in nested block scope). Kotlin allows this; Java does not.
63 ///
64 /// When `true`, `is_overlap()` only checks the current scope, not parents.
65 /// Default: `false`.
66 fn allows_nested_shadowing(&self) -> bool {
67 false
68 }
69
70 /// Returns `true` if this scope kind should block capture chain traversal.
71 ///
72 /// When resolving identifiers past a class boundary, the capture chain
73 /// walks outer scopes looking for captured variables. This method controls
74 /// which scopes stop that traversal.
75 ///
76 /// Java overrides to return `is_non_capturing_class_scope()` only, because
77 /// anonymous classes and local classes CAN capture effectively-final
78 /// variables from enclosing scopes through multiple nesting levels.
79 ///
80 /// Default: `self.is_non_capturing_class_scope() || self.is_class_scope()`
81 fn blocks_capture_chain(&self) -> bool {
82 self.is_non_capturing_class_scope() || self.is_class_scope()
83 }
84
85 /// Whether unresolved base types should cause ambiguous member resolution.
86 ///
87 /// When a class has base types that cannot be resolved (e.g., from external
88 /// libraries), this controls whether member lookups return `Ambiguous`
89 /// (conservative) or `None` (allowing the capture chain to proceed).
90 ///
91 /// Java: `true` (strict — returns `Ambiguous` when bases are unresolved).
92 /// Kotlin: `false` (lenient — returns `None` so capture chain continues).
93 ///
94 /// Default: `true` (conservative, matches Java behavior).
95 fn strict_unresolved_bases(&self) -> bool {
96 true
97 }
98}
99
100// ============================================================================
101// Core Data Structures
102// ============================================================================
103
104/// A local variable binding within a scope.
105#[derive(Debug, Clone)]
106pub struct VariableBinding {
107 /// The graph node representing this binding (set after node creation).
108 pub node_id: Option<NodeId>,
109 /// Start byte of the identifier in the source.
110 pub decl_start_byte: usize,
111 /// End byte of the identifier in the source.
112 pub decl_end_byte: usize,
113 /// End byte of the full declarator (e.g., for `int x = 5`, ends after `5`).
114 pub declarator_end_byte: usize,
115 /// Start byte of the initializer expression (if present).
116 /// Used for self-reference prevention: `let x = x + 1` should NOT
117 /// resolve the RHS `x` to this same binding.
118 pub initializer_start_byte: Option<usize>,
119}
120
121/// A scope within the scope tree, parameterized by a language-specific kind.
122#[derive(Debug, Clone)]
123pub struct Scope<K: ScopeKindTrait> {
124 /// Byte offset where this scope begins in the source.
125 pub start_byte: usize,
126 /// Byte offset where this scope ends in the source.
127 pub end_byte: usize,
128 /// Parent scope index (None for root scopes).
129 pub parent: Option<ScopeId>,
130 /// Language-specific scope kind.
131 pub kind: K,
132 /// Nesting depth from root.
133 pub depth: usize,
134 /// Variable bindings indexed by interned name.
135 pub variables: HashMap<Arc<str>, Vec<VariableBinding>>,
136}
137
138/// Interval for binary-search scope lookup.
139#[derive(Debug, Clone)]
140struct ScopeInterval {
141 start: usize,
142 end: usize,
143 scope_id: ScopeId,
144 depth: usize,
145}
146
147/// Sorted interval index for O(log n) innermost-scope queries.
148#[derive(Debug, Clone, Default)]
149struct ScopeIndex {
150 intervals: Vec<ScopeInterval>,
151}
152
153/// String deduplication cache for variable names.
154#[derive(Debug, Clone, Default)]
155pub struct StringInterner {
156 map: HashMap<String, Arc<str>>,
157}
158
159impl StringInterner {
160 /// Intern a string, returning a shared `Arc<str>`.
161 pub fn intern(&mut self, value: &str) -> Arc<str> {
162 if let Some(existing) = self.map.get(value) {
163 return existing.clone();
164 }
165 let arc: Arc<str> = Arc::from(value);
166 self.map.insert(value.to_string(), arc.clone());
167 arc
168 }
169}
170
171// ============================================================================
172// Class Member Tracking (for OOP languages)
173// ============================================================================
174
175/// Source of an inherited member (tracks which base class it came from).
176#[derive(Debug, Clone)]
177pub struct MemberSource {
178 /// Qualified name of the base class that declares/inherits this member.
179 pub qualifier: String,
180}
181
182/// Class member info associated with a scope.
183#[derive(Debug, Clone)]
184pub struct ClassMemberInfo {
185 /// Qualified name of this class (e.g., `"com.example.MyClass"`).
186 pub qualifier: Option<String>,
187 /// Members declared directly in this class.
188 pub declared_members: HashSet<Arc<str>>,
189 /// Members inherited from base classes.
190 pub inherited_members: HashMap<Arc<str>, Vec<MemberSource>>,
191 /// Count of base types that couldn't be resolved.
192 pub unresolved_base_count: usize,
193 /// Count of explicitly declared base types.
194 pub explicit_base_count: usize,
195}
196
197/// Maps scope IDs to class member information.
198#[derive(Debug, Clone, Default)]
199pub struct ClassMemberIndex {
200 /// Per-scope class member information.
201 pub by_scope: HashMap<ScopeId, ClassMemberInfo>,
202}
203
204/// A class's identity and declared members (for type name resolution).
205#[derive(Debug, Clone)]
206pub struct ClassInfo {
207 /// Qualified name of this class.
208 pub qualifier: String,
209 /// Members declared directly in this class.
210 pub declared_members: HashSet<Arc<str>>,
211}
212
213/// Index from class name keys to `ClassInfo` entries.
214#[derive(Debug, Clone, Default)]
215pub struct ClassInfoIndex {
216 by_key: HashMap<String, Vec<usize>>,
217 infos: Vec<ClassInfo>,
218}
219
220impl ClassInfoIndex {
221 /// Insert a class info entry with one or more lookup keys.
222 pub fn insert(&mut self, info: ClassInfo, keys: &[String]) {
223 let idx = self.infos.len();
224 self.infos.push(info);
225 for key in keys {
226 self.by_key.entry(key.clone()).or_default().push(idx);
227 }
228 }
229
230 /// Resolve a class by key. Returns `Some` only if there is exactly one match.
231 #[must_use]
232 pub fn resolve(&self, key: &str) -> Option<&ClassInfo> {
233 let candidates = self.by_key.get(key)?;
234 if candidates.len() == 1 {
235 self.infos.get(candidates[0])
236 } else {
237 None
238 }
239 }
240}
241
242/// Resolve a class info from the index, trying multiple key formats.
243///
244/// Attempts resolution with: the raw key, `::` replaced with `.`, and the
245/// last segment (simple name) only.
246#[must_use]
247pub fn resolve_class_info<'a>(index: &'a ClassInfoIndex, base: &str) -> Option<&'a ClassInfo> {
248 if let Some(info) = index.resolve(base) {
249 return Some(info);
250 }
251 let dotted = base.replace("::", ".");
252 if dotted != base
253 && let Some(info) = index.resolve(&dotted)
254 {
255 return Some(info);
256 }
257 if let Some(last) = base.rsplit(['.', ':']).next()
258 && let Some(info) = index.resolve(last)
259 {
260 return Some(info);
261 }
262 None
263}
264
265// ============================================================================
266// Debug Log Limiter
267// ============================================================================
268
269/// Debug event categories for rate-limited logging.
270#[derive(Debug, Clone, Hash, Eq, PartialEq)]
271pub enum DebugEvent {
272 /// Scope or binding span is invalid (start > end or beyond content length).
273 InvalidSpan,
274 /// Overlap conflict when adding a binding.
275 OverlapConflict,
276 /// Ambiguous inherited member resolution.
277 InheritedAmbiguous,
278 /// Could not find a binding to attach a `NodeId` to.
279 MissingBindingNode,
280}
281
282/// Rate-limited debug logger to avoid flooding logs.
283#[derive(Debug, Default)]
284pub struct DebugLogLimiter {
285 /// Per-event counters (public for test assertions in language plugins).
286 pub counts: HashMap<DebugEvent, usize>,
287}
288
289impl DebugLogLimiter {
290 /// Log a debug message, but only up to `MAX_DEBUG_LOGS_PER_EVENT` per category.
291 pub fn log(&mut self, event: DebugEvent, message: &str) {
292 let entry = self.counts.entry(event).or_insert(0);
293 if *entry < MAX_DEBUG_LOGS_PER_EVENT {
294 *entry += 1;
295 debug!("{message}");
296 }
297 }
298}
299
300// ============================================================================
301// Resolution Types
302// ============================================================================
303
304/// Outcome of resolving an identifier to a declaration.
305#[derive(Debug)]
306pub enum ResolutionOutcome {
307 /// Resolved to a local variable binding.
308 Local(LocalBindingMatch),
309 /// Resolved to a class member (field, property, enum constant, etc.).
310 Member {
311 /// Qualified name of the member, if known.
312 qualified_name: Option<String>,
313 },
314 /// Multiple conflicting resolutions found.
315 Ambiguous,
316 /// No matching declaration found.
317 NoMatch,
318}
319
320/// A successful local variable resolution.
321#[derive(Debug, Clone, Copy)]
322pub struct LocalBindingMatch {
323 /// The graph node ID for the variable declaration (may be None if not yet created).
324 pub node_id: Option<NodeId>,
325 /// Start byte of the declaration identifier.
326 pub decl_start_byte: usize,
327 /// End byte of the declaration identifier.
328 pub decl_end_byte: usize,
329}
330
331// ============================================================================
332// ScopeTree<K> — The shared scope tree
333// ============================================================================
334
335/// Per-file scope tree with variable binding and resolution.
336///
337/// Generic over `K`, a language-specific `ScopeKind` enum that implements
338/// [`ScopeKindTrait`].
339#[derive(Debug)]
340pub struct ScopeTree<K: ScopeKindTrait> {
341 /// All scopes, indexed by `ScopeId`.
342 pub scopes: Vec<Scope<K>>,
343 /// Binary-search interval index.
344 index: ScopeIndex,
345 /// String deduplication cache.
346 pub interner: StringInterner,
347 /// Class member tracking (for OOP languages).
348 pub class_members: ClassMemberIndex,
349 /// Class info index (for type name resolution).
350 pub class_infos: ClassInfoIndex,
351 /// Rate-limited debug logger.
352 pub debug: DebugLogLimiter,
353 /// Total content length in bytes (for span validation).
354 pub content_len: usize,
355}
356
357impl<K: ScopeKindTrait> ScopeTree<K> {
358 /// Create a new empty scope tree for the given content length.
359 ///
360 /// Language plugins should populate the tree using `add_scope()`,
361 /// `add_binding()`, and `rebuild_index()`.
362 #[must_use]
363 pub fn new(content_len: usize) -> Self {
364 ScopeTree {
365 scopes: Vec::new(),
366 index: ScopeIndex::default(),
367 interner: StringInterner::default(),
368 class_members: ClassMemberIndex::default(),
369 class_infos: ClassInfoIndex::default(),
370 debug: DebugLogLimiter::default(),
371 content_len,
372 }
373 }
374
375 // ========================================================================
376 // Node ID attachment
377 // ========================================================================
378
379 /// Attach a graph `NodeId` to an existing variable binding.
380 ///
381 /// First tries exact match by name + byte position. Falls back to
382 /// a fuzzy match (single unattached binding with the same name).
383 /// Returns `true` if a binding was found and updated.
384 pub fn attach_node_id(&mut self, name: &str, decl_start_byte: usize, node_id: NodeId) -> bool {
385 // Phase 1: exact match on name + byte position
386 for scope in &mut self.scopes {
387 if let Some(bindings) = scope.variables.get_mut(name) {
388 for binding in bindings {
389 if binding.decl_start_byte == decl_start_byte {
390 if binding.node_id.is_none() {
391 binding.node_id = Some(node_id);
392 }
393 return true;
394 }
395 }
396 }
397 }
398
399 // Phase 2: fallback — single unattached binding with same name
400 let mut fallback: Option<(ScopeId, usize)> = None;
401 let mut ambiguous = false;
402 for (scope_id, scope) in self.scopes.iter().enumerate() {
403 if let Some(bindings) = scope.variables.get(name) {
404 for (idx, binding) in bindings.iter().enumerate() {
405 if binding.node_id.is_none() {
406 if fallback.is_none() {
407 fallback = Some((scope_id, idx));
408 } else {
409 ambiguous = true;
410 break;
411 }
412 }
413 }
414 }
415 if ambiguous {
416 break;
417 }
418 }
419
420 if !ambiguous
421 && let Some((scope_id, idx)) = fallback
422 && let Some(bindings) = self.scopes[scope_id].variables.get_mut(name)
423 && let Some(binding) = bindings.get_mut(idx)
424 {
425 binding.node_id = Some(node_id);
426 return true;
427 }
428
429 self.debug.log(
430 DebugEvent::MissingBindingNode,
431 "Missing binding for node_id attach",
432 );
433 false
434 }
435
436 // ========================================================================
437 // Resolution — with class boundary support
438 // ========================================================================
439
440 /// Resolve an identifier to a local variable, class member, or no-match.
441 ///
442 /// This is the primary entry point for resolution. It:
443 /// 1. Finds the innermost scope at the usage byte position
444 /// 2. Builds a scope chain to the root
445 /// 3. Detects class boundaries and handles capture semantics
446 /// 4. Falls back to class member resolution for OOP languages
447 pub fn resolve_identifier(&mut self, usage_byte: usize, identifier: &str) -> ResolutionOutcome {
448 let Some(innermost) = self.innermost_scope_at(usage_byte) else {
449 return ResolutionOutcome::NoMatch;
450 };
451 let chain = self.scope_chain(innermost);
452
453 let class_boundary = chain
454 .iter()
455 .position(|scope_id| self.scopes[*scope_id].kind.is_class_scope());
456
457 if let Some(boundary_idx) = class_boundary {
458 let current_class_scope = chain[boundary_idx];
459
460 // Try local resolution before the class boundary
461 if let Some(binding) =
462 self.resolve_local_in_chain(identifier, usage_byte, &chain[..boundary_idx])
463 {
464 return ResolutionOutcome::Local(binding);
465 }
466
467 // Try class member resolution
468 if let Some(member_resolution) =
469 self.resolve_class_member(current_class_scope, identifier)
470 {
471 return member_resolution;
472 }
473
474 // Check if class scope blocks capture
475 let class_kind = self.scopes[current_class_scope].kind;
476 if class_kind.is_non_capturing_class_scope() {
477 return ResolutionOutcome::NoMatch;
478 }
479
480 // Try capture chain (variables from enclosing non-class scopes)
481 let mut capture_chain = Vec::new();
482 for scope_id in &chain[boundary_idx + 1..] {
483 if self.scopes[*scope_id].kind.blocks_capture_chain() {
484 break;
485 }
486 capture_chain.push(*scope_id);
487 }
488 if let Some(binding) =
489 self.resolve_local_in_chain(identifier, usage_byte, &capture_chain)
490 {
491 return ResolutionOutcome::Local(binding);
492 }
493
494 return ResolutionOutcome::NoMatch;
495 }
496
497 // No class boundary — resolve in entire chain
498 if let Some(binding) = self.resolve_local_in_chain(identifier, usage_byte, &chain) {
499 return ResolutionOutcome::Local(binding);
500 }
501
502 ResolutionOutcome::NoMatch
503 }
504
505 /// Quick check if a name is a known type/class name.
506 #[must_use]
507 pub fn is_known_type_name(&self, name: &str) -> bool {
508 self.class_infos.resolve(name).is_some()
509 }
510
511 /// Quick check if a local variable binding exists for `name` at the given byte position.
512 #[must_use]
513 pub fn has_local_binding(&self, name: &str, byte: usize) -> bool {
514 let Some(innermost) = self.innermost_scope_at(byte) else {
515 return false;
516 };
517 let chain = self.scope_chain(innermost);
518 self.resolve_local_in_chain(name, byte, &chain).is_some()
519 }
520
521 // ========================================================================
522 // Internal resolution helpers
523 // ========================================================================
524
525 /// Walk a scope chain looking for a binding matching `identifier` before `usage_byte`.
526 #[must_use]
527 pub fn resolve_local_in_chain(
528 &self,
529 identifier: &str,
530 usage_byte: usize,
531 chain: &[ScopeId],
532 ) -> Option<LocalBindingMatch> {
533 for scope_id in chain {
534 if let Some(bindings) = self.scopes[*scope_id].variables.get(identifier)
535 && let Some(binding) = find_applicable_binding(bindings, usage_byte)
536 {
537 return Some(LocalBindingMatch {
538 node_id: binding.node_id,
539 decl_start_byte: binding.decl_start_byte,
540 decl_end_byte: binding.decl_end_byte,
541 });
542 }
543 }
544 None
545 }
546
547 /// Resolve a class member by scope ID and identifier name.
548 pub fn resolve_class_member(
549 &mut self,
550 scope_id: ScopeId,
551 identifier: &str,
552 ) -> Option<ResolutionOutcome> {
553 let info = self.class_members.by_scope.get(&scope_id)?;
554 if info.declared_members.contains(identifier) {
555 let qualified_name = info
556 .qualifier
557 .as_ref()
558 .map(|qual| format!("{qual}::{identifier}"));
559 return Some(ResolutionOutcome::Member { qualified_name });
560 }
561
562 if let Some(sources) = info.inherited_members.get(identifier) {
563 if sources.len() == 1 {
564 let qualified_name = Some(format!("{}::{}", sources[0].qualifier, identifier));
565 return Some(ResolutionOutcome::Member { qualified_name });
566 }
567 if sources.len() > 1 {
568 self.debug.log(
569 DebugEvent::InheritedAmbiguous,
570 "Inherited member ambiguity: multiple matches",
571 );
572 return Some(ResolutionOutcome::Ambiguous);
573 }
574 }
575
576 if info.explicit_base_count > 0
577 && info.unresolved_base_count > 0
578 && self.scopes[scope_id].kind.strict_unresolved_bases()
579 {
580 self.debug.log(
581 DebugEvent::InheritedAmbiguous,
582 "Inherited member ambiguity: unresolved base",
583 );
584 return Some(ResolutionOutcome::Ambiguous);
585 }
586
587 None
588 }
589
590 // ========================================================================
591 // Scope lookup
592 // ========================================================================
593
594 /// Find the innermost (deepest) scope containing the given byte position.
595 ///
596 /// Uses a sorted interval index with binary search + reverse scan for
597 /// O(log n) lookup.
598 #[must_use]
599 pub fn innermost_scope_at(&self, byte: usize) -> Option<ScopeId> {
600 let intervals = &self.index.intervals;
601 if intervals.is_empty() {
602 return None;
603 }
604
605 // partition_point returns the first index where start > byte,
606 // so all candidates with start <= byte are in [0..idx).
607 let idx = intervals.partition_point(|iv| iv.start <= byte);
608 if idx == 0 {
609 return None;
610 }
611
612 // Reverse scan: for properly nested intervals sorted by
613 // (start ASC, depth ASC), the first containing interval is the
614 // deepest. At the same start position, deeper scopes come later
615 // in the array and are seen first during reverse iteration.
616 for iv in intervals[..idx].iter().rev() {
617 if byte < iv.end {
618 return Some(iv.scope_id);
619 }
620 }
621
622 None
623 }
624
625 /// Build the parent chain from a scope to the root.
626 #[must_use]
627 pub fn scope_chain(&self, innermost: ScopeId) -> Vec<ScopeId> {
628 let mut chain = Vec::new();
629 let mut current = Some(innermost);
630 while let Some(scope_id) = current {
631 chain.push(scope_id);
632 current = self.scopes[scope_id].parent;
633 }
634 chain
635 }
636
637 // ========================================================================
638 // Scope and binding construction
639 // ========================================================================
640
641 /// Add a new scope to the tree.
642 ///
643 /// Returns `None` if the span is invalid (start > end or beyond content).
644 pub fn add_scope(
645 &mut self,
646 kind: K,
647 start_byte: usize,
648 end_byte: usize,
649 parent: Option<ScopeId>,
650 ) -> Option<ScopeId> {
651 if !is_valid_span(start_byte, end_byte, self.content_len) {
652 self.debug
653 .log(DebugEvent::InvalidSpan, "Invalid scope span");
654 return None;
655 }
656 let depth = parent.map_or(0, |p| self.scopes[p].depth + 1);
657 let scope_id = self.scopes.len();
658 self.scopes.push(Scope {
659 start_byte,
660 end_byte,
661 parent,
662 kind,
663 depth,
664 variables: HashMap::new(),
665 });
666 Some(scope_id)
667 }
668
669 /// Rebuild the interval index from the current scopes.
670 ///
671 /// Must be called after adding scopes and before performing lookups.
672 pub fn rebuild_index(&mut self) {
673 self.index.intervals = self
674 .scopes
675 .iter()
676 .enumerate()
677 .map(|(scope_id, scope)| ScopeInterval {
678 start: scope.start_byte,
679 end: scope.end_byte,
680 scope_id,
681 depth: scope.depth,
682 })
683 .collect();
684 // Sort by (start ASC, depth ASC) so the deepest scope at each start
685 // position comes last — and is found first during reverse scan.
686 self.index
687 .intervals
688 .sort_by(|a, b| a.start.cmp(&b.start).then(a.depth.cmp(&b.depth)));
689 }
690
691 /// Add a variable binding to a scope.
692 ///
693 /// Performs overlap detection before adding. If the binding would
694 /// overlap with an existing binding in the same or enclosing scope
695 /// (depending on language rules), it is silently rejected.
696 pub fn add_binding(
697 &mut self,
698 scope_id: ScopeId,
699 name: &str,
700 decl_start_byte: usize,
701 decl_end_byte: usize,
702 declarator_end_byte: usize,
703 initializer_start_byte: Option<usize>,
704 ) {
705 if !is_valid_span(decl_start_byte, decl_end_byte, self.content_len)
706 || !is_valid_span(decl_end_byte, declarator_end_byte, self.content_len)
707 {
708 self.debug
709 .log(DebugEvent::InvalidSpan, "Invalid binding span");
710 return;
711 }
712
713 if self.is_overlap(scope_id, name, decl_start_byte) {
714 self.debug
715 .log(DebugEvent::OverlapConflict, "Overlap conflict for binding");
716 return;
717 }
718
719 let key = self.interner.intern(name);
720 let binding = VariableBinding {
721 node_id: None,
722 decl_start_byte,
723 decl_end_byte,
724 declarator_end_byte,
725 initializer_start_byte,
726 };
727
728 let entry = self
729 .scopes
730 .get_mut(scope_id)
731 .map(|scope| scope.variables.entry(key).or_default());
732 if let Some(bindings) = entry {
733 bindings.push(binding);
734 bindings.sort_by_key(|b| b.decl_start_byte);
735 }
736 }
737
738 /// Check if adding a binding in `scope_id` with `name` would overlap.
739 ///
740 /// Language-specific behavior:
741 /// - If `allows_nested_shadowing()` is true (e.g., Kotlin): only checks
742 /// the current scope for duplicate names.
743 /// - Otherwise (e.g., Java): walks parent scopes up to an overlap boundary.
744 fn is_overlap(&self, scope_id: ScopeId, name: &str, decl_start: usize) -> bool {
745 // Check if the language allows nested shadowing
746 if let Some(scope) = self.scopes.get(scope_id)
747 && scope.kind.allows_nested_shadowing()
748 {
749 // Only check the same scope for duplicates
750 return scope
751 .variables
752 .get(name)
753 .is_some_and(|bindings| !bindings.is_empty());
754 }
755
756 // Walk up parent scopes checking for overlapping bindings
757 let mut current = Some(scope_id);
758 while let Some(scope) = current {
759 if let Some(bindings) = self.scopes[scope].variables.get(name)
760 && !bindings.is_empty()
761 && decl_start <= self.scopes[scope].end_byte
762 {
763 return true;
764 }
765
766 if self.scopes[scope].kind.is_overlap_boundary() {
767 break;
768 }
769
770 current = self.scopes[scope].parent;
771 }
772 false
773 }
774}
775
776// ============================================================================
777// Free functions — language-agnostic utilities
778// ============================================================================
779
780/// Find the most recent applicable binding before `usage_byte`.
781///
782/// A binding is applicable if:
783/// 1. Its declaration comes before the usage (`decl_start_byte <= usage_byte`)
784/// 2. The usage is NOT within the binding's own initializer (self-reference prevention)
785///
786/// Returns the binding with the largest `decl_start_byte` (most recent).
787#[must_use]
788pub fn find_applicable_binding(
789 bindings: &[VariableBinding],
790 usage_byte: usize,
791) -> Option<&VariableBinding> {
792 bindings
793 .iter()
794 .filter(|binding| binding.decl_start_byte <= usage_byte)
795 .filter(|binding| {
796 // Self-reference prevention: skip if usage is within the
797 // binding's own initializer range [self_start, declarator_end).
798 let self_start = binding
799 .initializer_start_byte
800 .unwrap_or(binding.decl_end_byte);
801 !(self_start <= usage_byte && usage_byte < binding.declarator_end_byte)
802 })
803 .max_by_key(|binding| binding.decl_start_byte)
804}
805
806/// Validate a byte span against content bounds.
807#[must_use]
808pub fn is_valid_span(start: usize, end: usize, content_len: usize) -> bool {
809 start <= end && end <= content_len
810}
811
812/// Create a `RecursionGuard` from the configured recursion limits.
813///
814/// # Panics
815///
816/// Panics if recursion limits cannot be loaded or the guard cannot be created.
817#[must_use]
818pub fn load_recursion_guard() -> crate::query::security::RecursionGuard {
819 let recursion_limits =
820 crate::config::RecursionLimits::load_or_default().expect("Failed to load recursion limits");
821 let file_ops_depth = recursion_limits
822 .effective_file_ops_depth()
823 .expect("Invalid file_ops_depth configuration");
824 crate::query::security::RecursionGuard::new(file_ops_depth)
825 .expect("Failed to create recursion guard")
826}
827
828/// Find the first child of a tree-sitter node with the given kind.
829#[must_use]
830pub fn first_child_of_kind<'a>(
831 node: tree_sitter::Node<'a>,
832 kind: &str,
833) -> Option<tree_sitter::Node<'a>> {
834 let mut cursor = node.walk();
835 node.children(&mut cursor)
836 .find(|child| child.kind() == kind)
837}
838
839/// Map a `RecursionError` to a `GraphBuilderError::ParseError`.
840#[must_use]
841pub fn recursion_error_to_graph_error(
842 e: &crate::query::security::RecursionError,
843 node: tree_sitter::Node,
844) -> GraphBuilderError {
845 GraphBuilderError::ParseError {
846 span: Span::from_bytes(node.start_byte(), node.end_byte()),
847 reason: format!("Recursion limit: {e}"),
848 }
849}
850
851// ============================================================================
852// Test Helpers (available to all plugins via sqry-core)
853// ============================================================================
854
855/// Collect all `References` edges as `(source_name, target_name)` pairs.
856///
857/// This helper is for plugin test code that verifies local variable
858/// reference tracking. It uses the `StagingGraph`'s operations and
859/// string lookup to produce human-readable edge pairs.
860#[must_use]
861pub fn collect_reference_edges(
862 staging: &crate::graph::unified::build::StagingGraph,
863) -> Vec<(String, String)> {
864 use crate::graph::unified::build::StagingOp;
865 use crate::graph::unified::edge::EdgeKind;
866
867 let strings = crate::graph::unified::build::test_helpers::build_string_lookup(staging);
868 let node_names = build_node_name_map(staging, &strings);
869
870 staging
871 .operations()
872 .iter()
873 .filter_map(|op| {
874 if let StagingOp::AddEdge {
875 source,
876 target,
877 kind: EdgeKind::References,
878 ..
879 } = op
880 {
881 let from = node_names.get(source)?.clone();
882 let to = node_names.get(target)?.clone();
883 Some((from, to))
884 } else {
885 None
886 }
887 })
888 .collect()
889}
890
891/// Check if any `References` edge targets a variable with the given name
892/// (pattern: `name@*`).
893#[must_use]
894pub fn has_local_ref(edges: &[(String, String)], name: &str) -> bool {
895 let prefix = format!("{name}@");
896 edges.iter().any(|(_, target)| target.starts_with(&prefix))
897}
898
899/// Count `References` edges that target a variable with the given name.
900#[must_use]
901pub fn count_local_refs(edges: &[(String, String)], name: &str) -> usize {
902 let prefix = format!("{name}@");
903 edges
904 .iter()
905 .filter(|(_, target)| target.starts_with(&prefix))
906 .count()
907}
908
909/// Get the unique set of target names matching `name@*`.
910#[must_use]
911pub fn local_ref_targets(edges: &[(String, String)], name: &str) -> HashSet<String> {
912 let prefix = format!("{name}@");
913 edges
914 .iter()
915 .filter(|(_, target)| target.starts_with(&prefix))
916 .map(|(_, target)| target.clone())
917 .collect()
918}
919
920/// Internal: build a `NodeId` → name lookup map from staging operations.
921fn build_node_name_map(
922 staging: &crate::graph::unified::build::StagingGraph,
923 strings: &HashMap<u32, String>,
924) -> HashMap<NodeId, String> {
925 use crate::graph::unified::build::StagingOp;
926
927 staging
928 .operations()
929 .iter()
930 .filter_map(|op| {
931 if let StagingOp::AddNode {
932 entry, expected_id, ..
933 } = op
934 {
935 let node_id = (*expected_id)?;
936 let name_idx = entry.qualified_name.unwrap_or(entry.name).index();
937 let name = strings.get(&name_idx)?.clone();
938 Some((node_id, name))
939 } else {
940 None
941 }
942 })
943 .collect()
944}
945
946/// Build a `StringId.index() → String` lookup from staging operations.
947///
948/// Re-exported from `test_helpers` for convenience.
949#[must_use]
950pub fn build_string_lookup(
951 staging: &crate::graph::unified::build::StagingGraph,
952) -> HashMap<u32, String> {
953 crate::graph::unified::build::test_helpers::build_string_lookup(staging)
954}