use std::collections::{HashMap, HashSet};
use std::fmt::Debug;
use std::sync::Arc;
use log::debug;
use crate::graph::unified::node::NodeId;
use crate::graph::{GraphBuilderError, Span};
pub type ScopeId = usize;
pub const MAX_DEBUG_LOGS_PER_EVENT: usize = 5;
pub trait ScopeKindTrait: Copy + Eq + Debug + Send + Sync {
fn is_class_scope(&self) -> bool;
fn is_overlap_boundary(&self) -> bool;
fn is_non_capturing_class_scope(&self) -> bool {
false
}
fn allows_nested_shadowing(&self) -> bool {
false
}
fn blocks_capture_chain(&self) -> bool {
self.is_non_capturing_class_scope() || self.is_class_scope()
}
fn strict_unresolved_bases(&self) -> bool {
true
}
}
#[derive(Debug, Clone)]
pub struct VariableBinding {
pub node_id: Option<NodeId>,
pub decl_start_byte: usize,
pub decl_end_byte: usize,
pub declarator_end_byte: usize,
pub initializer_start_byte: Option<usize>,
}
#[derive(Debug, Clone)]
pub struct Scope<K: ScopeKindTrait> {
pub start_byte: usize,
pub end_byte: usize,
pub parent: Option<ScopeId>,
pub kind: K,
pub depth: usize,
pub variables: HashMap<Arc<str>, Vec<VariableBinding>>,
}
#[derive(Debug, Clone)]
struct ScopeInterval {
start: usize,
end: usize,
scope_id: ScopeId,
depth: usize,
}
#[derive(Debug, Clone, Default)]
struct ScopeIndex {
intervals: Vec<ScopeInterval>,
}
#[derive(Debug, Clone, Default)]
pub struct StringInterner {
map: HashMap<String, Arc<str>>,
}
impl StringInterner {
pub fn intern(&mut self, value: &str) -> Arc<str> {
if let Some(existing) = self.map.get(value) {
return existing.clone();
}
let arc: Arc<str> = Arc::from(value);
self.map.insert(value.to_string(), arc.clone());
arc
}
}
#[derive(Debug, Clone)]
pub struct MemberSource {
pub qualifier: String,
}
#[derive(Debug, Clone)]
pub struct ClassMemberInfo {
pub qualifier: Option<String>,
pub declared_members: HashSet<Arc<str>>,
pub inherited_members: HashMap<Arc<str>, Vec<MemberSource>>,
pub unresolved_base_count: usize,
pub explicit_base_count: usize,
}
#[derive(Debug, Clone, Default)]
pub struct ClassMemberIndex {
pub by_scope: HashMap<ScopeId, ClassMemberInfo>,
}
#[derive(Debug, Clone)]
pub struct ClassInfo {
pub qualifier: String,
pub declared_members: HashSet<Arc<str>>,
}
#[derive(Debug, Clone, Default)]
pub struct ClassInfoIndex {
by_key: HashMap<String, Vec<usize>>,
infos: Vec<ClassInfo>,
}
impl ClassInfoIndex {
pub fn insert(&mut self, info: ClassInfo, keys: &[String]) {
let idx = self.infos.len();
self.infos.push(info);
for key in keys {
self.by_key.entry(key.clone()).or_default().push(idx);
}
}
#[must_use]
pub fn resolve(&self, key: &str) -> Option<&ClassInfo> {
let candidates = self.by_key.get(key)?;
if candidates.len() == 1 {
self.infos.get(candidates[0])
} else {
None
}
}
}
#[must_use]
pub fn resolve_class_info<'a>(index: &'a ClassInfoIndex, base: &str) -> Option<&'a ClassInfo> {
if let Some(info) = index.resolve(base) {
return Some(info);
}
let dotted = base.replace("::", ".");
if dotted != base
&& let Some(info) = index.resolve(&dotted)
{
return Some(info);
}
if let Some(last) = base.rsplit(['.', ':']).next()
&& let Some(info) = index.resolve(last)
{
return Some(info);
}
None
}
#[derive(Debug, Clone, Hash, Eq, PartialEq)]
pub enum DebugEvent {
InvalidSpan,
OverlapConflict,
InheritedAmbiguous,
MissingBindingNode,
}
#[derive(Debug, Default)]
pub struct DebugLogLimiter {
pub counts: HashMap<DebugEvent, usize>,
}
impl DebugLogLimiter {
pub fn log(&mut self, event: DebugEvent, message: &str) {
let entry = self.counts.entry(event).or_insert(0);
if *entry < MAX_DEBUG_LOGS_PER_EVENT {
*entry += 1;
debug!("{message}");
}
}
}
#[derive(Debug)]
pub enum ResolutionOutcome {
Local(LocalBindingMatch),
Member {
qualified_name: Option<String>,
},
Ambiguous,
NoMatch,
}
#[derive(Debug, Clone, Copy)]
pub struct LocalBindingMatch {
pub node_id: Option<NodeId>,
pub decl_start_byte: usize,
pub decl_end_byte: usize,
}
#[derive(Debug)]
pub struct ScopeTree<K: ScopeKindTrait> {
pub scopes: Vec<Scope<K>>,
index: ScopeIndex,
pub interner: StringInterner,
pub class_members: ClassMemberIndex,
pub class_infos: ClassInfoIndex,
pub debug: DebugLogLimiter,
pub content_len: usize,
}
impl<K: ScopeKindTrait> ScopeTree<K> {
#[must_use]
pub fn new(content_len: usize) -> Self {
ScopeTree {
scopes: Vec::new(),
index: ScopeIndex::default(),
interner: StringInterner::default(),
class_members: ClassMemberIndex::default(),
class_infos: ClassInfoIndex::default(),
debug: DebugLogLimiter::default(),
content_len,
}
}
pub fn attach_node_id(&mut self, name: &str, decl_start_byte: usize, node_id: NodeId) -> bool {
for scope in &mut self.scopes {
if let Some(bindings) = scope.variables.get_mut(name) {
for binding in bindings {
if binding.decl_start_byte == decl_start_byte {
if binding.node_id.is_none() {
binding.node_id = Some(node_id);
}
return true;
}
}
}
}
let mut fallback: Option<(ScopeId, usize)> = None;
let mut ambiguous = false;
for (scope_id, scope) in self.scopes.iter().enumerate() {
if let Some(bindings) = scope.variables.get(name) {
for (idx, binding) in bindings.iter().enumerate() {
if binding.node_id.is_none() {
if fallback.is_none() {
fallback = Some((scope_id, idx));
} else {
ambiguous = true;
break;
}
}
}
}
if ambiguous {
break;
}
}
if !ambiguous
&& let Some((scope_id, idx)) = fallback
&& let Some(bindings) = self.scopes[scope_id].variables.get_mut(name)
&& let Some(binding) = bindings.get_mut(idx)
{
binding.node_id = Some(node_id);
return true;
}
self.debug.log(
DebugEvent::MissingBindingNode,
"Missing binding for node_id attach",
);
false
}
pub fn resolve_identifier(&mut self, usage_byte: usize, identifier: &str) -> ResolutionOutcome {
let Some(innermost) = self.innermost_scope_at(usage_byte) else {
return ResolutionOutcome::NoMatch;
};
let chain = self.scope_chain(innermost);
let class_boundary = chain
.iter()
.position(|scope_id| self.scopes[*scope_id].kind.is_class_scope());
if let Some(boundary_idx) = class_boundary {
let current_class_scope = chain[boundary_idx];
if let Some(binding) =
self.resolve_local_in_chain(identifier, usage_byte, &chain[..boundary_idx])
{
return ResolutionOutcome::Local(binding);
}
if let Some(member_resolution) =
self.resolve_class_member(current_class_scope, identifier)
{
return member_resolution;
}
let class_kind = self.scopes[current_class_scope].kind;
if class_kind.is_non_capturing_class_scope() {
return ResolutionOutcome::NoMatch;
}
let mut capture_chain = Vec::new();
for scope_id in &chain[boundary_idx + 1..] {
if self.scopes[*scope_id].kind.blocks_capture_chain() {
break;
}
capture_chain.push(*scope_id);
}
if let Some(binding) =
self.resolve_local_in_chain(identifier, usage_byte, &capture_chain)
{
return ResolutionOutcome::Local(binding);
}
return ResolutionOutcome::NoMatch;
}
if let Some(binding) = self.resolve_local_in_chain(identifier, usage_byte, &chain) {
return ResolutionOutcome::Local(binding);
}
ResolutionOutcome::NoMatch
}
#[must_use]
pub fn is_known_type_name(&self, name: &str) -> bool {
self.class_infos.resolve(name).is_some()
}
#[must_use]
pub fn has_local_binding(&self, name: &str, byte: usize) -> bool {
let Some(innermost) = self.innermost_scope_at(byte) else {
return false;
};
let chain = self.scope_chain(innermost);
self.resolve_local_in_chain(name, byte, &chain).is_some()
}
#[must_use]
pub fn resolve_local_in_chain(
&self,
identifier: &str,
usage_byte: usize,
chain: &[ScopeId],
) -> Option<LocalBindingMatch> {
for scope_id in chain {
if let Some(bindings) = self.scopes[*scope_id].variables.get(identifier)
&& let Some(binding) = find_applicable_binding(bindings, usage_byte)
{
return Some(LocalBindingMatch {
node_id: binding.node_id,
decl_start_byte: binding.decl_start_byte,
decl_end_byte: binding.decl_end_byte,
});
}
}
None
}
pub fn resolve_class_member(
&mut self,
scope_id: ScopeId,
identifier: &str,
) -> Option<ResolutionOutcome> {
let info = self.class_members.by_scope.get(&scope_id)?;
if info.declared_members.contains(identifier) {
let qualified_name = info
.qualifier
.as_ref()
.map(|qual| format!("{qual}::{identifier}"));
return Some(ResolutionOutcome::Member { qualified_name });
}
if let Some(sources) = info.inherited_members.get(identifier) {
if sources.len() == 1 {
let qualified_name = Some(format!("{}::{}", sources[0].qualifier, identifier));
return Some(ResolutionOutcome::Member { qualified_name });
}
if sources.len() > 1 {
self.debug.log(
DebugEvent::InheritedAmbiguous,
"Inherited member ambiguity: multiple matches",
);
return Some(ResolutionOutcome::Ambiguous);
}
}
if info.explicit_base_count > 0
&& info.unresolved_base_count > 0
&& self.scopes[scope_id].kind.strict_unresolved_bases()
{
self.debug.log(
DebugEvent::InheritedAmbiguous,
"Inherited member ambiguity: unresolved base",
);
return Some(ResolutionOutcome::Ambiguous);
}
None
}
#[must_use]
pub fn innermost_scope_at(&self, byte: usize) -> Option<ScopeId> {
let intervals = &self.index.intervals;
if intervals.is_empty() {
return None;
}
let idx = intervals.partition_point(|iv| iv.start <= byte);
if idx == 0 {
return None;
}
for iv in intervals[..idx].iter().rev() {
if byte < iv.end {
return Some(iv.scope_id);
}
}
None
}
#[must_use]
pub fn scope_chain(&self, innermost: ScopeId) -> Vec<ScopeId> {
let mut chain = Vec::new();
let mut current = Some(innermost);
while let Some(scope_id) = current {
chain.push(scope_id);
current = self.scopes[scope_id].parent;
}
chain
}
pub fn add_scope(
&mut self,
kind: K,
start_byte: usize,
end_byte: usize,
parent: Option<ScopeId>,
) -> Option<ScopeId> {
if !is_valid_span(start_byte, end_byte, self.content_len) {
self.debug
.log(DebugEvent::InvalidSpan, "Invalid scope span");
return None;
}
let depth = parent.map_or(0, |p| self.scopes[p].depth + 1);
let scope_id = self.scopes.len();
self.scopes.push(Scope {
start_byte,
end_byte,
parent,
kind,
depth,
variables: HashMap::new(),
});
Some(scope_id)
}
pub fn rebuild_index(&mut self) {
self.index.intervals = self
.scopes
.iter()
.enumerate()
.map(|(scope_id, scope)| ScopeInterval {
start: scope.start_byte,
end: scope.end_byte,
scope_id,
depth: scope.depth,
})
.collect();
self.index
.intervals
.sort_by(|a, b| a.start.cmp(&b.start).then(a.depth.cmp(&b.depth)));
}
pub fn add_binding(
&mut self,
scope_id: ScopeId,
name: &str,
decl_start_byte: usize,
decl_end_byte: usize,
declarator_end_byte: usize,
initializer_start_byte: Option<usize>,
) {
if !is_valid_span(decl_start_byte, decl_end_byte, self.content_len)
|| !is_valid_span(decl_end_byte, declarator_end_byte, self.content_len)
{
self.debug
.log(DebugEvent::InvalidSpan, "Invalid binding span");
return;
}
if self.is_overlap(scope_id, name, decl_start_byte) {
self.debug
.log(DebugEvent::OverlapConflict, "Overlap conflict for binding");
return;
}
let key = self.interner.intern(name);
let binding = VariableBinding {
node_id: None,
decl_start_byte,
decl_end_byte,
declarator_end_byte,
initializer_start_byte,
};
let entry = self
.scopes
.get_mut(scope_id)
.map(|scope| scope.variables.entry(key).or_default());
if let Some(bindings) = entry {
bindings.push(binding);
bindings.sort_by_key(|b| b.decl_start_byte);
}
}
fn is_overlap(&self, scope_id: ScopeId, name: &str, decl_start: usize) -> bool {
if let Some(scope) = self.scopes.get(scope_id)
&& scope.kind.allows_nested_shadowing()
{
return scope
.variables
.get(name)
.is_some_and(|bindings| !bindings.is_empty());
}
let mut current = Some(scope_id);
while let Some(scope) = current {
if let Some(bindings) = self.scopes[scope].variables.get(name)
&& !bindings.is_empty()
&& decl_start <= self.scopes[scope].end_byte
{
return true;
}
if self.scopes[scope].kind.is_overlap_boundary() {
break;
}
current = self.scopes[scope].parent;
}
false
}
}
#[must_use]
pub fn find_applicable_binding(
bindings: &[VariableBinding],
usage_byte: usize,
) -> Option<&VariableBinding> {
bindings
.iter()
.filter(|binding| binding.decl_start_byte <= usage_byte)
.filter(|binding| {
let self_start = binding
.initializer_start_byte
.unwrap_or(binding.decl_end_byte);
!(self_start <= usage_byte && usage_byte < binding.declarator_end_byte)
})
.max_by_key(|binding| binding.decl_start_byte)
}
#[must_use]
pub fn is_valid_span(start: usize, end: usize, content_len: usize) -> bool {
start <= end && end <= content_len
}
#[must_use]
pub fn load_recursion_guard() -> crate::query::security::RecursionGuard {
let recursion_limits =
crate::config::RecursionLimits::load_or_default().expect("Failed to load recursion limits");
let file_ops_depth = recursion_limits
.effective_file_ops_depth()
.expect("Invalid file_ops_depth configuration");
crate::query::security::RecursionGuard::new(file_ops_depth)
.expect("Failed to create recursion guard")
}
#[must_use]
pub fn first_child_of_kind<'a>(
node: tree_sitter::Node<'a>,
kind: &str,
) -> Option<tree_sitter::Node<'a>> {
let mut cursor = node.walk();
node.children(&mut cursor)
.find(|child| child.kind() == kind)
}
#[must_use]
pub fn recursion_error_to_graph_error(
e: &crate::query::security::RecursionError,
node: tree_sitter::Node,
) -> GraphBuilderError {
GraphBuilderError::ParseError {
span: Span::from_bytes(node.start_byte(), node.end_byte()),
reason: format!("Recursion limit: {e}"),
}
}
#[must_use]
pub fn collect_reference_edges(
staging: &crate::graph::unified::build::StagingGraph,
) -> Vec<(String, String)> {
use crate::graph::unified::build::StagingOp;
use crate::graph::unified::edge::EdgeKind;
let strings = crate::graph::unified::build::test_helpers::build_string_lookup(staging);
let node_names = build_node_name_map(staging, &strings);
staging
.operations()
.iter()
.filter_map(|op| {
if let StagingOp::AddEdge {
source,
target,
kind: EdgeKind::References,
..
} = op
{
let from = node_names.get(source)?.clone();
let to = node_names.get(target)?.clone();
Some((from, to))
} else {
None
}
})
.collect()
}
#[must_use]
pub fn has_local_ref(edges: &[(String, String)], name: &str) -> bool {
let prefix = format!("{name}@");
edges.iter().any(|(_, target)| target.starts_with(&prefix))
}
#[must_use]
pub fn count_local_refs(edges: &[(String, String)], name: &str) -> usize {
let prefix = format!("{name}@");
edges
.iter()
.filter(|(_, target)| target.starts_with(&prefix))
.count()
}
#[must_use]
pub fn local_ref_targets(edges: &[(String, String)], name: &str) -> HashSet<String> {
let prefix = format!("{name}@");
edges
.iter()
.filter(|(_, target)| target.starts_with(&prefix))
.map(|(_, target)| target.clone())
.collect()
}
fn build_node_name_map(
staging: &crate::graph::unified::build::StagingGraph,
strings: &HashMap<u32, String>,
) -> HashMap<NodeId, String> {
use crate::graph::unified::build::StagingOp;
staging
.operations()
.iter()
.filter_map(|op| {
if let StagingOp::AddNode {
entry, expected_id, ..
} = op
{
let node_id = (*expected_id)?;
let name_idx = entry.qualified_name.unwrap_or(entry.name).index();
let name = strings.get(&name_idx)?.clone();
Some((node_id, name))
} else {
None
}
})
.collect()
}
#[must_use]
pub fn build_string_lookup(
staging: &crate::graph::unified::build::StagingGraph,
) -> HashMap<u32, String> {
crate::graph::unified::build::test_helpers::build_string_lookup(staging)
}