use anyhow::Result;
use rusqlite::params;
use sqlitegraph::{BackendDirection, GraphBackend, NeighborQuery, SnapshotId};
use std::collections::HashMap;
use std::path::{Path, PathBuf};
use crate::graph::ambiguity::AmbiguityOps;
use crate::graph::schema::{EdgeEndpoints, SymbolNode};
use crate::ingest::c::CParser;
use crate::ingest::cpp::CppParser;
use crate::ingest::java::JavaParser;
use crate::ingest::{SymbolFact, SymbolKind};
use crate::references::ReferenceFact;
use super::CodeGraph;
fn resolve_query_path(path: &str) -> String {
let path_buf = PathBuf::from(path);
if path_buf.is_absolute() {
return path.to_string();
}
if let Ok(cwd) = std::env::current_dir() {
return cwd.join(&path_buf).to_string_lossy().to_string();
}
path.to_string()
}
pub fn symbols_in_file(graph: &mut CodeGraph, path: &str) -> Result<Vec<SymbolFact>> {
let entries = symbol_nodes_in_file(graph, path)?;
Ok(entries.into_iter().map(|(_, fact)| fact).collect())
}
pub fn symbols_in_file_with_kind(
graph: &mut CodeGraph,
path: &str,
kind: Option<SymbolKind>,
) -> Result<Vec<SymbolFact>> {
let entries = symbol_nodes_in_file(graph, path)?;
let mut symbols = Vec::new();
for (_, fact) in entries {
if let Some(ref filter_kind) = kind {
if fact.kind == *filter_kind {
symbols.push(fact);
}
} else {
symbols.push(fact);
}
}
Ok(symbols)
}
pub fn symbol_nodes_in_file(graph: &mut CodeGraph, path: &str) -> Result<Vec<(i64, SymbolFact)>> {
let resolved_path = resolve_query_path(path);
let file_id = match graph.files.find_file_node(&resolved_path)? {
Some(id) => id,
None => return Ok(Vec::new()),
};
let path_buf = PathBuf::from(&resolved_path);
let snapshot = SnapshotId::current();
let neighbor_ids = graph.files.backend.neighbors(
snapshot,
file_id.as_i64(),
NeighborQuery {
direction: BackendDirection::Outgoing,
edge_type: Some("DEFINES".to_string()),
},
)?;
let mut entries = Vec::new();
for symbol_node_id in neighbor_ids {
if let Ok(Some(fact)) = graph
.files
.symbol_fact_from_node(symbol_node_id, path_buf.clone())
{
entries.push((symbol_node_id, fact));
}
}
entries.sort_by(|(_, a), (_, b)| {
a.start_line
.cmp(&b.start_line)
.then_with(|| a.start_col.cmp(&b.start_col))
.then_with(|| a.byte_start.cmp(&b.byte_start))
});
Ok(entries)
}
pub fn symbol_nodes_in_file_with_ids(
graph: &mut CodeGraph,
path: &str,
) -> Result<Vec<(i64, SymbolFact, Option<String>)>> {
let resolved_path = resolve_query_path(path);
let file_id = match graph.files.find_file_node(&resolved_path)? {
Some(id) => id,
None => return Ok(Vec::new()),
};
let path_buf = PathBuf::from(&resolved_path);
let snapshot = SnapshotId::current();
let neighbor_ids = graph.files.backend.neighbors(
snapshot,
file_id.as_i64(),
NeighborQuery {
direction: BackendDirection::Outgoing,
edge_type: Some("DEFINES".to_string()),
},
)?;
let mut entries = Vec::new();
for symbol_node_id in neighbor_ids {
if let Ok(node) = graph.files.backend.get_node(snapshot, symbol_node_id) {
if let Ok(symbol_node) = serde_json::from_value::<SymbolNode>(node.data.clone()) {
let kind = match symbol_node.kind.as_str() {
"Function" => SymbolKind::Function,
"Method" => SymbolKind::Method,
"Class" => SymbolKind::Class,
"Interface" => SymbolKind::Interface,
"Enum" => SymbolKind::Enum,
"Module" => SymbolKind::Module,
"Union" => SymbolKind::Union,
"Namespace" => SymbolKind::Namespace,
"TypeAlias" => SymbolKind::TypeAlias,
"Unknown" => SymbolKind::Unknown,
_ => SymbolKind::Unknown,
};
let kind_normalized = symbol_node
.kind_normalized
.clone()
.unwrap_or_else(|| kind.normalized_key().to_string());
let fact = SymbolFact {
file_path: path_buf.clone(),
kind,
kind_normalized,
name: symbol_node.name.clone(),
fqn: symbol_node.fqn,
canonical_fqn: symbol_node.canonical_fqn,
display_fqn: symbol_node.display_fqn,
byte_start: symbol_node.byte_start,
byte_end: symbol_node.byte_end,
start_line: symbol_node.start_line,
start_col: symbol_node.start_col,
end_line: symbol_node.end_line,
end_col: symbol_node.end_col,
};
entries.push((symbol_node_id, fact, symbol_node.symbol_id));
}
}
}
entries.sort_by(|(_, a, _), (_, b, _)| {
a.start_line
.cmp(&b.start_line)
.then_with(|| a.start_col.cmp(&b.start_col))
.then_with(|| a.byte_start.cmp(&b.byte_start))
});
Ok(entries)
}
pub fn symbol_extents(
graph: &mut CodeGraph,
path: &str,
name: &str,
) -> Result<Vec<(i64, SymbolFact)>> {
let entries = symbol_nodes_in_file(graph, path)?;
let mut matches = Vec::new();
for (node_id, fact) in entries {
if fact.name.as_deref() == Some(name) {
matches.push((node_id, fact));
}
}
Ok(matches)
}
pub fn symbol_id_by_name(graph: &mut CodeGraph, path: &str, name: &str) -> Result<Option<i64>> {
let normalized_path = crate::graph::files::normalize_path_for_index(path);
let file_id = match graph.files.find_file_node(&normalized_path)? {
Some(id) => id,
None => return Ok(None),
};
let snapshot = SnapshotId::current();
let neighbor_ids = graph.files.backend.neighbors(
snapshot,
file_id.as_i64(),
NeighborQuery {
direction: BackendDirection::Outgoing,
edge_type: Some("DEFINES".to_string()),
},
)?;
for symbol_node_id in neighbor_ids {
if let Ok(node) = graph.files.backend.get_node(snapshot, symbol_node_id) {
if let Ok(symbol_node) = serde_json::from_value::<SymbolNode>(node.data) {
if symbol_node
.name
.as_ref()
.map(|n| n == name)
.unwrap_or(false)
{
return Ok(Some(symbol_node_id));
}
}
}
}
Ok(None)
}
pub fn find_by_symbol_id(graph: &mut CodeGraph, symbol_id: &str) -> Result<Option<SymbolNode>> {
let entity_ids = graph.calls.backend.entity_ids()?;
let snapshot = SnapshotId::current();
for entity_id in entity_ids {
if let Ok(node) = graph.calls.backend.get_node(snapshot, entity_id) {
if node.kind == "Symbol" {
if let Ok(symbol_node) = serde_json::from_value::<SymbolNode>(node.data.clone()) {
if symbol_node.symbol_id.as_deref() == Some(symbol_id) {
return Ok(Some(symbol_node));
}
}
}
}
}
Ok(None)
}
pub fn index_references(graph: &mut CodeGraph, path: &str, source: &[u8]) -> Result<usize> {
let _file_id = match graph.files.find_file_node(path)? {
Some(id) => id,
None => return Ok(0), };
let mut symbol_id_to_id: HashMap<String, i64> = HashMap::new();
let mut display_fqn_groups: HashMap<String, Vec<i64>> = HashMap::new();
let mut symbol_fqn_to_id: HashMap<String, i64> = HashMap::new();
let entity_ids = graph.files.backend.entity_ids()?;
let snapshot = SnapshotId::current();
for entity_id in entity_ids {
if let Ok(node) = graph.files.backend.get_node(snapshot, entity_id) {
if node.kind == "Symbol" {
if let Ok(symbol_node) = serde_json::from_value::<SymbolNode>(node.data) {
if let Some(symbol_id) = symbol_node.symbol_id {
symbol_id_to_id.insert(symbol_id, entity_id);
}
if let Some(ref display_fqn) = symbol_node.display_fqn {
if !display_fqn.is_empty() {
display_fqn_groups
.entry(display_fqn.clone())
.or_default()
.push(entity_id);
}
}
let fqn = symbol_node.fqn.or(symbol_node.name).unwrap_or_default();
if !fqn.is_empty() {
symbol_fqn_to_id.insert(fqn, entity_id);
}
}
}
}
}
for (display_fqn, symbol_ids) in display_fqn_groups {
if symbol_ids.len() > 1 {
graph.create_ambiguous_group(&display_fqn, &symbol_ids)?;
}
}
let count = graph.references.index_references_with_symbol_id(
path,
source,
&symbol_id_to_id,
&symbol_fqn_to_id,
)?;
populate_cross_file_refs(graph, path, source, &symbol_id_to_id, &symbol_fqn_to_id)?;
Ok(count)
}
fn populate_cross_file_refs(
graph: &mut CodeGraph,
path: &str,
source: &[u8],
symbol_id_to_id: &HashMap<String, i64>,
symbol_fqn_to_id: &HashMap<String, i64>,
) -> Result<()> {
use crate::ingest::{detect_language, Parser};
use std::path::PathBuf;
let path_buf = PathBuf::from(path);
let language = detect_language(&path_buf);
let _file_symbols = symbols_in_file(graph, path)?;
let mut entity_to_symbol_id: HashMap<i64, String> = HashMap::new();
for entity_id in graph.files.backend.entity_ids()? {
if let Ok(node) = graph
.files
.backend
.get_node(SnapshotId::current(), entity_id)
{
if node.kind == "Symbol" {
if let Ok(symbol_node) = serde_json::from_value::<SymbolNode>(node.data) {
if let Some(sid) = symbol_node.symbol_id {
entity_to_symbol_id.insert(entity_id, sid);
}
}
}
}
}
let mut all_symbol_facts: Vec<crate::ingest::SymbolFact> = Vec::new();
for entity_id in graph.files.backend.entity_ids()? {
if let Ok(node) = graph
.files
.backend
.get_node(SnapshotId::current(), entity_id)
{
if node.kind == "Symbol" {
if let Ok(symbol_node) = serde_json::from_value::<SymbolNode>(node.data.clone()) {
if let Some(name) = &symbol_node.name {
let file_path_str = node.file_path.as_deref().unwrap_or("");
let fqn = symbol_node
.fqn
.clone()
.or(symbol_node.name.clone())
.unwrap_or_default();
all_symbol_facts.push(crate::ingest::SymbolFact {
file_path: PathBuf::from(file_path_str),
kind: crate::ingest::SymbolKind::Unknown,
kind_normalized: symbol_node
.kind_normalized
.clone()
.unwrap_or(symbol_node.kind.clone()),
name: Some(name.clone()),
fqn: if fqn.is_empty() { None } else { Some(fqn) },
canonical_fqn: symbol_node.canonical_fqn.clone(),
display_fqn: symbol_node.display_fqn.clone(),
byte_start: symbol_node.byte_start,
byte_end: symbol_node.byte_end,
start_line: symbol_node.start_line,
start_col: symbol_node.start_col,
end_line: symbol_node.end_line,
end_col: symbol_node.end_col,
});
}
}
}
}
}
let references = match language {
Some(crate::ingest::Language::Rust) => {
let mut parser = Parser::new()?;
parser.extract_references(path_buf.clone(), source, &all_symbol_facts)
}
Some(crate::ingest::Language::C) => {
let mut parser = CParser::new()?;
parser.extract_references(path_buf.clone(), source, &all_symbol_facts)
}
Some(crate::ingest::Language::Cpp) => {
let mut parser = CppParser::new()?;
parser.extract_references(path_buf.clone(), source, &all_symbol_facts)
}
Some(crate::ingest::Language::Java) => {
let mut parser = JavaParser::new()?;
parser.extract_references(path_buf.clone(), source, &all_symbol_facts)
}
_ => Vec::new(),
};
for reference in &references {
let target_symbol_id = symbol_id_to_id
.get(&reference.referenced_symbol)
.or_else(|| symbol_fqn_to_id.get(&reference.referenced_symbol))
.and_then(|&entity_id| entity_to_symbol_id.get(&entity_id).cloned());
let Some(to_symbol_id) = target_symbol_id else {
continue;
};
let containing_symbol = all_symbol_facts.iter().find(|s| {
s.file_path == path_buf
&& s.byte_start <= reference.byte_start
&& s.byte_end >= reference.byte_end
});
let from_symbol_id = containing_symbol
.and_then(|s| s.fqn.clone())
.unwrap_or_else(|| format!("file:{}", path));
let cref = crate::graph::schema::CrossFileRef {
from_symbol_id,
to_symbol_id,
file_path: path.to_string(),
line_number: reference.start_line,
byte_start: reference.byte_start,
byte_end: reference.byte_end,
};
let _ = graph.side_tables.store_cross_file_ref(&cref);
}
Ok(())
}
pub fn references_to_symbol(graph: &mut CodeGraph, symbol_id: i64) -> Result<Vec<ReferenceFact>> {
graph.references.references_to_symbol(symbol_id)
}
pub fn cross_file_references_to(
graph: &CodeGraph,
symbol_fqn: &str,
) -> Result<Vec<crate::graph::schema::CrossFileRef>> {
graph.side_tables.get_references_to(symbol_fqn)
}
pub fn edge_endpoints(graph: &CodeGraph) -> Result<Vec<EdgeEndpoints>> {
let conn = graph.chunks.connect()?;
let mut stmt = conn
.prepare_cached("SELECT from_id, to_id FROM graph_edges ORDER BY id")
.map_err(|e| anyhow::anyhow!("Failed to prepare edge endpoint query: {}", e))?;
let endpoints = stmt
.query_map([], |row: &rusqlite::Row| {
Ok(EdgeEndpoints {
from_id: row.get(0)?,
to_id: row.get(1)?,
})
})
.map_err(|e| anyhow::anyhow!("Failed to query edge endpoints: {}", e))?
.collect::<Result<Vec<_>, _>>()
.map_err(|e| anyhow::anyhow!("Failed to collect edge endpoints: {}", e))?;
Ok(endpoints)
}
#[derive(Debug, Clone)]
pub struct SymbolQueryResult {
pub entity_id: i64,
pub name: String,
pub file_path: String,
pub kind: String,
pub byte_start: usize,
pub byte_end: usize,
}
impl CodeGraph {
pub fn get_entities_by_label(&self, label: &str) -> Result<Vec<i64>> {
let conn = self.chunks.connect()?;
let mut stmt = conn
.prepare_cached("SELECT DISTINCT entity_id FROM graph_labels WHERE label = ?1")
.map_err(|e| anyhow::anyhow!("Failed to prepare label query: {}", e))?;
let entity_ids = stmt
.query_map(params![label], |row: &rusqlite::Row| row.get(0))
.map_err(|e| anyhow::anyhow!("Failed to execute label query: {}", e))?
.collect::<Result<Vec<i64>, _>>()
.map_err(|e| anyhow::anyhow!("Failed to collect label results: {}", e))?;
Ok(entity_ids)
}
pub fn get_entities_by_labels(&self, labels: &[&str]) -> Result<Vec<i64>> {
if labels.is_empty() {
return Ok(Vec::new());
}
let conn = self.chunks.connect()?;
let placeholders = std::iter::repeat_n("?", labels.len())
.collect::<Vec<_>>()
.join(", ");
let query = format!(
"SELECT entity_id FROM graph_labels WHERE label IN ({})
GROUP BY entity_id HAVING COUNT(DISTINCT label) = ?",
placeholders
);
let label_params: Vec<String> = labels.iter().map(|s| s.to_string()).collect();
let count_param: i64 = labels.len() as i64;
let mut stmt = conn
.prepare_cached(&query)
.map_err(|e| anyhow::anyhow!("Failed to prepare multi-label query: {}", e))?;
let params: Vec<&dyn rusqlite::ToSql> = label_params
.iter()
.map(|s| s as &dyn rusqlite::ToSql)
.chain(std::iter::once(&count_param as &dyn rusqlite::ToSql))
.collect();
let entity_ids = stmt
.query_map(¶ms[..], |row: &rusqlite::Row| row.get(0))
.map_err(|e| anyhow::anyhow!("Failed to execute multi-label query: {}", e))?
.collect::<Result<Vec<i64>, _>>()
.map_err(|e| anyhow::anyhow!("Failed to collect multi-label results: {}", e))?;
Ok(entity_ids)
}
pub fn get_all_labels(&self) -> Result<Vec<String>> {
let conn = self.chunks.connect()?;
let mut stmt = conn
.prepare_cached("SELECT DISTINCT label FROM graph_labels ORDER BY label")
.map_err(|e| anyhow::anyhow!("Failed to prepare labels query: {}", e))?;
let labels = stmt
.query_map([], |row: &rusqlite::Row| row.get::<_, String>(0))
.map_err(|e| anyhow::anyhow!("Failed to execute labels query: {}", e))?
.collect::<Result<Vec<_>, _>>()
.map_err(|e| anyhow::anyhow!("Failed to collect labels: {}", e))?;
Ok(labels)
}
pub fn count_entities_by_label(&self, label: &str) -> Result<usize> {
let conn = self.chunks.connect()?;
let count: i64 = conn
.query_row(
"SELECT COUNT(DISTINCT entity_id) FROM graph_labels WHERE label = ?1",
params![label],
|row: &rusqlite::Row| row.get(0),
)
.map_err(|e| anyhow::anyhow!("Failed to count entities by label: {}", e))?;
Ok(count as usize)
}
pub fn get_symbols_by_label(&self, label: &str) -> Result<Vec<SymbolQueryResult>> {
let entity_ids = self.get_entities_by_label(label)?;
let mut results = Vec::new();
let snapshot = SnapshotId::current();
for entity_id in entity_ids {
if let Ok(node) = self.symbols.backend.get_node(snapshot, entity_id) {
let symbol_node: SymbolNode =
serde_json::from_value(node.data).unwrap_or_else(|_| SymbolNode {
symbol_id: None,
fqn: None,
canonical_fqn: None,
display_fqn: None,
name: None,
kind: "Unknown".to_string(),
kind_normalized: None,
byte_start: 0,
byte_end: 0,
start_line: 0,
start_col: 0,
end_line: 0,
end_col: 0,
});
results.push(SymbolQueryResult {
entity_id,
name: symbol_node.name.unwrap_or_else(|| "<unnamed>".to_string()),
file_path: node.file_path.unwrap_or_else(|| "?".to_string()),
kind: symbol_node.kind_normalized.unwrap_or(symbol_node.kind),
byte_start: symbol_node.byte_start,
byte_end: symbol_node.byte_end,
});
}
}
Ok(results)
}
pub fn get_symbols_by_labels(&self, labels: &[&str]) -> Result<Vec<SymbolQueryResult>> {
let entity_ids = self.get_entities_by_labels(labels)?;
let mut results = Vec::new();
let snapshot = SnapshotId::current();
for entity_id in entity_ids {
if let Ok(node) = self.symbols.backend.get_node(snapshot, entity_id) {
let symbol_node: SymbolNode =
serde_json::from_value(node.data).unwrap_or_else(|_| SymbolNode {
symbol_id: None,
fqn: None,
canonical_fqn: None,
display_fqn: None,
name: None,
kind: "Unknown".to_string(),
kind_normalized: None,
byte_start: 0,
byte_end: 0,
start_line: 0,
start_col: 0,
end_line: 0,
end_col: 0,
});
results.push(SymbolQueryResult {
entity_id,
name: symbol_node.name.unwrap_or_else(|| "<unnamed>".to_string()),
file_path: node.file_path.unwrap_or_else(|| "?".to_string()),
kind: symbol_node.kind_normalized.unwrap_or(symbol_node.kind),
byte_start: symbol_node.byte_start,
byte_end: symbol_node.byte_end,
});
}
}
Ok(results)
}
pub fn search_symbols_by_name(&self, name: &str) -> Result<Vec<SymbolQueryResult>> {
let conn = self.chunks.connect()?;
let mut stmt = conn
.prepare_cached(
"SELECT id, name, file_path, data
FROM graph_entities
WHERE name = ?1",
)
.map_err(|e| anyhow::anyhow!("Failed to prepare name search: {}", e))?;
let results = stmt
.query_map(params![name], |row| {
let entity_id: i64 = row.get(0)?;
let sym_name: String = row.get(1)?;
let file_path: Option<String> = row.get(2)?;
let data: Option<String> = row.get(3)?;
let symbol_node: SymbolNode = data
.and_then(|d| serde_json::from_value(serde_json::Value::String(d)).ok())
.unwrap_or(SymbolNode {
symbol_id: None,
fqn: None,
canonical_fqn: None,
display_fqn: None,
name: None,
kind: "Unknown".to_string(),
kind_normalized: None,
byte_start: 0,
byte_end: 0,
start_line: 0,
start_col: 0,
end_line: 0,
end_col: 0,
});
Ok(SymbolQueryResult {
entity_id,
name: sym_name,
file_path: file_path.unwrap_or_else(|| "?".to_string()),
kind: symbol_node.kind_normalized.unwrap_or(symbol_node.kind),
byte_start: symbol_node.byte_start,
byte_end: symbol_node.byte_end,
})
})
.map_err(|e| anyhow::anyhow!("Failed to execute name search: {}", e))?
.collect::<Result<Vec<_>, _>>()
.map_err(|e| anyhow::anyhow!("Failed to collect name search results: {}", e))?;
Ok(results)
}
}
pub fn get_ambiguous_candidates(
graph: &mut CodeGraph,
display_fqn: &str,
) -> Result<Vec<(i64, SymbolNode)>> {
let entity_ids = graph.calls.backend.entity_ids()?;
let snapshot = SnapshotId::current();
let mut candidates = Vec::new();
for entity_id in entity_ids {
if let Ok(node) = graph.calls.backend.get_node(snapshot, entity_id) {
if node.kind == "Symbol" {
if let Ok(symbol_node) = serde_json::from_value::<SymbolNode>(node.data.clone()) {
if symbol_node.display_fqn.as_deref() == Some(display_fqn) {
candidates.push((entity_id, symbol_node));
}
}
}
}
}
candidates.sort_by_key(|(id, _)| *id);
Ok(candidates)
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum CollisionField {
Fqn,
DisplayFqn,
CanonicalFqn,
}
impl CollisionField {
pub fn from_str(value: &str) -> Option<Self> {
match value {
"fqn" => Some(CollisionField::Fqn),
"display_fqn" => Some(CollisionField::DisplayFqn),
"canonical_fqn" => Some(CollisionField::CanonicalFqn),
_ => None,
}
}
pub fn as_str(&self) -> &'static str {
match self {
CollisionField::Fqn => "fqn",
CollisionField::DisplayFqn => "display_fqn",
CollisionField::CanonicalFqn => "canonical_fqn",
}
}
fn _json_path(&self) -> &'static str {
match self {
CollisionField::Fqn => "$.fqn",
CollisionField::DisplayFqn => "$.display_fqn",
CollisionField::CanonicalFqn => "$.canonical_fqn",
}
}
}
#[derive(Debug, Clone)]
pub struct CollisionCandidate {
pub entity_id: i64,
pub symbol_id: Option<String>,
pub canonical_fqn: Option<String>,
pub display_fqn: Option<String>,
pub name: Option<String>,
pub file_path: Option<String>,
}
#[derive(Debug, Clone)]
pub struct CollisionGroup {
pub field: String,
pub value: String,
pub count: usize,
pub candidates: Vec<CollisionCandidate>,
}
pub fn collision_groups(
graph: &mut CodeGraph,
field: CollisionField,
limit: usize,
) -> Result<Vec<CollisionGroup>> {
let entity_ids = graph.calls.backend.entity_ids()?;
let snapshot = SnapshotId::current();
let mut groups: std::collections::HashMap<String, Vec<(i64, SymbolNode, Option<String>)>> =
std::collections::HashMap::new();
for entity_id in entity_ids {
if let Ok(node) = graph.calls.backend.get_node(snapshot, entity_id) {
if node.kind == "Symbol" {
if let Ok(symbol_node) = serde_json::from_value::<SymbolNode>(node.data.clone()) {
let field_value = match field {
CollisionField::Fqn => symbol_node.fqn.clone(),
CollisionField::DisplayFqn => symbol_node.display_fqn.clone(),
CollisionField::CanonicalFqn => symbol_node.canonical_fqn.clone(),
};
if let Some(value) = field_value {
groups.entry(value).or_default().push((
entity_id,
symbol_node,
node.file_path.clone(),
));
}
}
}
}
}
let mut collision_groups: Vec<(String, Vec<(i64, SymbolNode, Option<String>)>)> = groups
.into_iter()
.filter(|(_, candidates)| candidates.len() > 1)
.collect();
collision_groups.sort_by(|a, b| {
let count_cmp = b.1.len().cmp(&a.1.len());
if count_cmp != std::cmp::Ordering::Equal {
count_cmp
} else {
a.0.cmp(&b.0)
}
});
collision_groups.truncate(limit);
let mut results = Vec::new();
for (value, candidates) in collision_groups {
let count = candidates.len();
let collision_candidates: Vec<CollisionCandidate> = candidates
.into_iter()
.map(|(entity_id, symbol_node, file_path)| {
let final_file_path = file_path.or_else(|| {
symbol_node.canonical_fqn.as_ref().and_then(|fqn| {
let parts: Vec<&str> = fqn.split("::").collect();
if parts.len() >= 3 {
Some(parts[1..parts.len() - 1].join("::"))
} else {
None
}
})
});
CollisionCandidate {
entity_id,
symbol_id: symbol_node.symbol_id,
canonical_fqn: symbol_node.canonical_fqn,
display_fqn: symbol_node.display_fqn,
name: symbol_node.name,
file_path: final_file_path,
}
})
.collect();
results.push(CollisionGroup {
field: field.as_str().to_string(),
value,
count,
candidates: collision_candidates,
});
}
Ok(results)
}
#[cfg(test)]
mod tests {
use crate::graph::query::{
collision_groups, find_by_symbol_id, get_ambiguous_candidates,
symbol_nodes_in_file_with_ids, symbols_in_file, CollisionField,
};
use crate::graph::schema::SymbolNode;
use sqlitegraph::{GraphBackend, SnapshotId};
#[test]
fn test_index_references_propagates_count() {
let temp_dir = tempfile::TempDir::new().unwrap();
let db_path = temp_dir.path().join("test.db");
let mut graph = crate::CodeGraph::open(&db_path).unwrap();
let test_file = temp_dir.path().join("test.rs");
std::fs::write(
&test_file,
r#"
fn foo() {}
fn bar() {
foo();
}
"#,
)
.unwrap();
let path_str = test_file.to_string_lossy().to_string();
let source = std::fs::read(&test_file).unwrap();
graph.index_file(&path_str, &source).unwrap();
let count = graph.index_references(&path_str, &source).unwrap();
assert!(count > 0, "Expected at least 1 reference, got {}", count);
}
#[test]
fn test_find_by_symbol_id_returns_none_for_nonexistent() {
let temp_dir =
std::env::temp_dir().join(format!("magellan_query_test_{}", std::process::id()));
std::fs::create_dir_all(&temp_dir).unwrap();
let db_path = temp_dir.join("test.db");
let mut graph = crate::CodeGraph::open(&db_path).unwrap();
let test_file = temp_dir.join("dummy.rs");
std::fs::write(&test_file, "fn dummy() {}").unwrap();
let path_str = test_file.to_string_lossy().to_string();
let source = std::fs::read(&test_file).unwrap();
graph.index_file(&path_str, &source).unwrap();
let result = find_by_symbol_id(&mut graph, "nonexistent12345678901234567890");
assert!(result.is_ok());
assert!(result.unwrap().is_none());
}
#[test]
fn test_find_by_symbol_id_returns_symbol_when_found() {
let temp_dir =
std::env::temp_dir().join(format!("magellan_query_test2_{}", std::process::id()));
std::fs::create_dir_all(&temp_dir).unwrap();
let db_path = temp_dir.join("test.db");
let mut graph = crate::CodeGraph::open(&db_path).unwrap();
let test_file = temp_dir.join("test.rs");
std::fs::write(
&test_file,
r#"
fn test_function() -> i32 {
42
}
"#,
)
.unwrap();
let path_str = test_file.to_string_lossy().to_string();
let source = std::fs::read(&test_file).unwrap();
graph.index_file(&path_str, &source).unwrap();
let symbols = symbols_in_file(&mut graph, &path_str).unwrap();
assert!(!symbols.is_empty());
let (_node_id, _fact, symbol_id) = symbol_nodes_in_file_with_ids(&mut graph, &path_str)
.unwrap()
.into_iter()
.find(|(_, fact, _)| fact.name.as_deref() == Some("test_function"))
.expect("test_function should exist");
if let Some(id) = symbol_id {
let result = find_by_symbol_id(&mut graph, &id).unwrap();
assert!(result.is_some());
let found = result.unwrap();
assert_eq!(found.name.as_deref(), Some("test_function"));
}
}
#[test]
fn test_get_ambiguous_candidates_empty_for_no_match() {
let temp_dir =
std::env::temp_dir().join(format!("magellan_query_test3_{}", std::process::id()));
std::fs::create_dir_all(&temp_dir).unwrap();
let db_path = temp_dir.join("test.db");
let mut graph = crate::CodeGraph::open(&db_path).unwrap();
let test_file = temp_dir.join("dummy.rs");
std::fs::write(&test_file, "fn dummy() {}").unwrap();
let path_str = test_file.to_string_lossy().to_string();
let source = std::fs::read(&test_file).unwrap();
graph.index_file(&path_str, &source).unwrap();
let result = get_ambiguous_candidates(&mut graph, "nonexistent::function").unwrap();
assert!(result.is_empty());
}
#[test]
fn test_get_ambiguous_candidates_single_result() {
let temp_dir =
std::env::temp_dir().join(format!("magellan_query_test4_{}", std::process::id()));
std::fs::create_dir_all(&temp_dir).unwrap();
let db_path = temp_dir.join("test.db");
let mut graph = crate::CodeGraph::open(&db_path).unwrap();
let test_file = temp_dir.join("test.rs");
std::fs::write(
&test_file,
r#"fn unique_function() {}
"#,
)
.unwrap();
let path_str = test_file.to_string_lossy().to_string();
let source = std::fs::read(&test_file).unwrap();
graph.index_file(&path_str, &source).unwrap();
let entity_ids = graph.files.backend.entity_ids().unwrap();
let mut found_display_fqn: Option<String> = None;
let snapshot = SnapshotId::current();
for entity_id in entity_ids {
if let Ok(node) = graph.files.backend.get_node(snapshot, entity_id) {
if node.kind == "Symbol" {
if let Ok(symbol_node) = serde_json::from_value::<SymbolNode>(node.data) {
if symbol_node.name.as_deref() == Some("unique_function") {
found_display_fqn = symbol_node.display_fqn.clone();
if found_display_fqn.is_none() {
return; }
break;
}
}
}
}
}
if found_display_fqn.is_none() {
return; }
let display_fqn = found_display_fqn.unwrap();
let result = get_ambiguous_candidates(&mut graph, &display_fqn).unwrap();
assert_eq!(result.len(), 1);
assert_eq!(result[0].1.name.as_deref(), Some("unique_function"));
}
#[test]
fn test_get_ambiguous_candidates_multiple_results() {
let temp_dir =
std::env::temp_dir().join(format!("magellan_query_test5_{}", std::process::id()));
std::fs::create_dir_all(&temp_dir).unwrap();
let db_path = temp_dir.join("test.db");
let mut graph = crate::CodeGraph::open(&db_path).unwrap();
let file1 = temp_dir.join("file1.rs");
std::fs::write(
&file1,
r#"fn common_name() {}
"#,
)
.unwrap();
let file2 = temp_dir.join("file2.rs");
std::fs::write(
&file2,
r#"fn common_name() {}
"#,
)
.unwrap();
let path1 = file1.to_string_lossy().to_string();
let path2 = file2.to_string_lossy().to_string();
let source1 = std::fs::read(&file1).unwrap();
let source2 = std::fs::read(&file2).unwrap();
graph.index_file(&path1, &source1).unwrap();
graph.index_file(&path2, &source2).unwrap();
let entity_ids = graph.files.backend.entity_ids().unwrap();
let mut common_display_fqn: Option<String> = None;
let snapshot = SnapshotId::current();
for entity_id in entity_ids {
if let Ok(node) = graph.files.backend.get_node(snapshot, entity_id) {
if node.kind == "Symbol" {
if let Ok(symbol_node) = serde_json::from_value::<SymbolNode>(node.data) {
if symbol_node.name.as_deref() == Some("common_name") {
common_display_fqn = symbol_node.display_fqn.clone();
if common_display_fqn.is_some() {
break;
}
}
}
}
}
}
if common_display_fqn.is_none() {
return; }
let display_fqn = common_display_fqn.unwrap();
let result = get_ambiguous_candidates(&mut graph, &display_fqn).unwrap();
assert!(
result.len() >= 2,
"Should find at least 2 symbols with common_name display_fqn"
);
}
#[test]
fn test_collision_groups_for_fqn() {
let temp_dir =
std::env::temp_dir().join(format!("magellan_query_test6_{}", std::process::id()));
std::fs::create_dir_all(&temp_dir).unwrap();
let db_path = temp_dir.join("test.db");
let mut graph = crate::CodeGraph::open(&db_path).unwrap();
let file1 = temp_dir.join("file1.rs");
std::fs::write(&file1, "fn collide() {}\n").unwrap();
let file2 = temp_dir.join("file2.rs");
std::fs::write(&file2, "fn collide() {}\n").unwrap();
let path1 = file1.to_string_lossy().to_string();
let path2 = file2.to_string_lossy().to_string();
let source1 = std::fs::read(&file1).unwrap();
let source2 = std::fs::read(&file2).unwrap();
graph.index_file(&path1, &source1).unwrap();
graph.index_file(&path2, &source2).unwrap();
let entity_ids = graph.files.backend.entity_ids().unwrap();
let snapshot = SnapshotId::current();
eprintln!("DEBUG: Total entities: {}", entity_ids.len());
for entity_id in entity_ids {
if let Ok(node) = graph.files.backend.get_node(snapshot, entity_id) {
if node.kind == "Symbol" {
if let Ok(symbol_node) = serde_json::from_value::<SymbolNode>(node.data.clone())
{
eprintln!(
"DEBUG: Symbol: name={:?}, canonical_fqn={:?}, display_fqn={:?}",
symbol_node.name, symbol_node.canonical_fqn, symbol_node.display_fqn
);
}
}
}
}
let groups = collision_groups(&mut graph, CollisionField::Fqn, 10).unwrap();
eprintln!("DEBUG: Found {} collision groups", groups.len());
for group in &groups {
eprintln!("DEBUG: Group: value={}, count={}", group.value, group.count);
}
let collide_group = groups
.iter()
.find(|group| group.value == "collide")
.expect("Expected collision group for 'collide'");
assert!(collide_group.count >= 2);
assert!(collide_group
.candidates
.iter()
.any(|c| c.symbol_id.is_some()));
assert!(collide_group
.candidates
.iter()
.all(|c| c.file_path.is_some()));
}
}