use anyhow::Result;
use blake3::Hasher;
use sha2::{Digest, Sha256};
use sqlitegraph::{
BackendDirection, EdgeSpec, GraphBackend, NeighborQuery, NodeId, NodeSpec, SnapshotId,
};
use std::sync::Arc;
use crate::detect_language;
use crate::graph::schema::SymbolNode;
use crate::ingest::SymbolFact;
pub fn generate_symbol_id(language: &str, fqn: &str, span_id: &str) -> String {
let mut hasher = Sha256::new();
hasher.update(language.as_bytes());
hasher.update(b":");
hasher.update(fqn.as_bytes());
hasher.update(b":");
hasher.update(span_id.as_bytes());
let result = hasher.finalize();
format!(
"{:02x}{:02x}{:02x}{:02x}{:02x}{:02x}{:02x}{:02x}",
result[0], result[1], result[2], result[3], result[4], result[5], result[6], result[7]
)
}
pub fn _generate_symbol_id_v2(
crate_name: &str,
file_path: &str,
enclosing_items: &[String],
symbol_kind: &str,
symbol_name: &str,
) -> String {
let mut hasher = Hasher::new();
hasher.update(crate_name.as_bytes());
hasher.update(b":");
hasher.update(enclosing_items.join("::").as_bytes());
hasher.update(b":");
hasher.update(file_path.as_bytes());
hasher.update(b":");
hasher.update(symbol_kind.as_bytes());
hasher.update(b":");
hasher.update(symbol_name.as_bytes());
let hash = hasher.finalize();
let hex = hash.to_hex().to_string();
hex[..32].to_string()
}
fn generate_span_id(file_path: &str, byte_start: usize, byte_end: usize) -> String {
let mut hasher = Sha256::new();
hasher.update(file_path.as_bytes());
hasher.update(b":");
hasher.update(byte_start.to_be_bytes());
hasher.update(b":");
hasher.update(byte_end.to_be_bytes());
let result = hasher.finalize();
format!(
"{:02x}{:02x}{:02x}{:02x}{:02x}{:02x}{:02x}{:02x}",
result[0], result[1], result[2], result[3], result[4], result[5], result[6], result[7]
)
}
pub struct SymbolOps {
pub backend: Arc<dyn GraphBackend>,
pub lookup: super::symbol_lookup::SymbolLookup,
}
impl SymbolOps {
pub fn insert_symbol_node(&mut self, fact: &SymbolFact) -> Result<NodeId> {
let language = detect_language(&fact.file_path)
.map(|l| l.as_str().to_string())
.unwrap_or_else(|| "unknown".to_string());
let file_path_str = fact.file_path.to_string_lossy();
let span_id = generate_span_id(&file_path_str, fact.byte_start, fact.byte_end);
let fqn_for_id = fact.fqn.as_deref().unwrap_or("");
let symbol_id = generate_symbol_id(&language, fqn_for_id, &span_id);
let stable_symbol_id = symbol_id.clone();
let symbol_node = SymbolNode {
symbol_id: Some(symbol_id),
fqn: fact.fqn.clone(),
canonical_fqn: fact.canonical_fqn.clone(),
display_fqn: fact.display_fqn.clone(),
name: fact.name.clone(),
kind: format!("{:?}", fact.kind),
kind_normalized: Some(fact.kind_normalized.clone()),
byte_start: fact.byte_start,
byte_end: fact.byte_end,
start_line: fact.start_line,
start_col: fact.start_col,
end_line: fact.end_line,
end_col: fact.end_col,
};
let name = fact.name.clone().unwrap_or_else(|| {
format!("<{:?} at {}>", fact.kind, fact.byte_start)
});
let node_spec = NodeSpec {
kind: "Symbol".to_string(),
name,
file_path: Some(file_path_str.to_string()),
data: serde_json::to_value(symbol_node)?,
};
let id = self.backend.insert_node(node_spec)?;
let node_id = NodeId::from(id);
self.lookup
.insert_with_symbol_id(id, &file_path_str, fact, stable_symbol_id);
Ok(node_id)
}
pub fn insert_defines_edge(&self, file_id: NodeId, symbol_id: NodeId) -> Result<()> {
let edge_spec = EdgeSpec {
from: file_id.as_i64(),
to: symbol_id.as_i64(),
edge_type: "DEFINES".to_string(),
data: serde_json::json!({}),
};
self.backend.insert_edge(edge_spec)?;
Ok(())
}
pub fn delete_file_symbols(&mut self, file_id: NodeId) -> Result<()> {
let snapshot = SnapshotId::current();
let neighbor_ids = self.backend.neighbors(
snapshot,
file_id.as_i64(),
NeighborQuery {
direction: BackendDirection::Outgoing,
edge_type: Some("DEFINES".to_string()),
},
)?;
let entity_ids_to_remove: Vec<i64> = neighbor_ids.clone();
for symbol_node_id in neighbor_ids {
self.backend.delete_entity(symbol_node_id)?;
}
for entity_id in entity_ids_to_remove {
self.lookup.remove(entity_id);
}
Ok(())
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::graph::schema::SymbolNode;
use sqlitegraph::GraphBackend;
#[test]
fn test_symbol_id_deterministic() {
let id1 = generate_symbol_id("rust", "my_crate::main", "a1b2c3d4e5f6g7h8");
let id2 = generate_symbol_id("rust", "my_crate::main", "a1b2c3d4e5f6g7h8");
assert_eq!(id1, id2, "Same inputs should produce same symbol ID");
}
#[test]
fn test_symbol_id_different_languages() {
let rust_id = generate_symbol_id("rust", "my_crate::main", "a1b2c3d4e5f6g7h8");
let python_id = generate_symbol_id("python", "my_module.main", "a1b2c3d4e5f6g7h8");
assert_ne!(
rust_id, python_id,
"Different languages should produce different symbol IDs"
);
}
#[test]
fn test_symbol_id_different_fqn() {
let id1 = generate_symbol_id("rust", "my_crate::main", "a1b2c3d4e5f6g7h8");
let id2 = generate_symbol_id("rust", "my_crate::foo", "a1b2c3d4e5f6g7h8");
assert_ne!(
id1, id2,
"Different FQNs should produce different symbol IDs"
);
}
#[test]
fn test_symbol_id_different_span() {
let id1 = generate_symbol_id("rust", "my_crate::main", "a1b2c3d4e5f6g7h8");
let id2 = generate_symbol_id("rust", "my_crate::main", "b1b2c3d4e5f6g7h8");
assert_ne!(
id1, id2,
"Different span IDs should produce different symbol IDs"
);
}
#[test]
fn test_symbol_id_format() {
let id = generate_symbol_id("rust", "my_crate::main", "a1b2c3d4e5f6g7h8");
assert_eq!(id.len(), 16, "Symbol ID should be 16 characters");
assert!(
id.chars().all(|c| c.is_ascii_hexdigit()),
"Symbol ID should be hex"
);
}
#[test]
fn test_span_id_deterministic() {
let id1 = generate_span_id("src/main.rs", 10, 20);
let id2 = generate_span_id("src/main.rs", 10, 20);
assert_eq!(id1, id2, "Same inputs should produce same span ID");
}
#[test]
fn test_span_id_different_files() {
let id1 = generate_span_id("src/main.rs", 10, 20);
let id2 = generate_span_id("lib/main.rs", 10, 20);
assert_ne!(
id1, id2,
"Different file paths should produce different span IDs"
);
}
#[test]
fn test_span_id_different_positions() {
let id1 = generate_span_id("test.rs", 0, 10);
let id2 = generate_span_id("test.rs", 10, 20);
assert_ne!(
id1, id2,
"Different positions should produce different span IDs"
);
}
#[test]
fn test_span_id_format() {
let id = generate_span_id("test.rs", 10, 20);
assert_eq!(id.len(), 16, "Span ID should be 16 characters");
assert!(
id.chars().all(|c| c.is_ascii_hexdigit()),
"Span ID should be hex"
);
}
#[test]
fn test_generate_symbol_id_v2_deterministic() {
let id1 = _generate_symbol_id_v2(
"my_crate",
"src/lib.rs",
&["mod my_module".to_string()],
"Function",
"my_function",
);
let id2 = _generate_symbol_id_v2(
"my_crate",
"src/lib.rs",
&["mod my_module".to_string()],
"Function",
"my_function",
);
assert_eq!(id1, id2, "Same inputs should produce same SymbolId");
}
#[test]
fn test_generate_symbol_id_v2_length() {
let id = _generate_symbol_id_v2("my_crate", "src/lib.rs", &[], "Function", "my_function");
assert_eq!(id.len(), 32, "SymbolId should be 32 characters (128 bits)");
assert!(id.chars().all(|c| c.is_ascii_hexdigit()), "Should be hex");
}
#[test]
fn test_generate_symbol_id_v2_different_inputs() {
let id1 = _generate_symbol_id_v2("crate_a", "src/lib.rs", &[], "Function", "foo");
let id2 = _generate_symbol_id_v2("crate_b", "src/lib.rs", &[], "Function", "foo");
let id3 = _generate_symbol_id_v2("crate_a", "src/main.rs", &[], "Function", "foo");
let id4 = _generate_symbol_id_v2(
"crate_a",
"src/lib.rs",
&["mod".to_string()],
"Function",
"foo",
);
let id5 = _generate_symbol_id_v2("crate_a", "src/lib.rs", &[], "Method", "foo");
let id6 = _generate_symbol_id_v2("crate_a", "src/lib.rs", &[], "Function", "bar");
let ids = [&id1, &id2, &id3, &id4, &id5, &id6];
for (i, id_a) in ids.iter().enumerate() {
for (j, id_b) in ids.iter().enumerate() {
if i != j {
assert_ne!(id_a, id_b, "Different inputs should produce different IDs");
}
}
}
}
#[test]
fn test_generate_symbol_id_v2_no_span_dependency() {
let enclosing_items = vec!["impl MyStruct".to_string()];
let id1 = _generate_symbol_id_v2(
"my_crate",
"src/lib.rs",
&enclosing_items,
"Method",
"my_method",
);
let id2 = _generate_symbol_id_v2(
"my_crate",
"src/lib.rs",
&enclosing_items,
"Method",
"my_method",
);
assert_eq!(id1, id2, "Span-independent inputs should produce stable ID");
}
#[test]
fn test_generate_symbol_id_v2_field_order() {
let id1 =
_generate_symbol_id_v2("crate", "file.rs", &["scope".to_string()], "kind", "name");
let id2 =
_generate_symbol_id_v2("crate", "file.rs", &["scope".to_string()], "kind", "name");
assert_eq!(id1, id2, "Alphabetical field order should be deterministic");
}
#[test]
fn test_symbol_node_persists_fqn_fields() {
let temp_dir = tempfile::TempDir::new().unwrap();
let db_path = temp_dir.path().join("test.db");
let mut graph = crate::CodeGraph::open(&db_path).unwrap();
let test_file = temp_dir.path().join("test.rs");
std::fs::write(&test_file, "fn persist_me() {}\n").unwrap();
let path_str = test_file.to_string_lossy().to_string();
let source = std::fs::read(&test_file).unwrap();
graph.index_file(&path_str, &source).unwrap();
let entity_ids = graph.files.backend.entity_ids().unwrap();
let mut found = false;
let snapshot = SnapshotId::current();
for entity_id in entity_ids {
if let Ok(node) = graph.files.backend.get_node(snapshot, entity_id) {
if node.kind == "Symbol" {
if let Ok(symbol_node) = serde_json::from_value::<SymbolNode>(node.data) {
if symbol_node.name.as_deref() == Some("persist_me") {
found = true;
let canonical = symbol_node.canonical_fqn.as_deref().unwrap_or("");
let display = symbol_node.display_fqn.as_deref().unwrap_or("");
assert!(!canonical.is_empty(), "canonical_fqn should be persisted");
assert!(!display.is_empty(), "display_fqn should be persisted");
assert!(canonical.contains("persist_me"));
assert!(display.contains("persist_me"));
}
}
}
}
}
assert!(found, "Expected to find symbol node for persist_me");
}
}