use crate::graph::pdg::{
Edge as PDGEdge, EdgeMetadata as PDGEdgeMetadata, EdgeType as PDGEdgeType, Node as PDGNode,
NodeId, NodeType as PDGNodeType, ProgramDependenceGraph,
};
use crate::storage::edges::{EdgeMetadata as StorageEdgeMetadata, EdgeType as StorageEdgeType};
use crate::storage::nodes::{NodeRecord, NodeType as StorageNodeType};
use crate::storage::schema::Storage;
use rusqlite::{params, Result as SqliteResult};
use std::collections::HashMap;
use std::sync::Arc;
type NodeDbRow = (
i64,
String,
String,
String,
String,
String,
String,
Option<i32>,
String,
Option<Vec<u8>>,
Option<i64>,
Option<i64>,
Option<i32>,
);
#[derive(Debug, thiserror::Error)]
pub enum PdgStoreError {
#[error("SQLite error: {0}")]
Sqlite(#[from] rusqlite::Error),
#[error("Node not found: {0}")]
NodeNotFound(i64),
#[error("Edge refers to non-existent node: caller={caller}, callee={callee}")]
EdgeNodeMissing {
caller: i64,
callee: i64,
},
#[error("Serialization error: {0}")]
Serialization(String),
#[error("Deserialization error: {0}")]
Deserialization(String),
}
pub type Result<T> = std::result::Result<T, PdgStoreError>;
fn convert_node_type(node_type: &PDGNodeType) -> StorageNodeType {
match node_type {
PDGNodeType::Function => StorageNodeType::Function,
PDGNodeType::Class => StorageNodeType::Class,
PDGNodeType::Method => StorageNodeType::Method,
PDGNodeType::Variable => StorageNodeType::Variable,
PDGNodeType::Module => StorageNodeType::Module,
PDGNodeType::External => StorageNodeType::External,
}
}
fn convert_storage_node_type(node_type: &StorageNodeType) -> PDGNodeType {
match node_type {
StorageNodeType::Function => PDGNodeType::Function,
StorageNodeType::Class => PDGNodeType::Class,
StorageNodeType::Method => PDGNodeType::Method,
StorageNodeType::Variable => PDGNodeType::Variable,
StorageNodeType::Module => PDGNodeType::Module,
StorageNodeType::External => PDGNodeType::External,
}
}
fn convert_edge_type(edge_type: &PDGEdgeType) -> StorageEdgeType {
match edge_type {
PDGEdgeType::Call => StorageEdgeType::Call,
PDGEdgeType::DataDependency => StorageEdgeType::DataDependency,
PDGEdgeType::Inheritance => StorageEdgeType::Inheritance,
PDGEdgeType::Import => StorageEdgeType::Import,
PDGEdgeType::Containment => StorageEdgeType::Call, }
}
fn convert_storage_edge_type(edge_type: &StorageEdgeType) -> PDGEdgeType {
match edge_type {
StorageEdgeType::Call => PDGEdgeType::Call,
StorageEdgeType::DataDependency => PDGEdgeType::DataDependency,
StorageEdgeType::Inheritance => PDGEdgeType::Inheritance,
StorageEdgeType::Import => PDGEdgeType::Import,
StorageEdgeType::Containment => PDGEdgeType::Containment,
}
}
fn convert_edge_metadata(metadata: &PDGEdgeMetadata) -> StorageEdgeMetadata {
StorageEdgeMetadata {
call_count: metadata.call_count,
variable_name: metadata.variable_name.clone(),
confidence: metadata.confidence,
}
}
fn convert_storage_edge_metadata(metadata: &StorageEdgeMetadata) -> PDGEdgeMetadata {
PDGEdgeMetadata {
call_count: metadata.call_count,
variable_name: metadata.variable_name.clone(),
confidence: metadata.confidence,
}
}
pub fn save_pdg(
storage: &mut Storage,
project_id: &str,
pdg: &ProgramDependenceGraph,
) -> Result<()> {
let tx = storage.conn_mut().transaction()?;
tx.execute(
"DELETE FROM intel_edges WHERE caller_id IN (SELECT id FROM intel_nodes WHERE project_id = ?1)",
params![project_id],
)?;
tx.execute(
"DELETE FROM intel_nodes WHERE project_id = ?1",
params![project_id],
)?;
let mut node_id_map: HashMap<NodeId, i64> = HashMap::new();
for node_idx in pdg.node_indices() {
let pdg_node = pdg
.get_node(node_idx)
.ok_or_else(|| PdgStoreError::Serialization("Missing node data".to_string()))?;
let record = NodeRecord {
id: None,
project_id: project_id.to_string(),
file_path: pdg_node.file_path.to_string(),
node_id: pdg_node.id.clone(),
symbol_name: pdg_node.name.clone(),
qualified_name: pdg_node
.id
.split(':')
.next_back()
.unwrap_or(&pdg_node.id)
.to_string(),
language: pdg_node.language.clone(),
node_type: convert_node_type(&pdg_node.node_type),
signature: None, complexity: Some(pdg_node.complexity as i32),
content_hash: blake3::hash(pdg_node.id.as_bytes()).to_hex().to_string(),
embedding: None, byte_range_start: Some(pdg_node.byte_range.0 as i64),
byte_range_end: Some(pdg_node.byte_range.1 as i64),
embedding_format: Some(0),
};
let db_id: i64 = tx.query_row(
"INSERT INTO intel_nodes (project_id, file_path, node_id, symbol_name, qualified_name, language, node_type, signature, complexity, content_hash, embedding, byte_range_start, byte_range_end, created_at, updated_at, embedding_format)
VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12, ?13, ?14, ?15, ?16)
RETURNING id",
params![
record.project_id,
record.file_path,
record.node_id,
record.symbol_name,
record.qualified_name,
record.language,
record.node_type.as_str(),
record.signature,
record.complexity,
record.content_hash,
record.embedding.as_deref(),
record.byte_range_start,
record.byte_range_end,
chrono::Utc::now().timestamp(),
chrono::Utc::now().timestamp(),
record.embedding_format,
],
|row| row.get(0),
)?;
node_id_map.insert(node_idx, db_id);
}
for edge_idx in pdg.edge_indices() {
let (source, target) = pdg
.edge_endpoints(edge_idx)
.ok_or_else(|| PdgStoreError::Serialization("Edge has no endpoints".to_string()))?;
let pdg_edge = pdg
.get_edge(edge_idx)
.ok_or_else(|| PdgStoreError::Serialization("Missing edge data".to_string()))?;
let caller_id =
*node_id_map
.get(&source)
.ok_or_else(|| PdgStoreError::EdgeNodeMissing {
caller: source.index() as i64,
callee: target.index() as i64,
})?;
let callee_id =
*node_id_map
.get(&target)
.ok_or_else(|| PdgStoreError::EdgeNodeMissing {
caller: source.index() as i64,
callee: target.index() as i64,
})?;
let metadata = convert_edge_metadata(&pdg_edge.metadata);
let metadata_json = serde_json::to_string(&metadata)
.map_err(|e| PdgStoreError::Serialization(e.to_string()))?;
tx.execute(
"INSERT INTO intel_edges (caller_id, callee_id, edge_type, metadata)
VALUES (?1, ?2, ?3, ?4)
ON CONFLICT DO UPDATE SET metadata = excluded.metadata",
params![
caller_id,
callee_id,
convert_edge_type(&pdg_edge.edge_type).as_str(),
metadata_json,
],
)?;
}
tx.commit()?;
Ok(())
}
pub fn load_pdg(storage: &Storage, project_id: &str) -> Result<ProgramDependenceGraph> {
let mut pdg = ProgramDependenceGraph::new();
let mut db_id_to_node_id: HashMap<i64, NodeId> = HashMap::new();
let mut nodes_stmt = storage.conn().prepare(
"SELECT id, file_path, node_id, symbol_name, qualified_name, language, node_type, complexity, content_hash, embedding, byte_range_start, byte_range_end, embedding_format
FROM intel_nodes WHERE project_id = ?1"
)?;
let node_rows: Vec<NodeDbRow> = nodes_stmt
.query_map(params![project_id], |row| {
Ok((
row.get::<_, i64>(0)?, row.get::<_, String>(1)?, row.get::<_, String>(2)?, row.get::<_, String>(3)?, row.get::<_, String>(4)?, row.get::<_, String>(5)?, row.get::<_, String>(6)?, row.get::<_, Option<i32>>(7)?, row.get::<_, String>(8)?, row.get::<_, Option<Vec<u8>>>(9)?, row.get::<_, Option<i64>>(10)?, row.get::<_, Option<i64>>(11)?, row.get::<_, Option<i32>>(12)?, ))
})?
.collect::<SqliteResult<Vec<_>>>()?;
for (
db_id,
file_path,
node_id_str,
symbol_name,
_qualified_name,
language,
node_type_str,
complexity,
_content_hash,
_embedding_blob,
start,
end,
_embedding_format,
) in node_rows
{
let node_type = StorageNodeType::from_str_name(&node_type_str).ok_or_else(|| {
PdgStoreError::Deserialization(format!("Invalid node type: {}", node_type_str))
})?;
let pdg_node = PDGNode {
id: node_id_str,
node_type: convert_storage_node_type(&node_type),
name: symbol_name,
file_path: Arc::from(file_path),
byte_range: (start.unwrap_or(0) as usize, end.unwrap_or(0) as usize),
complexity: complexity.unwrap_or(0) as u32,
language,
};
let node_id = pdg.add_node(pdg_node);
db_id_to_node_id.insert(db_id, node_id);
}
let mut edges_stmt = storage.conn().prepare(
"SELECT e.caller_id, e.callee_id, e.edge_type, e.metadata
FROM intel_edges e
INNER JOIN intel_nodes n1 ON e.caller_id = n1.id
INNER JOIN intel_nodes n2 ON e.callee_id = n2.id
WHERE n1.project_id = ?1 AND n2.project_id = ?1",
)?;
let edge_rows: Vec<(i64, i64, String, Option<String>)> = edges_stmt
.query_map(params![project_id], |row| {
Ok((
row.get::<_, i64>(0)?, row.get::<_, i64>(1)?, row.get::<_, String>(2)?, row.get::<_, Option<String>>(3)?, ))
})?
.collect::<SqliteResult<Vec<_>>>()?;
for (caller_id, callee_id, edge_type_str, metadata_json) in edge_rows {
let caller_node_id = *db_id_to_node_id
.get(&caller_id)
.ok_or_else(|| PdgStoreError::NodeNotFound(caller_id))?;
let callee_node_id = *db_id_to_node_id
.get(&callee_id)
.ok_or_else(|| PdgStoreError::NodeNotFound(callee_id))?;
let edge_type = StorageEdgeType::from_str_name(&edge_type_str).ok_or_else(|| {
PdgStoreError::Deserialization(format!("Invalid edge type: {}", edge_type_str))
})?;
let metadata = match metadata_json.as_deref() {
Some(json) => serde_json::from_str(json).map_err(|e| {
PdgStoreError::Deserialization(format!("Invalid edge metadata: {}", e))
})?,
None => StorageEdgeMetadata {
call_count: None,
variable_name: None,
confidence: None,
},
};
let pdg_edge = PDGEdge {
edge_type: convert_storage_edge_type(&edge_type),
metadata: convert_storage_edge_metadata(&metadata),
};
pdg.add_edge(caller_node_id, callee_node_id, pdg_edge);
}
Ok(pdg)
}
pub fn pdg_exists(storage: &Storage, project_id: &str) -> SqliteResult<bool> {
let count: i64 = storage.conn().query_row(
"SELECT COUNT(*) FROM intel_nodes WHERE project_id = ?1",
params![project_id],
|row| row.get(0),
)?;
Ok(count > 0)
}
pub fn delete_pdg(storage: &mut Storage, project_id: &str) -> SqliteResult<()> {
storage.conn().execute(
"DELETE FROM intel_edges WHERE caller_id IN (SELECT id FROM intel_nodes WHERE project_id = ?1)",
params![project_id],
)?;
storage.conn().execute(
"DELETE FROM intel_nodes WHERE project_id = ?1",
params![project_id],
)?;
storage.conn().execute(
"DELETE FROM indexed_files WHERE project_id = ?1",
params![project_id],
)?;
Ok(())
}
pub fn delete_file_data(
storage: &mut Storage,
project_id: &str,
file_path: &str,
) -> SqliteResult<()> {
storage.conn().execute(
"DELETE FROM intel_edges WHERE
caller_id IN (SELECT id FROM intel_nodes WHERE project_id = ?1 AND file_path = ?2) OR
callee_id IN (SELECT id FROM intel_nodes WHERE project_id = ?1 AND file_path = ?2)",
params![project_id, file_path],
)?;
storage.conn().execute(
"DELETE FROM intel_nodes WHERE project_id = ?1 AND file_path = ?2",
params![project_id, file_path],
)?;
storage.conn().execute(
"DELETE FROM indexed_files WHERE project_id = ?1 AND file_path = ?2",
params![project_id, file_path],
)?;
Ok(())
}
pub fn get_indexed_files(
storage: &Storage,
project_id: &str,
) -> SqliteResult<HashMap<String, String>> {
let mut stmt = storage
.conn()
.prepare("SELECT file_path, file_hash FROM indexed_files WHERE project_id = ?1")?;
let rows = stmt.query_map(params![project_id], |row| {
Ok((row.get::<_, String>(0)?, row.get::<_, String>(1)?))
})?;
let mut result = HashMap::new();
for row in rows {
let (path, hash) = row?;
result.insert(path, hash);
}
Ok(result)
}
pub fn has_indexed_files(storage: &Storage, project_id: &str) -> bool {
storage
.conn()
.query_row(
"SELECT COUNT(*) FROM indexed_files WHERE project_id = ?1 LIMIT 1",
params![project_id],
|row| row.get::<_, i64>(0),
)
.unwrap_or(0)
> 0
}
pub fn update_indexed_file(
storage: &mut Storage,
project_id: &str,
file_path: &str,
hash: &str,
) -> SqliteResult<()> {
storage.conn().execute(
"INSERT INTO indexed_files (file_path, project_id, file_hash, last_indexed)
VALUES (?1, ?2, ?3, ?4)
ON CONFLICT(file_path) DO UPDATE SET file_hash = ?3, last_indexed = ?4",
params![file_path, project_id, hash, chrono::Utc::now().timestamp()],
)?;
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
use crate::storage::schema::Storage;
use tempfile::NamedTempFile;
fn create_test_pdg() -> ProgramDependenceGraph {
let mut pdg = ProgramDependenceGraph::new();
let n1 = pdg.add_node(PDGNode {
id: "func1".to_string(),
node_type: PDGNodeType::Function,
name: "func1".to_string(),
file_path: Arc::from("test.rs"),
byte_range: (0, 100),
complexity: 5,
language: "rust".to_string(),
});
let n2 = pdg.add_node(PDGNode {
id: "func2".to_string(),
node_type: PDGNodeType::Function,
name: "func2".to_string(),
file_path: Arc::from("test.rs"),
byte_range: (100, 200),
complexity: 3,
language: "rust".to_string(),
});
pdg.add_edge(
n1,
n2,
PDGEdge {
edge_type: PDGEdgeType::Call,
metadata: PDGEdgeMetadata {
call_count: Some(5),
variable_name: None,
confidence: None,
},
},
);
pdg
}
#[test]
fn test_save_and_load_pdg() {
let temp_file = NamedTempFile::new().unwrap();
let mut storage = Storage::open(temp_file.path()).unwrap();
let pdg = create_test_pdg();
save_pdg(&mut storage, "test_project", &pdg).unwrap();
assert!(pdg_exists(&storage, "test_project").unwrap());
let loaded = load_pdg(&storage, "test_project").unwrap();
assert_eq!(loaded.node_count(), 2);
assert_eq!(loaded.edge_count(), 1);
let func1 = loaded.find_by_symbol("func1").unwrap();
let node1 = loaded.get_node(func1).unwrap();
assert_eq!(node1.complexity, 5);
}
#[test]
fn test_save_pdg_replaces_existing() {
let temp_file = NamedTempFile::new().unwrap();
let mut storage = Storage::open(temp_file.path()).unwrap();
let pdg1 = create_test_pdg();
save_pdg(&mut storage, "test_project", &pdg1).unwrap();
assert_eq!(load_pdg(&storage, "test_project").unwrap().node_count(), 2);
let mut pdg2 = ProgramDependenceGraph::new();
pdg2.add_node(PDGNode {
id: "new_func".to_string(),
node_type: PDGNodeType::Function,
name: "new_func".to_string(),
file_path: Arc::from("new.rs"),
byte_range: (0, 50),
complexity: 1,
language: "rust".to_string(),
});
save_pdg(&mut storage, "test_project", &pdg2).unwrap();
assert_eq!(load_pdg(&storage, "test_project").unwrap().node_count(), 1);
}
#[test]
fn test_load_nonexistent_project() {
let temp_file = NamedTempFile::new().unwrap();
let storage = Storage::open(temp_file.path()).unwrap();
let loaded = load_pdg(&storage, "nonexistent").unwrap();
assert_eq!(loaded.node_count(), 0);
assert_eq!(loaded.edge_count(), 0);
}
#[test]
fn test_delete_pdg() {
let temp_file = NamedTempFile::new().unwrap();
let mut storage = Storage::open(temp_file.path()).unwrap();
let pdg = create_test_pdg();
save_pdg(&mut storage, "test_project", &pdg).unwrap();
assert!(pdg_exists(&storage, "test_project").unwrap());
delete_pdg(&mut storage, "test_project").unwrap();
assert!(!pdg_exists(&storage, "test_project").unwrap());
}
#[test]
fn test_convert_node_types() {
assert_eq!(
convert_node_type(&PDGNodeType::Function),
StorageNodeType::Function
);
assert_eq!(
convert_node_type(&PDGNodeType::Class),
StorageNodeType::Class
);
assert_eq!(
convert_node_type(&PDGNodeType::Method),
StorageNodeType::Method
);
assert_eq!(
convert_node_type(&PDGNodeType::Variable),
StorageNodeType::Variable
);
assert_eq!(
convert_node_type(&PDGNodeType::Module),
StorageNodeType::Module
);
assert_eq!(
convert_storage_node_type(&StorageNodeType::Function),
PDGNodeType::Function
);
assert_eq!(
convert_storage_node_type(&StorageNodeType::Class),
PDGNodeType::Class
);
assert_eq!(
convert_storage_node_type(&StorageNodeType::Method),
PDGNodeType::Method
);
assert_eq!(
convert_storage_node_type(&StorageNodeType::Variable),
PDGNodeType::Variable
);
assert_eq!(
convert_storage_node_type(&StorageNodeType::Module),
PDGNodeType::Module
);
assert_eq!(
convert_node_type(&PDGNodeType::External),
StorageNodeType::External
);
assert_eq!(
convert_storage_node_type(&StorageNodeType::External),
PDGNodeType::External
);
}
#[test]
fn test_convert_edge_types() {
assert_eq!(convert_edge_type(&PDGEdgeType::Call), StorageEdgeType::Call);
assert_eq!(
convert_edge_type(&PDGEdgeType::DataDependency),
StorageEdgeType::DataDependency
);
assert_eq!(
convert_edge_type(&PDGEdgeType::Inheritance),
StorageEdgeType::Inheritance
);
assert_eq!(
convert_edge_type(&PDGEdgeType::Import),
StorageEdgeType::Import
);
assert_eq!(
convert_storage_edge_type(&StorageEdgeType::Call),
PDGEdgeType::Call
);
assert_eq!(
convert_storage_edge_type(&StorageEdgeType::DataDependency),
PDGEdgeType::DataDependency
);
assert_eq!(
convert_storage_edge_type(&StorageEdgeType::Inheritance),
PDGEdgeType::Inheritance
);
assert_eq!(
convert_storage_edge_type(&StorageEdgeType::Import),
PDGEdgeType::Import
);
}
#[test]
fn test_edge_metadata_conversion() {
let pdg_meta = PDGEdgeMetadata {
call_count: Some(42),
variable_name: Some("x".to_string()),
confidence: None,
};
let storage_meta = convert_edge_metadata(&pdg_meta);
assert_eq!(storage_meta.call_count, Some(42));
assert_eq!(storage_meta.variable_name, Some("x".to_string()));
let converted_back = convert_storage_edge_metadata(&storage_meta);
assert_eq!(converted_back.call_count, Some(42));
assert_eq!(converted_back.variable_name, Some("x".to_string()));
}
#[test]
fn test_save_pdg_with_all_edge_types() {
let temp_file = NamedTempFile::new().unwrap();
let mut storage = Storage::open(temp_file.path()).unwrap();
let mut pdg = ProgramDependenceGraph::new();
let n1 = pdg.add_node(PDGNode {
id: "child".to_string(),
node_type: PDGNodeType::Class,
name: "Child".to_string(),
file_path: Arc::from("test.rs"),
byte_range: (0, 50),
complexity: 1,
language: "rust".to_string(),
});
let n2 = pdg.add_node(PDGNode {
id: "parent".to_string(),
node_type: PDGNodeType::Class,
name: "Parent".to_string(),
file_path: Arc::from("test.rs"),
byte_range: (50, 100),
complexity: 1,
language: "rust".to_string(),
});
let n3 = pdg.add_node(PDGNode {
id: "data_user".to_string(),
node_type: PDGNodeType::Function,
name: "data_user".to_string(),
file_path: Arc::from("test.rs"),
byte_range: (100, 150),
complexity: 1,
language: "rust".to_string(),
});
pdg.add_edge(
n1,
n2,
PDGEdge {
edge_type: PDGEdgeType::Inheritance,
metadata: PDGEdgeMetadata {
call_count: None,
variable_name: None,
confidence: None,
},
},
);
pdg.add_edge(
n3,
n1,
PDGEdge {
edge_type: PDGEdgeType::DataDependency,
metadata: PDGEdgeMetadata {
call_count: None,
variable_name: Some("child_instance".to_string()),
confidence: None,
},
},
);
save_pdg(&mut storage, "test_project", &pdg).unwrap();
let loaded = load_pdg(&storage, "test_project").unwrap();
assert_eq!(loaded.node_count(), 3);
assert_eq!(loaded.edge_count(), 2);
let child_id = loaded.find_by_symbol("child").unwrap();
let parent_id = loaded.find_by_symbol("parent").unwrap();
let data_user_id = loaded.find_by_symbol("data_user").unwrap();
let child_neighbors = loaded.neighbors(child_id);
assert!(child_neighbors.contains(&parent_id));
let data_user_neighbors = loaded.neighbors(data_user_id);
assert!(data_user_neighbors.contains(&child_id));
}
}