pub mod algorithms;
pub mod ambiguity;
pub mod backend;
#[cfg(feature = "external-tools-cfg")]
pub mod external_tools;
mod ast_extractor;
mod ast_node;
mod ast_ops;
#[cfg(feature = "geometric-backend")]
pub mod geo_index;
#[cfg(feature = "geometric-backend")]
pub mod geometric_backend;
#[cfg(feature = "geometric-backend")]
pub mod geometric_calls;
#[cfg(feature = "bytecode-cfg")]
mod bytecode_cfg;
mod cache;
mod call_ops;
mod calls;
pub mod canonical_fqn;
pub mod cfg_edges_extract;
mod cfg_extractor;
mod cfg_ops;
mod count;
pub mod crate_name;
pub mod db_compat;
pub mod execution_log;
pub mod export;
mod files;
pub mod filter;
mod freshness;
mod imports; pub mod metrics;
mod module_resolver; mod ops;
pub mod query;
mod references;
pub mod scan;
pub mod schema;
pub mod side_tables;
mod symbol_index;
mod symbol_lookup;
mod symbols;
pub mod validation;
pub use ops::{index_file, DeleteResult, ReconcileOutcome};
pub use metrics::BackfillResult;
pub use ops::test_helpers;
pub use symbols::generate_symbol_id;
#[cfg(test)]
mod ast_tests;
#[cfg(test)]
mod tests;
use anyhow::Result;
use sqlitegraph::GraphBackend;
use std::collections::HashMap;
use std::path::{Path, PathBuf};
use std::sync::Arc;
use crate::generation::{ChunkStore, CodeChunk};
use crate::references::{CallFact, ReferenceFact};
pub use algorithms::{
CondensationGraph, CondensationResult, Cycle, CycleKind, CycleReport, DeadSymbol,
ExecutionPath, PathEnumerationResult, PathStatistics, ProgramSlice, SliceDirection,
SliceResult, SliceStatistics, Supernode, SymbolInfo,
};
pub use ast_extractor::{extract_ast_nodes, language_from_path, normalize_node_kind};
pub use ast_node::{is_structural_kind, AstNode, AstNodeWithText};
#[deprecated(since = "10.0.0", note = "Use cfg_edges_extract instead")]
pub use cfg_extractor::{BlockKind, CfgExtractor, TerminatorKind};
pub use cfg_ops::CfgOps;
#[cfg(feature = "bytecode-cfg")]
pub use bytecode_cfg::JavaBytecodeCfgExtractor;
pub use cache::CacheStats;
pub use db_compat::MAGELLAN_SCHEMA_VERSION;
pub use db_compat::{ensure_ast_schema, ensure_cfg_schema, ensure_coverage_schema, CFG_EDGE};
pub use execution_log::ExecutionLog;
pub use export::{ExportConfig, ExportFormat};
pub use freshness::{check_freshness, FreshnessStatus, STALE_THRESHOLD_SECS};
pub use metrics::MetricsOps;
pub use schema::{CallNode, CfgBlock, CfgEdge, CrossFileRef, FileNode, ReferenceNode, SymbolNode};
#[cfg(feature = "geometric-backend")]
pub use geometric_backend::GeometricBackend;
#[derive(Debug, Clone)]
pub struct GraphStats {
pub symbol_count: usize,
pub file_count: usize,
pub cfg_block_count: usize,
}
pub type ScanProgress = dyn Fn(usize, usize, &str) + Send + Sync;
fn is_memory_db(path: &Path) -> bool {
path.as_os_str() == ":memory:"
}
pub struct CodeGraph {
files: files::FileOps,
symbols: symbols::SymbolOps,
references: references::ReferenceOps,
calls: call_ops::CallOps,
imports: imports::ImportOps,
module_resolver: module_resolver::ModuleResolver,
chunks: ChunkStore,
execution_log: execution_log::ExecutionLog,
metrics: metrics::MetricsOps,
file_node_cache: cache::FileNodeCache,
pub cfg_ops: cfg_ops::CfgOps,
side_tables: Arc<dyn side_tables::SideTables>,
db_path: PathBuf,
}
impl CodeGraph {
pub fn open<P: AsRef<Path>>(db_path: P) -> Result<Self> {
let db_path_buf = db_path.as_ref().to_path_buf();
{
db_compat::preflight_sqlitegraph_compat(&db_path_buf)
.map_err(|e| anyhow::anyhow!(e.to_string()))?;
}
#[cfg(feature = "sqlite-backend")]
let backend: Arc<dyn GraphBackend> = {
use sqlitegraph::{SqliteGraph, SqliteGraphBackend};
let sqlite_graph = SqliteGraph::open(&db_path_buf)?;
eprintln!("Using SQLite backend: {:?}", db_path_buf);
Arc::new(SqliteGraphBackend::from_graph(sqlite_graph))
};
#[cfg(not(feature = "sqlite-backend"))]
compile_error!("'sqlite-backend' feature must be enabled");
#[cfg(feature = "sqlite-backend")]
{
let pragma_conn = rusqlite::Connection::open(&db_path_buf).map_err(|e| {
anyhow::anyhow!("Failed to open connection for PRAGMA config: {}", e)
})?;
let journal_mode = pragma_conn
.query_row("PRAGMA journal_mode = WAL", [], |row| {
let mode: String = row.get(0)?;
Ok(mode)
})
.map_err(|e| anyhow::anyhow!("Failed to set WAL mode: {}", e))?;
if !is_memory_db(&db_path_buf) {
debug_assert_eq!(journal_mode, "wal", "WAL mode should be enabled");
}
pragma_conn
.execute("PRAGMA synchronous = NORMAL", [])
.map_err(|e| anyhow::anyhow!("Failed to set synchronous: {}", e))?;
pragma_conn
.execute("PRAGMA cache_size = -64000", [])
.map_err(|e| anyhow::anyhow!("Failed to set cache_size: {}", e))?;
pragma_conn
.execute("PRAGMA temp_store = MEMORY", [])
.map_err(|e| anyhow::anyhow!("Failed to set temp_store: {}", e))?;
}
let file_index = HashMap::new();
let mut files = files::FileOps {
backend: Arc::clone(&backend),
file_index,
};
files.rebuild_file_index()?;
let (side_tables, chunks, execution_log, metrics, needs_backfill) = {
db_compat::ensure_magellan_meta(&db_path_buf)
.map_err(|e| anyhow::anyhow!(e.to_string()))?;
let side_tables: Arc<dyn side_tables::SideTables> = Arc::new(
side_tables::sqlite_impl::SqliteSideTables::open(&db_path_buf)?,
);
let shared_conn = rusqlite::Connection::open(&db_path_buf).map_err(|e| {
anyhow::anyhow!("Failed to open shared connection for ChunkStore: {}", e)
})?;
let chunks = ChunkStore::with_connection(shared_conn);
chunks.ensure_schema()?;
let execution_log = execution_log::ExecutionLog::new(&db_path_buf);
execution_log.ensure_schema()?;
let metrics = metrics::MetricsOps::new(&db_path_buf);
metrics.ensure_schema()?;
{
let ast_conn = rusqlite::Connection::open(&db_path_buf).map_err(|e| {
anyhow::anyhow!("Failed to open connection for AST schema: {}", e)
})?;
db_compat::ensure_ast_schema(&ast_conn)
.map_err(|e| anyhow::anyhow!(e.to_string()))?;
}
{
let cfg_conn = rusqlite::Connection::open(&db_path_buf).map_err(|e| {
anyhow::anyhow!("Failed to open connection for CFG schema: {}", e)
})?;
db_compat::ensure_cfg_schema(&cfg_conn)
.map_err(|e| anyhow::anyhow!(e.to_string()))?;
}
{
let cov_conn = rusqlite::Connection::open(&db_path_buf).map_err(|e| {
anyhow::anyhow!("Failed to open connection for coverage schema: {}", e)
})?;
db_compat::ensure_coverage_schema(&cov_conn, &db_path_buf)
.map_err(|e| anyhow::anyhow!(e.to_string()))?;
}
let needs_backfill = {
let check_conn = rusqlite::Connection::open(&db_path_buf).map_err(|e| {
anyhow::anyhow!("Failed to open connection for backfill check: {}", e)
})?;
let metric_count: i64 = check_conn
.query_row("SELECT COUNT(*) FROM file_metrics", [], |row| row.get(0))
.unwrap_or(0);
let symbol_count: i64 = check_conn
.query_row(
"SELECT COUNT(*) FROM graph_entities WHERE kind = 'Symbol'",
[],
|row| row.get(0),
)
.unwrap_or(0);
metric_count == 0 && symbol_count > 0
};
(side_tables, chunks, execution_log, metrics, needs_backfill)
};
let file_node_cache = cache::FileNodeCache::new(128);
let project_root = db_path_buf
.parent()
.unwrap_or_else(|| Path::new("."))
.to_path_buf();
let module_resolver =
module_resolver::ModuleResolver::new(Arc::clone(&backend), project_root);
let mut graph = Self {
files,
symbols: symbols::SymbolOps {
backend: Arc::clone(&backend),
lookup: symbol_lookup::SymbolLookup::new(),
},
references: references::ReferenceOps {
backend: Arc::clone(&backend),
},
calls: call_ops::CallOps {
backend: Arc::clone(&backend),
},
imports: imports::ImportOps {
backend: Arc::clone(&backend),
},
module_resolver,
chunks,
execution_log,
metrics,
file_node_cache,
cfg_ops: cfg_ops::CfgOps::new(ChunkStore::new(&db_path_buf)),
side_tables,
db_path: db_path_buf,
};
let _ = graph.module_resolver.build_module_index();
if let Err(e) = graph.symbols.lookup.rebuild_from_backend(&*backend) {
eprintln!("Warning: Failed to build symbol lookup index: {}", e);
}
if needs_backfill {
let _ = graph.backfill_metrics(None);
}
Ok(graph)
}
pub fn index_file(&mut self, path: &str, source: &[u8]) -> Result<usize> {
ops::index_file(self, path, source)
}
pub fn delete_file(&mut self, path: &str) -> Result<DeleteResult> {
ops::delete_file(self, path)
}
pub fn delete_file_facts(&mut self, path: &str) -> Result<DeleteResult> {
ops::delete_file_facts(self, path)
}
pub fn symbols_in_file(&mut self, path: &str) -> Result<Vec<crate::ingest::SymbolFact>> {
query::symbols_in_file(self, path)
}
pub fn symbols_in_file_with_kind(
&mut self,
path: &str,
kind: Option<crate::ingest::SymbolKind>,
) -> Result<Vec<crate::ingest::SymbolFact>> {
query::symbols_in_file_with_kind(self, path, kind)
}
pub fn symbol_nodes_in_file(
&mut self,
path: &str,
) -> Result<Vec<(i64, crate::ingest::SymbolFact)>> {
query::symbol_nodes_in_file(self, path)
}
pub fn symbol_id_by_name(&mut self, path: &str, name: &str) -> Result<Option<i64>> {
query::symbol_id_by_name(self, path, name)
}
pub fn index_references(&mut self, path: &str, source: &[u8]) -> Result<usize> {
query::index_references(self, path, source)
}
pub fn references_to_symbol(&mut self, symbol_id: i64) -> Result<Vec<ReferenceFact>> {
query::references_to_symbol(self, symbol_id)
}
pub fn symbol_extents(
&mut self,
path: &str,
name: &str,
) -> Result<Vec<(i64, crate::ingest::SymbolFact)>> {
query::symbol_extents(self, path, name)
}
pub fn index_calls(&mut self, path: &str, source: &[u8]) -> Result<usize> {
calls::index_calls(self, path, source)
}
pub fn calls_from_symbol(&mut self, path: &str, name: &str) -> Result<Vec<CallFact>> {
calls::calls_from_symbol(self, path, name)
}
pub fn callers_of_symbol(&mut self, path: &str, name: &str) -> Result<Vec<CallFact>> {
calls::callers_of_symbol(self, path, name)
}
pub fn count_files(&self) -> Result<usize> {
count::count_files(self)
}
pub fn count_symbols(&self) -> Result<usize> {
count::count_symbols(self)
}
pub fn count_references(&self) -> Result<usize> {
count::count_references(self)
}
pub fn count_calls(&self) -> Result<usize> {
count::count_calls(self)
}
pub fn count_cfg_blocks(&self) -> Result<usize> {
Ok(0)
}
pub fn check_coverage_schema(&self) -> Result<bool> {
let conn = rusqlite::Connection::open(&self.db_path).map_err(|e| {
anyhow::anyhow!("Failed to open connection for coverage schema check: {}", e)
})?;
let tables = ["cfg_block_coverage", "cfg_edge_coverage", "cfg_coverage_meta"];
for table in tables {
let count: i64 = conn
.query_row(
"SELECT COUNT(*) FROM sqlite_master WHERE type='table' AND name=?1",
[table],
|row| row.get(0),
)
.unwrap_or(0);
if count == 0 {
return Ok(false);
}
}
Ok(true)
}
pub fn get_stats(&self) -> Result<GraphStats> {
Ok(GraphStats {
symbol_count: self.count_symbols()?,
file_count: self.count_files()?,
cfg_block_count: 0, })
}
pub fn reconcile_file_path(&mut self, path: &Path, path_key: &str) -> Result<ReconcileOutcome> {
ops::reconcile_file_path(self, path, path_key)
}
pub fn reconcile_file_path_with_source(
&mut self,
path: &Path,
path_key: &str,
source: &[u8],
) -> Result<ReconcileOutcome> {
ops::reconcile_file_path_with_source(self, path, path_key, source)
}
pub fn scan_directory(
&mut self,
dir_path: &Path,
progress: Option<&ScanProgress>,
) -> Result<usize> {
scan::scan_directory(self, dir_path, progress)
}
pub async fn scan_directory_async(
&mut self,
dir_path: &Path,
progress: Option<&ScanProgress>,
) -> Result<usize> {
let filter = filter::FileFilter::new(dir_path, &[], &[])?;
let result = scan::scan_directory_async(self, dir_path, &filter, progress).await?;
Ok(result.indexed)
}
pub fn backfill_metrics(
&mut self,
progress: Option<&ScanProgress>,
) -> Result<metrics::BackfillResult> {
self.metrics.backfill_all_metrics(progress)
}
pub fn export_json(&mut self) -> Result<String> {
export::export_json(self)
}
pub fn get_file_node(&mut self, path: &str) -> Result<Option<FileNode>> {
if let Some(node) = self.file_node_cache.get(&path.to_string()) {
return Ok(Some(node.clone()));
}
let result = self.files.get_file_node(path)?;
if let Some(ref node) = result {
self.file_node_cache.put(path.to_string(), node.clone());
}
Ok(result)
}
pub fn all_file_nodes(&mut self) -> Result<std::collections::HashMap<String, FileNode>> {
self.files.all_file_nodes()
}
pub fn all_file_nodes_readonly(&self) -> Result<std::collections::HashMap<String, FileNode>> {
self.files.all_file_nodes_readonly()
}
pub fn get_code_chunks(&self, file_path: &str) -> Result<Vec<CodeChunk>> {
self.chunks.get_chunks_for_file(file_path)
}
pub fn get_code_chunks_for_symbol(
&self,
file_path: &str,
symbol_name: &str,
) -> Result<Vec<CodeChunk>> {
self.chunks.get_chunks_for_symbol(file_path, symbol_name)
}
pub fn get_code_chunk_by_span(
&self,
file_path: &str,
byte_start: usize,
byte_end: usize,
) -> Result<Option<CodeChunk>> {
self.chunks
.get_chunk_by_span(file_path, byte_start, byte_end)
}
pub fn store_code_chunks(&self, chunks: &[CodeChunk]) -> Result<Vec<i64>> {
self.chunks.store_chunks(chunks)
}
pub fn count_chunks(&self) -> Result<usize> {
self.chunks.count_chunks()
}
pub fn execution_log(&self) -> &execution_log::ExecutionLog {
&self.execution_log
}
pub fn validate_graph(&mut self) -> validation::ValidationReport {
validation::validate_graph(self).unwrap_or_else(|e| validation::ValidationReport {
passed: false,
errors: vec![validation::ValidationError::new(
"VALIDATION_ERROR".to_string(),
format!("Validation failed with error: {}", e),
)],
warnings: Vec::new(),
})
}
pub fn cache_stats(&self) -> CacheStats {
self.file_node_cache.stats()
}
pub fn invalidate_cache(&mut self, path: &str) {
self.file_node_cache.invalidate(&path.to_string());
}
pub fn clear_cache(&mut self) {
self.file_node_cache.clear();
}
#[doc(hidden)]
pub fn __backend_for_benchmarks(&self) -> &std::sync::Arc<dyn sqlitegraph::GraphBackend> {
&self.files.backend
}
#[doc(hidden)]
pub fn __backend_for_watcher(&self) -> &std::sync::Arc<dyn sqlitegraph::GraphBackend> {
&self.files.backend
}
#[doc(hidden)]
pub fn backend(&self) -> &std::sync::Arc<dyn sqlitegraph::GraphBackend> {
&self.files.backend
}
pub fn get_symbol_by_entity_id(&self, entity_id: i64) -> Option<SymbolNode> {
use sqlitegraph::SnapshotId;
let snapshot = SnapshotId::current();
match self.files.backend.get_node(snapshot, entity_id) {
Ok(node) => {
if node.kind != "Symbol" {
return None;
}
serde_json::from_value(node.data).ok()
}
Err(_) => None,
}
}
pub fn add_label(&self, entity_id: i64, label: &str) -> Result<()> {
self.side_tables.add_label(entity_id, label)
}
pub fn get_labels_for_entity(&self, entity_id: i64) -> Result<Vec<String>> {
self.side_tables.get_labels_for_entity(entity_id)
}
}