pub mod config;
pub mod db;
pub mod extraction;
pub mod graph;
pub mod mcp;
pub mod types;
use anyhow::{anyhow, Context, Result};
use config::{load_config, save_config, CodeGraphConfig};
use db::Database;
use extraction::{detect_language, extract_from_source, should_include_file};
use graph::{GraphTraverser, Subgraph};
use sha2::{Digest, Sha256};
use std::fs;
use std::path::{Path, PathBuf};
use types::{FileRecord, GraphStats, IndexResult, Node, NodeEdge, SearchOptions, SearchResult};
pub const CODEGRAPH_DIR: &str = ".codegraph";
pub const DATABASE_FILE: &str = "codegraph.db";
pub struct CodeGraph {
root: PathBuf,
config: CodeGraphConfig,
db: Database,
}
impl CodeGraph {
pub fn init(root: impl AsRef<Path>) -> Result<Self> {
let root = root
.as_ref()
.canonicalize()
.unwrap_or_else(|_| root.as_ref().to_path_buf());
let dir = root.join(CODEGRAPH_DIR);
if dir.exists() {
return Err(anyhow!(
"CodeGraph already initialized in {}",
root.display()
));
}
fs::create_dir_all(&dir).with_context(|| format!("creating {}", dir.display()))?;
let config = CodeGraphConfig::default_for_root(".");
save_config(&root, &config)?;
let db = Database::initialize(dir.join(DATABASE_FILE))?;
Ok(Self { root, config, db })
}
pub fn open(root: impl AsRef<Path>) -> Result<Self> {
let root = find_nearest_codegraph_root(root.as_ref())
.ok_or_else(|| anyhow!("CodeGraph not initialized in {}", root.as_ref().display()))?;
let config = load_config(&root)?;
let db = Database::open(root.join(CODEGRAPH_DIR).join(DATABASE_FILE))?;
Ok(Self { root, config, db })
}
pub fn root(&self) -> &Path {
&self.root
}
pub fn index_all(&mut self) -> Result<IndexResult> {
let start = std::time::Instant::now();
self.db.clear_all()?;
let files = self.scan_files()?;
let mut result = IndexResult::default();
for path in files {
let full = self.root.join(&path);
let content = match fs::read_to_string(&full) {
Ok(content) => content,
Err(err) => {
result.files_errored += 1;
result.errors.push(format!("{}: {}", path.display(), err));
continue;
}
};
let lang = detect_language(&path, &content);
if lang.is_unknown() {
result.files_skipped += 1;
continue;
}
let extraction = extract_from_source(&path, &content, lang);
let hash = content_hash(&content);
let metadata = fs::metadata(&full)?;
self.db.insert_file(&FileRecord {
path: path.to_string_lossy().replace('\\', "/"),
content_hash: hash,
language: lang,
size: metadata.len(),
modified_at: metadata
.modified()
.ok()
.and_then(system_time_ms)
.unwrap_or_default(),
indexed_at: now_ms(),
node_count: extraction.nodes.len() as i64,
})?;
self.db.insert_nodes(&extraction.nodes)?;
self.db.insert_edges(&extraction.edges)?;
self.db
.insert_unresolved_refs(&extraction.unresolved_references)?;
result.files_indexed += 1;
result.nodes_created += extraction.nodes.len() as i64;
result.edges_created += extraction.edges.len() as i64;
}
self.db.resolve_references_by_name()?;
result.edges_created = self.db.edge_count()?;
result.success = result.files_errored == 0;
result.duration_ms = start.elapsed().as_millis() as i64;
Ok(result)
}
pub fn sync(&mut self) -> Result<IndexResult> {
self.index_all()
}
pub fn stats(&self) -> Result<GraphStats> {
self.db.stats()
}
pub fn search_nodes(&self, query: &str, options: SearchOptions) -> Result<Vec<SearchResult>> {
self.db.search_nodes(query, options)
}
pub fn get_node(&self, id: &str) -> Result<Option<Node>> {
self.db.get_node(id)
}
pub fn get_callers(&self, node_id: &str, max_depth: usize) -> Result<Vec<NodeEdge>> {
GraphTraverser::new(&self.db).get_callers(node_id, max_depth)
}
pub fn get_callees(&self, node_id: &str, max_depth: usize) -> Result<Vec<NodeEdge>> {
GraphTraverser::new(&self.db).get_callees(node_id, max_depth)
}
pub fn get_impact_radius(&self, node_id: &str, max_depth: usize) -> Result<Subgraph> {
GraphTraverser::new(&self.db).get_impact_radius(node_id, max_depth)
}
pub fn get_file_dependents(&self, file_path: &str) -> Result<Vec<String>> {
self.db.get_file_dependents(file_path)
}
pub fn get_all_files(&self) -> Result<Vec<FileRecord>> {
self.db.get_all_files()
}
pub fn build_context(&self, task: &str, max_nodes: i64, include_code: bool) -> Result<String> {
let results = self.search_nodes(
task,
SearchOptions {
limit: max_nodes,
..Default::default()
},
)?;
let mut out = format!("## Context: {task}\n\n");
for result in results {
let n = result.node;
out.push_str(&format!(
"- `{}` `{}` at `{}:{}`",
n.kind, n.name, n.file_path, n.start_line
));
if let Some(sig) = n.signature.as_deref() {
out.push_str(&format!(" — `{}`", sig.replace('\n', " ")));
}
out.push('\n');
if include_code {
if let Ok(code) = self.read_node_source(&n) {
out.push_str("\n```");
out.push_str(n.language.as_str());
out.push('\n');
out.push_str(&code);
if !code.ends_with('\n') {
out.push('\n');
}
out.push_str("```\n\n");
}
}
}
Ok(out)
}
pub fn read_node_source(&self, node: &Node) -> Result<String> {
let full = self.root.join(&node.file_path);
let text =
fs::read_to_string(&full).with_context(|| format!("reading {}", full.display()))?;
let lines: Vec<&str> = text.lines().collect();
let start = (node.start_line.saturating_sub(1) as usize).min(lines.len());
let end = (node.end_line.max(node.start_line) as usize).min(lines.len());
Ok(lines[start..end].join("\n"))
}
pub fn close(self) {}
fn scan_files(&self) -> Result<Vec<PathBuf>> {
let mut out = Vec::new();
let walker = ignore::WalkBuilder::new(&self.root)
.hidden(false)
.git_ignore(true)
.git_global(true)
.git_exclude(true)
.build();
for entry in walker {
let entry = entry?;
if !entry.file_type().map(|ft| ft.is_file()).unwrap_or(false) {
continue;
}
let rel = entry
.path()
.strip_prefix(&self.root)
.unwrap_or(entry.path())
.to_path_buf();
if rel.components().any(|c| c.as_os_str() == CODEGRAPH_DIR) {
continue;
}
if should_include_file(&rel, &self.config) {
out.push(rel);
}
}
out.sort();
Ok(out)
}
}
pub fn is_initialized(root: impl AsRef<Path>) -> bool {
root.as_ref()
.join(CODEGRAPH_DIR)
.join(DATABASE_FILE)
.exists()
}
pub fn find_nearest_codegraph_root(start: impl AsRef<Path>) -> Option<PathBuf> {
let mut cur = start
.as_ref()
.canonicalize()
.unwrap_or_else(|_| start.as_ref().to_path_buf());
if cur.is_file() {
cur.pop();
}
loop {
if is_initialized(&cur) {
return Some(cur);
}
if !cur.pop() {
return None;
}
}
}
fn content_hash(content: &str) -> String {
let mut h = Sha256::new();
h.update(content.as_bytes());
format!("{:x}", h.finalize())
}
fn now_ms() -> i64 {
system_time_ms(std::time::SystemTime::now()).unwrap_or_default()
}
fn system_time_ms(t: std::time::SystemTime) -> Option<i64> {
t.duration_since(std::time::UNIX_EPOCH)
.ok()
.map(|d| d.as_millis() as i64)
}