use std::path::PathBuf;
use std::time::Instant;
use anyhow::Result;
use infigraph_core::extract::extract_file;
use infigraph_core::graph::{CozoStore, GraphQuery, GraphStore};
use infigraph_languages::bundled_registry;
fn main() -> Result<()> {
let target_dir = std::env::args().nth(1)
.map(PathBuf::from)
.unwrap_or_else(|| PathBuf::from("tests/fixtures/python-simple"));
if !target_dir.exists() {
anyhow::bail!("dir not found: {}", target_dir.display());
}
let registry = bundled_registry()?;
let mut files = Vec::new();
collect_files(&target_dir, &mut files);
files.sort();
eprintln!("Found {} files in {}", files.len(), target_dir.display());
let t0 = Instant::now();
let mut extractions = Vec::new();
let mut skipped = 0usize;
for path in &files {
let rel_path = path.to_string_lossy().replace('\\', "/");
let source = match std::fs::read(path) {
Ok(s) => s,
Err(_) => { skipped += 1; continue; }
};
let pack = match registry.for_file_with_content(&rel_path, &source) {
Some(p) => p,
None => { skipped += 1; continue; }
};
match extract_file(&rel_path, &source, pack) {
Ok(e) => extractions.push(e),
Err(_) => { skipped += 1; }
}
}
let extract_ms = t0.elapsed().as_millis();
let total_symbols: usize = extractions.iter().map(|e| e.symbols.len()).sum();
let total_relations: usize = extractions.iter().map(|e| e.relations.len()).sum();
let total_stmts: usize = extractions.iter().map(|e| e.statements.len()).sum();
eprintln!("Extracted {} files ({} skipped) in {}ms: {} symbols, {} relations, {} statements",
extractions.len(), skipped, extract_ms, total_symbols, total_relations, total_stmts);
let tmp = std::env::temp_dir().join("cozo_vs_kuzu_bench");
if tmp.exists() { std::fs::remove_dir_all(&tmp)?; }
std::fs::create_dir_all(&tmp)?;
let kuzu_path = tmp.join("graph");
let cozo_path = tmp.join("graph.cozo");
eprintln!("\n=== Kuzu ===");
let t0 = Instant::now();
let kuzu = GraphStore::open(&kuzu_path)?;
let kuzu_open_ms = t0.elapsed().as_millis();
let t0 = Instant::now();
let conn = kuzu.connection()?;
for extraction in &extractions {
kuzu.upsert_file_conn(&conn, extraction)?;
}
let kuzu_write_ms = t0.elapsed().as_millis();
let t0 = Instant::now();
let kuzu_stats = kuzu.stats()?;
let kuzu_stats_ms = t0.elapsed().as_millis();
eprintln!("open={}ms write={}ms stats={}ms", kuzu_open_ms, kuzu_write_ms, kuzu_stats_ms);
eprintln!("{} symbols, {} calls, {} modules, {} files",
kuzu_stats.symbols, kuzu_stats.calls, kuzu_stats.modules, kuzu_stats.files);
eprintln!("\n=== CozoDB ===");
let t0 = Instant::now();
let cozo = CozoStore::open(&cozo_path)?;
let cozo_open_ms = t0.elapsed().as_millis();
let t0 = Instant::now();
for extraction in &extractions {
cozo.upsert_file_batch(extraction)?;
}
cozo.refresh_materialized()?;
let cozo_write_ms = t0.elapsed().as_millis();
let t0 = Instant::now();
let cozo_counts = cozo.relation_counts()?;
let cozo_counts_ms = t0.elapsed().as_millis();
let cozo_stats = cozo.stats()?;
eprintln!("open={}ms write={}ms counts={}ms", cozo_open_ms, cozo_write_ms, cozo_counts_ms);
eprintln!("{} symbols, {} calls, {} modules, {} files",
cozo_stats.symbols, cozo_counts.get("calls").copied().unwrap_or(0),
cozo_stats.modules, cozo_stats.files);
eprintln!("\n=== Write Performance ===");
eprintln!("{:<15} {:>8} {:>8} {:>8}", "", "Kuzu", "CozoDB", "Speedup");
eprintln!("{}", "-".repeat(45));
print_perf("DB open", kuzu_open_ms, cozo_open_ms);
print_perf("Write all", kuzu_write_ms, cozo_write_ms);
print_perf("Stats", kuzu_stats_ms, cozo_counts_ms);
let kuzu_total = kuzu_open_ms + kuzu_write_ms + kuzu_stats_ms;
let cozo_total = cozo_open_ms + cozo_write_ms + cozo_counts_ms;
print_perf("TOTAL", kuzu_total, cozo_total);
eprintln!("\n=== Count Verification ===");
let mut failures = 0u32;
for (label, kuzu_n, cozo_n) in [
("symbols", kuzu_stats.symbols, cozo_stats.symbols),
("modules", kuzu_stats.modules, cozo_stats.modules),
("files", kuzu_stats.files, cozo_stats.files),
("contains", kuzu_stats.contains, cozo_stats.contains),
("inherits", kuzu_stats.inherits, cozo_stats.inherits),
] {
let ok = kuzu_n == cozo_n;
if !ok { failures += 1; }
eprintln!("{:<15} Kuzu={:<6} CozoDB={:<6} {}", label, kuzu_n, cozo_n, if ok { "✅" } else { "❌" });
}
let kq = GraphQuery::new(&conn);
let sample_file = extractions.first().map(|e| e.file.as_str()).unwrap_or("");
let sample_syms = cozo.symbols_in_file(sample_file)?;
let sample_id = sample_syms.first().map(|s| s.id.as_str()).unwrap_or("");
if !sample_id.is_empty() {
eprintln!("\n=== Query Performance (10 iterations, sample={}) ===", sample_id.rsplit("::").next().unwrap_or(sample_id));
let iters: u128 = 10;
let queries: Vec<(&str, Box<dyn Fn() -> Result<()>>, Box<dyn Fn() -> Result<()>>)> = vec![
("symbols_in_file",
Box::new(|| { kq.symbols_in_file(sample_file)?; Ok(()) }),
Box::new(|| { cozo.symbols_in_file(sample_file)?; Ok(()) })),
("callers_of",
Box::new(|| { kq.callers_of(sample_id)?; Ok(()) }),
Box::new(|| { cozo.callers_of(sample_id)?; Ok(()) })),
("callees_of",
Box::new(|| { kq.callees_of(sample_id)?; Ok(()) }),
Box::new(|| { cozo.callees_of(sample_id)?; Ok(()) })),
("find_symbol_by_id",
Box::new(|| { kq.find_symbol_by_id(sample_id)?; Ok(()) }),
Box::new(|| { cozo.find_symbol_by_id(sample_id)?; Ok(()) })),
("get_api_surface",
Box::new(|| { kq.get_api_surface()?; Ok(()) }),
Box::new(|| { cozo.get_api_surface()?; Ok(()) })),
("get_test_coverage",
Box::new(|| { kq.get_test_coverage()?; Ok(()) }),
Box::new(|| { cozo.get_test_coverage()?; Ok(()) })),
("transitive_impact",
Box::new(|| { kq.transitive_impact(sample_id, 3)?; Ok(()) }),
Box::new(|| { cozo.transitive_impact(sample_id, 3)?; Ok(()) })),
("find_all_references",
Box::new(|| { kq.find_all_references(sample_id)?; Ok(()) }),
Box::new(|| { cozo.find_all_references(sample_id)?; Ok(()) })),
("get_file_deps",
Box::new(|| { kq.get_file_deps(sample_file)?; Ok(()) }),
Box::new(|| { cozo.get_file_deps(sample_file)?; Ok(()) })),
("symbols_in_range",
Box::new(|| { kq.symbols_in_range(sample_file, 1, 50)?; Ok(()) }),
Box::new(|| { cozo.symbols_in_range(sample_file, 1, 50)?; Ok(()) })),
("branches_of",
Box::new(|| { kq.branches_of(sample_id)?; Ok(()) }),
Box::new(|| { cozo.branches_of(sample_id)?; Ok(()) })),
("get_type_hierarchy",
Box::new(|| { kq.get_type_hierarchy(sample_id, 3)?; Ok(()) }),
Box::new(|| { cozo.get_type_hierarchy(sample_id, 3)?; Ok(()) })),
("generate_test_context",
Box::new(|| { kq.generate_test_context(None, 10)?; Ok(()) }),
Box::new(|| { cozo.generate_test_context(None, 10)?; Ok(()) })),
("stats",
Box::new(|| { kuzu.stats()?; Ok(()) }),
Box::new(|| { cozo.stats()?; Ok(()) })),
];
for (name, kuzu_fn, cozo_fn) in &queries {
let t0 = Instant::now();
for _ in 0..iters { kuzu_fn()?; }
let kuzu_us = t0.elapsed().as_micros() / iters;
let t0 = Instant::now();
for _ in 0..iters { cozo_fn()?; }
let cozo_us = t0.elapsed().as_micros() / iters;
let pct = if kuzu_us > 0 { (cozo_us as f64 - kuzu_us as f64) / kuzu_us as f64 * 100.0 } else { 0.0 };
eprintln!("{:<22} Kuzu={:>6}µs CozoDB={:>6}µs {:+.0}%", name, kuzu_us, cozo_us, pct);
}
}
if let Some(ext) = extractions.first() {
eprintln!("\n=== Incremental Update (re-upsert 1 file into existing DB) ===");
let iters: u128 = 5;
let t0 = Instant::now();
for _ in 0..iters { kuzu.upsert_file_conn(&conn, ext)?; }
let kuzu_inc = t0.elapsed().as_micros() / iters;
let t0 = Instant::now();
for _ in 0..iters { cozo.upsert_file(ext)?; }
let cozo_inc = t0.elapsed().as_micros() / iters;
let pct = if kuzu_inc > 0 { (cozo_inc as f64 - kuzu_inc as f64) / kuzu_inc as f64 * 100.0 } else { 0.0 };
eprintln!("upsert_file Kuzu={:>6}µs CozoDB={:>6}µs {:+.0}%", kuzu_inc, cozo_inc, pct);
}
let kuzu_size = dir_size(&kuzu_path);
let cozo_size = std::fs::metadata(&cozo_path).map(|m| m.len()).unwrap_or(0);
eprintln!("\n=== DB Size ===");
eprintln!("Kuzu: {} KB", kuzu_size / 1024);
eprintln!("CozoDB: {} KB", cozo_size / 1024);
eprintln!("\n=== Result ===");
if failures == 0 {
eprintln!("ALL CHECKS PASSED ✅");
} else {
eprintln!("{failures} FAILURES ❌");
}
std::fs::remove_dir_all(&tmp)?;
if failures > 0 {
anyhow::bail!("{failures} comparison failures");
}
Ok(())
}
fn collect_files(dir: &PathBuf, out: &mut Vec<PathBuf>) {
let entries = match std::fs::read_dir(dir) {
Ok(e) => e,
Err(_) => return,
};
for entry in entries.flatten() {
let path = entry.path();
if path.is_dir() {
let name = path.file_name().unwrap_or_default().to_string_lossy();
if name.starts_with('.') || name == "node_modules" || name == "target" || name == "__pycache__" {
continue;
}
collect_files(&path, out);
} else if path.is_file() {
out.push(path);
}
}
}
fn print_perf(label: &str, kuzu_ms: u128, cozo_ms: u128) {
let speedup = if cozo_ms > 0 { kuzu_ms as f64 / cozo_ms as f64 } else { f64::INFINITY };
eprintln!("{:<15} {:>6}ms {:>6}ms {:.1}x", label, kuzu_ms, cozo_ms, speedup);
}
fn dir_size(path: &PathBuf) -> u64 {
let mut total = 0u64;
if let Ok(entries) = std::fs::read_dir(path) {
for entry in entries.flatten() {
let p = entry.path();
if p.is_file() {
total += p.metadata().map(|m| m.len()).unwrap_or(0);
} else if p.is_dir() {
total += dir_size(&p);
}
}
}
total
}