#![cfg(feature = "code-graph")]
use std::path::{Path, PathBuf};
use std::time::Instant;
use heliosdb_nano::code_graph::CodeIndexOptions;
use heliosdb_nano::{EmbeddedDatabase, Value};
fn collect_rust_corpus(root: &Path) -> Vec<(PathBuf, String)> {
fn walk(dir: &Path, root: &Path, out: &mut Vec<(PathBuf, String)>) {
let entries = match std::fs::read_dir(dir) {
Ok(e) => e,
Err(_) => return,
};
for entry in entries.flatten() {
let p = entry.path();
let name = p.file_name().and_then(|n| n.to_str()).unwrap_or("");
if name.starts_with('.') || name == "target" || name == "node_modules" {
continue;
}
if p.is_dir() {
walk(&p, root, out);
} else if p.extension().and_then(|e| e.to_str()) == Some("rs") {
if let Ok(content) = std::fs::read_to_string(&p) {
let rel = p.strip_prefix(root).unwrap_or(&p).to_path_buf();
out.push((rel, content));
}
}
}
}
let mut out = Vec::new();
walk(root, root, &mut out);
out
}
fn populate_db(db: &EmbeddedDatabase, corpus: &[(PathBuf, String)]) {
db.execute("CREATE TABLE src (path TEXT PRIMARY KEY, lang TEXT, content TEXT)")
.unwrap();
for (path, content) in corpus {
let _ = db.execute_params_returning(
"INSERT INTO src VALUES ($1, 'rust', $2)",
&[
Value::String(path.to_string_lossy().into_owned()),
Value::String(content.clone()),
],
);
}
}
fn measure(corpus: &[(PathBuf, String)], parallelism: usize, chunk_size: Option<usize>)
-> (u128, u64, u64, u64)
{
let db = EmbeddedDatabase::new_in_memory().expect("in-memory db");
populate_db(&db, corpus);
let mut opts = CodeIndexOptions::for_table("src");
opts.parallelism = Some(parallelism);
opts.chunk_size = chunk_size;
let started = Instant::now();
let stats = db.code_index(opts).expect("code_index");
let total_ms = started.elapsed().as_millis();
(total_ms, stats.parse_elapsed_ms, stats.write_elapsed_ms, stats.files_parsed)
}
#[test]
#[ignore]
fn field_benchmark_nano_src_tree() {
let manifest = std::env::var("CARGO_MANIFEST_DIR").expect("CARGO_MANIFEST_DIR");
let src_root = Path::new(&manifest).join("src");
let corpus = collect_rust_corpus(&src_root);
println!("\n== Field benchmark: Nano src/ tree ==");
println!("Corpus: {} Rust files", corpus.len());
assert!(corpus.len() >= 100, "expected ≥100 files in src/, got {}", corpus.len());
let cores = std::thread::available_parallelism()
.map(|n| n.get())
.unwrap_or(1);
let parallel_workers = cores.min(8);
println!("Available cores: {} — using {} workers for parallel run", cores, parallel_workers);
let (total_serial, parse_s, write_s, files_s) = measure(&corpus, 1, None);
println!(
"\nSerial (parallelism=1, no chunking):\n total {} ms, parse {} ms, write {} ms, files parsed {}",
total_serial, parse_s, write_s, files_s
);
let (total_par, parse_p, write_p, files_p) = measure(&corpus, parallel_workers, None);
println!(
"\nParallel (parallelism={}, no chunking):\n total {} ms, parse {} ms, write {} ms, files parsed {}",
parallel_workers, total_par, parse_p, write_p, files_p
);
let (total_chunked, parse_c, write_c, files_c) = measure(&corpus, parallel_workers, Some(256));
println!(
"\nParallel + chunked (parallelism={}, chunk_size=256):\n total {} ms, parse {} ms, write {} ms, files parsed {}",
parallel_workers, total_chunked, parse_c, write_c, files_c
);
let parse_speedup = parse_s as f64 / parse_p.max(1) as f64;
let parse_speedup_chunked = parse_s as f64 / parse_c.max(1) as f64;
println!(
"\nParse-phase speedup: {:.2}× (parallel) / {:.2}× (chunked)",
parse_speedup, parse_speedup_chunked
);
let utilisation = parse_speedup / parallel_workers as f64;
println!("Effective utilisation (parse_speedup / workers): {:.2}", utilisation);
assert_eq!(files_s, files_p, "serial and parallel parsed different file counts");
assert_eq!(files_s, files_c, "serial and chunked parsed different file counts");
if parallel_workers >= 4 {
assert!(
parse_speedup >= 1.5,
"parse_speedup {:.2}× is below the 1.5× regression floor (parallelism={})",
parse_speedup, parallel_workers
);
}
assert!(
parse_c <= parse_p.saturating_mul(2).max(5_000),
"chunked parse {}ms is more than 2× unchunked parse {}ms — overhead is too high",
parse_c, parse_p
);
}