use anyhow::Result;
use colored::Colorize;
use serde::Deserialize;
use std::collections::BTreeSet;
use std::io::Write;
use std::path::{Path, PathBuf};
use std::process::{Command, Stdio};
use std::time::Instant;
use crate::chunker::{count_tokens, generate_outline, should_index};
use crate::indexer;
use crate::query::{build_task_context, query_index};
use crate::store::{count_stats, index_staleness, open_db};
struct ReadRow {
path: String,
lines: usize,
raw_tokens: usize,
outline_tokens: usize,
saved_pct: f64,
}
struct WorkflowCase {
label: &'static str,
path: &'static str,
symbol: &'static str,
}
struct WorkflowRow {
label: &'static str,
path: String,
symbol: &'static str,
raw_tokens: usize,
outline_tokens: usize,
target_tokens: usize,
total_tokens: usize,
saved_pct: f64,
quality_ok: bool,
}
struct QueryCase {
label: String,
query: String,
expected_paths: Vec<String>,
}
struct QueryRow {
label: String,
query: String,
tokens: usize,
latency_ms: u128,
top_files: Vec<String>,
hit_top1: bool,
hit_top3: bool,
}
struct ContextBenchCase {
label: String,
task: String,
expected_paths: Vec<String>,
}
struct ContextArmRow {
label: String,
arm: &'static str,
tokens: Option<usize>,
latency_ms: Option<u128>,
quality_ok: Option<bool>,
note: &'static str,
}
#[derive(Deserialize)]
struct BenchCases {
#[serde(default)]
context: Vec<BenchContextCase>,
}
#[derive(Deserialize)]
struct BenchContextCase {
label: String,
task: String,
expected_path: String,
#[serde(default)]
acceptable_paths: Vec<String>,
}
pub fn run_benchmark(
repo_root: &Path,
refresh_index: bool,
query_budget: usize,
codegraph_path: Option<&Path>,
cases_path: Option<&Path>,
) -> Result<()> {
println!();
println!("{}", "=== tokenix real benchmark ===".bold());
println!(
"{}",
"Measures token reduction and retrieval quality using the actual index/search code."
.dimmed()
);
println!();
if refresh_index {
println!("{}", "Preparing fresh-enough index...".yellow());
let start = Instant::now();
let (_result, stats) = indexer::index_repo(repo_root, false, |msg| {
println!(" {}", msg.dimmed());
})?;
println!(
" indexed metadata ready in {:.1}s - {} files - {} chunks - {} stored tokens",
start.elapsed().as_secs_f64(),
stats.files,
stats.chunks,
format_num(stats.total_tokens)
);
println!();
} else if index_needs_refresh(repo_root) {
println!(
"{}",
"Index is stale or missing; benchmark will use available metadata only. Pass --refresh-index to re-embed."
.yellow()
);
println!();
}
let rtk_available = check_rtk();
let context_cases = load_context_bench_cases(cases_path)?;
let read_rows = measure_read_reduction(repo_root)?;
print_read_reduction(&read_rows);
let workflow_rows = measure_targeted_workflows(repo_root)?;
print_targeted_workflows(&workflow_rows);
let query_rows =
measure_semantic_quality(repo_root, query_budget, rtk_available, &context_cases)?;
print_semantic_quality(&query_rows, query_budget);
let context_rows = measure_context_homologation(repo_root, codegraph_path, &context_cases)?;
print_context_homologation(&context_rows);
let cmd_rows = measure_command_compression(repo_root, rtk_available)?;
print_command_compression(&cmd_rows);
print_verdict(&read_rows, &workflow_rows, &query_rows, &cmd_rows);
if let Some(path) = codegraph_path {
print_codegraph_comparison(repo_root, path, &workflow_rows, &query_rows, &context_rows)?;
} else {
print_internal_graph_stats(repo_root)?;
}
Ok(())
}
fn default_context_bench_cases() -> Vec<ContextBenchCase> {
vec![
ContextBenchCase {
label: "Hook fail-open".to_string(),
task: "how does hook fail open when index is stale or missing".to_string(),
expected_paths: vec!["src/hook.rs".to_string()],
},
ContextBenchCase {
label: "Chunking".to_string(),
task: "how are rust files chunked into symbols and outlines".to_string(),
expected_paths: vec!["src/chunker.rs".to_string()],
},
ContextBenchCase {
label: "Database repo".to_string(),
task: "postgres transaction pool user repository pagination".to_string(),
expected_paths: vec!["benchmark/samples/database_client.ts".to_string()],
},
ContextBenchCase {
label: "Compression".to_string(),
task: "how does cargo output compression keep errors".to_string(),
expected_paths: vec!["src/compress.rs".to_string()],
},
]
}
fn load_context_bench_cases(cases_path: Option<&Path>) -> Result<Vec<ContextBenchCase>> {
let Some(path) = cases_path else {
return Ok(default_context_bench_cases());
};
let raw = std::fs::read_to_string(path)?;
let cases: BenchCases = toml::from_str(&raw)?;
if cases.context.is_empty() {
return Ok(default_context_bench_cases());
}
Ok(cases
.context
.into_iter()
.map(|case| {
let mut expected_paths = vec![case.expected_path];
expected_paths.extend(case.acceptable_paths);
expected_paths.sort();
expected_paths.dedup();
ContextBenchCase {
label: case.label,
task: case.task,
expected_paths,
}
})
.collect())
}
fn measure_context_homologation(
repo_root: &Path,
codegraph_path: Option<&Path>,
cases: &[ContextBenchCase],
) -> Result<Vec<ContextArmRow>> {
let mut rows = Vec::new();
for case in cases {
let expected_path = case
.expected_paths
.first()
.map(String::as_str)
.unwrap_or_default();
let expected_file = repo_root.join(expected_path);
let start = Instant::now();
let vanilla = std::fs::read_to_string(&expected_file).unwrap_or_default();
rows.push(ContextArmRow {
label: case.label.clone(),
arm: "vanilla",
tokens: Some(count_tokens(&vanilla)),
latency_ms: Some(start.elapsed().as_millis()),
quality_ok: Some(!vanilla.is_empty()),
note: "full expected file",
});
let start = Instant::now();
let tokenix = build_task_context(repo_root, &case.task, 1200, 2).unwrap_or_default();
rows.push(ContextArmRow {
label: case.label.clone(),
arm: "tokenix",
tokens: Some(count_tokens(&tokenix)),
latency_ms: Some(start.elapsed().as_millis()),
quality_ok: Some(matches_expected_path(&tokenix, &case.expected_paths)),
note: "context --budget 1200",
});
if let Some(root) = codegraph_path {
let start = Instant::now();
let codegraph = run_codegraph_context(root, repo_root, &case.task).unwrap_or_default();
rows.push(ContextArmRow {
label: case.label.clone(),
arm: "codegraph",
tokens: if codegraph.is_empty() {
None
} else {
Some(count_tokens(&codegraph))
},
latency_ms: if codegraph.is_empty() {
None
} else {
Some(start.elapsed().as_millis())
},
quality_ok: if codegraph.is_empty() {
None
} else {
Some(matches_expected_path(&codegraph, &case.expected_paths))
},
note: "context --max-nodes 12 --max-code 4",
});
}
rows.push(ContextArmRow {
label: case.label.clone(),
arm: "rtk",
tokens: None,
latency_ms: None,
quality_ok: None,
note: "not a code-context tool",
});
}
Ok(rows)
}
fn matches_expected_path(output: &str, expected_paths: &[String]) -> bool {
expected_paths.iter().any(|path| output.contains(path))
}
fn run_codegraph_context(root: &Path, repo_root: &Path, task: &str) -> Result<String> {
let candidates = [root.join("dist/bin/codegraph.js")];
let Some(cli) = candidates.iter().find(|path| path.exists()) else {
return Ok(String::new());
};
let out = std::process::Command::new("node")
.arg(cli)
.arg("context")
.arg(task)
.arg("--path")
.arg(repo_root)
.arg("--max-nodes")
.arg("12")
.arg("--max-code")
.arg("4")
.output()?;
if !out.status.success() {
return Ok(String::new());
}
Ok(String::from_utf8_lossy(&out.stdout).to_string())
}
fn print_context_homologation(rows: &[ContextArmRow]) {
println!(
"{}",
"4. Context Homologation: Vanilla vs tokenix vs CodeGraph vs RTK".bold()
);
println!(
" {:<18} {:<10} {:>9} {:>8} {:>7} Note",
"Case", "Arm", "Tokens", "ms", "OK"
);
println!(" {}", "-".repeat(86).dimmed());
for row in rows {
let tokens = row
.tokens
.map(|t| format_num(t as i64))
.unwrap_or_else(|| "n/a".to_string());
let latency = row
.latency_ms
.map(|ms| ms.to_string())
.unwrap_or_else(|| "n/a".to_string());
let ok = row
.quality_ok
.map(|ok| if ok { "yes".green() } else { "no".red() }.to_string())
.unwrap_or_else(|| "n/a".to_string());
println!(
" {:<18} {:<10} {:>9} {:>8} {:>7} {}",
truncate(&row.label, 18),
row.arm,
tokens,
latency,
ok,
row.note.dimmed()
);
}
println!(" {}", "-".repeat(86).dimmed());
for arm in ["vanilla", "tokenix", "codegraph"] {
let arm_rows: Vec<&ContextArmRow> = rows
.iter()
.filter(|row| row.arm == arm && row.tokens.is_some())
.collect();
if arm_rows.is_empty() {
continue;
}
let token_sum: usize = arm_rows.iter().filter_map(|row| row.tokens).sum();
let hits = arm_rows
.iter()
.filter(|row| row.quality_ok == Some(true))
.count();
println!(
" {:<18} {:<10} {:>9} {:>8} {:>7}",
"TOTAL",
arm,
format_num(token_sum as i64),
"",
format!("{hits}/{}", arm_rows.len())
);
}
println!();
}
fn check_rtk() -> bool {
let cmd = if cfg!(windows) { "rtk.exe" } else { "rtk" };
std::process::Command::new(cmd)
.arg("--version")
.output()
.is_ok()
}
struct CmdRow {
cmd: &'static str,
vanilla: usize,
tokenix: usize,
rtk: Option<usize>,
}
fn measure_command_compression(repo_root: &Path, rtk: bool) -> Result<Vec<CmdRow>> {
let cases = [
("git status", sample_git_status(repo_root)),
("git log -n 5", sample_git_log(repo_root)),
("cargo check", sample_cargo_check()),
("ls -R", sample_file_listing(repo_root)),
];
let mut rows = Vec::new();
for (cmd_str, vanilla_out) in cases {
let vanilla_tokens = count_tokens(&vanilla_out);
let tokenix_out = crate::compress::compress_bash_output(cmd_str, &vanilla_out);
let tokenix_tokens = count_tokens(&tokenix_out);
let rtk_tokens = rtk
.then(|| rtk_pipe_filter(cmd_str, &vanilla_out).map(|out| count_tokens(&out)))
.flatten();
rows.push(CmdRow {
cmd: cmd_str,
vanilla: vanilla_tokens,
tokenix: tokenix_tokens,
rtk: rtk_tokens,
});
}
Ok(rows)
}
fn rtk_pipe_filter(cmd: &str, input: &str) -> Option<String> {
let filter = match cmd {
"git status" => "git-status",
"git log -n 5" => "git-log",
"cargo check" => "cargo",
"ls -R" => "find",
_ => return None,
};
let exe = if cfg!(windows) { "rtk.exe" } else { "rtk" };
let mut child = Command::new(exe)
.args(["pipe", "-f", filter])
.stdin(Stdio::piped())
.stdout(Stdio::piped())
.stderr(Stdio::null())
.spawn()
.ok()?;
child.stdin.as_mut()?.write_all(input.as_bytes()).ok()?;
let out = child.wait_with_output().ok()?;
out.status
.success()
.then(|| String::from_utf8_lossy(&out.stdout).to_string())
}
fn sample_git_status(repo_root: &Path) -> String {
std::process::Command::new("git")
.args(["status", "--short"])
.current_dir(repo_root)
.output()
.ok()
.map(|out| String::from_utf8_lossy(&out.stdout).to_string())
.filter(|out| !out.trim().is_empty())
.unwrap_or_else(|| " M src/query.rs\n M src/benchmark.rs\n?? .cgcignore\n".to_string())
}
fn sample_git_log(repo_root: &Path) -> String {
std::process::Command::new("git")
.args(["log", "-n", "5", "--oneline"])
.current_dir(repo_root)
.output()
.ok()
.map(|out| String::from_utf8_lossy(&out.stdout).to_string())
.filter(|out| !out.trim().is_empty())
.unwrap_or_else(|| {
[
"2f9d8a1 improve semantic ranking",
"8a73c55 add symbol graph traversal",
"19db3de lower cpu indexing",
"cd01b91 add codex memory hook",
"a1f73cc release benchmark docs",
]
.join("\n")
})
}
fn sample_cargo_check() -> String {
[
" Checking tokenix v0.1.0 (/path/to/tokenix)",
"warning: unused variable: `candidate`",
" --> src/query.rs:214:9",
" |",
"214 | let candidate = score_path(path);",
" | ^^^^^^^^^ help: if this is intentional, prefix it with an underscore: `_candidate`",
"error[E0425]: cannot find value `MAX_INDEX_AGE_SECS` in this scope",
" --> src/hook.rs:88:42",
" |",
"88 | if index_staleness(repo_root, MAX_INDEX_AGE_SECS).stale {",
" | ^^^^^^^^^^^^^^^^^^ not found in this scope",
"error: could not compile `tokenix` due to 1 previous error; 1 warning emitted",
]
.join("\n")
}
fn sample_file_listing(repo_root: &Path) -> String {
let out = collect_benchmark_files(repo_root)
.map(|files| {
files
.into_iter()
.map(|path| rel_path(repo_root, &path))
.collect::<Vec<_>>()
.join("\n")
})
.unwrap_or_default();
if out.trim().is_empty() {
[
"src/main.rs",
"src/query.rs",
"src/hook.rs",
"src/chunker.rs",
"src/compress.rs",
"benchmark/samples/database_client.ts",
]
.join("\n")
} else {
out
}
}
fn print_command_compression(rows: &[CmdRow]) {
println!("{}", "5. Command Output Compression".bold());
println!(
" {:<20} {:>10} {:>10} {:>10} {:>8} {:>9}",
"Command", "Vanilla", "tokenix", "RTK", "Saved", "vs RTK"
);
println!(" {}", "-".repeat(75).dimmed());
for row in rows {
let rtk_str = row
.rtk
.map(|t| format_num(t as i64))
.unwrap_or_else(|| "n/a".to_string());
let vs_rtk = row
.rtk
.map(|rtk| if row.tokenix <= rtk { "ok" } else { "behind" })
.unwrap_or("n/a");
println!(
" {:<20} {:>10} {:>10} {:>10} {:>7.1}% {:>9}",
row.cmd,
format_num(row.vanilla as i64),
format_num(row.tokenix as i64),
rtk_str,
saved_pct(row.vanilla, row.tokenix),
vs_rtk
);
}
println!();
}
fn print_internal_graph_stats(repo_root: &Path) -> Result<()> {
let conn = crate::store::open_db(repo_root, false)?.unwrap();
let node_count: i64 = conn.query_row("SELECT COUNT(*) FROM graph_nodes", [], |r| r.get(0))?;
let edge_count: i64 = conn.query_row("SELECT COUNT(*) FROM graph_edges", [], |r| r.get(0))?;
println!("{}", "5. Internal Symbol Graph (CodeGraph baseline)".bold());
println!(" Nodes (symbols): {}", format_num(node_count).green());
println!(" Edges (relations): {}", format_num(edge_count).green());
println!(
" - tokenix uses this graph to boost RAG results by resolving callee/caller proximity."
);
println!(" - CodeGraph MCP servers typically offer similar structural context.");
println!();
Ok(())
}
fn index_needs_refresh(repo_root: &Path) -> bool {
index_staleness(repo_root).stale
}
fn collect_benchmark_files(repo_root: &Path) -> Result<Vec<PathBuf>> {
let mut files = Vec::new();
for dir in ["src", "benchmark/samples"] {
let root = repo_root.join(dir);
if root.exists() {
collect_files_rec(&root, &mut files)?;
}
}
files.sort();
Ok(files)
}
fn collect_files_rec(dir: &Path, files: &mut Vec<PathBuf>) -> Result<()> {
for entry in std::fs::read_dir(dir)? {
let entry = entry?;
let path = entry.path();
if path.is_dir() {
collect_files_rec(&path, files)?;
} else if should_index(&path) {
files.push(path);
}
}
Ok(())
}
fn measure_read_reduction(repo_root: &Path) -> Result<Vec<ReadRow>> {
let mut rows = Vec::new();
for path in collect_benchmark_files(repo_root)? {
let content = std::fs::read_to_string(&path)?;
let lines = content.lines().count();
if lines < 200 {
continue;
}
let rel = rel_path(repo_root, &path);
let outline = generate_outline(&content, &rel);
let raw_tokens = count_tokens(&content);
let outline_tokens = count_tokens(&outline);
rows.push(ReadRow {
path: rel,
lines,
raw_tokens,
outline_tokens,
saved_pct: saved_pct(raw_tokens, outline_tokens),
});
}
Ok(rows)
}
fn measure_targeted_workflows(repo_root: &Path) -> Result<Vec<WorkflowRow>> {
let cases = [
WorkflowCase {
label: "Hook fail-open logic",
path: "src/hook.rs",
symbol: "run_hook",
},
WorkflowCase {
label: "Chunking algorithm",
path: "src/chunker.rs",
symbol: "chunk_rust",
},
WorkflowCase {
label: "SQLite vector search",
path: "src/store.rs",
symbol: "search_similar",
},
WorkflowCase {
label: "Rust service workflow",
path: "benchmark/samples/user_service.rs",
symbol: "UserService",
},
WorkflowCase {
label: "TS repository workflow",
path: "benchmark/samples/database_client.ts",
symbol: "UserRepository",
},
WorkflowCase {
label: "Go auth middleware",
path: "benchmark/samples/api_handler.go",
symbol: "authMiddleware",
},
];
let mut rows = Vec::new();
for case in cases {
let path = repo_root.join(case.path);
if !path.exists() {
continue;
}
let content = std::fs::read_to_string(&path)?;
let outline = generate_outline(&content, case.path);
let target = symbol_content(case.path, &content, case.symbol);
let raw_tokens = count_tokens(&content);
let outline_tokens = count_tokens(&outline);
let target_tokens = count_tokens(&target);
let total_tokens = outline_tokens + target_tokens;
rows.push(WorkflowRow {
label: case.label,
path: case.path.to_string(),
symbol: case.symbol,
raw_tokens,
outline_tokens,
target_tokens,
total_tokens,
saved_pct: saved_pct(raw_tokens, total_tokens),
quality_ok: target.to_lowercase().contains(&case.symbol.to_lowercase()),
});
}
Ok(rows)
}
fn symbol_content(path: &str, content: &str, symbol: &str) -> String {
let needle = symbol.to_lowercase();
crate::chunker::chunk_file(path, content)
.into_iter()
.filter(|chunk| chunk.symbol.to_lowercase().contains(&needle))
.map(|chunk| chunk.content)
.collect::<Vec<_>>()
.join("\n")
}
fn measure_semantic_quality(
repo_root: &Path,
query_budget: usize,
_rtk: bool,
context_cases: &[ContextBenchCase],
) -> Result<Vec<QueryRow>> {
let cases = if context_cases.is_empty() {
default_query_cases()
} else {
context_cases
.iter()
.map(|case| QueryCase {
label: case.label.clone(),
query: case.task.clone(),
expected_paths: case.expected_paths.clone(),
})
.collect()
};
let mut rows = Vec::new();
for case in cases {
let start = Instant::now();
let results =
query_index(repo_root, &case.query, query_budget, 20, None)?.unwrap_or_default();
let latency_ms = start.elapsed().as_millis();
let tokens: usize = results.iter().map(|r| r.token_count).sum();
let top_files = unique_files(results.iter().map(|r| r.path.as_str()));
let hit_top1 = top_files
.first()
.map(|p| path_expected(p, &case.expected_paths))
.unwrap_or(false);
let hit_top3 = top_files
.iter()
.take(3)
.any(|p| path_expected(p, &case.expected_paths));
rows.push(QueryRow {
label: case.label,
query: case.query,
tokens,
latency_ms,
top_files,
hit_top1,
hit_top3,
});
}
Ok(rows)
}
fn default_query_cases() -> Vec<QueryCase> {
vec![
QueryCase {
label: "Hook behavior".to_string(),
query: "how does hook fail open when index is stale or missing".to_string(),
expected_paths: vec!["src/hook.rs".to_string()],
},
QueryCase {
label: "Chunking".to_string(),
query: "how are rust files chunked into symbols and outlines".to_string(),
expected_paths: vec!["src/chunker.rs".to_string()],
},
QueryCase {
label: "Vector search".to_string(),
query: "how is cosine similarity search implemented in sqlite".to_string(),
expected_paths: vec!["src/store.rs".to_string()],
},
QueryCase {
label: "Savings analytics".to_string(),
query: "how are token savings calculated from hook log".to_string(),
expected_paths: vec!["src/gain.rs".to_string(), "src/store.rs".to_string()],
},
QueryCase {
label: "Output compression".to_string(),
query: "how does cargo output compression keep errors".to_string(),
expected_paths: vec!["src/compress.rs".to_string()],
},
QueryCase {
label: "Authentication sample".to_string(),
query: "jwt validation refresh token revocation role dependency".to_string(),
expected_paths: vec!["benchmark/samples/auth_middleware.py".to_string()],
},
QueryCase {
label: "Database sample".to_string(),
query: "postgres transaction pool user repository pagination".to_string(),
expected_paths: vec!["benchmark/samples/database_client.ts".to_string()],
},
]
}
fn unique_files<'a>(paths: impl Iterator<Item = &'a str>) -> Vec<String> {
let mut seen = BTreeSet::new();
let mut out = Vec::new();
for path in paths {
if seen.insert(path.to_string()) {
out.push(path.to_string());
}
}
out
}
fn path_expected(path: &str, expected: &[String]) -> bool {
expected.iter().any(|candidate| candidate == path)
}
fn print_read_reduction(rows: &[ReadRow]) {
println!("{}", "1. Read Interception: Gross Token Reduction".bold());
if rows.is_empty() {
println!(
" {}",
"No default large-file cases found in this repository.".dimmed()
);
println!();
return;
}
println!(
" {:<42} {:>6} {:>10} {:>10} {:>8}",
"File", "Lines", "Raw", "Outline", "Saved"
);
println!(" {}", "-".repeat(83).dimmed());
let mut raw_total = 0usize;
let mut outline_total = 0usize;
for row in rows {
raw_total += row.raw_tokens;
outline_total += row.outline_tokens;
println!(
" {:<42} {:>6} {:>10} {:>10} {:>7.1}%",
truncate(&row.path, 42),
row.lines,
format_num(row.raw_tokens as i64),
format_num(row.outline_tokens as i64),
row.saved_pct
);
}
println!(" {}", "-".repeat(83).dimmed());
println!(
" {:<42} {:>6} {:>10} {:>10} {:>7.1}%",
"TOTAL",
rows.len(),
format_num(raw_total as i64),
format_num(outline_total as i64),
saved_pct(raw_total, outline_total)
);
println!();
}
fn print_targeted_workflows(rows: &[WorkflowRow]) {
println!("{}", "2. Targeted Workflow: Outline + Symbol Read".bold());
println!("{}", " Baseline is reading the full file once. tokenix cost is outline plus the target symbol chunk.".dimmed());
if rows.is_empty() {
println!(
" {}",
"No default symbol workflow cases found in this repository.".dimmed()
);
println!();
return;
}
println!(
" {:<24} {:>9} {:>9} {:>9} {:>9} {:>8} {:>5}",
"Task", "Raw", "Outline", "Target", "Total", "Saved", "OK"
);
println!(" {}", "-".repeat(85).dimmed());
let mut raw_total = 0usize;
let mut tokenix_total = 0usize;
let mut ok_total = 0usize;
for row in rows {
raw_total += row.raw_tokens;
tokenix_total += row.total_tokens;
if row.quality_ok {
ok_total += 1;
}
println!(
" {:<24} {:>9} {:>9} {:>9} {:>9} {:>7.1}% {:>5}",
truncate(row.label, 24),
format_num(row.raw_tokens as i64),
format_num(row.outline_tokens as i64),
format_num(row.target_tokens as i64),
format_num(row.total_tokens as i64),
row.saved_pct,
if row.quality_ok {
"yes".green()
} else {
"no".red()
}
);
println!(
" {} -> --symbol {}",
truncate(&row.path, 54).dimmed(),
row.symbol.dimmed()
);
}
println!(" {}", "-".repeat(85).dimmed());
println!(
" {:<24} {:>9} {:>9} {:>9} {:>9} {:>7.1}% {:>5}",
"TOTAL",
format_num(raw_total as i64),
"",
"",
format_num(tokenix_total as i64),
saved_pct(raw_total, tokenix_total),
format!("{}/{}", ok_total, rows.len())
);
println!();
}
fn print_semantic_quality(rows: &[QueryRow], query_budget: usize) {
println!("{}", "3. Semantic Search Quality".bold());
println!(
" Budget: {} tokens/query. Hit@1 means the first returned file is expected; Hit@3 allows the first three files.",
format_num(query_budget as i64)
);
println!(
" {:<22} {:>8} {:>8} {:>7} {:>7} Top files",
"Case", "Tokens", "ms", "Hit@1", "Hit@3"
);
println!(" {}", "-".repeat(104).dimmed());
let mut hit1 = 0usize;
let mut hit3 = 0usize;
for row in rows {
if row.hit_top1 {
hit1 += 1;
}
if row.hit_top3 {
hit3 += 1;
}
let top = row
.top_files
.iter()
.take(3)
.map(|p| truncate(p, 28))
.collect::<Vec<_>>()
.join(", ");
println!(
" {:<22} {:>8} {:>8} {:>7} {:>7} {}",
truncate(&row.label, 22),
format_num(row.tokens as i64),
row.latency_ms,
yes_no(row.hit_top1),
yes_no(row.hit_top3),
top
);
println!(" {}", row.query.dimmed());
}
println!(" {}", "-".repeat(104).dimmed());
println!(
" {:<22} {:>8} {:>8} {:>7} {:>7}",
"TOTAL",
"",
"",
format!("{}/{}", hit1, rows.len()),
format!("{}/{}", hit3, rows.len())
);
println!();
}
fn print_verdict(
read_rows: &[ReadRow],
workflow_rows: &[WorkflowRow],
query_rows: &[QueryRow],
cmd_rows: &[CmdRow],
) {
let read_raw: usize = read_rows.iter().map(|r| r.raw_tokens).sum();
let read_outline: usize = read_rows.iter().map(|r| r.outline_tokens).sum();
let flow_raw: usize = workflow_rows.iter().map(|r| r.raw_tokens).sum();
let flow_tokenix: usize = workflow_rows.iter().map(|r| r.total_tokens).sum();
let hit3 = query_rows.iter().filter(|r| r.hit_top3).count();
let cmd_vanilla: usize = cmd_rows.iter().map(|r| r.vanilla).sum();
let cmd_tokenix: usize = cmd_rows.iter().map(|r| r.tokenix).sum();
println!("{}", "Verdict".bold());
if read_rows.is_empty() {
println!(" Read-only exploration: n/a for this benchmark case set.");
} else {
println!(
" Read-only exploration saved {:.1}% ({}) tokens on large files (Vanilla baseline).",
saved_pct(read_raw, read_outline),
format_num((read_raw.saturating_sub(read_outline)) as i64)
);
}
if workflow_rows.is_empty() {
println!(" Targeted workflows: n/a for this benchmark case set.");
} else {
println!(
" Targeted workflows saved {:.1}% ({}) tokens vs reading full files.",
saved_pct(flow_raw, flow_tokenix),
format_num((flow_raw.saturating_sub(flow_tokenix)) as i64)
);
}
println!(
" Command output compression saved {:.1}% ({}) tokens on common tools.",
saved_pct(cmd_vanilla, cmd_tokenix),
format_num((cmd_vanilla.saturating_sub(cmd_tokenix)) as i64)
);
println!(
" Semantic search found an expected file in the top 3 for {}/{} labeled queries.",
hit3,
query_rows.len()
);
println!();
}
fn print_codegraph_comparison(
repo_root: &Path,
codegraph_path: &Path,
workflow_rows: &[WorkflowRow],
_query_rows: &[QueryRow],
context_rows: &[ContextArmRow],
) -> Result<()> {
let flow_raw: usize = workflow_rows.iter().map(|r| r.raw_tokens).sum();
let flow_tokenix: usize = workflow_rows.iter().map(|r| r.total_tokens).sum();
let readme = read_readme(codegraph_path);
let codegraph_cli = [codegraph_path.join("dist/bin/codegraph.js")]
.into_iter()
.find(|path| path.exists());
let tokenix_stats = open_db(repo_root, false)?
.as_ref()
.and_then(|conn| count_stats(conn).ok());
let codegraph_stats = codegraph_cli
.as_ref()
.and_then(|cli| codegraph_status(cli, repo_root).ok())
.flatten();
println!("{}", "6. CodeGraph Comparison".bold());
if codegraph_cli.is_some() {
println!(" Status: {}", "local CLI available".green());
} else {
println!(
" Status: {}",
"local CLI not built; static repo signals only".yellow()
);
}
println!(" {}", "-".repeat(91).dimmed());
println!(
" {:<28} {:<28} {:<28}",
"Capability", "tokenix (measured)", "CodeGraph (measured/est)"
);
println!(" {}", "-".repeat(91).dimmed());
let avg_tokens = |arm: &str| {
let rows: Vec<&ContextArmRow> = context_rows
.iter()
.filter(|row| row.arm == arm && row.tokens.is_some())
.collect();
if rows.is_empty() {
"n/a".to_string()
} else {
format_num(
(rows.iter().filter_map(|row| row.tokens).sum::<usize>() / rows.len()) as i64,
)
}
};
let avg_latency = |arm: &str| {
let rows: Vec<&ContextArmRow> = context_rows
.iter()
.filter(|row| row.arm == arm && row.latency_ms.is_some())
.collect();
if rows.is_empty() {
"n/a".to_string()
} else {
format!(
"{}ms",
rows.iter().filter_map(|row| row.latency_ms).sum::<u128>() / rows.len() as u128
)
}
};
println!(
" {:<28} {:<28} {:<28}",
"Context Tokens (avg)",
avg_tokens("tokenix"),
avg_tokens("codegraph")
);
println!(
" {:<28} {:<28} {:<28}",
"Token reduction (workflow)",
if workflow_rows.is_empty() {
"n/a".to_string()
} else {
format!("{:.1}%", saved_pct(flow_raw, flow_tokenix))
},
"not applicable (graph-based)"
);
println!(
" {:<28} {:<28} {:<28}",
"Context quality",
quality_summary(context_rows, "tokenix"),
quality_summary(context_rows, "codegraph")
);
println!(
" {:<28} {:<28} {:<28}",
"Search Latency",
avg_latency("tokenix"),
avg_latency("codegraph")
);
println!(
" {:<28} {:<28} {:<28}",
"Indexed files",
tokenix_stats
.as_ref()
.map(|stats| format_num(stats.files))
.unwrap_or_else(|| "n/a".to_string()),
codegraph_stats
.as_ref()
.map(|stats| format_num(stats.files))
.unwrap_or_else(|| "n/a".to_string())
);
println!(
" {:<28} {:<28} {:<28}",
"Index shape",
tokenix_stats
.as_ref()
.map(|stats| format!("{} chunks", format_num(stats.chunks)))
.unwrap_or_else(|| "n/a".to_string()),
codegraph_stats
.as_ref()
.map(|stats| {
format!(
"{} nodes / {} edges",
format_num(stats.nodes),
format_num(stats.edges)
)
})
.unwrap_or_else(|| "n/a".to_string())
);
println!(
" {:<28} {:<28} {:<28}",
"MCP/context feature",
"native CLI context",
yes_no_plain(contains_any(&readme, &["mcp", "context", "graph"]))
);
println!();
Ok(())
}
fn quality_summary(rows: &[ContextArmRow], arm: &str) -> String {
let measured: Vec<&ContextArmRow> = rows
.iter()
.filter(|row| row.arm == arm && row.quality_ok.is_some())
.collect();
if measured.is_empty() {
return "n/a".to_string();
}
let ok = measured
.iter()
.filter(|row| row.quality_ok == Some(true))
.count();
format!("{}/{}", ok, measured.len())
}
struct CodeGraphStatus {
files: i64,
nodes: i64,
edges: i64,
}
fn codegraph_status(cli: &Path, repo_root: &Path) -> Result<Option<CodeGraphStatus>> {
let out = Command::new("node")
.arg(cli)
.arg("status")
.arg(repo_root)
.output()?;
if !out.status.success() {
return Ok(None);
}
let stdout = String::from_utf8_lossy(&out.stdout);
Ok(Some(CodeGraphStatus {
files: parse_status_count(&stdout, "Files:").unwrap_or(0),
nodes: parse_status_count(&stdout, "Nodes:").unwrap_or(0),
edges: parse_status_count(&stdout, "Edges:").unwrap_or(0),
}))
}
fn parse_status_count(stdout: &str, label: &str) -> Option<i64> {
stdout
.lines()
.find_map(|line| line.split_once(label).map(|(_, value)| value))
.and_then(|value| value.split_whitespace().next())
.and_then(|value| value.replace(',', "").parse().ok())
}
fn read_readme(root: &Path) -> String {
for name in ["README.md", "readme.md", "README"] {
let path = root.join(name);
if let Ok(content) = std::fs::read_to_string(path) {
return content.to_ascii_lowercase();
}
}
String::new()
}
fn contains_any(haystack: &str, needles: &[&str]) -> bool {
needles.iter().any(|needle| haystack.contains(needle))
}
fn yes_no_plain(value: bool) -> &'static str {
if value {
"claimed"
} else {
"not found"
}
}
fn rel_path(repo_root: &Path, path: &Path) -> String {
path.strip_prefix(repo_root)
.unwrap_or(path)
.to_string_lossy()
.replace('\\', "/")
}
fn saved_pct(before: usize, after: usize) -> f64 {
if before == 0 {
0.0
} else {
(1.0 - after as f64 / before as f64) * 100.0
}
}
fn truncate(s: &str, max: usize) -> String {
if s.chars().count() <= max {
return s.to_string();
}
let keep = max.saturating_sub(1);
format!("{}~", s.chars().take(keep).collect::<String>())
}
fn yes_no(value: bool) -> colored::ColoredString {
if value {
"yes".green()
} else {
"no".red()
}
}
fn format_num(n: i64) -> String {
let s = n.to_string();
let mut out = String::new();
for (i, ch) in s.chars().rev().enumerate() {
if i > 0 && i % 3 == 0 {
out.push(',');
}
out.push(ch);
}
out.chars().rev().collect()
}