use std::path::PathBuf;
use std::time::Instant;
use crate::commands::compress_md::{compress_text, Mode as MdMode};
use crate::config::Config;
use crate::filter;
use crate::json_util;
const INPUT_COST_PER_MTOK: f64 = 3.0;
const QUALITY_PASS_THRESHOLD: f64 = 0.50;
#[derive(Clone)]
pub struct ScenarioResult {
pub name: String,
pub category: String,
pub baseline_tokens: usize,
pub compressed_tokens: usize,
pub reduction_pct: f64,
pub latency_us: u64,
pub quality_score: f64,
pub quality_pass: bool,
pub context_saved_tokens: usize,
pub iterations: usize,
}
pub struct BenchmarkReport {
pub results: Vec<ScenarioResult>,
pub total_baseline_tokens: usize,
pub total_compressed_tokens: usize,
pub total_reduction_pct: f64,
pub bash_reduction_pct: f64,
pub md_reduction_pct: f64,
pub wrap_reduction_pct: f64,
pub avg_latency_us: u64,
pub p95_latency_us: u64,
pub estimated_cost_savings_pct: f64,
pub quality_pass_count: usize,
pub quality_fail_count: usize,
pub quality_skip_count: usize,
}
enum ScenarioKind {
Filter { hint: String },
Markdown,
Wrap { calls: usize },
}
enum QualityMode {
Signal,
Keywords,
}
struct Scenario {
name: String,
category: String,
kind: ScenarioKind,
content: String,
required_keywords: Vec<String>,
quality_mode: QualityMode,
}
fn make_cargo_build() -> String {
let mut out = String::new();
for i in 0..80 {
out.push_str(&format!(
" Downloading crates.io index\n Downloading {} v{}.{}.{}\n",
["serde", "tokio", "hyper", "reqwest", "clap"][i % 5],
i / 10,
i % 10,
0
));
}
out.push_str(" Compiling squeez v0.2.1\n");
for i in 0..30 {
out.push_str(&format!(
"warning: unused variable `x` --> src/lib.rs:{}:{}\n |\n{}| let x = 42;\n | ^ help: consider using `_x`\n",
100 + i, 5, 100 + i
));
}
out.push_str("error[E0432]: unresolved import `crate::missing`\n --> src/main.rs:3:5\n |\n3 | use crate::missing;\n | ^^^^^^^^^^^^^^^ no `missing` in the root\n\n");
out.push_str("error[E0308]: mismatched types\n --> src/filter.rs:42:10\n |\n42| return \"hello\";\n | ^^^^^^^ expected usize, found &str\n\n");
out.push_str("error: aborting due to 2 previous errors\n");
out.push_str("For more information about this error, try `rustc --explain E0432`.\n");
out.push_str("error: could not compile `squeez` due to 2 previous errors\n");
out
}
fn make_tsc_errors() -> String {
let mut out = String::new();
for i in 0..40 {
out.push_str(&format!(
"info: checking src/components/Component{}.tsx\n",
i
));
}
out.push_str("src/components/Button.tsx(12,5): error TS2345: Argument of type 'string' is not assignable to parameter of type 'number'.\n");
out.push_str("src/components/Modal.tsx(34,9): error TS2304: Cannot find name 'useEffect'.\n");
out.push_str("src/api/client.ts(88,3): error TS2339: Property 'data' does not exist on type 'Response'.\n");
out.push_str("src/utils/format.ts(5,10): warning TS6133: 'unused' is declared but its value is never read.\n");
for i in 0..20 {
out.push_str(&format!(
"info: processed module {}/20\n",
i + 1
));
}
out.push_str("Found 3 errors in 3 files.\n\nErrors Files\n 1 src/components/Button.tsx:12\n 1 src/components/Modal.tsx:34\n 1 src/api/client.ts:88\n");
out
}
fn make_verbose_log() -> String {
let mut out = String::new();
let levels = ["DEBUG", "DEBUG", "DEBUG", "INFO", "INFO", "WARN", "ERROR"];
let msgs = [
"request received method=GET path=/api/health",
"database pool: 4/10 connections active",
"cache hit key=user:12345 ttl=3540s",
"processed request latency=12ms status=200",
"scheduled job starting name=cleanup_old_sessions",
"slow query detected duration=1250ms table=events",
"upstream timeout after 30s url=https://api.external.com/webhook",
];
for i in 0..250 {
let ts = format!("2026-04-07T{:02}:{:02}:{:02}.{:03}Z", i / 3600, (i / 60) % 60, i % 60, i * 3 % 1000);
let level = levels[i % levels.len()];
let msg = msgs[i % msgs.len()];
out.push_str(&format!("{} [{}] {}\n", ts, level, msg));
}
out.push_str("2026-04-07T01:00:00.000Z [ERROR] OOM kill signal received — pod squeez-worker-7f9b restarting\n");
out.push_str("2026-04-07T01:00:01.000Z [ERROR] connection to Redis lost — retrying in 5s\n");
out
}
fn make_repetitive_output() -> String {
let mut out = String::new();
for _ in 0..300 {
out.push_str("2026-04-07 00:00:00 [TRACE] heartbeat ping to cluster-node-a\n");
}
out.push_str("unique: deployment completed successfully version=1.2.3\n");
out.push_str("unique: rollout status: 5/5 pods updated\n");
out.push_str("unique: health check passed for all replicas\n");
out.push_str("unique: CDN cache invalidated region=us-east-1\n");
out.push_str("unique: metrics flushed to prometheus endpoint\n");
out.push_str("unique: alert rules reloaded count=42\n");
out.push_str("unique: backup snapshot created id=snap-0xdeadbeef\n");
out.push_str("unique: audit log entry recorded user=deploy-bot\n");
out.push_str("unique: TLS certificate renewed expiry=2027-04-07\n");
out.push_str("unique: session count=1234 active connections\n");
out
}
fn make_kubectl_pods() -> String {
let mut out = String::new();
out.push_str("NAMESPACE NAME READY STATUS RESTARTS AGE\n");
let namespaces = ["default", "kube-system", "monitoring", "ingress-nginx", "cert-manager"];
let statuses = ["Running", "Running", "Running", "Running", "CrashLoopBackOff", "Error", "Pending"];
let apps = ["api-server", "worker", "scheduler", "prometheus", "grafana", "redis", "postgres", "nginx"];
for i in 0..60 {
let ns = namespaces[i % namespaces.len()];
let app = apps[i % apps.len()];
let status = statuses[i % statuses.len()];
let ready = if status == "Running" { "1/1" } else { "0/1" };
out.push_str(&format!(
"{:<16}{:<42}{:<8}{:<19}{:<11}{}\n",
ns, format!("{}-{:x}-{:x}", app, i * 0x1a2b, i * 0x3c4d),
ready, status, i % 5, format!("{}d", i / 5 + 1)
));
}
out
}
fn fixtures_dir() -> PathBuf {
if let Ok(dir) = std::env::var("SQUEEZ_BENCH_FIXTURES") {
return PathBuf::from(dir);
}
if let Ok(exe) = std::env::current_exe() {
if let Some(parent) = exe.parent() {
let candidate = parent.join("../../bench/fixtures");
let candidate = candidate.canonicalize().unwrap_or(candidate);
if candidate.is_dir() {
return candidate;
}
}
}
let candidate = PathBuf::from("bench/fixtures");
if candidate.is_dir() {
return candidate;
}
PathBuf::from("bench/fixtures")
}
fn find_binary() -> Option<PathBuf> {
let exe = std::env::current_exe().ok()?;
if exe.file_name().map(|n| n == "squeez").unwrap_or(false) {
return Some(exe);
}
let home = crate::session::home_dir();
let installed = PathBuf::from(format!("{}/.claude/squeez/bin/squeez", home));
if installed.exists() {
return Some(installed);
}
None
}
fn build_scenarios(fixtures: &PathBuf) -> Vec<Scenario> {
let mut s: Vec<Scenario> = Vec::new();
let load = |name: &str| -> Option<String> {
std::fs::read_to_string(fixtures.join(name)).ok()
};
macro_rules! f {
($name:literal, $fixture:literal, $hint:literal, [$($kw:literal),*]) => {
if let Some(content) = load($fixture) {
s.push(Scenario {
name: $name.to_string(),
category: "bash_output".to_string(),
kind: ScenarioKind::Filter { hint: $hint.to_string() },
content,
required_keywords: vec![$($kw.to_string()),*],
quality_mode: QualityMode::Signal,
});
}
};
}
if let Some(content) = load("git_log_200.txt") {
s.push(Scenario {
name: "git_log_200".to_string(),
category: "bash_output".to_string(),
kind: ScenarioKind::Filter { hint: "git log".to_string() },
content,
required_keywords: vec![],
quality_mode: QualityMode::Keywords,
});
}
f!("git_diff", "git_diff.txt", "git diff", ["---", "+++"]);
f!("git_status", "git_status.txt", "git status", []);
f!("docker_logs", "docker_logs.txt", "docker", []);
f!("npm_install", "npm_install.txt", "npm", ["added"]);
f!("ps_aux", "ps_aux.txt", "ps", []);
f!("find_deep", "find_deep.txt", "find", []);
f!("ls_la", "ls_la.txt", "ls", ["total"]);
f!("env_dump", "env_dump.txt", "env", ["PATH"]);
f!("git_copilot", "git_copilot_session.txt", "git", []);
s.push(Scenario {
name: "cargo_build_noisy".to_string(),
category: "bash_output".to_string(),
kind: ScenarioKind::Filter { hint: "cargo build".to_string() },
content: make_cargo_build(),
required_keywords: vec!["error".to_string()],
quality_mode: QualityMode::Signal,
});
s.push(Scenario {
name: "tsc_errors".to_string(),
category: "bash_output".to_string(),
kind: ScenarioKind::Filter { hint: "tsc".to_string() },
content: make_tsc_errors(),
required_keywords: vec!["error TS".to_string(), "Found".to_string()],
quality_mode: QualityMode::Signal,
});
s.push(Scenario {
name: "verbose_app_log".to_string(),
category: "bash_output".to_string(),
kind: ScenarioKind::Filter { hint: "docker logs".to_string() },
content: make_verbose_log(),
required_keywords: vec!["ERROR".to_string()],
quality_mode: QualityMode::Signal,
});
s.push(Scenario {
name: "repetitive_output".to_string(),
category: "bash_output".to_string(),
kind: ScenarioKind::Filter { hint: "generic".to_string() },
content: make_repetitive_output(),
required_keywords: vec!["unique".to_string()],
quality_mode: QualityMode::Signal,
});
s.push(Scenario {
name: "kubectl_pods".to_string(),
category: "bash_output".to_string(),
kind: ScenarioKind::Filter { hint: "kubectl get pods".to_string() },
content: make_kubectl_pods(),
required_keywords: vec!["Running".to_string(), "NAME".to_string()],
quality_mode: QualityMode::Signal,
});
if let Some(content) = load("mdcompress_claude_md.txt") {
s.push(Scenario {
name: "md_claude_md".to_string(),
category: "markdown".to_string(),
kind: ScenarioKind::Markdown,
content,
required_keywords: vec![],
quality_mode: QualityMode::Signal,
});
}
if let Some(content) = load("mdcompress_prose.txt") {
s.push(Scenario {
name: "md_prose".to_string(),
category: "markdown".to_string(),
kind: ScenarioKind::Markdown,
content,
required_keywords: vec![],
quality_mode: QualityMode::Signal,
});
}
if find_binary().is_some() {
if let Some(content) = load("summarize_huge.txt") {
s.push(Scenario {
name: "summarize_huge".to_string(),
category: "wrap_summarize".to_string(),
kind: ScenarioKind::Wrap { calls: 1 },
content,
required_keywords: vec!["squeez:summary".to_string()],
quality_mode: QualityMode::Keywords,
});
}
if let Some(content) = load("context_crosscall_1.txt") {
s.push(Scenario {
name: "crosscall_redundancy_3x".to_string(),
category: "wrap_crosscall".to_string(),
kind: ScenarioKind::Wrap { calls: 3 },
content,
required_keywords: vec!["squeez: identical to".to_string()],
quality_mode: QualityMode::Keywords,
});
}
}
s
}
fn quality_score(baseline: &str, compressed: &str, required: &[String], mode: &QualityMode) -> f64 {
if compressed.is_empty() {
return 0.0;
}
match mode {
QualityMode::Keywords => {
for kw in required {
if !kw.is_empty() && !compressed.contains(kw.as_str()) {
return 0.0;
}
}
1.0
}
QualityMode::Signal => {
for kw in required {
if !kw.is_empty() && !compressed.contains(kw.as_str()) {
return 0.0;
}
}
let signal = extract_signal_terms(baseline);
if signal.is_empty() {
return 1.0;
}
let compressed_lower = compressed.to_ascii_lowercase();
let preserved = signal
.iter()
.filter(|t| compressed_lower.contains(t.as_str()) || compressed.contains(t.as_str()))
.count();
preserved as f64 / signal.len() as f64
}
}
}
fn extract_signal_terms(text: &str) -> Vec<String> {
let mut terms = std::collections::HashSet::new();
let noise: &[&str] = &["the", "and", "for", "this", "that", "with", "from", "into", "was"];
let diag = ["error", "warning", "failed", "fatal", "panic", "exception"];
for line in text.lines() {
let trimmed = line.trim();
if !trimmed.contains(' ')
&& (trimmed.starts_with("./") || trimmed.starts_with('/'))
{
continue;
}
let ll = line.to_ascii_lowercase();
let is_diagnostic = diag.iter().any(|kw| {
if let Some(pos) = ll.find(kw) {
let bytes = ll.as_bytes();
let before_ok = pos == 0 || !bytes[pos - 1].is_ascii_alphanumeric();
let after_ok = pos + kw.len() >= bytes.len()
|| !bytes[pos + kw.len()].is_ascii_alphanumeric();
before_ok && after_ok
} else {
false
}
});
if is_diagnostic {
for word in line.split_whitespace() {
let w = word.trim_matches(|c: char| {
!c.is_alphanumeric() && c != '/' && c != '.' && c != '_' && c != ':' && c != '['
});
let wl = w.to_ascii_lowercase();
if w.len() >= 4
&& !wl.chars().all(|c| c.is_ascii_digit())
&& !noise.contains(&wl.as_str())
{
terms.insert(wl);
}
}
}
}
terms.into_iter().collect()
}
fn run_filter(scenario: &Scenario, hint: &str, iterations: usize) -> ScenarioResult {
let config = Config {
adaptive_intensity: false, show_header: false,
..Config::default()
};
let lines: Vec<String> = scenario.content.lines().map(|l| l.to_string()).collect();
let baseline_tokens = scenario.content.len() / 4;
let mut latencies_us: Vec<u64> = Vec::with_capacity(iterations);
let mut last_compressed = String::new();
for _ in 0..iterations {
let t0 = Instant::now();
let result = filter::compress(hint, lines.clone(), &config);
let elapsed = t0.elapsed().as_micros() as u64;
latencies_us.push(elapsed);
last_compressed = result.join("\n");
}
latencies_us.sort_unstable();
let median_us = latencies_us[latencies_us.len() / 2];
let compressed_tokens = last_compressed.len() / 4;
let reduction = reduction_pct(baseline_tokens, compressed_tokens);
let qscore = quality_score(&scenario.content, &last_compressed, &scenario.required_keywords, &scenario.quality_mode);
ScenarioResult {
name: scenario.name.clone(),
category: scenario.category.clone(),
baseline_tokens,
compressed_tokens,
reduction_pct: reduction,
latency_us: median_us,
quality_score: qscore,
quality_pass: qscore >= QUALITY_PASS_THRESHOLD,
context_saved_tokens: 0,
iterations,
}
}
fn run_markdown(scenario: &Scenario, iterations: usize) -> ScenarioResult {
let baseline_tokens = scenario.content.len() / 4;
let mut latencies_us: Vec<u64> = Vec::with_capacity(iterations);
let mut last_output = String::new();
for _ in 0..iterations {
let t0 = Instant::now();
let result = compress_text(&scenario.content, MdMode::Ultra);
let elapsed = t0.elapsed().as_micros() as u64;
latencies_us.push(elapsed);
last_output = result.output;
}
latencies_us.sort_unstable();
let median_us = latencies_us[latencies_us.len() / 2];
let compressed_tokens = last_output.len() / 4;
let reduction = reduction_pct(baseline_tokens, compressed_tokens);
let qscore = quality_score(&scenario.content, &last_output, &scenario.required_keywords, &scenario.quality_mode);
ScenarioResult {
name: scenario.name.clone(),
category: scenario.category.clone(),
baseline_tokens,
compressed_tokens,
reduction_pct: reduction,
latency_us: median_us,
quality_score: qscore,
quality_pass: qscore >= QUALITY_PASS_THRESHOLD,
context_saved_tokens: 0,
iterations,
}
}
fn run_wrap(scenario: &Scenario, calls: usize, iterations: usize) -> ScenarioResult {
let binary = match find_binary() {
Some(b) => b,
None => {
return ScenarioResult {
name: scenario.name.clone(),
category: scenario.category.clone(),
baseline_tokens: scenario.content.len() / 4,
compressed_tokens: scenario.content.len() / 4,
reduction_pct: 0.0,
latency_us: 0,
quality_score: 0.0,
quality_pass: false,
context_saved_tokens: 0,
iterations: 0,
};
}
};
let baseline_tokens = scenario.content.len() / 4;
let tmp_dir = std::env::temp_dir().join(format!("squeez_bench_{}", std::process::id()));
let _ = std::fs::create_dir_all(&tmp_dir);
let fixture_file = tmp_dir.join("input.txt");
let squeez_dir = tmp_dir.join("squeez_state");
let _ = std::fs::create_dir_all(&squeez_dir);
let _ = std::fs::create_dir_all(squeez_dir.join("sessions"));
let _ = std::fs::create_dir_all(squeez_dir.join("memory"));
if std::fs::write(&fixture_file, &scenario.content).is_err() {
let _ = std::fs::remove_dir_all(&tmp_dir);
return ScenarioResult {
name: scenario.name.clone(),
category: scenario.category.clone(),
baseline_tokens,
compressed_tokens: baseline_tokens,
reduction_pct: 0.0,
latency_us: 0,
quality_score: 1.0,
quality_pass: true,
context_saved_tokens: 0,
iterations: 0,
};
}
let mut all_latencies_us: Vec<u64> = Vec::new();
let mut last_output_all_calls = String::new();
let mut total_compressed_tokens_per_run: Vec<usize> = Vec::new();
for _iter in 0..iterations {
let iter_state_dir = tmp_dir.join(format!("state_{}", _iter));
let _ = std::fs::create_dir_all(iter_state_dir.join("sessions"));
let _ = std::fs::create_dir_all(iter_state_dir.join("memory"));
let mut run_total_compressed = 0usize;
let mut iter_output = String::new();
let t_run_start = Instant::now();
for call_idx in 0..calls {
let input_file = if calls > 1 {
let alt = format!("context_crosscall_{}.txt", call_idx + 1);
let alt_path = fixtures_dir().join(&alt);
if alt_path.exists() { alt_path } else { fixture_file.clone() }
} else {
fixture_file.clone()
};
let t0 = Instant::now();
let output = std::process::Command::new(&binary)
.arg("wrap")
.arg(format!("cat {}", input_file.display()))
.env("SQUEEZ_DIR", &iter_state_dir)
.output();
let elapsed = t0.elapsed().as_micros() as u64;
all_latencies_us.push(elapsed);
if let Ok(out) = output {
let s = String::from_utf8_lossy(&out.stdout).to_string();
run_total_compressed += s.len() / 4;
if call_idx + 1 == calls {
iter_output = s;
}
}
}
let _ = t_run_start; total_compressed_tokens_per_run.push(run_total_compressed);
last_output_all_calls = iter_output;
let _ = std::fs::remove_dir_all(&iter_state_dir);
}
let _ = std::fs::remove_dir_all(&tmp_dir);
all_latencies_us.sort_unstable();
let median_us = if all_latencies_us.is_empty() {
0
} else {
all_latencies_us[all_latencies_us.len() / 2]
};
let baseline_total = baseline_tokens * calls;
let avg_compressed: usize = if total_compressed_tokens_per_run.is_empty() {
baseline_total
} else {
total_compressed_tokens_per_run.iter().sum::<usize>() / total_compressed_tokens_per_run.len()
};
let reduction = reduction_pct(baseline_total, avg_compressed);
let qscore = quality_score(
&scenario.content,
&last_output_all_calls,
&scenario.required_keywords,
&scenario.quality_mode,
);
ScenarioResult {
name: scenario.name.clone(),
category: scenario.category.clone(),
baseline_tokens: baseline_total,
compressed_tokens: avg_compressed,
reduction_pct: reduction,
latency_us: median_us,
quality_score: qscore,
quality_pass: qscore >= QUALITY_PASS_THRESHOLD,
context_saved_tokens: if baseline_total > avg_compressed {
baseline_total - avg_compressed
} else {
0
},
iterations,
}
}
fn run_scenario(scenario: &Scenario, iterations: usize) -> ScenarioResult {
match &scenario.kind {
ScenarioKind::Filter { hint } => run_filter(scenario, hint, iterations),
ScenarioKind::Markdown => run_markdown(scenario, iterations),
ScenarioKind::Wrap { calls } => run_wrap(scenario, *calls, iterations),
}
}
fn reduction_pct(before: usize, after: usize) -> f64 {
if before == 0 {
return 0.0;
}
let saved = before.saturating_sub(after) as f64;
(saved / before as f64) * 100.0
}
fn weighted_avg_reduction(results: &[ScenarioResult], category_prefix: &str) -> f64 {
let filtered: Vec<&ScenarioResult> = results
.iter()
.filter(|r| r.category.starts_with(category_prefix))
.collect();
if filtered.is_empty() {
return 0.0;
}
let total_baseline: usize = filtered.iter().map(|r| r.baseline_tokens).sum();
let total_compressed: usize = filtered.iter().map(|r| r.compressed_tokens).sum();
reduction_pct(total_baseline, total_compressed)
}
fn build_report(results: Vec<ScenarioResult>) -> BenchmarkReport {
let total_baseline: usize = results.iter().map(|r| r.baseline_tokens).sum();
let total_compressed: usize = results.iter().map(|r| r.compressed_tokens).sum();
let total_reduction = reduction_pct(total_baseline, total_compressed);
let bash_reduction = weighted_avg_reduction(&results, "bash_output");
let md_reduction = weighted_avg_reduction(&results, "markdown");
let wrap_reduction = weighted_avg_reduction(&results, "wrap");
let mut all_latencies: Vec<u64> = results.iter().filter(|r| r.latency_us > 0).map(|r| r.latency_us).collect();
all_latencies.sort_unstable();
let avg_latency_us = if all_latencies.is_empty() {
0
} else {
all_latencies.iter().sum::<u64>() / all_latencies.len() as u64
};
let p95_latency_us = if all_latencies.is_empty() {
0
} else {
let idx = (all_latencies.len() as f64 * 0.95) as usize;
all_latencies[idx.min(all_latencies.len() - 1)]
};
let cost_savings = total_reduction;
let quality_pass = results.iter().filter(|r| r.quality_pass).count();
let quality_fail = results.iter().filter(|r| !r.quality_pass && r.iterations > 0).count();
let quality_skip = results.iter().filter(|r| r.iterations == 0).count();
BenchmarkReport {
results,
total_baseline_tokens: total_baseline,
total_compressed_tokens: total_compressed,
total_reduction_pct: total_reduction,
bash_reduction_pct: bash_reduction,
md_reduction_pct: md_reduction,
wrap_reduction_pct: wrap_reduction,
avg_latency_us,
p95_latency_us,
estimated_cost_savings_pct: cost_savings,
quality_pass_count: quality_pass,
quality_fail_count: quality_fail,
quality_skip_count: quality_skip,
}
}
pub fn print_human(report: &BenchmarkReport) {
println!();
println!("╔══════════════════════════════════════════════════════════════════════════════╗");
println!("║ squeez benchmark — token reduction & quality report ║");
println!("╚══════════════════════════════════════════════════════════════════════════════╝");
println!();
println!("{:<32} {:>8} {:>8} {:>10} {:>8} {:>7} {}", "SCENARIO", "BEFORE", "AFTER", "REDUCTION", "LATENCY", "QUALITY", "STATUS");
println!("{}", "─".repeat(84));
let mut last_cat = String::new();
for r in &report.results {
if r.iterations == 0 {
println!("{:<32} [skipped — binary not found]", r.name);
continue;
}
if r.category != last_cat {
println!();
println!(" ▸ {}", r.category.replace('_', " ").to_uppercase());
last_cat = r.category.clone();
}
let status = if r.quality_pass { "✅" } else { "❌ quality" };
let latency_str = format_latency(r.latency_us);
println!(
" {:<30} {:>6}tk {:>6}tk {:>8.1}% {:>8} {:>5.0}% {}",
r.name,
r.baseline_tokens,
r.compressed_tokens,
r.reduction_pct,
latency_str,
r.quality_score * 100.0,
status
);
}
println!();
println!("{}", "═".repeat(84));
println!();
println!("SUMMARY");
println!(" Total token reduction {:>7.1}% ({} tk → {} tk)",
report.total_reduction_pct,
report.total_baseline_tokens,
report.total_compressed_tokens,
);
println!();
println!(" ├─ Bash output {:>7.1}% (filter pipeline)", report.bash_reduction_pct);
println!(" ├─ Markdown/context {:>7.1}% (compress-md)", report.md_reduction_pct);
println!(" └─ Wrap/cross-call {:>7.1}% (context engine + dedup)", report.wrap_reduction_pct);
println!();
println!("ESTIMATED COST SAVINGS (Claude Sonnet 4.6 · $3.00/MTok input)");
let baseline_cost_per_mtok = INPUT_COST_PER_MTOK;
let savings_frac = report.estimated_cost_savings_pct / 100.0;
for calls_per_day in [100u64, 1_000, 10_000] {
let avg_context_tokens_per_call = 2_000.0f64;
let monthly_tokens = calls_per_day as f64 * avg_context_tokens_per_call * 30.0;
let baseline_cost = monthly_tokens / 1_000_000.0 * baseline_cost_per_mtok;
let saved = baseline_cost * savings_frac;
println!(" {:>6} calls/day → ${:.2}/month baseline → ${:.2} saved/month ({:.1}%)",
format_num(calls_per_day), baseline_cost, saved, report.estimated_cost_savings_pct);
}
println!();
println!("LATENCY (compression overhead, filter mode)");
println!(" avg p50 {:>8}", format_latency(report.avg_latency_us));
println!(" p95 {:>8}", format_latency(report.p95_latency_us));
println!();
println!("QUALITY (≥{:.0}% of key terms preserved)", QUALITY_PASS_THRESHOLD * 100.0);
let total_scored = report.quality_pass_count + report.quality_fail_count;
println!(" passed {}/{}", report.quality_pass_count, total_scored);
if report.quality_fail_count > 0 {
println!(" FAILED {}/{}", report.quality_fail_count, total_scored);
println!();
for r in report.results.iter().filter(|r| !r.quality_pass && r.iterations > 0) {
println!(" ⚠ {} quality={:.0}%", r.name, r.quality_score * 100.0);
}
}
if report.quality_skip_count > 0 {
println!(" skipped {} (binary not found)", report.quality_skip_count);
}
println!();
println!("INTERPRETATION");
println!(" Best gains: high-volume/noisy outputs (ps aux, logs, repetitive lines)");
println!(" Moderate: structured diffs and markdown prose");
println!(" Trade-off: ultra-mode truncates aggressively — use --no-squeez for deep diffs");
println!(" Recommendation: keep adaptive_intensity=true for maximum context budget savings");
println!();
}
fn format_latency(us: u64) -> String {
if us == 0 {
return " n/a".to_string();
}
if us < 1_000 {
format!("{}µs", us)
} else if us < 1_000_000 {
format!("{:.1}ms", us as f64 / 1_000.0)
} else {
format!("{:.2}s", us as f64 / 1_000_000.0)
}
}
fn format_num(n: u64) -> String {
let s = n.to_string();
let mut out = String::new();
for (i, c) in s.chars().rev().enumerate() {
if i > 0 && i % 3 == 0 {
out.push(',');
}
out.push(c);
}
out.chars().rev().collect()
}
pub fn to_json(report: &BenchmarkReport) -> String {
let mut out = String::new();
out.push_str("{\n");
out.push_str(" \"schema_version\": 1,\n");
out.push_str(&format!(" \"squeez_version\": \"{}\",\n", env!("CARGO_PKG_VERSION")));
out.push_str(&format!(" \"total_baseline_tokens\": {},\n", report.total_baseline_tokens));
out.push_str(&format!(" \"total_compressed_tokens\": {},\n", report.total_compressed_tokens));
out.push_str(&format!(" \"total_reduction_pct\": {:.2},\n", report.total_reduction_pct));
out.push_str(&format!(" \"bash_reduction_pct\": {:.2},\n", report.bash_reduction_pct));
out.push_str(&format!(" \"md_reduction_pct\": {:.2},\n", report.md_reduction_pct));
out.push_str(&format!(" \"wrap_reduction_pct\": {:.2},\n", report.wrap_reduction_pct));
out.push_str(&format!(" \"estimated_cost_savings_pct\": {:.2},\n", report.estimated_cost_savings_pct));
out.push_str(&format!(" \"avg_latency_us\": {},\n", report.avg_latency_us));
out.push_str(&format!(" \"p95_latency_us\": {},\n", report.p95_latency_us));
out.push_str(&format!(" \"quality_pass_count\": {},\n", report.quality_pass_count));
out.push_str(&format!(" \"quality_fail_count\": {},\n", report.quality_fail_count));
out.push_str(&format!(" \"quality_skip_count\": {},\n", report.quality_skip_count));
out.push_str(" \"scenarios\": [\n");
for (i, r) in report.results.iter().enumerate() {
let comma = if i + 1 < report.results.len() { "," } else { "" };
out.push_str(" {\n");
out.push_str(&format!(" \"name\": \"{}\",\n", json_util::escape_str(&r.name)));
out.push_str(&format!(" \"category\": \"{}\",\n", json_util::escape_str(&r.category)));
out.push_str(&format!(" \"baseline_tokens\": {},\n", r.baseline_tokens));
out.push_str(&format!(" \"compressed_tokens\": {},\n", r.compressed_tokens));
out.push_str(&format!(" \"reduction_pct\": {:.2},\n", r.reduction_pct));
out.push_str(&format!(" \"latency_us\": {},\n", r.latency_us));
out.push_str(&format!(" \"quality_score\": {:.4},\n", r.quality_score));
out.push_str(&format!(" \"quality_pass\": {},\n", r.quality_pass));
out.push_str(&format!(" \"context_saved_tokens\": {},\n", r.context_saved_tokens));
out.push_str(&format!(" \"iterations\": {}\n", r.iterations));
out.push_str(&format!(" }}{}\n", comma));
}
out.push_str(" ]\n");
out.push('}');
out
}
pub fn run(args: &[String]) -> i32 {
let mut json_mode = false;
let mut output_file: Option<String> = None;
let mut scenario_filter: Option<String> = None;
let mut iterations: usize = 3;
let mut list_only = false;
let mut i = 0;
while i < args.len() {
match args[i].as_str() {
"--json" => json_mode = true,
"--list" => list_only = true,
"--output" | "-o" => {
i += 1;
output_file = args.get(i).cloned();
}
"--scenario" | "-s" => {
i += 1;
scenario_filter = args.get(i).cloned();
}
"--iterations" | "-n" => {
i += 1;
if let Some(v) = args.get(i) {
iterations = v.parse().unwrap_or(3);
}
}
"-h" | "--help" => {
print_help();
return 0;
}
other => {
eprintln!("squeez benchmark: unknown flag '{}'", other);
return 2;
}
}
i += 1;
}
let fixtures = fixtures_dir();
let all_scenarios = build_scenarios(&fixtures);
if list_only {
println!("Available scenarios ({}):", all_scenarios.len());
for s in &all_scenarios {
println!(" {:32} [{}]", s.name, s.category);
}
return 0;
}
let to_run: Vec<&Scenario> = if let Some(ref filter) = scenario_filter {
all_scenarios
.iter()
.filter(|s| s.name.contains(filter.as_str()) || s.category.contains(filter.as_str()))
.collect()
} else {
all_scenarios.iter().collect()
};
if to_run.is_empty() {
eprintln!("squeez benchmark: no scenarios matched '{}'", scenario_filter.as_deref().unwrap_or(""));
return 1;
}
eprintln!(
"squeez benchmark: running {} scenario(s) × {} iteration(s) ...",
to_run.len(),
iterations
);
eprintln!(" fixtures dir: {}", fixtures.display());
eprintln!();
let results: Vec<ScenarioResult> = to_run
.iter()
.map(|s| {
eprint!(" {:32} ... ", s.name);
let r = run_scenario(s, iterations);
if r.iterations == 0 {
eprintln!("skipped");
} else {
eprintln!("{:.1}% reduction quality={:.0}%", r.reduction_pct, r.quality_score * 100.0);
}
r
})
.collect();
let report = build_report(results);
let json = to_json(&report);
if let Some(ref path) = output_file {
match std::fs::write(path, &json) {
Ok(_) => eprintln!(" JSON report → {}", path),
Err(e) => eprintln!(" warn: could not write {}: {}", path, e),
}
}
if json_mode {
println!("{}", json);
} else {
print_human(&report);
}
if report.quality_fail_count > 0 { 1 } else { 0 }
}
fn print_help() {
eprintln!("squeez benchmark — measure token reduction, cost savings, latency, quality");
eprintln!();
eprintln!("USAGE");
eprintln!(" squeez benchmark [OPTIONS]");
eprintln!();
eprintln!("OPTIONS");
eprintln!(" --list List all available scenarios");
eprintln!(" --scenario, -s <name> Run only scenarios whose name/category contains <name>");
eprintln!(" --iterations, -n <n> Iterations per scenario (default: 3)");
eprintln!(" --json Print JSON report to stdout");
eprintln!(" --output, -o <file> Write JSON report to <file>");
eprintln!(" --help, -h Show this help");
eprintln!();
eprintln!("ENVIRONMENT");
eprintln!(" SQUEEZ_BENCH_FIXTURES Override fixture directory path");
eprintln!();
eprintln!("EXAMPLES");
eprintln!(" squeez benchmark");
eprintln!(" squeez benchmark --scenario git");
eprintln!(" squeez benchmark --json --output bench/report.json");
eprintln!(" squeez benchmark -n 5 --scenario bash_output");
}