use clap::Parser;
use rand::RngExt as _;
use serde::{Deserialize, Serialize};
use std::fs::{self, File};
use std::io::{BufWriter, Result, Write};
use std::path::Path;
use std::process::Command;
use std::sync::{Arc, Mutex};
use std::thread;
use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH};
use tempfile::NamedTempFile;
const DEFAULT_BINARY: &str = "./target/release/sk";
const DEFAULT_NUM_ITEMS: u64 = 1_000_000;
const DEFAULT_QUERY: &str = "test";
const BENCH_PROMPT: &str = "BENCH> ";
const PRE_MEASUREMENT_TIMEOUT_S: f64 = 15.0;
const REQUIRED_STABLE_S: f64 = 5.0;
const MAX_WAIT_S: f64 = 120.0;
const CHECK_INTERVAL_MS: u64 = 1;
const WORDS: &[&str] = &[
"home",
"usr",
"etc",
"var",
"opt",
"tmp",
"dev",
"proc",
"sys",
"lib",
"bin",
"sbin",
"boot",
"mnt",
"media",
"src",
"test",
"config",
"data",
"logs",
"cache",
"backup",
"docs",
"images",
"videos",
"audio",
"downloads",
"uploads",
"temp",
"shared",
];
#[derive(Parser, Debug)]
#[command(
name = "bench",
about = "Benchmark skim ingestion + matching rate in interactive mode",
long_about = "Measures how fast skim can ingest items and display matched results \
by running sk inside a tmux session and polling the status line.",
ignore_errors(true)
)]
struct Args {
#[command(subcommand)]
command: Subcommand,
}
#[derive(clap::Subcommand, Debug)]
enum Subcommand {
#[command(alias = "gen", alias = "g")]
Generate(GenerateArgs),
#[command(alias = "r")]
Run(RunArgs),
Plot(PlotArgs),
}
#[derive(clap::Args, Debug)]
struct PlotArgs {
#[arg(short = 'i', long, default_value = "/tmp/bench.json", value_name = "FILE")]
input: String,
#[arg(short = 'o', long, default_value = "bench.png", value_name = "FILE")]
output: String,
#[arg(long, default_value_t = 1600u32, value_name = "PX")]
width: u32,
#[arg(long, default_value_t = 1200u32, value_name = "PX")]
height: u32,
}
#[derive(clap::Args, Debug)]
struct GenerateArgs {
#[arg(short = 'f', long, value_name = "FILE", required = true)]
file: String,
#[arg(short = 'n', long, default_value_t = DEFAULT_NUM_ITEMS, value_name = "NUM")]
num_items: u64,
}
#[derive(clap::Args, Debug)]
struct RunArgs {
#[arg(value_name = "BINARY_PATH", default_value = "")]
binaries: Vec<String>,
#[arg(short = 'n', long, default_value_t = DEFAULT_NUM_ITEMS, value_name = "NUM")]
num_items: u64,
#[arg(short = 'q', long, default_value = DEFAULT_QUERY)]
query: String,
#[arg(short = 'r', long, default_value_t = 1u32, value_name = "RUNS")]
runs: u32,
#[arg(short = 'w', long, default_value_t = 1u32, value_name = "N")]
warmup: u32,
#[arg(short = 'f', long, value_name = "FILE")]
file: Option<String>,
#[arg(short = 'j', long)]
json: bool,
#[arg(
short = 'p',
long,
num_args = 0..=1,
default_missing_value = "",
value_name = "FILE"
)]
perf: Option<String>,
#[arg(
short = 't',
long,
num_args = 0..=1,
default_missing_value = "",
value_name = "FILE"
)]
strace: Option<String>,
#[arg(short = 's', long, default_value_t = REQUIRED_STABLE_S, value_name = "SECS")]
stable_secs: f64,
#[arg(last = true)]
extra_args: Vec<String>,
}
fn generate_test_data(output_file: &str, num_items: u64) -> std::io::Result<()> {
let file = File::create(output_file)?;
let mut writer = BufWriter::new(file);
let mut rng = rand::rng();
for i in 1..=num_items {
let depth = rng.random_range(2..=10usize);
let parts: Vec<&str> = (0..depth).map(|_| WORDS[rng.random_range(0..WORDS.len())]).collect();
writeln!(writer, "{}_{}", parts.join("/"), i)?;
}
Ok(())
}
struct ResourcePeak {
peak_mem_kb: u64,
peak_cpu: f64,
}
struct ResourceMonitor {
stats: Arc<Mutex<ResourcePeak>>,
handle: thread::JoinHandle<()>,
}
impl ResourceMonitor {
fn start(pid: u32) -> Self {
let stats = Arc::new(Mutex::new(ResourcePeak {
peak_mem_kb: 0,
peak_cpu: 0.0,
}));
let stats_clone = Arc::clone(&stats);
let handle = thread::spawn(move || {
loop {
match Command::new("ps")
.args(["-p", &pid.to_string(), "-o", "rss=,%cpu="])
.output()
{
Ok(o) => {
let text = String::from_utf8_lossy(&o.stdout);
let line = text.trim();
if line.is_empty() {
break;
}
let mut parts = line.split_whitespace();
if let (Some(rss), Some(cpu)) = (parts.next(), parts.next())
&& let (Ok(mem), Ok(cpu)) = (rss.parse::<u64>(), cpu.parse::<f64>())
{
let mut s = stats_clone.lock().unwrap();
s.peak_mem_kb = s.peak_mem_kb.max(mem);
s.peak_cpu = s.peak_cpu.max(cpu);
}
}
Err(_) => break,
}
thread::sleep(Duration::from_millis(50));
}
});
ResourceMonitor { stats, handle }
}
fn join(self) -> (Option<u64>, Option<f64>) {
let _ = self.handle.join();
let s = self.stats.lock().unwrap();
let mem = if s.peak_mem_kb > 0 { Some(s.peak_mem_kb) } else { None };
let cpu = if s.peak_cpu > 0.0 { Some(s.peak_cpu) } else { None };
(mem, cpu)
}
}
struct RunResult {
elapsed_s: f64,
rate: f64,
matched: u64,
total_count: u64,
peak_mem_kb: Option<u64>,
peak_cpu: Option<f64>,
completed: bool,
perf_file: Option<String>,
strace_file: Option<String>,
startup_s: Option<f64>,
}
fn find_sk_pid(pane_pid: u32, binary_path: &str) -> u32 {
for _ in 0..400 {
if let Ok(o) = Command::new("pgrep")
.args(["-P", &pane_pid.to_string(), "-f", binary_path])
.output()
{
let text = String::from_utf8_lossy(&o.stdout);
if let Some(first) = text.trim().lines().next()
&& let Ok(pid) = first.trim().parse::<u32>()
{
return pid;
}
}
thread::sleep(Duration::from_millis(5));
}
0
}
fn process_alive(pid: u32) -> bool {
Path::new(&format!("/proc/{}", pid)).exists()
}
struct TmuxServer {
socket: String,
capture_buf: String,
}
impl TmuxServer {
fn start() -> Self {
let socket = format!("skim_bench_{}", std::process::id());
let capture_buf = NamedTempFile::new()
.expect("failed to capture temp file")
.path()
.to_string_lossy()
.into_owned();
let _ = Command::new("tmux")
.args(["-L", &socket, "start-server"])
.env_clear()
.envs(env_vars())
.output();
Self { socket, capture_buf }
}
fn capture(&self, session_name: &str) -> Result<String> {
let buf_name = format!("status-{}", session_name);
let _ = Command::new("tmux")
.args(["-L", &self.socket, "capture-pane", "-b", &buf_name, "-t", session_name])
.output();
let _ = Command::new("tmux")
.args(["-L", &self.socket, "save-buffer", "-b", &buf_name, &self.capture_buf])
.output();
fs::read_to_string(&self.capture_buf)
}
fn new_session(&self, name: &str) -> Result<()> {
Command::new("tmux")
.args(["-L", &self.socket, "new-session", "-s", name, "-d"])
.env_clear()
.envs(env_vars())
.status()
.and(Ok(()))
}
fn send_keys(&self, session_name: &str, keys: &str) -> Result<()> {
Command::new("tmux")
.args(["-L", &self.socket, "send-keys", "-t", session_name, keys])
.status()
.and(Ok(()))
}
fn pane_pid(&self, session_name: &str) -> Result<u32> {
Command::new("tmux")
.args([
"-L",
&self.socket,
"list-panes",
"-t",
session_name,
"-F",
"#{pane_pid}",
])
.output()
.map(|o| {
String::from_utf8_lossy(&o.stdout)
.trim()
.lines()
.next()
.unwrap_or_default()
.trim()
.parse::<u32>()
.unwrap_or(0u32)
})
}
fn kill_session(&self, session_name: &str) -> Result<()> {
Command::new("tmux")
.args(["-L", &self.socket, "kill-session", "-t", session_name])
.status()
.and(Ok(()))
}
}
impl Drop for TmuxServer {
fn drop(&mut self) {
let _ = Command::new("tmux").args(["-L", &self.socket, "kill-server"]).output();
}
}
#[allow(clippy::too_many_arguments)]
fn run_once(
binary_path: &str,
query: &str,
tmp_file: &str,
extra_args: &[String],
run_index: u32,
session_suffix: &str,
perf_output: Option<&str>,
strace_output: Option<&str>,
tmux_server: &TmuxServer,
stable_secs: f64,
) -> Result<RunResult> {
let session_name = format!("skim_bench_{}_{}_{}", std::process::id(), session_suffix, run_index);
tmux_server.new_session(&session_name)?;
let extra_str = extra_args.join(" ");
let perf_prefix = match perf_output {
Some(path) => format!("perf record -o {} -- ", path),
None => String::new(),
};
let strace_prefix = match strace_output {
Some(path) => format!("strace -C -ttt -o {} -- ", path),
None => String::new(),
};
let cmd_str = format!(
"cat {} | {}{}{} --prompt '{}' {}",
tmp_file, perf_prefix, strace_prefix, binary_path, BENCH_PROMPT, extra_str
);
{
let phase_start = Instant::now();
loop {
thread::sleep(Duration::from_millis(CHECK_INTERVAL_MS));
if phase_start.elapsed().as_secs_f64() >= PRE_MEASUREMENT_TIMEOUT_S {
break;
}
if tmux_server.capture(&session_name).is_ok_and(|c| !c.trim().is_empty()) {
break;
}
}
}
tmux_server.send_keys(&session_name, &cmd_str)?;
{
let cmd_marker = "--prompt '";
let phase_start = Instant::now();
loop {
thread::sleep(Duration::from_millis(CHECK_INTERVAL_MS));
if phase_start.elapsed().as_secs_f64() >= PRE_MEASUREMENT_TIMEOUT_S {
break;
}
if tmux_server.capture(&session_name).is_ok_and(|c| c.contains(cmd_marker)) {
break;
}
}
}
let pane_pid: u32 = tmux_server.pane_pid(&session_name)?;
let binary_path_owned = binary_path.to_owned();
let monitor_cell: Arc<Mutex<Option<ResourceMonitor>>> = Arc::new(Mutex::new(None));
let sk_pid_cell: Arc<Mutex<u32>> = Arc::new(Mutex::new(0));
{
let monitor_cell = Arc::clone(&monitor_cell);
let sk_pid_cell = Arc::clone(&sk_pid_cell);
thread::spawn(move || {
let pid = find_sk_pid(pane_pid, &binary_path_owned);
*sk_pid_cell.lock().unwrap() = pid;
if pid > 0 {
*monitor_cell.lock().unwrap() = Some(ResourceMonitor::start(pid));
}
});
}
let re = regex::Regex::new(r"(\d+)/(\d+)").expect("valid regex");
let prompt_with_query = format!("{}{}", BENCH_PROMPT, query);
tmux_server.send_keys(&session_name, "Enter")?;
if !query.is_empty() {
tmux_server.send_keys(&session_name, query)?;
}
let start = Instant::now();
let mut completed = false;
let mut matched_count: u64 = 0;
let mut total_count: u64 = 0;
let mut prev_matched: u64 = u64::MAX;
let mut prev_total: u64 = u64::MAX;
let mut stable_since: Option<Instant> = None;
let mut last_change_elapsed: Option<Duration> = None;
let loop_start = Instant::now();
let mut startup_prompt_s: Option<f64> = None;
let mut startup_status_s: Option<f64> = None;
loop {
thread::sleep(Duration::from_millis(CHECK_INTERVAL_MS));
if loop_start.elapsed().as_secs_f64() >= MAX_WAIT_S {
break;
}
let sk_pid = *sk_pid_cell.lock().unwrap();
if sk_pid > 0 && !process_alive(sk_pid) {
break;
}
let content = match tmux_server.capture(&session_name) {
Ok(c) => c,
Err(_) => continue,
};
if startup_prompt_s.is_none() && content.contains(&prompt_with_query) {
startup_prompt_s = Some(start.elapsed().as_secs_f64());
}
if let Some(caps) = re.captures(&content) {
let mc: u64 = caps[1].parse().unwrap_or(0);
let total: u64 = caps[2].parse().unwrap_or(0);
if startup_status_s.is_none() {
startup_status_s = Some(start.elapsed().as_secs_f64());
}
if total > 0 {
total_count = total;
matched_count = mc;
if mc != prev_matched || total != prev_total {
prev_matched = mc;
prev_total = total;
stable_since = Some(Instant::now());
last_change_elapsed = Some(start.elapsed());
} else if stable_since.is_some_and(|t| t.elapsed().as_secs_f64() >= stable_secs) {
completed = true;
break;
}
}
}
}
let elapsed_s = last_change_elapsed.unwrap_or_else(|| start.elapsed()).as_secs_f64();
tmux_server.send_keys(&session_name, "Escape")?;
thread::sleep(Duration::from_millis(100));
if perf_output.is_some() && pane_pid > 0 {
let perf_wait = Instant::now();
loop {
if perf_wait.elapsed().as_secs_f64() >= 15.0 {
eprintln!("Warning: perf record did not exit within 15 s; perf data may be incomplete.");
break;
}
let still_running = Command::new("pgrep")
.args(["-P", &pane_pid.to_string(), "-f", "perf record"])
.output()
.map(|o| o.status.success())
.unwrap_or(false);
if !still_running {
break;
}
thread::sleep(Duration::from_millis(100));
}
}
if strace_output.is_some() && pane_pid > 0 {
let strace_wait = Instant::now();
loop {
if strace_wait.elapsed().as_secs_f64() >= 15.0 {
eprintln!("Warning: strace did not exit within 15 s; trace data may be incomplete.");
break;
}
let still_running = Command::new("pgrep")
.args(["-P", &pane_pid.to_string(), "-f", "strace"])
.output()
.map(|o| o.status.success())
.unwrap_or(false);
if !still_running {
break;
}
thread::sleep(Duration::from_millis(100));
}
}
let monitor = monitor_cell.lock().unwrap().take();
let (peak_mem_kb, peak_cpu) = monitor.map(ResourceMonitor::join).unwrap_or((None, None));
let rate = if elapsed_s > 0.0 && total_count > 0 {
total_count as f64 / elapsed_s
} else {
0.0
};
let _ = tmux_server.kill_session(&session_name);
Ok(RunResult {
elapsed_s,
rate,
matched: matched_count,
total_count,
peak_mem_kb,
peak_cpu,
completed,
perf_file: perf_output.map(str::to_owned),
strace_file: strace_output.map(str::to_owned),
startup_s: match (startup_prompt_s, startup_status_s) {
(Some(a), Some(b)) => Some(a.max(b)),
(Some(a), None) => Some(a),
(None, Some(b)) => Some(b),
(None, None) => None,
},
})
}
fn env_vars() -> Vec<(String, String)> {
std::env::vars()
.filter(|(k, _)| k != "HISTFILE" && !(k.starts_with("FZF") || k.starts_with("SKIM")))
.chain([("SHELL".into(), "/bin/sh".into())])
.collect()
}
struct AggResult {
completed: usize,
runs: usize,
avg_time: Option<f64>,
min_time: Option<f64>,
max_time: Option<f64>,
avg_rate: Option<f64>,
min_rate: Option<f64>,
max_rate: Option<f64>,
avg_matched: Option<f64>,
avg_total_count: Option<f64>,
avg_startup_s: Option<f64>,
min_startup_s: Option<f64>,
max_startup_s: Option<f64>,
min_matched: Option<f64>,
max_matched: Option<f64>,
avg_mem: Option<f64>,
min_mem: Option<f64>,
max_mem: Option<f64>,
avg_cpu: Option<f64>,
min_cpu: Option<f64>,
max_cpu: Option<f64>,
}
fn avg(vals: &[f64]) -> Option<f64> {
if vals.is_empty() {
None
} else {
Some(vals.iter().sum::<f64>() / vals.len() as f64)
}
}
fn aggregate(results: &[RunResult]) -> AggResult {
let done: Vec<&RunResult> = results.iter().filter(|r| r.completed).collect();
let times: Vec<f64> = done.iter().map(|r| r.elapsed_s).collect();
let rates: Vec<f64> = done.iter().map(|r| r.rate).collect();
let matched: Vec<f64> = done.iter().map(|r| r.matched as f64).collect();
let totals: Vec<f64> = done.iter().map(|r| r.total_count as f64).collect();
let mems: Vec<f64> = done.iter().filter_map(|r| r.peak_mem_kb.map(|v| v as f64)).collect();
let cpus: Vec<f64> = done.iter().filter_map(|r| r.peak_cpu).collect();
let startup: Vec<f64> = results.iter().filter_map(|r| r.startup_s).collect();
AggResult {
completed: done.len(),
runs: results.len(),
avg_time: avg(×),
min_time: times.iter().copied().reduce(f64::min),
max_time: times.iter().copied().reduce(f64::max),
avg_rate: avg(&rates),
min_rate: rates.iter().copied().reduce(f64::min),
max_rate: rates.iter().copied().reduce(f64::max),
avg_matched: avg(&matched),
min_matched: matched.iter().copied().reduce(f64::min),
max_matched: matched.iter().copied().reduce(f64::max),
avg_total_count: avg(&totals),
avg_mem: avg(&mems),
min_mem: mems.iter().copied().reduce(f64::min),
max_mem: mems.iter().copied().reduce(f64::max),
avg_cpu: avg(&cpus),
min_cpu: cpus.iter().copied().reduce(f64::min),
max_cpu: cpus.iter().copied().reduce(f64::max),
avg_startup_s: avg(&startup),
min_startup_s: startup.iter().copied().reduce(f64::min),
max_startup_s: startup.iter().copied().reduce(f64::max),
}
}
fn binary_display_name(binary_path: &str) -> String {
let exe_name = Path::new(binary_path)
.file_name()
.and_then(|n| n.to_str())
.unwrap_or(binary_path)
.to_owned();
let in_target = std::env::current_dir()
.ok()
.map(|cwd| Path::new(binary_path).starts_with(cwd.join("target")))
.unwrap_or(false);
if in_target {
return format!("{} HEAD", exe_name);
}
let version = Command::new(binary_path)
.arg("--version")
.output()
.ok()
.and_then(|o| {
let stdout = String::from_utf8_lossy(&o.stdout).into_owned();
let stderr = String::from_utf8_lossy(&o.stderr).into_owned();
let output = if stdout.trim().is_empty() { stderr } else { stdout };
let first_line = output.lines().next()?.trim().to_owned();
let rest = if let Some(stripped) = first_line.strip_prefix(&exe_name) {
stripped.trim_start().to_owned()
} else {
first_line
};
Some(rest.split_whitespace().next()?.to_owned())
})
.unwrap_or_else(|| "unknown".into());
format!("{} {}", exe_name, version)
}
fn pct(baseline: Option<f64>, value: Option<f64>) -> String {
match (baseline, value) {
(Some(b), Some(v)) if b != 0.0 => {
let diff = (v - b) / b.abs() * 100.0;
if diff >= 0.0 {
format!("+{:.1}%", diff)
} else {
format!("{:.1}%", diff)
}
}
_ => String::new(),
}
}
fn fmt_opt(value: Option<f64>, fmt: impl Fn(f64) -> String) -> String {
value.map(fmt).unwrap_or_else(|| "N/A".into())
}
fn print_human(binary_label: &str, agg: &AggResult, baseline: Option<&AggResult>, is_baseline: bool) {
let tag = if is_baseline { " [baseline]" } else { "" };
println!("\n=== Results: {}{} ===", binary_label, tag);
println!("Completed runs: {} / {}", agg.completed, agg.runs);
let cmp = |val: Option<f64>, base_val: Option<f64>| -> String {
if baseline.is_none() || is_baseline {
String::new()
} else {
format!(" {}", pct(base_val, val))
}
};
println!(
"Average items matched: {} (min: {}, max: {}) / {}{}",
fmt_opt(agg.avg_matched, |v| format!("{:.0}", v)),
fmt_opt(agg.min_matched, |v| format!("{:.0}", v)),
fmt_opt(agg.max_matched, |v| format!("{:.0}", v)),
fmt_opt(agg.avg_total_count, |v| format!("{:.0}", v)),
cmp(agg.avg_matched, baseline.and_then(|b| b.avg_matched)),
);
let time_cmp = if let (Some(b), Some(v), false) = (baseline.and_then(|b| b.avg_time), agg.avg_time, is_baseline) {
let diff = (v - b) / b.abs() * 100.0;
if diff >= 0.0 {
format!(" +{:.1}%", diff)
} else {
format!(" {:.1}%", diff)
}
} else {
String::new()
};
println!(
"Average time: {} (min: {}, max: {}){}",
fmt_opt(agg.avg_time, |v| format!("{:.3}s", v)),
fmt_opt(agg.min_time, |v| format!("{:.3}s", v)),
fmt_opt(agg.max_time, |v| format!("{:.3}s", v)),
time_cmp,
);
println!(
"Average items/second: {} (min: {}, max: {}){}",
fmt_opt(agg.avg_rate, |v| format!("{:.0}", v)),
fmt_opt(agg.min_rate, |v| format!("{:.0}", v)),
fmt_opt(agg.max_rate, |v| format!("{:.0}", v)),
cmp(agg.avg_rate, baseline.and_then(|b| b.avg_rate)),
);
if agg.avg_mem.is_some() {
let mb = |kb: Option<f64>| fmt_opt(kb, |v| format!("{:.1} MB", v / 1024.0));
println!(
"Average peak memory usage: {} (min: {}, max: {}){}",
mb(agg.avg_mem),
mb(agg.min_mem),
mb(agg.max_mem),
cmp(agg.avg_mem, baseline.and_then(|b| b.avg_mem)),
);
}
if agg.avg_cpu.is_some() {
println!(
"Average peak CPU usage: {} (min: {}, max: {}){}",
fmt_opt(agg.avg_cpu, |v| format!("{:.1}%", v)),
fmt_opt(agg.min_cpu, |v| format!("{:.1}%", v)),
fmt_opt(agg.max_cpu, |v| format!("{:.1}%", v)),
cmp(agg.avg_cpu, baseline.and_then(|b| b.avg_cpu)),
);
}
if agg.avg_startup_s.is_some() {
println!(
"Startup time (UI ready): {} (min: {}, max: {}){}",
fmt_opt(agg.avg_startup_s, |v| format!("{:.3}s", v)),
fmt_opt(agg.min_startup_s, |v| format!("{:.3}s", v)),
fmt_opt(agg.max_startup_s, |v| format!("{:.3}s", v)),
cmp(agg.avg_startup_s, baseline.and_then(|b| b.avg_startup_s)),
);
}
}
fn shorten_binary(binary: &str) -> String {
if binary.len() > 40 {
Path::new(binary)
.file_name()
.and_then(|n| n.to_str())
.unwrap_or(binary)
.to_owned()
} else {
binary.to_owned()
}
}
fn pad_cell(s: &str, width: usize, right_align: bool) -> String {
let n = s.chars().count();
let extra = width.saturating_sub(n);
if right_align {
format!("{}{}", " ".repeat(extra), s)
} else {
format!("{}{}", s, " ".repeat(extra))
}
}
fn print_markdown_table(display_names: &[String], aggregates: &[AggResult]) {
let multi = display_names.len() > 1;
let has_mem = aggregates.iter().any(|a| a.avg_mem.is_some());
let has_cpu = aggregates.iter().any(|a| a.avg_cpu.is_some());
let has_startup = aggregates.iter().any(|a| a.avg_startup_s.is_some());
let mut col_defs: Vec<(&str, bool)> =
vec![("Binary", false), ("Runs", true), ("Matched", true), ("Avg time", true)];
if multi {
col_defs.push(("Δ time", true));
}
col_defs.push(("Avg rate", true));
if multi {
col_defs.push(("Δ rate", true));
}
if has_mem {
col_defs.push(("Avg mem (MB)", true));
if multi {
col_defs.push(("Δ mem", true));
}
}
if has_cpu {
col_defs.push(("Avg CPU (%)", true));
if multi {
col_defs.push(("Δ CPU", true));
}
}
if has_startup {
col_defs.push(("Startup (s)", true));
if multi {
col_defs.push(("Δ startup", true));
}
}
let baseline = &aggregates[0];
let mut rows: Vec<Vec<String>> = Vec::new();
for (i, (display_name, agg)) in display_names.iter().zip(aggregates).enumerate() {
let name = shorten_binary(display_name);
let name = if i == 0 && multi {
format!("**{}** *(baseline)*", name)
} else {
name
};
let mut row: Vec<String> = vec![
name,
format!("{}/{}", agg.completed, agg.runs),
fmt_opt(agg.avg_matched, |v| format!("{:.0}", v)),
fmt_opt(agg.avg_time, |v| format!("{:.3}s", v)),
];
if multi {
row.push(if i == 0 {
"—".into()
} else {
pct(baseline.avg_time, agg.avg_time)
});
}
row.push(fmt_opt(agg.avg_rate, |v| format!("{:.0}", v)));
if multi {
row.push(if i == 0 {
"—".into()
} else {
pct(baseline.avg_rate, agg.avg_rate)
});
}
if has_mem {
row.push(fmt_opt(agg.avg_mem, |v| format!("{:.1}", v / 1024.0)));
if multi {
row.push(if i == 0 {
"—".into()
} else {
pct(baseline.avg_mem, agg.avg_mem)
});
}
}
if has_cpu {
row.push(fmt_opt(agg.avg_cpu, |v| format!("{:.1}%", v)));
if multi {
row.push(if i == 0 {
"—".into()
} else {
pct(baseline.avg_cpu, agg.avg_cpu)
});
}
}
if has_startup {
row.push(fmt_opt(agg.avg_startup_s, |v| format!("{:.3}s", v)));
if multi {
row.push(if i == 0 {
"—".into()
} else {
pct(baseline.avg_startup_s, agg.avg_startup_s)
});
}
}
rows.push(row);
}
let mut widths: Vec<usize> = col_defs.iter().map(|(h, _)| h.chars().count()).collect();
for row in &rows {
for (j, cell) in row.iter().enumerate() {
widths[j] = widths[j].max(cell.chars().count());
}
}
let render_row = |cells: &[String]| -> String {
let padded: Vec<String> = cells
.iter()
.zip(&col_defs)
.zip(&widths)
.map(|((cell, &(_, right)), &w)| pad_cell(cell, w, right))
.collect();
format!("| {} |", padded.join(" | "))
};
let headers: Vec<String> = col_defs.iter().map(|(h, _)| h.to_string()).collect();
println!("{}", render_row(&headers));
let seps: Vec<String> = col_defs
.iter()
.zip(&widths)
.map(|(&(_, right), &w)| {
if right {
format!("{}:", "-".repeat(w.saturating_sub(1)))
} else {
format!(":{}", "-".repeat(w.saturating_sub(1)))
}
})
.collect();
println!("| {} |", seps.join(" | "));
for row in &rows {
println!("{}", render_row(row));
}
}
#[derive(Serialize)]
struct JsonMinMaxAvg {
avg: Option<f64>,
min: Option<f64>,
max: Option<f64>,
}
#[derive(Serialize)]
struct JsonEntry {
binary: String,
display_name: String,
runs: u32,
completed_runs: usize,
items_matched: JsonMinMaxAvg,
items_total: Option<f64>,
time_s: JsonMinMaxAvg,
items_per_second: JsonMinMaxAvg,
peak_memory_kb: JsonMinMaxAvg,
peak_cpu: JsonMinMaxAvg,
startup_s: JsonMinMaxAvg,
}
fn build_json_entry(binary: &str, display_name: &str, agg: &AggResult, runs: u32) -> JsonEntry {
JsonEntry {
binary: binary.to_owned(),
display_name: display_name.to_owned(),
runs,
completed_runs: agg.completed,
items_matched: JsonMinMaxAvg {
avg: agg.avg_matched,
min: agg.min_matched,
max: agg.max_matched,
},
items_total: agg.avg_total_count,
time_s: JsonMinMaxAvg {
avg: agg.avg_time,
min: agg.min_time,
max: agg.max_time,
},
items_per_second: JsonMinMaxAvg {
avg: agg.avg_rate,
min: agg.min_rate,
max: agg.max_rate,
},
peak_memory_kb: JsonMinMaxAvg {
avg: agg.avg_mem,
min: agg.min_mem,
max: agg.max_mem,
},
peak_cpu: JsonMinMaxAvg {
avg: agg.avg_cpu,
min: agg.min_cpu,
max: agg.max_cpu,
},
startup_s: JsonMinMaxAvg {
avg: agg.avg_startup_s,
min: agg.min_startup_s,
max: agg.max_startup_s,
},
}
}
fn print_json(binaries: &[String], display_names: &[String], aggregates: &[AggResult], runs: u32) {
let entries: Vec<JsonEntry> = binaries
.iter()
.zip(display_names)
.zip(aggregates)
.map(|((b, dn), a)| build_json_entry(b, dn, a, runs))
.collect();
if entries.len() == 1 {
println!("{}", serde_json::to_string(&entries[0]).unwrap());
} else {
println!("{}", serde_json::to_string(&entries).unwrap());
}
}
fn perf_path_for(binary: &str, explicit: &str) -> String {
if !explicit.is_empty() {
return explicit.to_owned();
}
let ts = SystemTime::now()
.duration_since(UNIX_EPOCH)
.unwrap_or_default()
.as_secs();
let base = Path::new(binary)
.file_name()
.and_then(|n| n.to_str())
.map(|s| s.replace(' ', "_"))
.filter(|s| !s.is_empty())
.unwrap_or_else(|| "sk".into());
format!("perf-{}-{}.data", base, ts)
}
fn strace_path_for(binary: &str, explicit: &str) -> String {
if !explicit.is_empty() {
return explicit.to_owned();
}
let ts = SystemTime::now()
.duration_since(UNIX_EPOCH)
.unwrap_or_default()
.as_secs();
let base = Path::new(binary)
.file_name()
.and_then(|n| n.to_str())
.map(|s| s.replace(' ', "_"))
.filter(|s| !s.is_empty())
.unwrap_or_else(|| "sk".into());
format!("strace-{}-{}.out", base, ts)
}
#[derive(Deserialize)]
struct PlotMinMaxAvg {
avg: Option<f64>,
min: Option<f64>,
max: Option<f64>,
}
#[derive(Deserialize)]
struct PlotEntry {
display_name: String,
items_total: Option<f64>,
time_s: PlotMinMaxAvg,
peak_memory_kb: PlotMinMaxAvg,
peak_cpu: PlotMinMaxAvg,
startup_s: PlotMinMaxAvg,
}
struct BandPoint {
x: f64,
min: f64,
avg: f64,
max: f64,
}
struct BandSeries {
name: String,
points: Vec<BandPoint>,
}
fn load_plot_entries(path: &str) -> std::io::Result<Vec<PlotEntry>> {
let content = fs::read_to_string(path)?;
let mut entries: Vec<PlotEntry> = Vec::new();
for val in serde_json::Deserializer::from_str(&content).into_iter::<serde_json::Value>() {
match val.ok() {
Some(serde_json::Value::Array(arr)) => {
entries.extend(arr.into_iter().filter_map(|v| serde_json::from_value(v).ok()));
}
Some(obj @ serde_json::Value::Object(_)) => {
if let Ok(e) = serde_json::from_value(obj) {
entries.push(e);
}
}
_ => {}
}
}
Ok(entries)
}
fn collect_bands<F>(entries: &[PlotEntry], extract: F) -> Vec<BandSeries>
where
F: Fn(&PlotEntry) -> Option<(f64, f64, f64)>,
{
use std::collections::{BTreeMap, HashMap};
let mut map: HashMap<String, BTreeMap<u64, BandPoint>> = HashMap::new();
for e in entries {
let x = match e.items_total {
Some(v) if v > 0.0 => v,
_ => continue,
};
let (mn, avg, mx) = match extract(e) {
Some(t) => t,
None => continue,
};
map.entry(e.display_name.clone()).or_default().insert(
x as u64,
BandPoint {
x,
min: mn,
avg,
max: mx,
},
);
}
let mut series: Vec<BandSeries> = map
.into_iter()
.map(|(name, pts)| BandSeries {
name,
points: pts.into_values().collect(),
})
.collect();
series.sort_by(|a, b| a.name.cmp(&b.name));
series
}
fn cmd_plot(args: &PlotArgs) -> std::result::Result<(), Box<dyn std::error::Error>> {
use gnuplot::{
AlignType::*, AutoOption::Fix, AxesCommon, BorderLocation2D::*, Caption, Color, Figure, FillAlpha,
LabelOption::TextColor, LegendOption::Placement, LineWidth,
};
const BG: &str = "#1e1e2e"; const SURFACE: &str = "#31324c"; const TEXT: &str = "#cdd6f4"; const PALETTE: &[&str] = &[
"#89b4fa", "#f38ba8", "#a6e3a1", "#f9e2af", "#cba6f7", "#94e2d5", "#fab387", ];
let entries = load_plot_entries(&args.input).map_err(|e| format!("cannot read '{}': {}", args.input, e))?;
if entries.is_empty() {
return Err(format!("no valid benchmark entries found in '{}'", args.input).into());
}
let time_bands = collect_bands(&entries, |e| Some((e.time_s.min?, e.time_s.avg?, e.time_s.max?)));
let cpu_bands = collect_bands(&entries, |e| Some((e.peak_cpu.min?, e.peak_cpu.avg?, e.peak_cpu.max?)));
let mem_bands = collect_bands(&entries, |e| {
Some((
e.peak_memory_kb.min? / 1024.0,
e.peak_memory_kb.avg? / 1024.0,
e.peak_memory_kb.max? / 1024.0,
))
});
let startup_bands = collect_bands(&entries, |e| {
Some((e.startup_s.min?, e.startup_s.avg?, e.startup_s.max?))
});
let all_x: Vec<f64> = entries
.iter()
.filter_map(|e| e.items_total)
.filter(|&v| v > 0.0)
.collect();
if all_x.is_empty() {
return Err("no item-count data".into());
}
let x_min = all_x.iter().copied().fold(f64::INFINITY, f64::min);
let x_max = all_x.iter().copied().fold(f64::NEG_INFINITY, f64::max);
let x_lo = x_min * 0.5;
let x_hi = x_max * 2.0;
let y_extent = |bands: &[BandSeries]| -> (f64, f64) {
let mn = bands
.iter()
.flat_map(|s| s.points.iter())
.map(|p| p.min)
.fold(f64::INFINITY, f64::min);
let mx = bands
.iter()
.flat_map(|s| s.points.iter())
.map(|p| p.max)
.fold(f64::NEG_INFINITY, f64::max);
(mn, mx)
};
let draw_bands = |axes: &mut gnuplot::Axes2D, bands: &[BandSeries]| {
for (band, &color) in bands.iter().zip(PALETTE.iter().cycle()) {
if band.points.len() < 2 {
continue;
}
let xs: Vec<f64> = band.points.iter().map(|p| p.x).collect();
let ys_lo: Vec<f64> = band.points.iter().map(|p| p.min).collect();
let ys_hi: Vec<f64> = band.points.iter().map(|p| p.max).collect();
axes.fill_between(
xs.iter().copied(),
ys_lo.iter().copied(),
ys_hi.iter().copied(),
&[Color(color.into()), FillAlpha(0.22), Caption("")],
);
let ys_avg: Vec<f64> = band.points.iter().map(|p| p.avg).collect();
axes.lines(
xs.iter().copied(),
ys_avg.iter().copied(),
&[Color(color.into()), LineWidth(2.0), Caption(band.name.as_str())],
);
}
};
let pre = format!(
"set border lc rgb '{surf}'\n\
set tics textcolor rgb '{text}'\n\
set xlabel textcolor rgb '{text}'\n\
set ylabel textcolor rgb '{text}'\n\
set title textcolor rgb '{text}'\n\
set grid lc rgb '{surf}'\n\
set key opaque fc rgb '{bg}'\n\
set key box lc rgb '{surf}'\n\
set key textcolor rgb '{text}'",
bg = BG,
surf = SURFACE,
text = TEXT,
);
let mut fg = Figure::new();
fg.set_terminal(
&format!("pngcairo size {},{} background '{}'", args.width, args.height, BG),
&args.output,
);
fg.set_pre_commands(&pre);
fg.set_multiplot_layout(2, 2).set_title("Benchmark Results");
let lbl = &[TextColor(TEXT)];
{
let (mn, mx) = y_extent(&time_bands);
let y_lo = (mn * 0.5).max(1e-9);
let y_hi = mx * 2.0;
let axes = fg.axes2d();
axes.set_title("Total Time", lbl)
.set_x_label("Items", lbl)
.set_y_label("Time (s)", lbl)
.set_border(true, &[Bottom, Left, Top, Right], &[Color(SURFACE)])
.set_x_log(Some(10.0))
.set_y_log(Some(10.0))
.set_x_range(Fix(x_lo), Fix(x_hi))
.set_y_range(Fix(y_lo), Fix(y_hi))
.set_x_grid(true)
.set_y_grid(true)
.set_legend(
gnuplot::Coordinate::Graph(0.02),
gnuplot::Coordinate::Graph(0.98),
&[Placement(AlignLeft, AlignTop)],
lbl,
);
draw_bands(axes, &time_bands);
}
{
let (_, mx) = y_extent(&cpu_bands);
let y_hi = (mx * 1.25).max(100.0);
let axes = fg.axes2d();
axes.set_title("Peak CPU", lbl)
.set_x_label("Items", lbl)
.set_y_label("CPU (%)", lbl)
.set_border(true, &[Bottom, Left, Top, Right], &[Color(SURFACE)])
.set_x_log(Some(10.0))
.set_x_range(Fix(x_lo), Fix(x_hi))
.set_y_range(Fix(0.0), Fix(y_hi))
.set_x_grid(true)
.set_y_grid(true)
.set_legend(
gnuplot::Coordinate::Graph(0.02),
gnuplot::Coordinate::Graph(0.98),
&[Placement(AlignLeft, AlignTop)],
lbl,
);
draw_bands(axes, &cpu_bands);
}
{
let (_, mx) = y_extent(&mem_bands);
let y_hi = (mx * 1.25).max(1.0);
let axes = fg.axes2d();
axes.set_title("Peak Memory", lbl)
.set_x_label("Items", lbl)
.set_y_label("Memory (MB)", lbl)
.set_border(true, &[Bottom, Left, Top, Right], &[Color(SURFACE)])
.set_x_log(Some(10.0))
.set_x_range(Fix(x_lo), Fix(x_hi))
.set_y_range(Fix(0.0), Fix(y_hi))
.set_x_grid(true)
.set_y_grid(true)
.set_legend(
gnuplot::Coordinate::Graph(0.02),
gnuplot::Coordinate::Graph(0.98),
&[Placement(AlignLeft, AlignTop)],
lbl,
);
draw_bands(axes, &mem_bands);
}
{
let (_, mx) = y_extent(&startup_bands);
let y_hi = (mx * 1.25).max(0.01);
let axes = fg.axes2d();
axes.set_title("Startup Time", lbl)
.set_x_label("Items", lbl)
.set_y_label("Time (s)", lbl)
.set_border(true, &[Bottom, Left, Top, Right], &[Color(SURFACE)])
.set_x_log(Some(10.0))
.set_x_range(Fix(x_lo), Fix(x_hi))
.set_y_range(Fix(0.0), Fix(y_hi))
.set_x_grid(true)
.set_y_grid(true)
.set_legend(
gnuplot::Coordinate::Graph(0.02),
gnuplot::Coordinate::Graph(0.98),
&[Placement(AlignLeft, AlignTop)],
lbl,
);
draw_bands(axes, &startup_bands);
}
fg.show_and_keep_running().map_err(|e| {
format!("gnuplot not found or failed to start: {e}\nMake sure gnuplot is installed and available in PATH.")
})?;
fg.close();
eprintln!("Plot written to '{}'", args.output);
Ok(())
}
fn main() -> Result<()> {
let raw: Vec<String> = std::env::args().filter(|a| a != "--bench").collect();
let args = Args::parse_from(raw);
let run_args = match args.command {
Subcommand::Generate(ref g) => {
eprintln!("Generating {} items to {} ...", g.num_items, g.file);
generate_test_data(&g.file, g.num_items).expect("failed to write test data");
eprintln!("Generated {} items successfully", g.num_items);
return Ok(());
}
Subcommand::Plot(ref p) => {
if let Err(e) = cmd_plot(p) {
eprintln!("Error: {}", e);
std::process::exit(1);
}
return Ok(());
}
Subcommand::Run(ref r) => r,
};
let mut binaries = run_args.binaries.clone();
if binaries.is_empty() || (binaries.len() == 1 && binaries[0].is_empty()) {
binaries = vec![DEFAULT_BINARY.to_owned()];
}
for binary in &mut binaries {
match which::which(&*binary) {
Ok(resolved) => *binary = resolved.to_string_lossy().into_owned(),
Err(e) => {
eprintln!("Error: cannot resolve binary '{}': {}", binary, e);
std::process::exit(1);
}
}
}
let display_names: Vec<String> = binaries.iter().map(|b| binary_display_name(b)).collect();
let (tmp_file_path, _tmp_file_handle, num_items) = if let Some(ref path) = run_args.file {
if !Path::new(path).is_file() {
eprintln!("Error: Input file '{}' not found", path);
std::process::exit(1);
}
let count = fs::read_to_string(path)
.expect("failed to read input file")
.lines()
.count() as u64;
eprintln!("Using input file with {} items", count);
(path.clone(), None::<NamedTempFile>, count)
} else {
let tmp = NamedTempFile::new().expect("failed to create temp input file");
let path = tmp.path().to_string_lossy().into_owned();
eprintln!("Generating test data...");
generate_test_data(&path, run_args.num_items).expect("failed to generate test data");
(path, Some(tmp), run_args.num_items)
};
let query = &run_args.query;
let runs = run_args.runs;
let warmup = run_args.warmup;
let extra_args = &run_args.extra_args;
let record_perf = run_args.perf.is_some();
let perf_explicit = run_args.perf.as_deref().unwrap_or("");
let record_strace = run_args.strace.is_some();
let strace_explicit = run_args.strace.as_deref().unwrap_or("");
eprintln!("=== Skim Ingestion + Matching Benchmark ===");
eprintln!(
"Binaries: {} | Items: {} | Query: '{}' | Warmup: {} | Runs: {} (per binary)",
display_names.join(", "),
num_items,
query,
warmup,
runs,
);
if run_args.file.is_some() {
eprintln!("Input file: {}", tmp_file_path);
}
if !extra_args.is_empty() {
eprintln!(
"Extra args: {}",
extra_args
.iter()
.map(|arg| String::from_utf8(shell_quote::Sh::quote_vec(arg)).unwrap())
.collect::<Vec<_>>()
.join(" ")
);
}
if record_perf {
eprintln!("Perf recording: enabled (final measured run only)");
}
if record_strace {
eprintln!("Strace recording: enabled (final measured run only)");
}
let tmux_server = TmuxServer::start();
if warmup > 0 {
eprintln!("\n=== Warmup ({} run(s) per binary) ===", warmup);
for (bi, binary) in binaries.iter().enumerate() {
for wu in 1..=warmup {
eprintln!(" Warmup {}/{} — {} ...", wu, warmup, display_names[bi]);
let _ = run_once(
binary,
query,
&tmp_file_path,
extra_args,
wu,
&format!("warmup_b{}", bi),
None,
None,
&tmux_server,
run_args.stable_secs,
)?;
}
}
}
let mut all_results: Vec<Vec<RunResult>> = (0..binaries.len()).map(|_| Vec::new()).collect();
let perf_files: Vec<Option<String>> = if record_perf {
binaries
.iter()
.enumerate()
.map(|(bi, binary)| {
let explicit = if binaries.len() == 1 { perf_explicit } else { "" };
let _ = bi;
Some(perf_path_for(binary, explicit))
})
.collect()
} else {
vec![None; binaries.len()]
};
let strace_files: Vec<Option<String>> = if record_strace {
binaries
.iter()
.map(|binary| {
let explicit = if binaries.len() == 1 { strace_explicit } else { "" };
Some(strace_path_for(binary, explicit))
})
.collect()
} else {
vec![None; binaries.len()]
};
for run_num in 1..=runs {
for (bi, binary) in binaries.iter().enumerate() {
if runs > 1 || binaries.len() > 1 {
eprintln!(
"\n=== Run {}/{} — binary {}/{}: {} ===",
run_num,
runs,
bi + 1,
binaries.len(),
display_names[bi]
);
}
let this_perf = if run_num == runs {
perf_files[bi].as_deref()
} else {
None
};
let this_strace = if run_num == runs {
strace_files[bi].as_deref()
} else {
None
};
let result = run_once(
binary,
query,
&tmp_file_path,
extra_args,
run_num,
&format!("b{}", bi),
this_perf,
this_strace,
&tmux_server,
run_args.stable_secs,
)?;
if runs > 1 || binaries.len() > 1 {
eprintln!("Status: {}", if result.completed { "COMPLETED" } else { "TIMEOUT" });
eprintln!("Items matched: {} / {}", result.matched, result.total_count);
eprintln!("Total time: {:.3}s", result.elapsed_s);
eprintln!("Items/second: {:.0}", result.rate);
if let Some(kb) = result.peak_mem_kb {
eprintln!("Peak memory usage: {:.1} MB", kb as f64 / 1024.0);
}
if let Some(cpu) = result.peak_cpu {
eprintln!("Peak CPU usage: {:.1}%", cpu);
}
if let Some(s) = result.startup_s {
eprintln!("Startup time (UI ready): {:.3}s", s);
}
if let Some(ref pf) = result.perf_file {
eprintln!("Perf data: {}", pf);
}
if let Some(ref sf) = result.strace_file {
eprintln!("Strace output: {}", sf);
}
}
all_results[bi].push(result);
}
}
let aggregates: Vec<AggResult> = all_results.iter().map(|r| aggregate(r)).collect();
if run_args.json {
print_json(&binaries, &display_names, &aggregates, runs);
} else {
let baseline_agg = &aggregates[0];
for (i, (display_name, agg)) in display_names.iter().zip(&aggregates).enumerate() {
print_human(
display_name,
agg,
if binaries.len() > 1 { Some(baseline_agg) } else { None },
i == 0,
);
}
if binaries.len() > 1 {
println!("\n## Comparison Summary (vs baseline: `{}`)\n", display_names[0]);
} else {
println!("\n## Results Summary\n");
}
print_markdown_table(&display_names, &aggregates);
}
if record_perf {
eprintln!("\n=== Perf recording output ===");
for (display_name, path) in display_names.iter().zip(&perf_files) {
if let Some(p) = path {
if Path::new(p).is_file() {
eprintln!(" [{}] perf data: {}", display_name, p);
} else {
eprintln!(" [{}] perf data not found (perf may have failed)", display_name);
}
}
}
}
if record_strace {
eprintln!("\n=== Strace output ===");
for (display_name, path) in display_names.iter().zip(&strace_files) {
if let Some(p) = path {
if Path::new(p).is_file() {
eprintln!(" [{}] strace output: {}", display_name, p);
} else {
eprintln!(" [{}] strace output not found (strace may have failed)", display_name);
}
}
}
}
Ok(())
}