use serde::Serialize;
#[derive(Debug, Clone, Serialize, PartialEq, Eq)]
pub struct SavingsRow {
pub baseline_tokens: u64,
pub actual_tokens: u64,
pub est_tokens_saved: u64,
pub baseline: &'static str,
}
fn bytes_to_tokens(bytes: u64) -> u64 {
bytes / 4
}
const GREP_READ_MULTIPLIER: u64 = 3;
const DEPENDENTS_READ_MULTIPLIER: u64 = 2;
pub fn estimate(tool: &str, _corpus_bytes: u64, resp_bytes: u64) -> SavingsRow {
let actual = bytes_to_tokens(resp_bytes);
let (baseline, baseline_name) = match tool {
"outline" => (actual.saturating_mul(5), "full_file_read"),
"search_symbols" => (
actual.saturating_mul(GREP_READ_MULTIPLIER),
"grep_plus_read_top_hits",
),
"find_references" | "find_callers" => {
(actual.saturating_mul(GREP_READ_MULTIPLIER), "grep_top_hits")
}
"find_implementations" => {
(actual.saturating_mul(GREP_READ_MULTIPLIER), "grep_top_hits")
}
"dependents" => (
actual.saturating_mul(DEPENDENTS_READ_MULTIPLIER),
"grep_imports_top_hits",
),
"hot_files" => (actual.saturating_mul(3), "git_log_per_file"),
"symbol_history" => (actual.saturating_mul(4), "per_commit_outline_diff"),
"workspace_grep" => (actual, "no_baseline"),
"call_graph" => (actual, "no_baseline"),
"memory_get"
| "memory_put"
| "memory_list"
| "memory_search"
| "memory_delete"
| "search_documents"
| "telemetry_summary"
| "rescan"
| "cache_stats"
| "cache_gc"
| "cache_clear"
| "status"
| "repo_info"
| "list_files"
| "working_tree_status"
| "recent_changes"
| "commits_touching"
| "find_commits_by_path"
| "diff_file"
| "diff_outline"
| "blame_file"
| "blame_symbol"
| "web_scrape"
| "web_crawl"
| "web_map" => (actual, "no_baseline"),
_ => (actual, "unclassified"),
};
SavingsRow {
baseline_tokens: baseline,
actual_tokens: actual,
est_tokens_saved: baseline.saturating_sub(actual),
baseline: baseline_name,
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn outline_baseline_is_5x_response() {
let s = estimate("outline", 1_000_000, 400);
assert_eq!(s.actual_tokens, 100);
assert_eq!(s.baseline_tokens, 500);
assert_eq!(s.est_tokens_saved, 400);
assert_eq!(s.baseline, "full_file_read");
}
#[test]
fn search_symbols_savings_independent_of_corpus() {
let big = estimate("search_symbols", 1_000_000, 400);
let empty = estimate("search_symbols", 0, 400);
assert_eq!(big.est_tokens_saved, empty.est_tokens_saved);
assert_eq!(big.actual_tokens, 100);
assert_eq!(big.baseline_tokens, 300);
assert_eq!(big.est_tokens_saved, 200);
assert_eq!(big.baseline, "grep_plus_read_top_hits");
}
#[test]
fn find_references_grep_baseline_floors_at_zero_for_empty_corpus() {
let s = estimate("find_references", 0, 200);
assert_eq!(s.actual_tokens, 50);
assert_eq!(s.baseline_tokens, 150);
assert_eq!(s.est_tokens_saved, 100);
assert_eq!(s.baseline, "grep_top_hits");
}
#[test]
fn grep_savings_scale_with_response_not_corpus() {
let small = estimate("search_symbols", 1_000_000, 400);
let large = estimate("search_symbols", 1_000_000, 4_000);
assert!(
large.est_tokens_saved > small.est_tokens_saved,
"bigger response must yield bigger savings: {} !> {}",
large.est_tokens_saved,
small.est_tokens_saved
);
assert_eq!(large.est_tokens_saved, 2_000);
}
#[test]
fn no_baseline_tools_claim_zero_savings() {
for tool in [
"memory_get",
"memory_put",
"search_documents",
"status",
"web_scrape",
"web_crawl",
"web_map",
"workspace_grep",
] {
let s = estimate(tool, 1_000_000, 500);
assert_eq!(s.est_tokens_saved, 0, "{tool} must not claim savings");
assert_eq!(s.baseline, "no_baseline", "{tool} must label no_baseline");
}
}
#[test]
fn unknown_tool_is_unclassified() {
let s = estimate("not_a_real_tool", 1_000_000, 100);
assert_eq!(s.baseline, "unclassified");
assert_eq!(s.est_tokens_saved, 0);
}
}