use serde::Serialize;
#[derive(Debug, Clone, Serialize, PartialEq, Eq)]
pub struct SavingsRow {
pub baseline_tokens: u64,
pub actual_tokens: u64,
pub est_tokens_saved: u64,
pub baseline: &'static str,
}
pub(super) fn bytes_to_tokens(bytes: u64) -> u64 {
bytes / 4
}
fn tokens_for_text(text: &str) -> u64 {
super::tokens::count_tokens(text)
}
const GREP_READ_MULTIPLIER: u64 = 3;
const DEPENDENTS_READ_MULTIPLIER: u64 = 2;
const DOCUMENT_READ_MULTIPLIER: u64 = 5;
const LIST_FILES_READ_MULTIPLIER: u64 = 2;
const WEB_INGEST_MULTIPLIER: u64 = 3;
pub fn estimate_from_text(tool: &str, _corpus_bytes: u64, resp_text: &str) -> SavingsRow {
let actual = tokens_for_text(resp_text);
let (baseline, baseline_name) = match tool {
"outline" => (actual.saturating_mul(5), "full_file_read"),
"search_symbols" => (
actual.saturating_mul(GREP_READ_MULTIPLIER),
"grep_plus_read_top_hits",
),
"find_references" | "find_callers" => {
(actual.saturating_mul(GREP_READ_MULTIPLIER), "grep_top_hits")
}
"find_implementations" => (actual.saturating_mul(GREP_READ_MULTIPLIER), "grep_top_hits"),
"dependents" => (
actual.saturating_mul(DEPENDENTS_READ_MULTIPLIER),
"grep_imports_top_hits",
),
"hot_files" => (actual.saturating_mul(3), "git_log_per_file"),
"symbol_history" => (actual.saturating_mul(4), "per_commit_outline_diff"),
"workspace_grep" => (actual, "no_baseline"),
"call_graph" => (actual, "no_baseline"),
"search_documents" => (
actual.saturating_mul(DOCUMENT_READ_MULTIPLIER),
"full_document_read",
),
"list_files" => (
actual.saturating_mul(LIST_FILES_READ_MULTIPLIER),
"find_plus_filter",
),
"web_scrape" | "web_crawl" | "web_map" => (
actual.saturating_mul(WEB_INGEST_MULTIPLIER),
"manual_browse_paste",
),
"memory_get"
| "memory_put"
| "memory_list"
| "memory_search"
| "memory_delete"
| "telemetry_summary"
| "rescan"
| "cache_stats"
| "cache_gc"
| "cache_clear"
| "status"
| "repo_info"
| "working_tree_status"
| "recent_changes"
| "commits_touching"
| "find_commits_by_path"
| "diff_file"
| "diff_outline"
| "blame_file"
| "blame_symbol" => (actual, "no_baseline"),
_ => (actual, "unclassified"),
};
SavingsRow {
baseline_tokens: baseline,
actual_tokens: actual,
est_tokens_saved: baseline.saturating_sub(actual),
baseline: baseline_name,
}
}
#[cfg(test)]
mod tests {
use super::*;
fn assert_grep_model(s: &SavingsRow, expected_baseline: &str) {
assert_eq!(s.baseline, expected_baseline);
assert_eq!(
s.baseline_tokens,
s.actual_tokens.saturating_mul(GREP_READ_MULTIPLIER)
);
assert_eq!(
s.est_tokens_saved,
s.baseline_tokens.saturating_sub(s.actual_tokens)
);
}
#[test]
fn outline_baseline_is_5x_response() {
let s = estimate_from_text("outline", 1_000_000, &"a".repeat(400));
assert_eq!(s.baseline_tokens, s.actual_tokens.saturating_mul(5));
assert_eq!(s.baseline, "full_file_read");
#[cfg(not(feature = "documents"))]
{
assert_eq!(s.actual_tokens, 100);
assert_eq!(s.baseline_tokens, 500);
assert_eq!(s.est_tokens_saved, 400);
}
}
#[test]
fn search_symbols_savings_independent_of_corpus() {
let text = "a".repeat(400);
let big = estimate_from_text("search_symbols", 1_000_000, &text);
let empty = estimate_from_text("search_symbols", 0, &text);
assert_eq!(big.est_tokens_saved, empty.est_tokens_saved);
assert_grep_model(&big, "grep_plus_read_top_hits");
#[cfg(not(feature = "documents"))]
{
assert_eq!(big.actual_tokens, 100);
assert_eq!(big.baseline_tokens, 300);
assert_eq!(big.est_tokens_saved, 200);
}
}
#[test]
fn find_references_grep_baseline_floors_at_zero_for_empty_corpus() {
let s = estimate_from_text("find_references", 0, &"a".repeat(200));
assert_grep_model(&s, "grep_top_hits");
#[cfg(not(feature = "documents"))]
{
assert_eq!(s.actual_tokens, 50);
assert_eq!(s.baseline_tokens, 150);
assert_eq!(s.est_tokens_saved, 100);
}
}
#[test]
fn grep_savings_scale_with_response_not_corpus() {
let small = estimate_from_text("search_symbols", 1_000_000, &"a".repeat(400));
let large = estimate_from_text("search_symbols", 1_000_000, &"a".repeat(4_000));
assert!(
large.est_tokens_saved > small.est_tokens_saved,
"bigger response must yield bigger savings: {} !> {}",
large.est_tokens_saved,
small.est_tokens_saved
);
#[cfg(not(feature = "documents"))]
assert_eq!(large.est_tokens_saved, 2_000);
}
#[test]
fn no_baseline_tools_claim_zero_savings() {
for tool in [
"memory_get",
"memory_put",
"status",
"repo_info",
"telemetry_summary",
"rescan",
"cache_stats",
"recent_changes",
"commits_touching",
"diff_file",
"blame_file",
"working_tree_status",
"workspace_grep",
"call_graph",
] {
let s = estimate_from_text(tool, 1_000_000, &"a".repeat(500));
assert_eq!(s.est_tokens_saved, 0, "{tool} must not claim savings");
assert_eq!(s.baseline, "no_baseline", "{tool} must label no_baseline");
}
}
#[test]
fn search_documents_models_full_document_read_at_5x() {
let s = estimate_from_text("search_documents", 1_000_000, &"a".repeat(400));
assert_eq!(s.baseline, "full_document_read");
assert_eq!(s.baseline_tokens, s.actual_tokens.saturating_mul(5));
assert_eq!(
s.est_tokens_saved,
s.baseline_tokens.saturating_sub(s.actual_tokens)
);
#[cfg(not(feature = "documents"))]
{
assert_eq!(s.actual_tokens, 100);
assert_eq!(s.baseline_tokens, 500);
assert_eq!(s.est_tokens_saved, 400);
}
}
#[test]
fn list_files_models_find_plus_filter_at_2x() {
let s = estimate_from_text("list_files", 1_000_000, &"a".repeat(400));
assert_eq!(s.baseline, "find_plus_filter");
assert_eq!(s.baseline_tokens, s.actual_tokens.saturating_mul(2));
assert_eq!(
s.est_tokens_saved,
s.baseline_tokens.saturating_sub(s.actual_tokens)
);
#[cfg(not(feature = "documents"))]
{
assert_eq!(s.actual_tokens, 100);
assert_eq!(s.baseline_tokens, 200);
assert_eq!(s.est_tokens_saved, 100);
}
}
#[test]
fn web_ingest_models_manual_browse_paste_at_3x() {
for tool in ["web_scrape", "web_crawl", "web_map"] {
let s = estimate_from_text(tool, 1_000_000, &"a".repeat(400));
assert_eq!(s.baseline, "manual_browse_paste", "{tool} baseline name");
assert_eq!(
s.baseline_tokens,
s.actual_tokens.saturating_mul(3),
"{tool} multiplier"
);
assert_eq!(
s.est_tokens_saved,
s.baseline_tokens.saturating_sub(s.actual_tokens),
"{tool} savings"
);
#[cfg(not(feature = "documents"))]
{
assert_eq!(s.actual_tokens, 100, "{tool} actual");
assert_eq!(s.baseline_tokens, 300, "{tool} baseline");
assert_eq!(s.est_tokens_saved, 200, "{tool} saved");
}
}
}
#[test]
fn unknown_tool_is_unclassified() {
let s = estimate_from_text("not_a_real_tool", 1_000_000, &"a".repeat(100));
assert_eq!(s.baseline, "unclassified");
assert_eq!(s.est_tokens_saved, 0);
}
#[cfg(not(feature = "documents"))]
#[test]
fn estimate_from_text_is_bytes_over_four_under_heuristic() {
let s = estimate_from_text("outline", 0, &"x".repeat(800));
assert_eq!(s.actual_tokens, 200);
assert_eq!(s.baseline_tokens, 1_000);
assert_eq!(s.est_tokens_saved, 800);
}
}