use ahash::AHashMap;
use serde::{Deserialize, Serialize};
use super::safety;
const READ_REPEAT_THRESHOLD: u32 = 2;
const QUERY_REPEAT_THRESHOLD: u32 = 2;
const LARGE_READ_BYTES: u64 = 32 * 1024;
const MAX_FINDINGS: usize = 200;
const QUERY_TOOLS: &[&str] = &[
"Grep",
"workspace_grep",
"search_symbols",
"find_references",
"grep",
];
#[derive(Debug, Clone, PartialEq, Eq, Deserialize)]
pub struct ToolCall {
pub tool: String,
#[serde(default)]
pub target: String,
#[serde(default)]
pub bytes: u64,
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
pub struct WasteFinding {
pub kind: String,
pub target: String,
pub count: u32,
pub estimated_waste_bytes: u64,
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize)]
pub struct WasteReport {
pub findings: Vec<WasteFinding>,
pub total_estimated_waste_bytes: u64,
pub truncated: bool,
}
#[derive(Default)]
struct RepeatAccumulator {
count: u32,
waste_after_first: u64,
}
impl RepeatAccumulator {
fn observe(&mut self, bytes: u64) {
if self.count >= 1 {
self.waste_after_first = self.waste_after_first.saturating_add(bytes);
}
self.count = self.count.saturating_add(1);
}
}
pub fn detect_waste(calls: &[ToolCall]) -> WasteReport {
let mut reads: AHashMap<&str, RepeatAccumulator> = AHashMap::new();
let mut queries: AHashMap<&str, RepeatAccumulator> = AHashMap::new();
let mut findings: Vec<WasteFinding> = Vec::new();
for call in calls {
let target = call.target.as_str();
if call.tool == "Read" {
reads.entry(target).or_default().observe(call.bytes);
if call.bytes >= LARGE_READ_BYTES {
findings.push(WasteFinding {
kind: "oversized_read".to_string(),
target: target.to_string(),
count: 1,
estimated_waste_bytes: call.bytes,
});
}
} else if QUERY_TOOLS.contains(&call.tool.as_str()) {
queries.entry(target).or_default().observe(call.bytes);
}
}
for (target, acc) in reads {
if acc.count >= READ_REPEAT_THRESHOLD {
findings.push(WasteFinding {
kind: "redundant_read".to_string(),
target: target.to_string(),
count: acc.count,
estimated_waste_bytes: acc.waste_after_first,
});
}
}
for (target, acc) in queries {
if acc.count >= QUERY_REPEAT_THRESHOLD {
findings.push(WasteFinding {
kind: "repeated_query".to_string(),
target: target.to_string(),
count: acc.count,
estimated_waste_bytes: acc.waste_after_first,
});
}
}
findings.retain(|f| !safety::contains_credential(&f.target));
findings.sort_by(|a, b| a.kind.cmp(&b.kind).then_with(|| a.target.cmp(&b.target)));
let total_estimated_waste_bytes = findings
.iter()
.fold(0u64, |sum, f| sum.saturating_add(f.estimated_waste_bytes));
let truncated = findings.len() > MAX_FINDINGS;
if truncated {
findings.truncate(MAX_FINDINGS);
}
WasteReport {
findings,
total_estimated_waste_bytes,
truncated,
}
}
pub fn parse_calls(input: &str) -> Vec<ToolCall> {
input
.lines()
.filter_map(|line| {
let line = line.trim();
if line.is_empty() {
return None;
}
serde_json::from_str::<ToolCall>(line).ok()
})
.collect()
}
#[cfg(test)]
mod tests {
use super::*;
fn read(target: &str, bytes: u64) -> ToolCall {
ToolCall {
tool: "Read".to_string(),
target: target.to_string(),
bytes,
}
}
fn query(tool: &str, target: &str, bytes: u64) -> ToolCall {
ToolCall {
tool: tool.to_string(),
target: target.to_string(),
bytes,
}
}
#[test]
fn redundant_read_fires_at_two_reads_summing_bytes_after_first() {
let calls = vec![
read("src/main.rs", 100),
read("src/main.rs", 100),
read("src/main.rs", 100),
];
let report = detect_waste(&calls);
assert_eq!(report.findings.len(), 1);
let f = &report.findings[0];
assert_eq!(f.kind, "redundant_read");
assert_eq!(f.target, "src/main.rs");
assert_eq!(f.count, 3);
assert_eq!(f.estimated_waste_bytes, 200);
assert_eq!(report.total_estimated_waste_bytes, 200);
assert!(!report.truncated);
}
#[test]
fn single_read_yields_no_redundant_finding() {
let report = detect_waste(&[read("src/lib.rs", 4096)]);
assert_eq!(report.findings, Vec::<WasteFinding>::new());
assert_eq!(report.total_estimated_waste_bytes, 0);
}
#[test]
fn repeated_query_fires_for_two_identical_workspace_grep_targets() {
let calls = vec![
query("workspace_grep", "fn detect_waste", 50),
query("workspace_grep", "fn detect_waste", 70),
];
let report = detect_waste(&calls);
assert_eq!(report.findings.len(), 1);
let f = &report.findings[0];
assert_eq!(f.kind, "repeated_query");
assert_eq!(f.target, "fn detect_waste");
assert_eq!(f.count, 2);
assert_eq!(f.estimated_waste_bytes, 70);
}
#[test]
fn oversized_read_fires_at_threshold_not_below() {
let at = detect_waste(&[read("big.rs", LARGE_READ_BYTES)]);
assert_eq!(at.findings.len(), 1);
assert_eq!(at.findings[0].kind, "oversized_read");
assert_eq!(at.findings[0].count, 1);
assert_eq!(at.findings[0].estimated_waste_bytes, LARGE_READ_BYTES);
let below = detect_waste(&[read("small.rs", LARGE_READ_BYTES - 1)]);
assert_eq!(below.findings, Vec::<WasteFinding>::new());
}
#[test]
fn oversized_read_coexists_with_redundant_read() {
let calls = vec![
read("huge.rs", LARGE_READ_BYTES),
read("huge.rs", LARGE_READ_BYTES),
];
let report = detect_waste(&calls);
let kinds: Vec<&str> = report.findings.iter().map(|f| f.kind.as_str()).collect();
assert_eq!(
kinds,
vec!["oversized_read", "oversized_read", "redundant_read"]
);
}
#[test]
fn drops_finding_whose_target_carries_github_pat() {
let secret = format!("password=ghp_{}", "a".repeat(36));
let calls = vec![query("Grep", &secret, 10), query("Grep", &secret, 10)];
let report = detect_waste(&calls);
assert!(
report.findings.is_empty(),
"credential-bearing finding must be dropped, got: {:?}",
report.findings
);
assert_eq!(report.total_estimated_waste_bytes, 0);
}
#[test]
fn drops_finding_whose_target_carries_aws_key() {
let secret = "search AKIAIOSFODNN7EXAMPLE here";
let calls = vec![
query("search_symbols", secret, 10),
query("search_symbols", secret, 10),
];
let report = detect_waste(&calls);
assert!(
report.findings.is_empty(),
"AWS-key finding must be dropped, got: {:?}",
report.findings
);
}
#[test]
fn findings_are_deterministically_ordered() {
let calls = vec![
read("zzz.rs", LARGE_READ_BYTES),
read("aaa.rs", 5),
read("aaa.rs", 5),
query("Grep", "qqq", 1),
query("Grep", "qqq", 1),
query("Grep", "aaa", 1),
query("Grep", "aaa", 1),
read("mmm.rs", 5),
read("mmm.rs", 5),
];
let report = detect_waste(&calls);
let shape: Vec<(&str, &str)> = report
.findings
.iter()
.map(|f| (f.kind.as_str(), f.target.as_str()))
.collect();
assert_eq!(
shape,
vec![
("oversized_read", "zzz.rs"),
("redundant_read", "aaa.rs"),
("redundant_read", "mmm.rs"),
("repeated_query", "aaa"),
("repeated_query", "qqq"),
]
);
}
#[test]
fn max_findings_cap_sets_truncated_and_keeps_full_waste_total() {
let total = MAX_FINDINGS + 50;
let calls: Vec<ToolCall> = (0..total)
.map(|n| read(&format!("file_{n:05}.rs"), LARGE_READ_BYTES))
.collect();
let report = detect_waste(&calls);
assert_eq!(report.findings.len(), MAX_FINDINGS);
assert!(report.truncated);
let expected_total = (total as u64) * LARGE_READ_BYTES;
assert_eq!(report.total_estimated_waste_bytes, expected_total);
let surviving: u64 = report
.findings
.iter()
.map(|f| f.estimated_waste_bytes)
.sum();
assert!(report.total_estimated_waste_bytes > surviving);
}
#[test]
fn empty_input_yields_empty_report() {
let report = detect_waste(&[]);
assert_eq!(report.findings, Vec::<WasteFinding>::new());
assert_eq!(report.total_estimated_waste_bytes, 0);
assert!(!report.truncated);
}
#[test]
fn parse_calls_skips_malformed_and_tool_less_lines() {
let input = concat!(
"{\"tool\":\"Read\",\"target\":\"a.rs\",\"bytes\":10}\n",
"not json at all\n",
"{\"target\":\"b.rs\",\"bytes\":20}\n", "\n",
" \n",
"{\"tool\":\"Grep\",\"target\":\"q\"}\n", );
let calls = parse_calls(input);
assert_eq!(
calls,
vec![
ToolCall {
tool: "Read".to_string(),
target: "a.rs".to_string(),
bytes: 10,
},
ToolCall {
tool: "Grep".to_string(),
target: "q".to_string(),
bytes: 0,
},
]
);
}
}