pub const RECOVERABLE_FRACTION: f64 = 0.5;
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum NavBucket {
Read,
Grep,
Glob,
}
impl NavBucket {
pub fn as_str(&self) -> &'static str {
match self {
Self::Read => "read",
Self::Grep => "grep",
Self::Glob => "glob",
}
}
pub fn tool_name(&self) -> &'static str {
match self {
Self::Read => "Read",
Self::Grep => "Grep",
Self::Glob => "Glob",
}
}
pub fn suggestion(&self) -> &'static str {
match self {
Self::Read => "outline / read / node",
Self::Grep => "search / callers / callees / impact",
Self::Glob => "files / search",
}
}
}
const NAV_TOOLS: [&str; 3] = ["Read", "Grep", "Glob"];
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct BucketStat {
pub bucket: NavBucket,
pub turns: u64,
pub addressable_input_tokens: u64,
}
impl BucketStat {
pub fn recoverable_input_tokens(&self) -> u64 {
((self.addressable_input_tokens as f64) * RECOVERABLE_FRACTION) as u64
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct DiscoverReport {
pub total_turns: u64,
pub buckets: Vec<BucketStat>,
}
impl DiscoverReport {
pub fn total_replaceable_turns(&self) -> u64 {
self.buckets.iter().map(|b| b.turns).sum()
}
pub fn total_addressable_input_tokens(&self) -> u64 {
self.buckets
.iter()
.map(|b| b.addressable_input_tokens)
.sum()
}
pub fn total_recoverable_input_tokens(&self) -> u64 {
self.buckets
.iter()
.map(BucketStat::recoverable_input_tokens)
.sum()
}
}
fn split_tools(tool_names: &str) -> Vec<&str> {
tool_names
.split(',')
.map(str::trim)
.filter(|s| !s.is_empty())
.collect()
}
fn classify_nav(tools: &[&str]) -> Option<NavBucket> {
if tools.is_empty() {
return None;
}
if !tools.iter().all(|t| NAV_TOOLS.contains(t)) {
return None;
}
if tools.contains(&"Grep") {
Some(NavBucket::Grep)
} else if tools.contains(&"Glob") {
Some(NavBucket::Glob)
} else {
Some(NavBucket::Read)
}
}
pub fn analyze(turns: &[(String, u64)]) -> DiscoverReport {
let mut read = BucketStat {
bucket: NavBucket::Read,
turns: 0,
addressable_input_tokens: 0,
};
let mut grep = BucketStat {
bucket: NavBucket::Grep,
turns: 0,
addressable_input_tokens: 0,
};
let mut glob = BucketStat {
bucket: NavBucket::Glob,
turns: 0,
addressable_input_tokens: 0,
};
for (tool_names, input_tokens) in turns {
let tools = split_tools(tool_names);
if let Some(bucket) = classify_nav(&tools) {
let stat = match bucket {
NavBucket::Read => &mut read,
NavBucket::Grep => &mut grep,
NavBucket::Glob => &mut glob,
};
stat.turns += 1;
stat.addressable_input_tokens =
stat.addressable_input_tokens.saturating_add(*input_tokens);
}
}
let mut buckets: Vec<BucketStat> = [read, grep, glob]
.into_iter()
.filter(|b| b.turns > 0)
.collect();
buckets.sort_by(|a, b| {
b.addressable_input_tokens
.cmp(&a.addressable_input_tokens)
.then_with(|| a.bucket.as_str().cmp(b.bucket.as_str()))
});
DiscoverReport {
total_turns: turns.len() as u64,
buckets,
}
}
#[cfg(test)]
#[allow(clippy::unwrap_used, clippy::expect_used)]
mod tests {
use super::*;
fn t(tools: &str, input: u64) -> (String, u64) {
(tools.to_string(), input)
}
#[test]
fn empty_input_is_empty_report() {
let report = analyze(&[]);
assert_eq!(report.total_turns, 0);
assert!(report.buckets.is_empty());
assert_eq!(report.total_recoverable_input_tokens(), 0);
}
#[test]
fn pure_read_turn_is_read_bucket() {
let report = analyze(&[t("Read", 1000)]);
assert_eq!(report.buckets.len(), 1);
let b = &report.buckets[0];
assert_eq!(b.bucket, NavBucket::Read);
assert_eq!(b.turns, 1);
assert_eq!(b.addressable_input_tokens, 1000);
assert_eq!(b.recoverable_input_tokens(), 500);
}
#[test]
fn grep_wins_over_glob_and_read_when_mixed_navigation() {
let report = analyze(&[t("Read,Grep,Glob", 2000)]);
assert_eq!(report.buckets.len(), 1);
assert_eq!(report.buckets[0].bucket, NavBucket::Grep);
assert_eq!(report.buckets[0].turns, 1);
}
#[test]
fn glob_wins_over_read() {
let report = analyze(&[t("Read,Glob", 800)]);
assert_eq!(report.buckets.len(), 1);
assert_eq!(report.buckets[0].bucket, NavBucket::Glob);
}
#[test]
fn turn_with_edit_is_not_replaceable() {
let report = analyze(&[t("Read,Edit", 5000), t("Grep,Write", 5000)]);
assert!(report.buckets.is_empty());
assert_eq!(report.total_replaceable_turns(), 0);
assert_eq!(report.total_addressable_input_tokens(), 0);
}
#[test]
fn bash_only_turn_is_not_counted() {
let report = analyze(&[t("Bash", 3000)]);
assert!(report.buckets.is_empty());
}
#[test]
fn empty_tool_names_conversation_turn_excluded() {
let report = analyze(&[t("", 1234)]);
assert_eq!(report.total_turns, 1);
assert!(report.buckets.is_empty());
}
#[test]
fn buckets_ranked_by_addressable_tokens_descending() {
let report = analyze(&[
t("Read", 100),
t("Read", 100),
t("Grep", 5000),
t("Glob", 900),
]);
assert_eq!(report.buckets.len(), 3);
assert_eq!(report.buckets[0].bucket, NavBucket::Grep);
assert_eq!(report.buckets[1].bucket, NavBucket::Glob);
assert_eq!(report.buckets[2].bucket, NavBucket::Read);
assert_eq!(report.buckets[2].turns, 2);
assert_eq!(report.buckets[2].addressable_input_tokens, 200);
}
#[test]
fn estimate_is_non_negative_and_monotonic() {
let small = analyze(&[t("Read", 1000)]);
let large = analyze(&[t("Read", 1000), t("Grep", 4000)]);
assert!(large.total_recoverable_input_tokens() >= small.total_recoverable_input_tokens());
assert!(large.total_recoverable_input_tokens() <= large.total_addressable_input_tokens());
assert!(small.total_recoverable_input_tokens() <= small.total_addressable_input_tokens());
}
#[test]
fn whitespace_in_tool_names_is_tolerated() {
let report = analyze(&[t(" Read , Grep ", 1200)]);
assert_eq!(report.buckets.len(), 1);
assert_eq!(report.buckets[0].bucket, NavBucket::Grep);
assert_eq!(report.buckets[0].turns, 1);
}
}