use std::collections::BTreeMap;
use anyhow::Result;
use crate::state::Workspace;
use crate::telemetry::{self, RunTelemetry};
fn rate(num: usize, den: usize) -> f64 {
if den == 0 {
0.0
} else {
num as f64 / den as f64
}
}
fn is_done(state: &str) -> bool {
state.eq_ignore_ascii_case("done")
}
#[derive(Default, Clone)]
pub struct WorkerTrust {
pub runs: usize,
pub done: usize,
pub partial: usize,
pub failed: usize,
pub no_result: usize,
pub overrides: usize,
pub wall_seconds: u64,
}
impl WorkerTrust {
pub fn done_rate(&self) -> f64 {
rate(self.done, self.runs)
}
}
#[derive(Default, Clone)]
pub struct TaskTrust {
pub attempts: usize,
pub reached_done: bool,
pub first_pass: bool,
pub wall_seconds: u64,
pub last_state: String,
pub workers: Vec<String>,
}
pub struct TrustReport {
pub total_runs: usize,
pub tasks: BTreeMap<String, TaskTrust>,
pub workers: BTreeMap<String, WorkerTrust>,
pub total_wall_seconds: u64,
}
impl TrustReport {
pub fn task_count(&self) -> usize {
self.tasks.len()
}
pub fn first_pass_done(&self) -> usize {
self.tasks.values().filter(|t| t.first_pass).count()
}
pub fn retried_done(&self) -> usize {
self.tasks
.values()
.filter(|t| t.reached_done && !t.first_pass)
.count()
}
pub fn unresolved(&self) -> usize {
self.tasks.values().filter(|t| !t.reached_done).count()
}
}
pub fn summarize(runs: &[RunTelemetry]) -> TrustReport {
let mut tasks: BTreeMap<String, TaskTrust> = BTreeMap::new();
let mut workers: BTreeMap<String, WorkerTrust> = BTreeMap::new();
let mut total_wall_seconds = 0u64;
for r in runs {
total_wall_seconds += r.wall_seconds;
let t = tasks.entry(r.task_id.clone()).or_default();
let first_attempt = t.attempts == 0;
t.attempts += 1;
t.wall_seconds += r.wall_seconds;
t.last_state = r.eval_state.clone();
if !r.worker.is_empty() && !t.workers.iter().any(|w| w == &r.worker) {
t.workers.push(r.worker.clone());
}
if is_done(&r.eval_state) {
if first_attempt {
t.first_pass = true;
}
t.reached_done = true;
}
if !r.worker.is_empty() {
let w = workers.entry(r.worker.clone()).or_default();
w.runs += 1;
w.wall_seconds += r.wall_seconds;
if is_done(&r.eval_state) {
w.done += 1;
} else if r.eval_state.eq_ignore_ascii_case("partial") {
w.partial += 1;
} else if r.eval_state.eq_ignore_ascii_case("failed") {
w.failed += 1;
}
if r.result_status == "no-result" {
w.no_result += 1;
}
if r.user_override.is_some() {
w.overrides += 1;
}
}
}
TrustReport {
total_runs: runs.len(),
tasks,
workers,
total_wall_seconds,
}
}
fn humanize(secs: u64) -> String {
let h = secs / 3600;
let m = (secs % 3600) / 60;
if h > 0 {
format!("{h}h{m:02}m")
} else {
format!("{m}m{:02}s", secs % 60)
}
}
pub fn render(report: &TrustReport, intent: Option<&str>) -> String {
let mut s = String::new();
let tasks = report.task_count();
match intent {
Some(id) => s.push_str(&format!(
"Trust report — intent {} — {} runs across {} tasks, {} total worker wall\n",
id,
report.total_runs,
tasks,
humanize(report.total_wall_seconds),
)),
None => s.push_str(&format!(
"Trust report — {} runs across {} tasks, {} total worker wall\n",
report.total_runs,
tasks,
humanize(report.total_wall_seconds),
)),
}
s.push_str(&format!(
" first-pass Done: {}/{} ({:.0}%)\n",
report.first_pass_done(),
tasks,
rate(report.first_pass_done(), tasks) * 100.0,
));
s.push_str(&format!(
" Done after retry: {}/{}\n",
report.retried_done(),
tasks,
));
s.push_str(&format!(
" no Done in record: {}/{}\n",
report.unresolved(),
tasks,
));
s.push_str("\nBy worker:\n");
for (worker, w) in &report.workers {
s.push_str(&format!(
" {:<14} {:>3} runs done {:>3.0}% (P:{} F:{} no-result:{}) wall {} overrides {}\n",
worker,
w.runs,
w.done_rate() * 100.0,
w.partial,
w.failed,
w.no_result,
humanize(w.wall_seconds),
w.overrides,
));
}
let mut retried: Vec<(&String, &TaskTrust)> = report
.tasks
.iter()
.filter(|(_, t)| t.attempts > 1)
.collect();
retried.sort_by(|a, b| b.1.attempts.cmp(&a.1.attempts).then(a.0.cmp(b.0)));
if !retried.is_empty() {
s.push_str("\nNeeded multiple attempts:\n");
for (id, t) in retried {
let outcome = if t.reached_done {
"done".to_string()
} else {
format!("still {}", t.last_state.to_lowercase())
};
s.push_str(&format!(
" {:<10} {} attempts \u{2192} {} ({})\n",
id,
t.attempts,
outcome,
t.workers.join(", "),
));
}
if intent.is_none() {
s.push_str(
"\n (cumulative across intents; a task id reused across intents folds together)\n",
);
}
}
s
}
fn scope_runs(runs: &[RunTelemetry], active: Option<&str>) -> (Vec<RunTelemetry>, bool) {
if let Some(id) = active.filter(|s| !s.is_empty()) {
let scoped: Vec<RunTelemetry> =
runs.iter().filter(|r| r.intent_id == id).cloned().collect();
if !scoped.is_empty() {
return (scoped, true);
}
}
(runs.to_vec(), false)
}
pub fn report(ws: &Workspace) -> Result<String> {
let runs = telemetry::read_runs(ws);
if runs.is_empty() {
return Ok("No run telemetry yet. The trust report fills in as runs accrue.\n".to_string());
}
let active = ws.load_queue().ok().map(|q| q.intent_id);
let (slice, scoped) = scope_runs(&runs, active.as_deref());
let label = scoped.then_some(active.as_deref()).flatten();
Ok(render(&summarize(&slice), label))
}
#[derive(Debug, Clone)]
pub struct MinedObservation {
pub detail: String,
pub suggestion: String,
}
const MIN_WORKER_RUNS: usize = 6;
const NO_RESULT_FLOOR: f64 = 0.10;
const MIN_KIND_TASKS: usize = 3;
const RETRY_AVG_FLOOR: f64 = 2.5;
pub fn mine(runs: &[RunTelemetry]) -> Vec<MinedObservation> {
let report = summarize(runs);
let mut out = Vec::new();
for (worker, w) in &report.workers {
if w.runs < MIN_WORKER_RUNS {
continue;
}
let rate = w.no_result as f64 / w.runs as f64;
if rate >= NO_RESULT_FLOOR {
out.push(MinedObservation {
detail: format!(
"{worker}: {}/{} runs produced no parseable result ({:.0}%)",
w.no_result,
w.runs,
rate * 100.0
),
suggestion:
"check this worker's packet/output contract or model — no-result runs burn a \
whole attempt"
.to_string(),
});
}
}
let mut task_kind: BTreeMap<String, String> = BTreeMap::new();
for r in runs {
if !r.kind.is_empty() {
task_kind
.entry(r.task_id.clone())
.or_insert_with(|| r.kind.clone());
}
}
let mut kind_stats: BTreeMap<String, (usize, usize)> = BTreeMap::new();
for (id, t) in &report.tasks {
if !t.reached_done {
continue;
}
if let Some(k) = task_kind.get(id) {
let e = kind_stats.entry(k.clone()).or_default();
e.0 += t.attempts;
e.1 += 1;
}
}
for (kind, (sum, n)) in &kind_stats {
if *n < MIN_KIND_TASKS {
continue;
}
let avg = *sum as f64 / *n as f64;
if avg >= RETRY_AVG_FLOOR {
out.push(MinedObservation {
detail: format!(
"kind '{kind}': {n} tasks averaged {avg:.1} attempts to reach Done"
),
suggestion:
"consider a skill or sharper acceptance for this kind — it rarely lands \
first-pass"
.to_string(),
});
}
}
out
}
#[cfg(test)]
mod tests {
use super::*;
fn rec(task: &str, worker: &str, status: &str, eval: &str, wall: u64) -> RunTelemetry {
RunTelemetry {
ts: String::new(),
task_id: task.into(),
intent_id: String::new(),
kind: "implementation".into(),
risk: "low".into(),
worker: worker.into(),
chosen_reason: String::new(),
result_status: status.into(),
eval_state: eval.into(),
wall_seconds: wall,
user_override: None,
skills: vec![],
verdict_pass: None,
}
}
#[test]
fn first_pass_vs_retry_vs_unresolved() {
let runs = vec![
rec("A", "codex", "done", "Done", 100),
rec("B", "claude-code", "no-result", "Failed", 200),
rec("B", "claude-code", "done", "Done", 300),
rec("C", "codex", "partial", "Partial", 50),
];
let r = summarize(&runs);
assert_eq!(r.total_runs, 4);
assert_eq!(r.task_count(), 3);
assert_eq!(r.first_pass_done(), 1); assert_eq!(r.retried_done(), 1); assert_eq!(r.unresolved(), 1); assert_eq!(r.total_wall_seconds, 650);
let b = &r.tasks["B"];
assert_eq!(b.attempts, 2);
assert!(b.reached_done && !b.first_pass);
let cc = &r.workers["claude-code"];
assert_eq!(cc.runs, 2);
assert_eq!(cc.done, 1);
assert_eq!(cc.failed, 1);
assert_eq!(cc.no_result, 1);
assert!((cc.done_rate() - 0.5).abs() < 1e-9);
let out = render(&r, None);
assert!(out.contains("Needed multiple attempts"));
assert!(out.contains("B "));
assert!(out.contains("cumulative across intents"));
let scoped = render(&r, Some("intent-x"));
assert!(scoped.contains("intent intent-x"));
assert!(!scoped.contains("cumulative across intents"));
}
#[test]
fn scope_prefers_active_intent_then_falls_back() {
let mut a = rec("A", "codex", "done", "Done", 10);
a.intent_id = "i1".into();
let mut b = rec("B", "codex", "done", "Done", 10);
b.intent_id = "i2".into();
let legacy = rec("C", "codex", "done", "Done", 10); let runs = vec![a, b, legacy];
let (slice, scoped) = scope_runs(&runs, Some("i2"));
assert!(scoped);
assert_eq!(slice.len(), 1);
assert_eq!(slice[0].task_id, "B");
let (slice, scoped) = scope_runs(&runs, Some("i9"));
assert!(!scoped);
assert_eq!(slice.len(), 3);
let (slice, scoped) = scope_runs(&runs, None);
assert!(!scoped);
assert_eq!(slice.len(), 3);
}
#[test]
fn mine_flags_no_result_workers_and_high_retry_kinds() {
let mut runs = Vec::new();
for i in 0..8 {
let (status, eval) = if i < 2 {
("no-result", "Failed")
} else {
("done", "Done")
};
runs.push(rec(&format!("N{i}"), "flaky", status, eval, 1));
}
for t in 0..3 {
for a in 0..3 {
let (status, eval) = if a < 2 {
("failed", "Failed")
} else {
("done", "Done")
};
let mut r = rec(&format!("R{t}"), "builder", status, eval, 1);
r.kind = "research".into();
runs.push(r);
}
}
let obs = mine(&runs);
assert_eq!(obs.len(), 2, "{obs:?}");
assert!(
obs.iter()
.any(|o| o.detail.contains("flaky") && o.detail.contains("no parseable")),
"{obs:?}"
);
assert!(
obs.iter()
.any(|o| o.detail.contains("research") && o.detail.contains("3.0 attempts")),
"{obs:?}"
);
let quiet = vec![
rec("A", "codex", "done", "Done", 1),
rec("B", "codex", "done", "Done", 1),
];
assert!(mine(&quiet).is_empty());
}
#[test]
fn empty_telemetry_renders_zeroes() {
let r = summarize(&[]);
assert_eq!(r.task_count(), 0);
assert_eq!(r.first_pass_done(), 0);
let _ = render(&r, None);
}
}