Skip to main content

bn/
failure.rs

1/// Structured failure summaries for failed agent runs.
2///
3/// When an agent fails, this module generates a markdown summary capturing
4/// what was tried, why it failed, which files were touched, and suggestions
5/// for the next attempt. Designed to be appended as a bean note so context
6/// survives across retries.
7use std::collections::BTreeSet;
8use std::fmt::Write;
9
10/// Everything needed to produce a failure summary.
11#[derive(Debug)]
12pub struct FailureContext {
13    pub bean_id: String,
14    pub bean_title: String,
15    pub attempt: u32,
16    pub duration_secs: u64,
17    pub tool_count: usize,
18    pub turns: usize,
19    pub input_tokens: u64,
20    pub output_tokens: u64,
21    pub cost: f64,
22    pub error: Option<String>,
23    /// Log lines in `[tool] ToolName path/or/args` format.
24    pub tool_log: Vec<String>,
25    pub verify_command: Option<String>,
26}
27
28/// Build a structured markdown summary of a failed agent run.
29#[must_use]
30pub fn build_failure_summary(ctx: &FailureContext) -> String {
31    let mut sections: Vec<String> = Vec::new();
32
33    // Header
34    let duration = format_duration(ctx.duration_secs);
35    let total_tokens = ctx.input_tokens + ctx.output_tokens;
36    let tokens = format_tokens(total_tokens);
37    sections.push(format!(
38        "## Attempt {} Failed ({}, {} tokens, ${:.3})",
39        ctx.attempt, duration, tokens, ctx.cost
40    ));
41
42    // What was tried
43    let tried = build_tried_section(ctx);
44    if !tried.is_empty() {
45        sections.push("### What was tried".to_string());
46        sections.push(tried.join("\n"));
47    }
48
49    // Why it failed
50    sections.push("### Why it failed".to_string());
51    sections.push(build_failure_reason(ctx));
52
53    // Files touched
54    let files = extract_files_from_logs(&ctx.tool_log);
55    if !files.is_empty() {
56        sections.push("### Files touched".to_string());
57        let list = files.iter().map(|f| format!("- {f}")).collect::<Vec<_>>();
58        sections.push(list.join("\n"));
59    }
60
61    // Verify command
62    if let Some(ref verify) = ctx.verify_command {
63        sections.push("### Verify command".to_string());
64        sections.push(format!("`{verify}`"));
65    }
66
67    // Suggestion
68    if let Some(suggestion) = build_suggestion(ctx.error.as_deref()) {
69        sections.push("### Suggestion for next attempt".to_string());
70        sections.push(suggestion.to_string());
71    }
72
73    sections.join("\n\n")
74}
75
76/// Extract unique file paths associated with a specific tool from log lines.
77///
78/// Looks for lines matching `[tool] <tool_name> <path>` and returns
79/// deduplicated paths in the order first seen.
80#[must_use]
81pub fn extract_tool_paths(logs: &[String], tool_name: &str) -> Vec<String> {
82    let prefix = format!("[tool] {tool_name} ");
83    let mut seen = BTreeSet::new();
84    let mut paths = Vec::new();
85    for line in logs {
86        if let Some(rest) = line.strip_prefix(&prefix) {
87            let path = rest.trim().to_string();
88            if seen.insert(path.clone()) {
89                paths.push(path);
90            }
91        }
92        // Also handle lines where [tool] appears after a timestamp/prefix
93        if let Some(idx) = line.find(&prefix) {
94            let rest = &line[idx + prefix.len()..];
95            let path = rest.trim().to_string();
96            if seen.insert(path.clone()) {
97                paths.push(path);
98            }
99        }
100    }
101    paths
102}
103
104/// Count occurrences of a tool in log lines.
105#[must_use]
106pub fn count_tool(logs: &[String], tool_name: &str) -> usize {
107    let marker = format!("[tool] {tool_name}");
108    logs.iter().filter(|line| line.contains(&marker)).count()
109}
110
111/// Extract all unique file paths from log lines regardless of tool.
112///
113/// Matches `[tool] <name> <path>` where path contains no spaces (to
114/// distinguish file paths from multi-word arguments).
115#[must_use]
116pub fn extract_files_from_logs(logs: &[String]) -> Vec<String> {
117    let mut seen = BTreeSet::new();
118    let mut files = Vec::new();
119    for line in logs {
120        if let Some(path) = parse_tool_path(line) {
121            if !path.contains(' ') && seen.insert(path.clone()) {
122                files.push(path);
123            }
124        }
125    }
126    files
127}
128
129/// Return the last `n` tool names from log lines.
130#[must_use]
131pub fn extract_last_tools(logs: &[String], n: usize) -> Vec<String> {
132    let mut tools = Vec::new();
133    for line in logs {
134        if let Some(name) = parse_tool_name(line) {
135            tools.push(name);
136        }
137    }
138    let start = tools.len().saturating_sub(n);
139    tools[start..].to_vec()
140}
141
142/// Show up to 3 paths, then "+N more".
143#[must_use]
144pub fn summarize_paths(paths: &[String]) -> String {
145    if paths.len() <= 3 {
146        return paths.join(", ");
147    }
148    let first_three = paths[..3].join(", ");
149    let remaining = paths.len() - 3;
150    format!("{first_three} +{remaining} more")
151}
152
153// ---------------------------------------------------------------------------
154// Internal helpers
155// ---------------------------------------------------------------------------
156
157fn build_tried_section(ctx: &FailureContext) -> Vec<String> {
158    let mut lines = Vec::new();
159
160    let reads = extract_tool_paths(&ctx.tool_log, "Read");
161    let edits = extract_tool_paths(&ctx.tool_log, "Edit");
162    let writes = extract_tool_paths(&ctx.tool_log, "Write");
163    let bash_count = count_tool(&ctx.tool_log, "Bash");
164
165    if !reads.is_empty() {
166        lines.push(format!("- Read {}", summarize_paths(&reads)));
167    }
168    if !edits.is_empty() {
169        lines.push(format!("- Edited {}", summarize_paths(&edits)));
170    }
171    if !writes.is_empty() {
172        lines.push(format!("- Wrote {}", summarize_paths(&writes)));
173    }
174    if bash_count > 0 {
175        let plural = if bash_count > 1 { "s" } else { "" };
176        lines.push(format!("- Ran {bash_count} bash command{plural}"));
177    }
178
179    let duration = format_duration(ctx.duration_secs);
180    lines.push(format!(
181        "- {} tool calls over {} turns in {}",
182        ctx.tool_count, ctx.turns, duration
183    ));
184
185    lines
186}
187
188fn build_failure_reason(ctx: &FailureContext) -> String {
189    let mut lines = Vec::new();
190
191    if let Some(ref error) = ctx.error {
192        lines.push(format!("- {error}"));
193    }
194
195    let last_tools = extract_last_tools(&ctx.tool_log, 3);
196    if !last_tools.is_empty() {
197        lines.push(format!(
198            "- Last tools before failure: {}",
199            last_tools.join(", ")
200        ));
201    }
202
203    if lines.is_empty() {
204        lines.push("- Unknown failure (no error captured)".to_string());
205    }
206
207    lines.join("\n")
208}
209
210fn build_suggestion(error: Option<&str>) -> Option<&'static str> {
211    let err = error?.to_lowercase();
212
213    if err.contains("idle timeout") {
214        return Some("- Agent went idle — it may be stuck in a loop or waiting for input. Try a more focused prompt or break the task into smaller steps.");
215    }
216    if err.contains("timeout") {
217        return Some("- Agent ran out of time. Consider increasing the timeout or simplifying the task scope.");
218    }
219    if err.contains("aborted") {
220        return Some("- Agent was manually aborted. Review progress so far before retrying.");
221    }
222    if err.contains("claim") {
223        return Some("- Could not claim the bean. Check if another agent is working on it or if it's already closed.");
224    }
225    if err.contains("exit code") {
226        return Some("- Agent exited with an error. Check the verify command output and ensure the approach is correct before retrying.");
227    }
228
229    None
230}
231
232/// Parse the tool name from a `[tool] ToolName ...` log line.
233fn parse_tool_name(line: &str) -> Option<String> {
234    let tag = "[tool] ";
235    let idx = line.find(tag)?;
236    let rest = &line[idx + tag.len()..];
237    let name = rest.split_whitespace().next()?;
238    Some(name.to_string())
239}
240
241/// Parse the path argument from a `[tool] ToolName path` log line.
242fn parse_tool_path(line: &str) -> Option<String> {
243    let tag = "[tool] ";
244    let idx = line.find(tag)?;
245    let rest = &line[idx + tag.len()..];
246    let mut parts = rest.splitn(2, ' ');
247    let _tool = parts.next()?;
248    let path = parts.next()?.trim();
249    if path.is_empty() {
250        return None;
251    }
252    Some(path.to_string())
253}
254
255fn format_duration(secs: u64) -> String {
256    if secs < 60 {
257        return format!("{secs}s");
258    }
259    let m = secs / 60;
260    let s = secs % 60;
261    let mut out = String::new();
262    write!(out, "{m}m").ok();
263    if s > 0 {
264        write!(out, "{s}s").ok();
265    }
266    out
267}
268
269fn format_tokens(total: u64) -> String {
270    if total >= 1_000_000 {
271        format!("{:.1}M", total as f64 / 1_000_000.0)
272    } else if total >= 1_000 {
273        format!("{:.1}k", total as f64 / 1_000.0)
274    } else {
275        total.to_string()
276    }
277}
278
279#[cfg(test)]
280mod tests {
281    use super::*;
282
283    fn sample_logs() -> Vec<String> {
284        vec![
285            "[tool] Read src/main.rs".into(),
286            "[tool] Read src/lib.rs".into(),
287            "[tool] Edit src/main.rs".into(),
288            "[tool] Bash cargo test".into(),
289            "[tool] Write src/new_file.rs".into(),
290            "[tool] Bash cargo check".into(),
291            "[tool] Read src/main.rs".into(), // duplicate
292        ]
293    }
294
295    fn sample_ctx() -> FailureContext {
296        FailureContext {
297            bean_id: "42".into(),
298            bean_title: "Add widget".into(),
299            attempt: 2,
300            duration_secs: 185,
301            tool_count: 7,
302            turns: 4,
303            input_tokens: 50_000,
304            output_tokens: 12_000,
305            cost: 0.045,
306            error: Some("idle timeout after 300s".into()),
307            tool_log: sample_logs(),
308            verify_command: Some("cargo test widget".into()),
309        }
310    }
311
312    // -- extract_tool_paths --
313
314    #[test]
315    fn extract_tool_paths_deduplicates() {
316        let logs = sample_logs();
317        let reads = extract_tool_paths(&logs, "Read");
318        assert_eq!(reads, vec!["src/main.rs", "src/lib.rs"]);
319    }
320
321    #[test]
322    fn extract_tool_paths_returns_empty_for_missing_tool() {
323        let logs = sample_logs();
324        let grepped = extract_tool_paths(&logs, "Grep");
325        assert!(grepped.is_empty());
326    }
327
328    // -- count_tool --
329
330    #[test]
331    fn count_tool_counts_all_occurrences() {
332        let logs = sample_logs();
333        assert_eq!(count_tool(&logs, "Read"), 3);
334        assert_eq!(count_tool(&logs, "Bash"), 2);
335        assert_eq!(count_tool(&logs, "Write"), 1);
336        assert_eq!(count_tool(&logs, "Grep"), 0);
337    }
338
339    // -- extract_files_from_logs --
340
341    #[test]
342    fn extract_files_deduplicates_across_tools() {
343        let logs = sample_logs();
344        let files = extract_files_from_logs(&logs);
345        assert_eq!(files, vec!["src/main.rs", "src/lib.rs", "src/new_file.rs"]);
346    }
347
348    #[test]
349    fn extract_files_skips_multi_word_args() {
350        let logs = vec![
351            "[tool] Bash cargo test --release".into(),
352            "[tool] Read src/foo.rs".into(),
353        ];
354        let files = extract_files_from_logs(&logs);
355        assert_eq!(files, vec!["src/foo.rs"]);
356    }
357
358    // -- extract_last_tools --
359
360    #[test]
361    fn extract_last_tools_returns_last_n() {
362        let logs = sample_logs();
363        let last = extract_last_tools(&logs, 3);
364        assert_eq!(last, vec!["Write", "Bash", "Read"]);
365    }
366
367    #[test]
368    fn extract_last_tools_returns_all_when_fewer_than_n() {
369        let logs = vec!["[tool] Read src/a.rs".into()];
370        let last = extract_last_tools(&logs, 5);
371        assert_eq!(last, vec!["Read"]);
372    }
373
374    // -- summarize_paths --
375
376    #[test]
377    fn summarize_paths_three_or_fewer() {
378        let paths: Vec<String> = vec!["a.rs".into(), "b.rs".into()];
379        assert_eq!(summarize_paths(&paths), "a.rs, b.rs");
380    }
381
382    #[test]
383    fn summarize_paths_more_than_three() {
384        let paths: Vec<String> = vec![
385            "a.rs".into(),
386            "b.rs".into(),
387            "c.rs".into(),
388            "d.rs".into(),
389            "e.rs".into(),
390        ];
391        assert_eq!(summarize_paths(&paths), "a.rs, b.rs, c.rs +2 more");
392    }
393
394    // -- format helpers --
395
396    #[test]
397    fn format_duration_seconds_only() {
398        assert_eq!(format_duration(42), "42s");
399    }
400
401    #[test]
402    fn format_duration_minutes_and_seconds() {
403        assert_eq!(format_duration(185), "3m5s");
404    }
405
406    #[test]
407    fn format_duration_exact_minutes() {
408        assert_eq!(format_duration(120), "2m");
409    }
410
411    #[test]
412    fn format_tokens_raw() {
413        assert_eq!(format_tokens(500), "500");
414    }
415
416    #[test]
417    fn format_tokens_thousands() {
418        assert_eq!(format_tokens(62_000), "62.0k");
419    }
420
421    #[test]
422    fn format_tokens_millions() {
423        assert_eq!(format_tokens(1_500_000), "1.5M");
424    }
425
426    // -- build_failure_summary integration --
427
428    #[test]
429    fn summary_contains_all_sections() {
430        let ctx = sample_ctx();
431        let summary = build_failure_summary(&ctx);
432
433        assert!(summary.contains("## Attempt 2 Failed"));
434        assert!(summary.contains("3m5s"));
435        assert!(summary.contains("62.0k tokens"));
436        assert!(summary.contains("$0.045"));
437
438        assert!(summary.contains("### What was tried"));
439        assert!(summary.contains("Read src/main.rs, src/lib.rs"));
440        assert!(summary.contains("Edited src/main.rs"));
441        assert!(summary.contains("Wrote src/new_file.rs"));
442        assert!(summary.contains("Ran 2 bash commands"));
443        assert!(summary.contains("7 tool calls over 4 turns"));
444
445        assert!(summary.contains("### Why it failed"));
446        assert!(summary.contains("idle timeout after 300s"));
447        assert!(summary.contains("Last tools before failure:"));
448
449        assert!(summary.contains("### Files touched"));
450        assert!(summary.contains("- src/main.rs"));
451        assert!(summary.contains("- src/lib.rs"));
452
453        assert!(summary.contains("### Verify command"));
454        assert!(summary.contains("`cargo test widget`"));
455
456        assert!(summary.contains("### Suggestion for next attempt"));
457        assert!(summary.contains("stuck in a loop"));
458    }
459
460    #[test]
461    fn summary_without_error_shows_unknown() {
462        let ctx = FailureContext {
463            error: None,
464            tool_log: vec![],
465            verify_command: None,
466            ..sample_ctx()
467        };
468        let summary = build_failure_summary(&ctx);
469        assert!(summary.contains("Unknown failure (no error captured)"));
470        // No suggestion section when error is None
471        assert!(!summary.contains("### Suggestion for next attempt"));
472    }
473
474    #[test]
475    fn suggestion_timeout_generic() {
476        let suggestion = build_suggestion(Some("total timeout exceeded"));
477        assert!(suggestion.unwrap().contains("ran out of time"));
478    }
479
480    #[test]
481    fn suggestion_idle_timeout_more_specific() {
482        // "idle timeout" should match before generic "timeout"
483        let suggestion = build_suggestion(Some("idle timeout after 300s"));
484        assert!(suggestion.unwrap().contains("stuck in a loop"));
485    }
486
487    #[test]
488    fn suggestion_aborted() {
489        let suggestion = build_suggestion(Some("process aborted by user"));
490        assert!(suggestion.unwrap().contains("manually aborted"));
491    }
492
493    #[test]
494    fn suggestion_claim() {
495        let suggestion = build_suggestion(Some("failed to claim bean"));
496        assert!(suggestion.unwrap().contains("another agent"));
497    }
498
499    #[test]
500    fn suggestion_exit_code() {
501        let suggestion = build_suggestion(Some("exit code 1"));
502        assert!(suggestion.unwrap().contains("verify command output"));
503    }
504
505    #[test]
506    fn suggestion_none_for_unknown_error() {
507        let suggestion = build_suggestion(Some("something weird happened"));
508        assert!(suggestion.is_none());
509    }
510
511    #[test]
512    fn singular_bash_command() {
513        let ctx = FailureContext {
514            tool_log: vec!["[tool] Bash cargo test".into()],
515            ..sample_ctx()
516        };
517        let summary = build_failure_summary(&ctx);
518        assert!(summary.contains("Ran 1 bash command\n"));
519    }
520}