atomcode-core 4.23.1

Open-source terminal AI coding agent
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
use crate::conversation::message::{Message, MessageContent};
use crate::tool::ToolResult;

/// Dispatch to per-tool truncation based on tool name, then enforce universal upper bounds.
///
/// Per-tool truncation is the first line of defense (bash strips build noise, read_file
/// extracts outlines, etc.). The universal caps below are the LAST line of defense —
/// they cap `result.output` regardless of which tool produced it, so a single oversized
/// `ToolResult` can never dominate the ctx budget:
///
/// - `UNIVERSAL_MAX_LINES`: line-count ceiling (head 50 + tail 50 + "[N lines omitted]")
/// - `hard_char_limit`: char ceiling scaled to ~8K tokens, never more than 1/8 of window
///
/// 2026-04-13 context: a 14072-line `find` output contributed to a sent=0 cascade.
/// Per-tool truncate handled that case (head 10 + tail 20), but other pathological
/// outputs (unknown tools, huge grep, edit results with diffs) could still slip through
/// the old `char_limit = max(16000, context_window)` formula which scaled UP with ctx
/// window and let a single message consume 25% of a 64K budget.
pub fn truncate_output(result: &mut ToolResult, tool_name: &str, context_window: usize) {
    match tool_name {
        // bash: no per-tool truncation. The universal line/char caps below
        // are sufficient and purely numeric. Pattern-based "smart
        // extraction" (removed 2026-04-22) assumed English error keywords
        // (`error`/`FAILED`/`panic`) and hard-coded build tool names
        // (`cargo build`/`mvn compile`/`vite build`), which silently
        // dropped non-matching stderr — e.g. a 50-line Chinese compiler
        // trace was collapsed into `[... N lines skipped ...]` with no
        // diagnostic content surviving. Technology-stack neutrality is a
        // project rule (see `project_principles_vs_claude_md.md`), and
        // main's `turn/runner.rs::detect_call_loop` now catches the
        // retry-loop bug class that smart-extraction was trying to
        // prevent.
        "bash" => {}
        "read_file" => {} // Layer A in read.rs is the single authority. No post-hoc truncation.
        "web_fetch" => truncate_generic(result, 150, 20, 40),
        _ => truncate_generic(result, 200, 30, 50),
    }

    // ── Universal line-count ceiling ──
    // Applies after per-tool truncate. Protects against: unknown tools with no
    // per-tool logic, compile error compression that fails to shrink, edge-case
    // formats with embedded huge blobs.
    //
    // SKIP for read_file: it has its own 2000-line intelligent truncation
    // (truncate_read_file) that extracts outlines. The 300-line blanket cap
    // is too aggressive for typical source files (Vue SFC 300-500 lines,
    // Java 200-400 lines) — it cuts navItems/data definitions in the middle,
    // causing edit_file old_string mismatch on the next turn.
    // The hard_char_limit (Layer 3 below) still applies as the safety net.
    if tool_name != "read_file" {
        const UNIVERSAL_MAX_LINES: usize = 300;
        let line_count = result.output.lines().count();
        if line_count > UNIVERSAL_MAX_LINES {
            let lines: Vec<&str> = result.output.lines().collect();
            const HEAD: usize = 50;
            const TAIL: usize = 50;
            let head_part = lines[..HEAD].join("\n");
            let tail_part = lines[lines.len() - TAIL..].join("\n");
            result.output = format!(
                "{}\n\n[... {} lines omitted (universal 300-line cap) ...]\n\n{}",
                head_part,
                line_count - HEAD - TAIL,
                tail_part,
            );
        }
    }

    // ── Universal char-count ceiling ──
    // ── INVARIANT (2026-04-16): read_file MUST be skipped here ──
    // read_file has its own truncation (auto_skeleton + dynamic char_limit
    // in read.rs). This universal cap was the root cause of 26-turn
    // exploration sessions: 950-line file (38K chars) truncated to 8K
    // (200 lines), forcing 20+ turns of grep/read fragments.
    // Fixed in 4fc5cda, accidentally reverted by 4f704cb (whole-file
    // revert to restore verify.rs hit this as collateral damage).
    // Other tools (bash, grep, etc.) still get the char cap.
    // ────────────────────────────────────────────────────────────
    let hard_char_limit = (context_window / 8).min(32_000).max(8_000);
    if tool_name == "read_file" {
        // read_file: no char cap. Managed by read.rs internally:
        // 1. auto_skeleton (file_tokens > budget/5)
        // 2. dynamic char_limit (budget-scaled, not hardcoded)
        // 3. truncate_read_file above (>2000 lines → outline)
    } else if result.output.len() > hard_char_limit {
        // Preserve head AND tail when cutting — tools often put errors/status at the end.
        let chars: Vec<char> = result.output.chars().collect();
        let head_chars = hard_char_limit * 2 / 3;
        let tail_chars = hard_char_limit / 3;
        let head_part: String = chars[..head_chars.min(chars.len())].iter().collect();
        let tail_part: String = chars[chars.len().saturating_sub(tail_chars)..]
            .iter()
            .collect();
        let omitted = chars.len().saturating_sub(head_chars + tail_chars);
        result.output = format!(
            "{}\n\n[... {} chars omitted (universal {} char cap) ...]\n\n{}",
            head_part, omitted, hard_char_limit, tail_part,
        );
    }
}

// truncate_bash + try_compress_compile_errors + assemble_important_lines
// were removed 2026-04-22 (~250 lines) to enforce technology-stack
// neutrality. See comment at top of `truncate_output` for why.

// truncate_read_file: DELETED.
// read_file truncation is now handled exclusively by Layer A (auto_skeleton)
// in read.rs. Having two separate outline-extraction algorithms (tree-sitter
// in read.rs vs indent-based here) was redundant and caused confusion about
// which one actually controlled the output.

/// Generic truncation: head + tail, skipping middle.
pub(crate) fn truncate_generic(
    result: &mut ToolResult,
    max_lines: usize,
    head: usize,
    tail: usize,
) {
    let lines: Vec<&str> = result.output.lines().collect();
    if lines.len() > max_lines {
        let head_part: String = lines[..head].join("\n");
        let tail_part: String = lines[lines.len() - tail..].join("\n");
        result.output = format!(
            "{}\n\n[... {} lines omitted ...]\n\n{}",
            head_part,
            lines.len() - head - tail,
            tail_part
        );
    }
}

/// Apply truncation to all tool result messages
/// in the last `tool_count` messages of the conversation.
///
/// Two-pass: first per-result truncation, then per-turn budget enforcement.
/// Per-turn budget = 1/4 of context window (max 16K chars). If all results
/// in this turn exceed that, aggressively shrink the largest results.
pub fn post_process_tool_results(
    messages: &mut Vec<Message>,
    tool_count: usize,
    current_tool_name: &str,
    context_window: usize,
) {
    let len = messages.len();
    let start = len.saturating_sub(tool_count);

    // Build call_id → real tool_name lookup so each ToolResult is
    // truncated by the rules of the tool that actually produced it.
    // Without this a mixed-tool turn (e.g. read_file → bash) would
    // truncate every result under whichever tool ran last
    // (`current_tool_name`), which inverts read_file's cap exemption
    // and shrinks file contents to ~30 lines.
    let mut call_id_to_tool: std::collections::HashMap<String, String> =
        std::collections::HashMap::new();
    for msg in messages.iter() {
        if let MessageContent::AssistantWithToolCalls { tool_calls, .. } = &msg.content {
            for tc in tool_calls {
                call_id_to_tool.insert(tc.id.clone(), tc.name.clone());
            }
        }
    }

    // Pass 1: per-result truncation, keyed by each result's real tool.
    // `current_tool_name` is the fallback for results with no paired
    // ATC in the message vec (e.g. orphaned test fixtures).
    for i in start..len {
        if let MessageContent::ToolResult(ref r) = messages[i].content {
            let tool_name = call_id_to_tool
                .get(&r.call_id)
                .map(|s| s.as_str())
                .unwrap_or(current_tool_name);
            let mut result = r.clone();
            truncate_output(&mut result, tool_name, context_window);
            messages[i].content = MessageContent::ToolResult(result);
        }
    }

    // Pass 2: per-turn budget enforcement.
    // INVARIANT (2026-04-16): turn_budget must scale with context_window.
    // Was capped at 16K chars, which at 128K ctx meant a single turn of
    // 3 file reads got "trimmed to fit turn budget" — the model saw
    // different fragments each re-read and couldn't correlate them.
    // Now: ctx/4 with cap at 64K chars, floor 4K.
    let turn_budget = (context_window / 4).min(64_000).max(4_000);
    let mut total_chars: usize = 0;
    for i in start..len {
        if let MessageContent::ToolResult(ref r) = messages[i].content {
            total_chars += r.output.len();
        }
    }

    if total_chars > turn_budget {
        let ratio = turn_budget as f64 / total_chars as f64;
        for i in start..len {
            if let MessageContent::ToolResult(ref r) = messages[i].content {
                let target = (r.output.len() as f64 * ratio) as usize;
                if r.output.len() > target && target > 200 {
                    let mut result = r.clone();
                    let chars: Vec<char> = result.output.chars().collect();
                    let head = target * 2 / 3;
                    let tail = target / 3;
                    let head_part: String = chars[..head.min(chars.len())].iter().collect();
                    let tail_part: String =
                        chars[chars.len().saturating_sub(tail)..].iter().collect();
                    result.output = format!(
                        "{}\n[... trimmed to fit turn budget ...]\n{}",
                        head_part, tail_part,
                    );
                    messages[i].content = MessageContent::ToolResult(result);
                }
            }
        }
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::conversation::message::{Message, MessageContent, Role};
    use crate::tool::{ToolCall, ToolResult};

    fn make_result(output: &str) -> ToolResult {
        ToolResult {
            call_id: "test_call".to_string(),
            output: output.to_string(),
            success: true,
        }
    }

    fn make_tool_result_message(output: &str) -> Message {
        Message {
            role: Role::Tool,
            content: MessageContent::ToolResult(make_result(output)),
        }
    }

    fn make_atc(call_id: &str, tool_name: &str) -> Message {
        Message {
            role: Role::Assistant,
            content: MessageContent::AssistantWithToolCalls {
                text: None,
                tool_calls: vec![ToolCall {
                    id: call_id.to_string(),
                    name: tool_name.to_string(),
                    arguments: String::new(),
                }],
                reasoning_content: None,
                thinking_blocks: Vec::new(),
            },
        }
    }

    fn make_tool_result_with_id(call_id: &str, output: &str) -> Message {
        Message {
            role: Role::Tool,
            content: MessageContent::ToolResult(ToolResult {
                call_id: call_id.to_string(),
                output: output.to_string(),
                success: true,
            }),
        }
    }

    // --- bash truncation tests (A1, 2026-04-22) ---
    //
    // bash has no per-tool truncation — relies entirely on the universal
    // line/char caps in `truncate_output`. These tests lock in that
    // behavior so future refactors don't silently reintroduce pattern-based
    // extraction.

    #[test]
    fn bash_short_output_passes_through_verbatim() {
        let output: String = (0..100)
            .map(|i| format!("line {}", i))
            .collect::<Vec<_>>()
            .join("\n");
        let mut result = make_result(&output);
        truncate_output(&mut result, "bash", 64_000);
        assert_eq!(
            result.output, output,
            "bash output under 300 lines must not be touched"
        );
    }

    #[test]
    fn bash_huge_output_hits_universal_line_cap_only() {
        // 500 lines > UNIVERSAL_MAX_LINES (300) → head 50 + tail 50 + marker.
        // Purely numeric — no English error-keyword heuristic fires.
        let output: String = (0..500)
            .map(|i| format!("line {}", i))
            .collect::<Vec<_>>()
            .join("\n");
        let mut result = make_result(&output);
        truncate_output(&mut result, "bash", 64_000);
        assert!(result.output.contains("line 0"), "head must be preserved");
        assert!(result.output.contains("line 499"), "tail must be preserved");
        assert!(
            result.output.contains("lines omitted"),
            "omission marker required"
        );
        assert!(result.output.lines().count() <= 110);
    }

    #[test]
    fn bash_chinese_stderr_survives_truncation() {
        // Regression test for the 2026-04-22 forensic finding: the old
        // pattern-based `truncate_bash` collapsed any line not matching
        // English `error`/`Error`/`FAILED`/`panic` into
        // `[... N lines skipped ...]`. A 50-line Chinese compiler trace
        // was reduced to head+tail-only with every middle line dropped.
        // Under A1 the output passes through verbatim (below universal
        // caps).
        let output: String = (0..50)
            .map(|_| "编译失败:找不到符号".to_string())
            .collect::<Vec<_>>()
            .join("\n");
        let mut result = make_result(&output);
        truncate_output(&mut result, "bash", 64_000);
        assert_eq!(result.output.matches("编译失败").count(), 50);
    }

    // truncate_read_file tests: DELETED (function removed, Layer A in read.rs handles it)

    // --- truncate_generic tests ---

    #[test]
    fn truncate_generic_under_limit_unchanged() {
        let output = "line1\nline2\nline3\n";
        let mut result = make_result(output);
        truncate_generic(&mut result, 200, 30, 50);
        assert_eq!(result.output, output);
    }

    #[test]
    fn truncate_generic_over_limit_has_head_and_tail() {
        let lines: Vec<String> = (0..300).map(|i| format!("line {}", i)).collect();
        let output = lines.join("\n");
        let mut result = make_result(&output);
        truncate_generic(&mut result, 200, 30, 50);
        // Should be shorter
        assert!(result.output.len() < output.len());
        // Should contain head (line 0) and tail (line 299)
        assert!(result.output.contains("line 0"));
        assert!(result.output.contains("line 299"));
        // Should contain omit marker
        assert!(result.output.contains("lines omitted"));
    }

    // --- truncate_output universal cap tests ---

    #[test]
    fn truncate_output_hard_char_limit() {
        // With ctx_window=16000, new formula gives hard_char_limit = max(16000/8, 8000) = 8000.
        let output = "x".repeat(20000);
        let mut result = make_result(&output);
        truncate_output(&mut result, "unknown_tool", 16000);
        // Result should be at most ~8000 chars + omission marker.
        assert!(
            result.output.len() <= 8_500,
            "got {} chars",
            result.output.len()
        );
        assert!(
            result.output.contains("chars omitted"),
            "got: {}",
            result.output
        );
    }

    #[test]
    fn truncate_output_universal_line_cap() {
        // 500-line output should get capped to ~100 lines (50 head + 50 tail) + markers.
        let output: String = (0..500)
            .map(|i| format!("line {}", i))
            .collect::<Vec<_>>()
            .join("\n");
        let mut result = make_result(&output);
        truncate_output(&mut result, "unknown_tool", 64_000);
        let line_count = result.output.lines().count();
        assert!(
            line_count <= 110,
            "got {} lines, expected ≤ 110",
            line_count
        );
        assert!(result.output.contains("lines omitted"));
    }

    #[test]
    fn truncate_output_caps_never_grow_with_huge_window() {
        // Even with a 1M ctx window, a single tool_result must stay ≤ 32K chars.
        let output = "x".repeat(200_000);
        let mut result = make_result(&output);
        truncate_output(&mut result, "unknown_tool", 1_000_000);
        assert!(
            result.output.len() <= 33_000,
            "single tool output should never exceed 32K chars, got {}",
            result.output.len()
        );
    }

    // --- post_process_tool_results tests ---

    #[test]
    fn post_process_truncates_results() {
        let large_output = "x".repeat(20000);
        let mut messages = vec![make_tool_result_message(&large_output)];
        post_process_tool_results(&mut messages, 1, "unknown_tool", 16000);
        // Should be truncated but remain inline ToolResult
        assert!(matches!(messages[0].content, MessageContent::ToolResult(_)));
        if let MessageContent::ToolResult(ref r) = messages[0].content {
            // 8K cap + omission marker ≈ 8500 chars worst case.
            assert!(r.output.len() <= 8_500);
        }
    }

    #[test]
    fn post_process_keeps_small_results_unchanged() {
        let small_output = "short output";
        let mut messages = vec![make_tool_result_message(small_output)];
        post_process_tool_results(&mut messages, 1, "bash", 16000);
        assert!(matches!(messages[0].content, MessageContent::ToolResult(_)));
        if let MessageContent::ToolResult(ref r) = messages[0].content {
            assert_eq!(r.output, "short output");
        }
    }

    /// Regression: in a mixed-tool turn, each ToolResult must be truncated
    /// using the rules of the tool that actually produced it — looked up
    /// via call_id → ATC.name — NOT `current_tool_name` (which only
    /// reflects whichever tool ran last). Without this, a `read_file`
    /// result in a `read_file → bash` turn loses its hard-char-limit
    /// exemption and gets shrunk to bash's HEAD+TAIL, defeating the
    /// file-content preservation invariant.
    #[test]
    fn post_process_keys_truncation_by_each_result_tool_not_current() {
        // 400-line "file content" — would trip bash's HEAD 10 + TAIL 20
        // and the universal 300-line cap if keyed as bash, but read_file
        // is explicitly exempt from both.
        let file_content: String = (0..400)
            .map(|i| format!("line {}", i))
            .collect::<Vec<_>>()
            .join("\n");
        let original_line_count = file_content.lines().count();

        let mut messages = vec![
            make_atc("rf1", "read_file"),
            make_tool_result_with_id("rf1", &file_content),
        ];

        // current_tool_name="bash" as if bash ran last in this turn.
        // The read_file result must still be recognized as read_file.
        post_process_tool_results(&mut messages, 2, "bash", 128_000);

        if let MessageContent::ToolResult(ref r) = messages[1].content {
            assert_eq!(
                r.output.lines().count(),
                original_line_count,
                "read_file content must stay intact when current_tool_name \
                 is a different tool — got {} lines (expected {})",
                r.output.lines().count(),
                original_line_count,
            );
        } else {
            panic!("expected ToolResult at index 1");
        }
    }
}