Skip to main content

atomcode_core/ctx/
render.rs

1//! Default render & compression-plan policy for atomcode ctx.
2//!
3//! [`build_messages`], [`needs_compression`], and
4//! [`build_compression_content`] implement the out-of-the-box context
5//! behavior. `DefaultCtx` is a thin wrapper over them; `OllamaCtx`
6//! reuses `build_messages` / `build_compression_content` and overrides
7//! only the compression threshold (early trigger).
8//!
9//! Implementations wanting different behavior (different thresholds,
10//! different compression content format, different cold-zone layout)
11//! write their own `impl CtxBuilder` without touching this module.
12//!
13//! All functions here are free functions taking `&Conversation`,
14//! keeping `Conversation` as a pure data container — no render logic
15//! leaks back into the data layer.
16
17use crate::conversation::message::{self, Message, MessageContent, Role};
18use crate::conversation::{ContextStats, Conversation, KEEP_MESSAGES};
19
20/// Append model-specific behavioral directives to a system prompt.
21///
22/// Previously scattered as `if model_id.contains(...)` branches inside
23/// `agent::prompt::build_system_prompt`. Moved here so per-model prompt
24/// customization lives in the ctx layer alongside other per-model logic
25/// (compression threshold, tool-output cap, etc).
26///
27/// `model_id` MUST already be lowercased by the caller (matching the
28/// original `provider.model.to_lowercase()` check).
29///
30/// Currently handles two groups:
31/// - CN language lock: minimax / qwen / deepseek / kimi models default
32///   to English reasoning even when the user speaks Chinese; one gentle
33///   line nudges user-visible output back to zh-CN.
34/// - MiniMax thinking discipline: MiniMax M2 has no reasoning_effort
35///   knob and defaults to extremely verbose `<think>` blocks; a
36///   system-reminder near the tail caps it to ≤3 sentences via recency
37///   bias.
38///
39/// Impls that don't want these (e.g. a hypothetical ClaudeCtx) simply
40/// don't call this function — the hooks live in each `build_messages`
41/// impl, not in `ctx::render::build_messages`.
42pub(crate) fn apply_model_directives(system_prompt: &str, model_id: &str) -> String {
43    let mut out = String::with_capacity(system_prompt.len() + 512);
44    out.push_str(system_prompt);
45
46    let needs_cn_lock = model_id.contains("minimax")
47        || model_id.contains("qwen")
48        || model_id.contains("deepseek")
49        || model_id.contains("kimi");
50    if needs_cn_lock {
51        out.push_str("\n用户可见的输出请用中文。工具调用和代码保持原样。\n");
52    }
53
54    // MiniMax M2 的 thinking 默认极其啰嗦,会大量消耗 output tokens 并拖慢响应。
55    // 模型本身没有 reasoning_effort 档位开关,只能用 prompt 约束。放在接近尾部
56    // 借助 recency 保证每轮都生效,等效于一个轻量 system-reminder。
57    if model_id.contains("minimax") {
58        out.push_str(
59            "\n<system-reminder>\n\
60             THINKING 简洁纪律:内部思考(<think> 块)必须极简,\
61             只写必要的决策线索,不要复述工具结果、不要分点展开、不要自问自答。\
62             目标 ≤ 3 句话。冗长 thinking 视为严重问题。\n\
63             </system-reminder>\n",
64        );
65    }
66
67    out
68}
69
70/// Context management with cold zone compression.
71///
72/// Structure: [System] [Cold Zone (max 3 summaries)] [Last 5 turns full]
73///
74/// The cold zone is populated by `Conversation::apply_compression` when
75/// total tokens exceed ~70% of budget. If still over 80% after cold zone
76/// injection, this function drops oldest turns inline.
77///
78/// `turn_reminder` — if non-empty, prepended to the last User message.
79/// Keeps the system prompt prefix stable across turns (好 cache),
80/// while still delivering per-turn dynamic context (git diff, current
81/// task, etc). Empty string = no injection.
82pub fn build_messages(
83    conv: &Conversation,
84    system_prompt: &str,
85    token_budget: usize,
86    turn_reminder: &str,
87) -> (Vec<Message>, ContextStats) {
88    if conv.messages.is_empty() {
89        return (
90            vec![Message::new(Role::System, system_prompt)],
91            ContextStats::default(),
92        );
93    }
94
95    let system_msg = Message::new(Role::System, system_prompt);
96    let system_tokens = system_msg.estimate_tokens();
97
98    let turns = &conv.turn_tracker.turns;
99
100    if turns.is_empty() {
101        let remaining = token_budget.saturating_sub(system_tokens);
102        return (
103            build_messages_fallback(conv, system_msg, remaining),
104            ContextStats::default(),
105        );
106    }
107
108    let mut result = Vec::with_capacity(conv.messages.len() + 3);
109    result.push(system_msg);
110
111    // Inject cold zone summaries (if any)
112    if !conv.cold_summaries.is_empty() {
113        let cold_text = format!(
114            "[Earlier conversation history ({} compression{})]\n{}",
115            conv.cold_summaries.len(),
116            if conv.cold_summaries.len() > 1 {
117                "s"
118            } else {
119                ""
120            },
121            conv.cold_summaries.join("\n---\n")
122        );
123        result.push(Message::new(Role::System, cold_text));
124    }
125
126    // Add all current messages
127    result.extend(conv.messages.iter().cloned());
128
129    // NOTE: read_file result condensation was here (83fc7ff) but reverted.
130    // 问题: 长距离重读是合理需求(旧内容被压缩后模型需要重新看),
131    // 短距离重读在 keep_recent 保护内又压缩不到。两头不讨好。
132    // 正确方案需要更深入设计,不在这里做。
133
134    // Safety: if over 80% (or 60K absolute cap), drop oldest turns.
135    // BUT: skip if cold_summaries exist — that means LLM compression just ran
136    // and we're looking at the "keep_full=5" survivor set. Dropping those too
137    // would wipe ALL context (the bug that caused sent=0 in audit sessions).
138    let budget_80pct = (token_budget * 80 / 100).min(60000);
139    let total_tokens: usize = result.iter().map(|m| m.estimate_tokens()).sum();
140    let mut dropped_tokens = 0usize;
141
142    if total_tokens > budget_80pct && conv.cold_summaries.is_empty() {
143        let tokens_to_drop = total_tokens - budget_80pct;
144
145        // ── HARD FLOOR: the last turn is sacred and NEVER dropped ──
146        // Without this floor, a single oversized tool_result could make `tokens_to_drop`
147        // exceed the sum of all earlier turns, and the `survived_start` calculation below
148        // would settle on `conv.messages.len()` → NO messages survive → sent=0 → agent
149        // goes blind and repeats searches forever (2026-04-12 21:25 session pathology).
150        let last_turn_idx = turns.len().saturating_sub(1);
151        let last_turn_start = turns
152            .get(last_turn_idx)
153            .map(|t| t.start_idx)
154            .unwrap_or(0)
155            .min(conv.messages.len());
156
157        // First pass: identify which turns to drop and extract their reasoning.
158        // Loop bound `turns.len()-1` ensures we never touch the last turn.
159        let mut drop_summaries: Vec<String> = Vec::new();
160        let mut drop_count = 0usize;
161
162        for ti in 0..turns.len().saturating_sub(1) {
163            if dropped_tokens >= tokens_to_drop {
164                break;
165            }
166            let turn = &turns[ti];
167            let end = turn.end_idx().min(conv.messages.len());
168            if turn.start_idx >= conv.messages.len() {
169                continue;
170            }
171
172            // Extract model reasoning and tool calls before dropping
173            let turn_msgs = &conv.messages[turn.start_idx..end];
174            let mut parts: Vec<String> = Vec::new();
175            for msg in turn_msgs {
176                match &msg.content {
177                    MessageContent::Text(t) if msg.role == Role::Assistant => {
178                        let short: String = t.chars().take(150).collect();
179                        if !short.trim().is_empty() {
180                            parts.push(short);
181                        }
182                    }
183                    MessageContent::AssistantWithToolCalls {
184                        text, tool_calls, ..
185                    } => {
186                        if let Some(t) = text {
187                            let short: String = t.chars().take(150).collect();
188                            if !short.trim().is_empty() {
189                                parts.push(short);
190                            }
191                        }
192                        let tools: Vec<&str> =
193                            tool_calls.iter().map(|tc| tc.name.as_str()).collect();
194                        if !tools.is_empty() {
195                            parts.push(format!("tools: {}", tools.join(", ")));
196                        }
197                    }
198                    _ => {}
199                }
200            }
201            if !parts.is_empty() {
202                drop_summaries.push(parts.join(" | "));
203            }
204
205            dropped_tokens += turn_msgs.iter().map(|m| m.estimate_tokens()).sum::<usize>();
206            drop_count += 1;
207        }
208
209        // Rebuild: system + cold zone + drop digest + surviving messages
210        let cold_msgs = if conv.cold_summaries.is_empty() { 1 } else { 2 };
211        result.truncate(cold_msgs);
212
213        // Inject mechanical digest of dropped turns so model retains reasoning chain
214        if !drop_summaries.is_empty() {
215            let digest = format!(
216                "[Context overflow: {} earlier turns compressed]\n{}",
217                drop_count,
218                drop_summaries
219                    .iter()
220                    .enumerate()
221                    .map(|(i, s)| format!("{}. {}", i + 1, s))
222                    .collect::<Vec<_>>()
223                    .join("\n")
224            );
225            result.push(Message::new(Role::System, digest));
226        }
227
228        // Find first surviving message, clamped to last_turn_start so the last turn always survives.
229        let mut survived_start = 0;
230        let mut skipped = 0usize;
231        for ti in 0..turns.len() {
232            let turn = &turns[ti];
233            let end = turn.end_idx().min(conv.messages.len());
234            if turn.start_idx >= conv.messages.len() {
235                continue;
236            }
237            let t: usize = conv.messages[turn.start_idx..end]
238                .iter()
239                .map(|m| m.estimate_tokens())
240                .sum();
241            skipped += t;
242            if skipped >= dropped_tokens {
243                survived_start = if ti + 1 < turns.len() {
244                    turns[ti + 1].start_idx
245                } else {
246                    // Old code set this to conv.messages.len() → no survivors.
247                    // Clamp to last_turn_start to preserve at least the last turn.
248                    last_turn_start
249                };
250                break;
251            }
252        }
253        // Final clamp: survived_start must not skip past the last turn.
254        survived_start = survived_start.min(last_turn_start);
255        result.extend(conv.messages[survived_start..].iter().cloned());
256    }
257
258    // Microcompact: condense PRIOR-TURN ToolResults to one-line stubs.
259    // Current turn (everything from last User message onward) is always
260    // full-fidelity — see the microcompact() docstring for the
261    // turn-aware boundary rationale (this fixes the pre-5-8
262    // `HELLO_TEST_12345` bug where fixed-window stubbing could clip
263    // the in-flight turn).
264    //
265    // Threshold = min(budget × 70%, 100K chars). The 100K cap keeps
266    // long-session token savings (kicks in around ~25K tokens of
267    // history); the 70%-of-budget floor protects small-context models
268    // from compacting too eagerly.
269    let microcompact_threshold =
270        ((token_budget as u64 * 4 * 70 / 100) as usize).min(100_000);
271    microcompact(&mut result, conv.messages.len(), microcompact_threshold);
272
273    replace_stale_reads(&mut result);
274    // sanitize_messages drops AssistantWithToolCalls whose tool_calls
275    // didn't all get followed by matching tool_result messages before
276    // a non-tool boundary (next ATC / Text / MultiPart). Required to
277    // satisfy DeepSeek's strict `insufficient tool messages following
278    // tool_calls message` 400 and the equivalent Claude/OpenAI/Gemini
279    // pairing contracts. Several upstream paths can leave the
280    // conversation in this state (cancel mid-batch, hard-truncate
281    // landing between ATC and its results, /resume of an old session)
282    // — sanitizing at send time is the defensive backstop that catches
283    // them all uniformly. Already wired into the fallback path
284    // (`build_messages_fallback`); this call extends the same safety net
285    // to the main turn-tracked path. Runs BEFORE clean_message_pipeline
286    // so the consecutive-User merger downstream can collapse any
287    // adjacent User messages that the dropped ATC was previously
288    // separating.
289    sanitize_messages(&mut result);
290    clean_message_pipeline(&mut result);
291
292    // ── ABSOLUTE FLOOR (runs AFTER all cleanup, right before sent_tokens calc) ──
293    // If compaction + cleanup somehow left us with only system messages, graft back
294    // the last user message so the LLM has *something* to respond to. This is the
295    // strictest possible invariant: whenever conv.messages is non-empty, the result
296    // must contain at least one non-system message.
297    let non_system_count = result
298        .iter()
299        .filter(|m| !matches!(m.role, Role::System))
300        .count();
301    if non_system_count == 0 {
302        if let Some(last_user) =
303            conv.messages.iter().rev().find(|m| {
304                matches!(m.role, Role::User) && matches!(m.content, MessageContent::Text(..))
305            })
306        {
307            result.push(Message::new(
308                Role::System,
309                "[Emergency: prior conversation was dropped during compaction. Only the latest user message is preserved.]"
310            ));
311            result.push(last_user.clone());
312        }
313    }
314
315    // ── FINAL BYTE CEILING (last-line-of-defense) ──
316    // microcompact protects the last 20 messages; the 80% drop cap at
317    // line ~181 skips entirely when `cold_summaries` is populated
318    // (legacy protection against a since-fixed pathology). That
319    // leaves the recent window with no byte enforcement, so accumulated
320    // mid-sized ToolResults can still blow the budget. Single
321    // oldest-first forward pass: condense each ToolResult once only
322    // (idempotent `condensed()` would otherwise spin), stop as soon
323    // as the total fits under 80% of budget. The last 4 messages
324    // (current turn's work) and Text / AssistantWithToolCalls are
325    // never touched.
326    let token_ceiling = token_budget.saturating_mul(80) / 100;
327    let keep_tail = 4.min(result.len());
328    let shrinkable_end = result.len().saturating_sub(keep_tail);
329    // Build call_id → tool_name so `condensed` can pick the right
330    // summarization strategy per tool (read_file → skeleton, others →
331    // first-line). Without this, `condensed` would have had to guess
332    // from output shape — a substring heuristic that false-positived
333    // on bash outputs with `"  N| ..."` lines.
334    let call_id_to_tool: std::collections::HashMap<String, String> = result
335        .iter()
336        .filter_map(|m| {
337            if let MessageContent::AssistantWithToolCalls { tool_calls, .. } = &m.content {
338                Some(tool_calls.iter().map(|tc| (tc.id.clone(), tc.name.clone())))
339            } else {
340                None
341            }
342        })
343        .flatten()
344        .collect();
345    for i in 1..shrinkable_end {
346        let total: usize = result.iter().map(|m| m.estimate_tokens()).sum();
347        if total <= token_ceiling {
348            break;
349        }
350        let tool_name = match &result[i].content {
351            MessageContent::ToolResult(r) => call_id_to_tool
352                .get(&r.call_id)
353                .map(|s| s.as_str())
354                .unwrap_or(""),
355            _ => continue,
356        };
357        let before = result[i].estimate_tokens();
358        let condensed = result[i].condensed(tool_name);
359        if condensed.estimate_tokens() < before {
360            result[i] = condensed;
361        }
362    }
363
364    // Turn reminder: prepend to last User message. Runs AFTER all
365    // compaction/cleanup so the reminder always rides the most recent
366    // user turn. Keeps system_prompt itself stable (cacheable).
367    if !turn_reminder.is_empty() {
368        for msg in result.iter_mut().rev() {
369            if matches!(msg.role, Role::User) {
370                if let MessageContent::Text(ref mut text) = msg.content {
371                    *text = format!("{}\n{}", turn_reminder, text);
372                    break;
373                }
374            }
375        }
376    }
377
378    let sent_tokens: usize = result
379        .iter()
380        .map(|m| m.estimate_tokens())
381        .sum::<usize>()
382        .saturating_sub(system_tokens);
383    let msg_count = result.len();
384    (
385        result,
386        ContextStats {
387            system_tokens,
388            sent_tokens,
389            dropped_tokens,
390            total_messages: msg_count,
391        },
392    )
393}
394
395/// Reserved headroom for large windows (CC / Anthropic 200K territory)
396/// where compaction can afford to leave a generous response + tool-result
397/// runway. Mirrors CC's `AUTOCOMPACT_BUFFER_TOKENS`.
398pub const AUTO_COMPACT_BUFFER_LARGE: usize = 13_000;
399
400/// Reserved headroom for small/proxy-bound windows (typical self-hosted
401/// GLM 65K). 5K leaves space for one streaming response + a round of
402/// tool results without forcing compaction so early it shrinks the
403/// usable session. Larger buffers (13K) on a 65K cap kick compaction at
404/// 52K — wasting the 12K immediately above where users do real work.
405pub const AUTO_COMPACT_BUFFER_SMALL: usize = 5_000;
406
407/// Cutoff between "small" and "large" windows. 100K is the natural
408/// dividing line: anything ≤ 100K is a self-hosted / proxy-bound
409/// deployment that benefits from a tight buffer; anything > 100K is a
410/// vendor offering (Anthropic 200K, etc.) where the wider buffer
411/// matches CC's behaviour.
412pub const AUTO_COMPACT_LARGE_WINDOW_FROM: usize = 100_000;
413
414/// Compute the auto-compression trigger threshold for a given context
415/// window. Returns the token total above which `needs_compression` fires.
416///
417/// Buffer scales with window size:
418/// - ≤ 100K (proxy-bound): 5K buffer → 65K window → 60K trigger.
419/// - > 100K (vendor large): 13K buffer → 200K window → 187K trigger.
420/// - Either branch caps at `ctx_window / 4` so degenerate small windows
421///   (8K Ollama) still land on a meaningful 6K threshold rather than
422///   underflowing to 0.
423pub fn auto_compact_threshold(token_budget: usize) -> usize {
424    let raw_buffer = if token_budget > AUTO_COMPACT_LARGE_WINDOW_FROM {
425        AUTO_COMPACT_BUFFER_LARGE
426    } else {
427        AUTO_COMPACT_BUFFER_SMALL
428    };
429    let buffer = raw_buffer.min(token_budget / 4);
430    token_budget.saturating_sub(buffer)
431}
432
433/// Check if context needs compression.
434///
435/// Threshold derived from `auto_compact_threshold` — fires when fewer
436/// than `buffer` tokens remain (5K for ≤100K windows, 13K for >100K).
437/// Buffer scales with the deployment: self-hosted GLM at 65K trips
438/// at 60K (4K runway is plenty for one round); Anthropic at 200K
439/// trips at 187K, matching CC's behaviour.
440///
441/// The `messages.len() < 12` guard stays — needs a non-trivial backlog
442/// before compression is worthwhile, and 1 user msg can produce 15+
443/// messages so message count is the right unit.
444pub fn needs_compression(
445    conv: &Conversation,
446    system_prompt_tokens: usize,
447    token_budget: usize,
448) -> bool {
449    if conv.messages.len() < 12 {
450        return false;
451    }
452    let total: usize = system_prompt_tokens
453        + conv
454            .messages
455            .iter()
456            .map(|m| m.estimate_tokens())
457            .sum::<usize>();
458    total > auto_compact_threshold(token_budget)
459}
460
461/// Build content for LLM compression.
462///
463/// Strategy: keep the last `KEEP_MESSAGES` messages at full fidelity,
464/// compress everything before that into one-line-per-round summaries.
465/// Returns `(compressed_text, number_of_messages_to_remove)`.
466///
467/// This operates at MESSAGE level, not turn level, because `turn_tracker`
468/// counts user messages (1 user msg = 1 turn) but a single user message
469/// can produce 15+ LLM calls with 35+ messages.
470pub fn build_compression_content(conv: &Conversation) -> (String, usize) {
471    if conv.messages.len() <= KEEP_MESSAGES {
472        return (String::new(), 0);
473    }
474
475    let mut compress_end_idx = conv.messages.len() - KEEP_MESSAGES;
476
477    // ── Pair-preserving snap ──
478    // Anthropic API requires every `tool_result` to have its paired
479    // `tool_use` in the same conversation. If the naive cut lands on a
480    // ToolResult whose ATC lives in the drop range, the surviving range
481    // begins with an orphan — `clean_message_pipeline` would silently
482    // drop it and we'd lose the edit confirmation / tool output.
483    //
484    // Advance the cut forward past any trailing ToolResults so they
485    // get dropped WITH their paired ATC (already in the drop range),
486    // not kept as orphans. `compress_msgs` below uses the same index
487    // so the summary captures these results too.
488    while compress_end_idx < conv.messages.len() {
489        match &conv.messages[compress_end_idx].content {
490            message::MessageContent::ToolResult(_) | message::MessageContent::ToolResultRef(_) => {
491                compress_end_idx += 1;
492            }
493            _ => break,
494        }
495    }
496
497    // If snapping consumed all remaining messages, nothing to compress.
498    if compress_end_idx >= conv.messages.len() {
499        return (String::new(), 0);
500    }
501
502    // Group messages into logical rounds (assistant + tool_calls + tool_results)
503    // and compress each round into a one-liner.
504    let mut content = String::new();
505    let mut round = 0usize;
506    let compress_msgs = &conv.messages[..compress_end_idx];
507    let mut i = 0;
508    while i < compress_msgs.len() {
509        // Collect messages for this round
510        let round_start = i;
511        // A round starts at a User or Assistant message and includes
512        // all subsequent tool results until the next User/Assistant.
513        i += 1;
514        while i < compress_msgs.len() {
515            match compress_msgs[i].role {
516                message::Role::User | message::Role::Assistant => break,
517                _ => i += 1,
518            }
519        }
520        round += 1;
521        let round_msgs = &compress_msgs[round_start..i];
522        content.push_str(&compress_turn(round, round_msgs));
523        content.push('\n');
524    }
525
526    // Return message count (not turn count) for apply_compression
527    (content, compress_end_idx)
528}
529
530// ─── private helpers ────────────────────────────────────────────────
531
532/// Compress a turn into a one-line mechanical summary.
533/// No LLM call — deterministic, fast, never fails.
534/// Format: "Turn N: user asked X → read file.js, edited file.js (-3 +5 lines)"
535// ── INVARIANT (2026-04-16): compress_turn MUST preserve assistant thinking ──
536// The assistant's text (thinking/reasoning) in AssistantWithToolCalls is the
537// diagnostic conclusion for that turn ("代码逻辑看起来正确", "问题找到了!ID不匹配").
538// Without it, the compressed summary says only "read main.ts, grep closeSettings"
539// — the model doesn't know it already confirmed the logic was correct, so it
540// searches the same files again. 39-turn loop sessions traced to this omission.
541fn compress_turn(turn_num: usize, turn_msgs: &[Message]) -> String {
542    let mut user_text = String::new();
543    let mut assistant_text = String::new();
544    let mut tools: Vec<String> = Vec::new();
545
546    for msg in turn_msgs {
547        match (&msg.role, &msg.content) {
548            (Role::User, MessageContent::Text(s)) => {
549                if !s.starts_with('[') {
550                    // skip system-injected messages
551                    user_text = if s.chars().count() > 60 {
552                        format!("{}...", s.chars().take(57).collect::<String>())
553                    } else {
554                        s.clone()
555                    };
556                }
557            }
558            (
559                _,
560                MessageContent::AssistantWithToolCalls {
561                    text, tool_calls, ..
562                },
563            ) => {
564                // Preserve assistant's diagnostic conclusion (first 80 chars).
565                if let Some(t) = text {
566                    let trimmed = t.trim();
567                    if !trimmed.is_empty() && assistant_text.is_empty() {
568                        assistant_text = if trimmed.chars().count() > 80 {
569                            format!("{}...", trimmed.chars().take(77).collect::<String>())
570                        } else {
571                            trimmed.to_string()
572                        };
573                    }
574                }
575                for tc in tool_calls {
576                    let short = if let Ok(args) =
577                        serde_json::from_str::<serde_json::Value>(&tc.arguments)
578                    {
579                        let fp = args.get("file_path").and_then(|v| v.as_str()).map(|p| {
580                            std::path::Path::new(p)
581                                .file_name()
582                                .map(|n| n.to_string_lossy().to_string())
583                                .unwrap_or_else(|| p.to_string())
584                        });
585                        match (tc.name.as_str(), fp) {
586                            ("read_file", Some(f)) => format!("read {}", f),
587                            ("edit_file", Some(f)) => format!("edit {}", f),
588                            ("write_file", Some(f)) => format!("write {}", f),
589                            ("grep", _) => {
590                                let pat =
591                                    args.get("pattern").and_then(|v| v.as_str()).unwrap_or("?");
592                                format!("grep({})", pat)
593                            }
594                            ("bash", _) => {
595                                let cmd =
596                                    args.get("command").and_then(|v| v.as_str()).unwrap_or("?");
597                                let short_cmd: String = cmd.chars().take(30).collect();
598                                format!("bash({})", short_cmd)
599                            }
600                            (name, _) => name.to_string(),
601                        }
602                    } else {
603                        tc.name.clone()
604                    };
605                    if !tools.contains(&short) {
606                        tools.push(short);
607                    }
608                }
609            }
610            (Role::Assistant, MessageContent::Text(s)) => {
611                if assistant_text.is_empty() {
612                    let trimmed = s.trim();
613                    if !trimmed.is_empty() {
614                        assistant_text = if trimmed.chars().count() > 80 {
615                            format!("{}...", trimmed.chars().take(77).collect::<String>())
616                        } else {
617                            trimmed.to_string()
618                        };
619                    }
620                }
621            }
622            (_, MessageContent::ToolResult(r)) if !r.success => {
623                tools.push("FAILED".to_string());
624            }
625            _ => {}
626        }
627    }
628
629    let tools_str = if tools.is_empty() {
630        "no tools".to_string()
631    } else {
632        tools.join(", ")
633    };
634
635    let prefix = if !user_text.is_empty() {
636        format!("\"{}\" ", user_text)
637    } else {
638        String::new()
639    };
640    let conclusion = if !assistant_text.is_empty() {
641        format!("[{}] ", assistant_text)
642    } else {
643        String::new()
644    };
645    format!(
646        "- Turn {}: {}{}→ {}",
647        turn_num, prefix, conclusion, tools_str
648    )
649}
650
651/// Fallback windowing when no turns are tracked.
652/// Keeps as many recent messages as fit within 60% of remaining budget.
653fn build_messages_fallback(
654    conv: &Conversation,
655    system_msg: Message,
656    remaining_budget: usize,
657) -> Vec<Message> {
658    let budget = remaining_budget * 60 / 100;
659    let mut used = 0usize;
660    let mut start = conv.messages.len();
661
662    for i in (0..conv.messages.len()).rev() {
663        let msg_tokens = conv.messages[i].estimate_tokens();
664        if used + msg_tokens > budget {
665            break;
666        }
667        used += msg_tokens;
668        start = i;
669    }
670    start = snap_to_valid_boundary(&conv.messages, start);
671
672    let mut result = Vec::with_capacity(conv.messages.len() - start + 1);
673    result.push(system_msg);
674    result.extend(conv.messages[start..].iter().cloned());
675    sanitize_messages(&mut result);
676    result
677}
678
679/// Snap an index to a valid message boundary for the API.
680fn snap_to_valid_boundary(messages: &[Message], idx: usize) -> usize {
681    let mut start = idx.min(messages.len());
682
683    // Skip orphan ToolResult/ToolResultRef messages
684    while start < messages.len() {
685        match &messages[start].content {
686            MessageContent::ToolResult(_) | MessageContent::ToolResultRef(_) => start += 1,
687            _ => break,
688        }
689    }
690
691    // Prefer starting at a User message
692    let original = start;
693    while start < messages.len() {
694        if matches!(messages[start].role, Role::User | Role::System) {
695            break;
696        }
697        start += 1;
698        if start > original + 5 {
699            return original;
700        }
701    }
702    start
703}
704
705// ─── Message-list manipulation helpers used during render ───────────
706// These operate on `&mut Vec<Message>` and are called by
707// `build_messages` to apply rolling condensation / freshness
708// replacement / sanity cleanup.
709
710/// Floor for collapse: outputs smaller than this are left alone.
711/// Doubles as the idempotence guarantee — every stub we produce is
712/// well under this size, so re-running compaction never re-stubs.
713pub(crate) const MIN_COLLAPSE_SIZE: usize = 500;
714
715/// Build the generic compaction stub used by both microcompact (render
716/// time, ephemeral) and the conv-level Tier 1 (destructive). Tool name
717/// comes from the model's own tool_calls so the framework adds zero
718/// hardcoded tool knowledge — every tool gets the same shape.
719///
720/// **First-line picking**: skips `[elapsed: ...]` framework metadata.
721/// `tool::bash` prepends `[elapsed: Xs, exit: N]\n<actual output>` to
722/// every bash result (see bash.rs:540). 5-7 atomgr datalog showed all
723/// 1704 bash stubs surfaced this metadata as `first:` content — model
724/// got "1.9s, exit 101" instead of the actual error. Skipping to line 2
725/// flips the stub from "exit code only" to "actual error / actual
726/// output preview". Falls back to line 1 when there's no line 2
727/// (single-line bash like `wc -l`). Non-bash tools (grep, edit_file,
728/// web_fetch) don't have this prefix → unaffected.
729///
730/// **Hardcoding note**: matching `[elapsed:` is framework-internal
731/// knowledge of our own bash tool's output format, not tech-stack
732/// hardcoding (the prefix is the same regardless of cargo/npm/etc).
733/// Same category as the `read_file` skip in microcompact.
734pub(crate) fn build_compact_stub(tool_name: &str, output: &str, success: bool) -> String {
735    let line_count = output.lines().count();
736    let first_line: String = {
737        let mut iter = output.lines();
738        let l1 = iter.next().unwrap_or("(empty)");
739        let chosen = if l1.starts_with("[elapsed:") {
740            iter.next().unwrap_or(l1)
741        } else {
742            l1
743        };
744        chosen.chars().take(80).collect()
745    };
746    let status = if success { "ok" } else { "FAILED" };
747    format!(
748        "[{} {}: {} lines, first: {}]",
749        tool_name, status, line_count, first_line,
750    )
751}
752
753/// Build a `call_id -> tool_name` lookup from a slice of messages. The
754/// `MessageContent::AssistantWithToolCalls` variant carries the model's
755/// own tool name; this is what we surface in stubs.
756fn build_call_id_to_tool_map(
757    msgs: &[Message],
758) -> std::collections::HashMap<String, String> {
759    let mut map = std::collections::HashMap::new();
760    for msg in msgs {
761        if let MessageContent::AssistantWithToolCalls { tool_calls, .. } = &msg.content {
762            for tc in tool_calls {
763                map.insert(tc.id.clone(), tc.name.clone());
764            }
765        }
766    }
767    map
768}
769
770/// Conv-level Tier 1 compaction. Replaces tool_result bodies in turns
771/// older than `keep_recent_turns` with the same generic stub used by
772/// microcompact. This is the destructive counterpart: microcompact runs
773/// every render and is ephemeral (only mutates the rendered Vec); this
774/// runs from the agent emergency path and permanently shrinks
775/// `conv.messages` so the next `needs_compression` check sees the
776/// freed budget.
777///
778/// Idempotent: stubs already in place are smaller than MIN_COLLAPSE_SIZE
779/// and skip the rewrite.
780pub(crate) fn compact_old_tool_results_in_place(
781    conv: &mut crate::conversation::Conversation,
782    keep_recent_turns: usize,
783) {
784    let turns = &conv.turn_tracker.turns;
785    if turns.len() <= keep_recent_turns {
786        return;
787    }
788    let cutoff_turn = turns.len() - keep_recent_turns;
789    let cutoff_msg = turns[cutoff_turn].start_idx.min(conv.messages.len());
790
791    let call_id_to_tool = build_call_id_to_tool_map(&conv.messages);
792
793    for i in 0..cutoff_msg {
794        let MessageContent::ToolResult(ref tr) = conv.messages[i].content else {
795            continue;
796        };
797        if tr.output.len() <= MIN_COLLAPSE_SIZE {
798            continue;
799        }
800        let tool_name = call_id_to_tool
801            .get(&tr.call_id)
802            .map(|s| s.as_str())
803            .unwrap_or("tool");
804        let summary = build_compact_stub(tool_name, &tr.output, tr.success);
805        conv.messages[i].content = MessageContent::ToolResult(crate::tool::ToolResult {
806            call_id: tr.call_id.clone(),
807            output: summary,
808            success: tr.success,
809        });
810    }
811}
812
813/// Microcompact: condense **prior-turn** `ToolResult` messages to one-line
814/// semantic summaries. Zero LLM calls — purely mechanical compression.
815///
816/// **Turn-aware boundary (5-8 redesign).** Earlier versions used a
817/// fixed `OTHER_KEEP = 20` last-messages window. That window slid every
818/// LLM round, so within ONE user turn the model's earlier tool results
819/// got progressively stubbed as the model emitted more tool calls —
820/// the "model echoes HELLO_TEST_12345 to verify it can see anything"
821/// 5-8 atomgr session was caused by this. Now we anchor on the last
822/// `Role::User` message in the rendered Vec: everything from that
823/// message onward IS the current turn and stays full-fidelity; only
824/// strictly older content is eligible for stubbing.
825///
826/// **Threshold (5-8 redesign).** Earlier capped at 100K chars (~25K
827/// tokens) → triggered at ~20% of a 131K-token window, way too eager.
828/// Now `threshold_chars = 70% × token_budget × 4` (uncapped) so
829/// microcompact only fires when the conversation is genuinely close
830/// to filling the model's window. Below 70% it's a no-op.
831///
832/// **Stub format.** `[<tool_name> <ok|FAILED>: N lines, first: <80c>]`.
833/// Tool name comes from the model's own `tool_calls.name` (no
834/// `match tool_name { "bash" => ... }` framework branches). `read_file`
835/// is exempted by hardcoded skip — see in-line comment for rationale.
836fn microcompact(msgs: &mut Vec<Message>, _total_msg_count: usize, threshold_chars: usize) {
837    let total_chars: usize = msgs
838        .iter()
839        .map(|m| match &m.content {
840            MessageContent::ToolResult(r) => r.output.len(),
841            MessageContent::Text(t) => t.len(),
842            _ => 100,
843        })
844        .sum();
845    if total_chars < threshold_chars {
846        return;
847    }
848
849    // Anchor on the last User message — everything after it is the
850    // ACTIVE turn and must stay full. If no User message (cold start
851    // / system-only), there's nothing to compress yet.
852    let current_turn_start = match msgs
853        .iter()
854        .rposition(|m| matches!(m.role, Role::User))
855    {
856        Some(i) => i,
857        None => return,
858    };
859
860    let cold_msgs = msgs
861        .iter()
862        .position(|m| !matches!(m.role, Role::System))
863        .unwrap_or(0);
864
865    if cold_msgs >= current_turn_start {
866        return; // nothing between system and current turn
867    }
868
869    let call_id_to_tool = build_call_id_to_tool_map(msgs);
870
871    for i in cold_msgs..current_turn_start {
872        let MessageContent::ToolResult(ref r) = msgs[i].content else {
873            continue;
874        };
875
876        if r.output.len() <= MIN_COLLAPSE_SIZE {
877            continue;
878        }
879
880        let tool_name = call_id_to_tool
881            .get(&r.call_id)
882            .map(|s| s.as_str())
883            .unwrap_or("tool");
884
885        // read_file 永远不被 microcompact 压缩。stub 给模型的
886        // `first: 205| pub async fn dynamic_connect(` 信息会制造"伪自信"
887        // ——模型以为还记得函数体就直接 edit,结果反复修同一个文件
888        // (5-7 atomgr datalog T22-T29 实证 6 turn 反复修补)。保留全文
889        // 让模型在 edit 系列 turn 里始终看到最新代码。
890        // D3 FileStore 已经处理 re-read 的 disk-side 成本;prompt-side
891        // 多花 5-10% token 换"模型不丢上下文",是值得的交易。
892        //
893        // 关于硬编码: 这里直接字符串比较 "read_file",而非工具自声明
894        // (e.g. trait fn microcompact_eligible)。妥协理由:
895        // (a) "read_file" 是框架自家工具名常量,不是 cargo/npm/pytest
896        //     这类技术栈关键字,不违反"框架对技术栈中立"的项目铁律;
897        // (b) 改成 trait 方法需要把 ToolRegistry 引用穿进 render 层,
898        //     渲染路径调用面增大,收益不抵成本;
899        // (c) 仅此一处,未来如有第二个工具也要豁免,再重构成 trait。
900        if tool_name == "read_file" {
901            continue;
902        }
903
904        let summary = build_compact_stub(tool_name, &r.output, r.success);
905
906        msgs[i].content = MessageContent::ToolResult(crate::tool::ToolResult {
907            call_id: r.call_id.clone(),
908            output: summary,
909            success: r.success,
910        });
911    }
912}
913
914/// Replace stale read_file results with current disk content.
915/// When a file was read then later edited, the old read result is outdated.
916/// This replaces it so the model always sees the latest version.
917fn replace_stale_reads(msgs: &mut Vec<Message>) {
918    struct ReadInfo {
919        file_path: String,
920        offset: Option<usize>,
921        limit: Option<usize>,
922    }
923    let mut call_id_to_read: std::collections::HashMap<String, ReadInfo> =
924        std::collections::HashMap::new();
925    let mut edit_call_to_file: std::collections::HashMap<String, String> =
926        std::collections::HashMap::new();
927    let mut edited_files: std::collections::HashSet<String> = std::collections::HashSet::new();
928
929    for msg in msgs.iter() {
930        if let MessageContent::AssistantWithToolCalls { tool_calls, .. } = &msg.content {
931            for tc in tool_calls {
932                if let Ok(args) = serde_json::from_str::<serde_json::Value>(&tc.arguments) {
933                    let file_path = args
934                        .get("file_path")
935                        .and_then(|v| v.as_str())
936                        .unwrap_or("")
937                        .to_string();
938                    if tc.name == "read_file" && !file_path.is_empty() {
939                        let offset = args
940                            .get("offset")
941                            .and_then(|v| v.as_u64())
942                            .map(|v| v as usize);
943                        let limit = args
944                            .get("limit")
945                            .and_then(|v| v.as_u64())
946                            .map(|v| v as usize);
947                        call_id_to_read.insert(
948                            tc.id.clone(),
949                            ReadInfo {
950                                file_path: file_path.clone(),
951                                offset,
952                                limit,
953                            },
954                        );
955                    }
956                    if matches!(tc.name.as_str(), "edit_file" | "write_file" | "create_file")
957                        && !file_path.is_empty()
958                    {
959                        edit_call_to_file.insert(tc.id.clone(), file_path);
960                    }
961                }
962            }
963        }
964        if let MessageContent::ToolResult(ref r) = msg.content {
965            if let Some(file_path) = edit_call_to_file.get(&r.call_id) {
966                if !r.output.starts_with("Error") {
967                    edited_files.insert(file_path.clone());
968                }
969            }
970        }
971    }
972
973    if edited_files.is_empty() {
974        return;
975    }
976
977    for msg in msgs.iter_mut() {
978        if let MessageContent::ToolResult(ref mut r) = msg.content {
979            if let Some(info) = call_id_to_read.get(&r.call_id) {
980                if !edited_files.contains(&info.file_path) {
981                    continue;
982                }
983                if let Ok(content) = std::fs::read_to_string(&info.file_path) {
984                    let all_lines: Vec<&str> = content.lines().collect();
985                    let total = all_lines.len();
986
987                    if info.offset.is_some() || info.limit.is_some() {
988                        let start = info.offset.unwrap_or(1).max(1) - 1;
989                        let start = start.min(total);
990                        let end = info.limit.map(|l| (start + l).min(total)).unwrap_or(total);
991                        let display: String = all_lines[start..end]
992                            .iter()
993                            .enumerate()
994                            .map(|(i, l)| format!("{:>4}| {}", start + i + 1, l))
995                            .collect::<Vec<_>>()
996                            .join("\n");
997                        r.output = display;
998                    } else if total <= 300 {
999                        r.output = all_lines
1000                            .iter()
1001                            .enumerate()
1002                            .map(|(i, l)| format!("{:>4}| {}", i + 1, l))
1003                            .collect::<Vec<_>>()
1004                            .join("\n");
1005                    }
1006                    // else: large-file full-read, keep existing skeleton as-is.
1007                }
1008            }
1009        }
1010    }
1011}
1012
1013/// Walk forward tracking tool_call/tool_result pairing; remove orphans.
1014/// Valid sequences: System → (User → Assistant/AssistantWithToolCalls → [ToolResult]* → ...)*
1015///
1016/// Drops three kinds of broken state:
1017///
1018/// 1. **Orphan ToolResult** — appears outside any `expecting` window
1019///    (no preceding AssistantWithToolCalls awaiting it). Removed solo.
1020/// 2. **Mid-conversation under-paired AssistantWithToolCalls** — has N
1021///    tool_calls but a Text / MultiPart / next ATC arrives before all N
1022///    ToolResults have been seen. The unsatisfied ATC AND any partial
1023///    ToolResults already paired with it are removed together. This is
1024///    the path that triggers DeepSeek's `insufficient tool messages
1025///    following tool_calls message` 400 — the strictest providers
1026///    require the wire-level invariant `len(asst.tool_calls) ==
1027///    len(following tool messages)` to hold for every ATC, not just the
1028///    most recent one.
1029/// 3. **Trailing under-paired AssistantWithToolCalls** — same as (2)
1030///    but the conversation ends mid-pairing. Handled by the rev-scan
1031///    after the main loop.
1032fn sanitize_messages(msgs: &mut Vec<Message>) {
1033    let mut to_remove: Vec<usize> = Vec::new();
1034    let mut expecting_tool_results = 0usize;
1035    // Track the most recent ATC and the ToolResult indices already
1036    // paired with it. On a boundary (Text / MultiPart / next ATC) with
1037    // `expecting > 0`, both the ATC and its partial results are dropped.
1038    let mut current_atc_idx: Option<usize> = None;
1039    let mut current_atc_results: Vec<usize> = Vec::new();
1040
1041    for i in 0..msgs.len() {
1042        match &msgs[i].content {
1043            MessageContent::ToolResult(_) | MessageContent::ToolResultRef(_) => {
1044                if expecting_tool_results > 0 {
1045                    expecting_tool_results -= 1;
1046                    current_atc_results.push(i);
1047                } else {
1048                    to_remove.push(i);
1049                }
1050            }
1051            MessageContent::AssistantWithToolCalls { tool_calls, .. } => {
1052                if expecting_tool_results > 0 {
1053                    if let Some(idx) = current_atc_idx {
1054                        to_remove.push(idx);
1055                    }
1056                    to_remove.extend(current_atc_results.drain(..));
1057                } else {
1058                    current_atc_results.clear();
1059                }
1060                expecting_tool_results = tool_calls.len();
1061                current_atc_idx = Some(i);
1062            }
1063            MessageContent::Text(_) | MessageContent::MultiPart { .. } => {
1064                if expecting_tool_results > 0 {
1065                    if let Some(idx) = current_atc_idx {
1066                        to_remove.push(idx);
1067                    }
1068                    to_remove.extend(current_atc_results.drain(..));
1069                } else {
1070                    current_atc_results.clear();
1071                }
1072                expecting_tool_results = 0;
1073                current_atc_idx = None;
1074            }
1075        }
1076    }
1077
1078    if expecting_tool_results > 0 {
1079        for i in (0..msgs.len()).rev() {
1080            match &msgs[i].content {
1081                MessageContent::AssistantWithToolCalls { .. } => {
1082                    to_remove.push(i);
1083                    break;
1084                }
1085                MessageContent::ToolResult(_) | MessageContent::ToolResultRef(_) => {
1086                    to_remove.push(i);
1087                }
1088                _ => break,
1089            }
1090        }
1091    }
1092
1093    to_remove.sort_unstable();
1094    to_remove.dedup();
1095    for &idx in to_remove.iter().rev() {
1096        msgs.remove(idx);
1097    }
1098}
1099
1100/// Clean message pipeline before sending to API.
1101/// Removes noise that degrades model decision quality:
1102/// - Empty/whitespace-only assistant messages
1103/// - Orphaned tool results (no matching tool_use)
1104/// - Consecutive same-role user messages (merge into one)
1105/// - Consecutive system messages (merge into one) — MiniMax-M2.7 rejects
1106///   adjacent `system` turns with `2013 invalid chat setting`; the
1107///   post-compression layout (orig system + cold-zone + drop-digest) is
1108///   the trigger.
1109fn clean_message_pipeline(msgs: &mut Vec<Message>) {
1110    // 1. Remove empty assistant messages (e.g., after <think> stripping)
1111    msgs.retain(|m| {
1112        if m.role == Role::Assistant {
1113            match &m.content {
1114                MessageContent::Text(t) => !t.trim().is_empty(),
1115                _ => true,
1116            }
1117        } else {
1118            true
1119        }
1120    });
1121
1122    // 2. Collect valid tool_use IDs from assistant messages
1123    let mut valid_call_ids: std::collections::HashSet<String> = std::collections::HashSet::new();
1124    for msg in msgs.iter() {
1125        if let MessageContent::AssistantWithToolCalls { tool_calls, .. } = &msg.content {
1126            for tc in tool_calls {
1127                valid_call_ids.insert(tc.id.clone());
1128            }
1129        }
1130    }
1131
1132    // 3. Remove orphaned tool results (no matching tool_use)
1133    msgs.retain(|m| {
1134        if let MessageContent::ToolResult(ref r) = m.content {
1135            valid_call_ids.contains(&r.call_id)
1136        } else if let MessageContent::ToolResultRef(ref r) = m.content {
1137            valid_call_ids.contains(&r.call_id)
1138        } else {
1139            true
1140        }
1141    });
1142
1143    // 4. Merge consecutive user messages into one
1144    let mut i = 1;
1145    while i < msgs.len() {
1146        if msgs[i].role == Role::User && msgs[i - 1].role == Role::User {
1147            if let (MessageContent::Text(prev), MessageContent::Text(curr)) =
1148                (&msgs[i - 1].content, &msgs[i].content)
1149            {
1150                let merged = format!("{}\n{}", prev, curr);
1151                msgs[i - 1].content = MessageContent::Text(merged);
1152                msgs.remove(i);
1153                continue;
1154            }
1155        }
1156        i += 1;
1157    }
1158
1159    // 5. Merge consecutive system messages into one. After compression the
1160    // wire layout is `system(orig) + system(cold-zone) [+ system(drop-digest)]`,
1161    // which MiniMax-M2.7's chat-setting validator rejects (empty stream then
1162    // 400 / 2013). Blank line between blocks preserves visual separation.
1163    let mut i = 1;
1164    while i < msgs.len() {
1165        if msgs[i].role == Role::System && msgs[i - 1].role == Role::System {
1166            if let (MessageContent::Text(prev), MessageContent::Text(curr)) =
1167                (&msgs[i - 1].content, &msgs[i].content)
1168            {
1169                let merged = format!("{}\n\n{}", prev, curr);
1170                msgs[i - 1].content = MessageContent::Text(merged);
1171                msgs.remove(i);
1172                continue;
1173            }
1174        }
1175        i += 1;
1176    }
1177}
1178
1179#[cfg(test)]
1180mod tests {
1181    use super::*;
1182    use crate::conversation::message::{Message, Role};
1183    use crate::conversation::Conversation;
1184
1185    #[test]
1186    fn apply_model_directives_noop_for_generic_model() {
1187        // gpt / claude / gemini 等模型不触发任何指令 — 原 prompt 原样返回。
1188        let out = apply_model_directives("SYS", "gpt-4o");
1189        assert_eq!(out, "SYS");
1190        let out = apply_model_directives("SYS", "claude-opus-4-7");
1191        assert_eq!(out, "SYS");
1192    }
1193
1194    #[test]
1195    fn auto_compact_threshold_large_window_uses_large_buffer() {
1196        // > 100K → 13K buffer (Anthropic / CC territory). 200K - 13K = 187K.
1197        assert_eq!(auto_compact_threshold(200_000), 187_000);
1198        // 131K → boundary above the 100K cutoff, also gets 13K buffer.
1199        assert_eq!(auto_compact_threshold(131_072), 118_072);
1200    }
1201
1202    #[test]
1203    fn auto_compact_threshold_small_window_uses_small_buffer() {
1204        // ≤ 100K → 5K buffer (proxy-bound deployments). 65K - 5K = 60K
1205        // — exactly the sweet spot for a 65K self-hosted GLM cap:
1206        // compaction kicks in 5K below the proxy hard wall, leaving
1207        // a runway for one streaming response without forcing
1208        // pre-emptive compaction so early it shrinks the usable
1209        // session.
1210        assert_eq!(auto_compact_threshold(65_000), 60_000);
1211        // 100K is the boundary — still small-buffer (the cutoff is
1212        // strictly greater-than).
1213        assert_eq!(auto_compact_threshold(100_000), 95_000);
1214        // Just over 100K trips into large-buffer territory.
1215        assert_eq!(auto_compact_threshold(101_000), 88_000);
1216    }
1217
1218    #[test]
1219    fn auto_compact_threshold_tiny_window_caps_at_quarter() {
1220        // 8K Ollama: 5K buffer would still leave only 3K usable, but
1221        // window/4 = 2K caps the buffer below 5K → 6K threshold (~75%
1222        // of window). Scales the buffer when the window is too small
1223        // for the small-buffer constant.
1224        assert_eq!(auto_compact_threshold(8_000), 6_000);
1225        assert_eq!(auto_compact_threshold(16_000), 12_000);
1226        // At 20K the small-buffer constant (5K) lands at exactly
1227        // window/4, so 5K applies straight: 20K - 5K = 15K.
1228        assert_eq!(auto_compact_threshold(20_000), 15_000);
1229    }
1230
1231    #[test]
1232    fn auto_compact_threshold_handles_degenerate_window() {
1233        // ctx_window == 0 happens transiently before the provider config
1234        // loads; saturating_sub keeps it from panicking. Threshold is 0,
1235        // so any non-empty conversation trips the gate — caller's
1236        // `messages.len() < 12` check still gates the actual fire.
1237        assert_eq!(auto_compact_threshold(0), 0);
1238    }
1239
1240    #[test]
1241    fn needs_compression_fires_at_absolute_headroom_not_percentage() {
1242        // Reproduces the user's debug confusion: under the prior formula
1243        // a 131K window's threshold was `min(131K * 50%, 50K) = 50K` —
1244        // compression fired at 38% of window, leaving 81K of phantom
1245        // "available" headroom that wasn't actually used. The new
1246        // formula fires at 118K (90% of window), matching the user's
1247        // intuition of "fire when ~13K headroom remains".
1248        //
1249        // Test fixture: 15 alternating User/Assistant messages so the
1250        // 12-message guard passes (`add_user_message` merges
1251        // consecutive User msgs, which would collapse 15 calls into 1).
1252        let mut conv = Conversation::new();
1253        for i in 0..8 {
1254            conv.messages.push(Message::new(Role::User, format!("u{}", i)));
1255            conv.messages.push(Message::new(Role::Assistant, format!("a{}", i)));
1256        }
1257        assert_eq!(conv.messages.len(), 16);
1258        assert!(!needs_compression(&conv, 0, 131_072));
1259
1260        // 500K bytes ≈ 125K tokens (byte / 4) → exceeds 118K threshold.
1261        conv.messages
1262            .push(Message::new(Role::User, "x".repeat(500_000)));
1263        assert!(needs_compression(&conv, 0, 131_072));
1264    }
1265
1266    #[test]
1267    fn tool_result_ref_token_estimate_uses_summary_not_byte_size() {
1268        // Pre-fix bug: ToolResultRef estimated from the full original
1269        // content size (could be 50K+ for a large file read), but at
1270        // send time only `r.summary` (a short string) was actually
1271        // serialised. The estimator overcounted by 5-50× on
1272        // externalised results, pushing compression to fire on phantom
1273        // budget pressure.
1274        use crate::conversation::message::MessageContent;
1275        use crate::tool::result_store::ToolResultRef;
1276
1277        let big_ref = ToolResultRef {
1278            call_id: "call_1".into(),
1279            hash: "deadbeef".into(),
1280            summary: "hello".into(), // 5 bytes
1281            byte_size: 200_000,      // pretend the disk-cached blob is 200KB
1282            success: true,
1283        };
1284        let msg = Message {
1285            role: Role::User,
1286            content: MessageContent::ToolResultRef(big_ref),
1287        };
1288        // (5 + 10) / 4 + 4 = 7. Pre-fix this was (200000 + 10) / 4 + 4 = 50006.
1289        assert!(
1290            msg.estimate_tokens() < 20,
1291            "expected estimate to track summary size, got {}",
1292            msg.estimate_tokens()
1293        );
1294    }
1295
1296    #[test]
1297    fn apply_model_directives_cn_lock_for_cjk_tier() {
1298        for id in ["qwen3-max", "deepseek-v3", "kimi-k2"] {
1299            let out = apply_model_directives("SYS", id);
1300            assert!(
1301                out.contains("用户可见的输出请用中文"),
1302                "model {id} missing CN lock"
1303            );
1304            assert!(
1305                !out.contains("THINKING 简洁纪律"),
1306                "model {id} got MiniMax directive erroneously"
1307            );
1308        }
1309    }
1310
1311    #[test]
1312    fn apply_model_directives_minimax_gets_both_blocks() {
1313        let out = apply_model_directives("SYS", "minimax-m2");
1314        assert!(out.contains("用户可见的输出请用中文"));
1315        assert!(out.contains("THINKING 简洁纪律"));
1316        // MiniMax 指令必须在 CN lock 之后(recency: 更尾部 = 更高优先级)
1317        let cn_idx = out.find("用户可见的输出").unwrap();
1318        let thinking_idx = out.find("THINKING").unwrap();
1319        assert!(thinking_idx > cn_idx);
1320    }
1321
1322    #[test]
1323    fn apply_model_directives_preserves_system_prompt_prefix() {
1324        // 追加模式:原 prompt 必须 100% 保留在开头,cache key 不破坏。
1325        let sys = "You are AtomCode. Working directory: /tmp\n";
1326        let out = apply_model_directives(sys, "minimax-m2");
1327        assert!(out.starts_with(sys));
1328    }
1329
1330    #[test]
1331    fn test_budgeted_empty_conversation() {
1332        let conv = Conversation::new();
1333        let (msgs, _stats) = build_messages(&conv, "system prompt", 8000, "");
1334        assert_eq!(msgs.len(), 1);
1335        assert!(matches!(msgs[0].role, Role::System));
1336    }
1337
1338    #[test]
1339    fn test_budgeted_includes_recent_messages() {
1340        let mut conv = Conversation::new();
1341        conv.add_user_message("hello");
1342        conv.messages
1343            .push(Message::new(Role::Assistant, "hi there"));
1344        conv.add_user_message("do something");
1345
1346        let (msgs, _stats) = build_messages(&conv, "sys", 8000, "");
1347        assert_eq!(msgs.len(), 4); // system + 3 messages
1348        assert!(matches!(msgs[0].role, Role::System));
1349    }
1350
1351    #[test]
1352    fn test_budgeted_sends_all_when_under_80pct() {
1353        use crate::tool::{ToolCall, ToolResult};
1354        let mut conv = Conversation::new();
1355
1356        // Create 2 turns with small tool results — should all fit
1357        for turn in 0..2 {
1358            conv.add_user_message(&format!("task {}", turn));
1359            let call = ToolCall {
1360                id: format!("call_{}", turn),
1361                name: "read_file".to_string(),
1362                arguments: format!(r#"{{"file_path":"/tmp/file_{}.rs"}}"#, turn),
1363            };
1364            conv.add_assistant_tool_calls(None, vec![call], None);
1365            conv.add_tool_result(ToolResult {
1366                call_id: format!("call_{}", turn),
1367                output: "short result".to_string(),
1368                success: true,
1369            });
1370        }
1371        conv.add_user_message("now what?");
1372
1373        // Large budget — everything fits
1374        let (msgs, stats) = build_messages(&conv, "sys", 100000, "");
1375        // system + 7 messages (2 turns * 3 msgs each + final user)
1376        assert_eq!(msgs.len(), 8);
1377        assert!(matches!(msgs[0].role, Role::System));
1378        assert_eq!(msgs.last().unwrap().text(), Some("now what?"));
1379        assert_eq!(stats.dropped_tokens, 0, "Nothing should be dropped");
1380    }
1381
1382    #[test]
1383    fn test_budgeted_drops_oldest_turns_when_over_budget() {
1384        use crate::tool::{ToolCall, ToolResult};
1385        let mut conv = Conversation::new();
1386
1387        // Create 5 turns with large tool results (2000 chars each ≈ 500 tokens)
1388        // Total ≈ 5 * 4 * 500 = 10000 tokens + overhead, budget 80% of 4000 = 3200
1389        for turn in 0..5 {
1390            conv.add_user_message(&format!("task {}", turn));
1391            for i in 0..4 {
1392                let idx = turn * 4 + i;
1393                let call = ToolCall {
1394                    id: format!("call_{}", idx),
1395                    name: "read_file".to_string(),
1396                    arguments: format!(r#"{{"file_path":"/tmp/file_{}.rs"}}"#, idx),
1397                };
1398                conv.add_assistant_tool_calls(None, vec![call], None);
1399                conv.add_tool_result(ToolResult {
1400                    call_id: format!("call_{}", idx),
1401                    output: "x".repeat(2000),
1402                    success: true,
1403                });
1404            }
1405        }
1406        conv.add_user_message("now what?");
1407
1408        let (msgs, stats) = build_messages(&conv, "sys", 4000, "");
1409        // Oldest turns should be dropped
1410        assert!(
1411            stats.dropped_tokens > 0,
1412            "Some turns should have been dropped"
1413        );
1414        // Most recent user message must survive
1415        assert_eq!(msgs.last().unwrap().text(), Some("now what?"));
1416        // System prompt must be first
1417        assert!(matches!(msgs[0].role, Role::System));
1418    }
1419
1420    #[test]
1421    fn test_budgeted_always_keeps_latest_turn() {
1422        use crate::tool::{ToolCall, ToolResult};
1423        let mut conv = Conversation::new();
1424
1425        // Create a single turn with very large output
1426        conv.add_user_message("big task");
1427        let call = ToolCall {
1428            id: "c0".to_string(),
1429            name: "bash".to_string(),
1430            arguments: "{}".to_string(),
1431        };
1432        conv.add_assistant_tool_calls(Some("running..."), vec![call], None);
1433        conv.add_tool_result(ToolResult {
1434            call_id: "c0".to_string(),
1435            output: "z".repeat(50000),
1436            success: true,
1437        });
1438
1439        // Very small budget — system prompt is always kept
1440        let (msgs, _stats) = build_messages(&conv, "sys", 1000, "");
1441        assert!(!msgs.is_empty(), "Must at least have system prompt");
1442        assert!(matches!(msgs[0].role, Role::System));
1443    }
1444
1445    #[test]
1446    fn test_budgeted_never_returns_system_only_when_messages_exist() {
1447        // Regression for 2026-04-13 bug: a single oversized tool_result caused
1448        // `survived_start = self.messages.len()` → no non-system messages in result
1449        // → sent=0 → agent blind.
1450        //
1451        // Invariant: if self.messages is non-empty, to_provider_messages_budgeted
1452        // must always include at least one non-system message.
1453        use crate::tool::{ToolCall, ToolResult};
1454        let mut conv = Conversation::new();
1455
1456        // 5 normal turns
1457        for i in 0..5 {
1458            conv.add_user_message(&format!("task {}", i));
1459            let call = ToolCall {
1460                id: format!("c{}", i),
1461                name: "bash".to_string(),
1462                arguments: "{}".to_string(),
1463            };
1464            conv.add_assistant_tool_calls(Some("ok"), vec![call], None);
1465            conv.add_tool_result(ToolResult {
1466                call_id: format!("c{}", i),
1467                output: "x".repeat(500),
1468                success: true,
1469            });
1470        }
1471
1472        // 6th turn with a pathologically oversized output (50K tokens worth of 'z')
1473        conv.add_user_message("find everything");
1474        let call = ToolCall {
1475            id: "c5".to_string(),
1476            name: "bash".to_string(),
1477            arguments: "{}".to_string(),
1478        };
1479        conv.add_assistant_tool_calls(Some("finding..."), vec![call], None);
1480        conv.add_tool_result(ToolResult {
1481            call_id: "c5".to_string(),
1482            output: "z".repeat(200_000), // huge
1483            success: true,
1484        });
1485
1486        // Budget too small to fit the huge output — compaction MUST still leave
1487        // at least one non-system message.
1488        let (msgs, _stats) = build_messages(&conv, "sys", 10_000, "");
1489        let non_system = msgs
1490            .iter()
1491            .filter(|m| !matches!(m.role, Role::System))
1492            .count();
1493        assert!(
1494            non_system > 0,
1495            "never return system-only result when messages exist — got msgs.len()={}",
1496            msgs.len()
1497        );
1498    }
1499
1500    #[test]
1501    fn test_budgeted_emergency_restores_last_user_when_all_else_dropped() {
1502        // Even if every turn gets dropped by some path, the emergency fallback at
1503        // the bottom of to_provider_messages_budgeted should graft back the last
1504        // user message rather than return system-only.
1505        let mut conv = Conversation::new();
1506        conv.add_user_message("original question");
1507        // Add 20 turns of huge assistant+tool content to force aggressive drop
1508        for i in 0..20 {
1509            use crate::tool::{ToolCall, ToolResult};
1510            conv.add_assistant_tool_calls(
1511                Some(&format!("reasoning {}", i)),
1512                vec![ToolCall {
1513                    id: format!("c{}", i),
1514                    name: "bash".to_string(),
1515                    arguments: "{}".to_string(),
1516                }],
1517                None,
1518            );
1519            conv.add_tool_result(ToolResult {
1520                call_id: format!("c{}", i),
1521                output: "y".repeat(10_000),
1522                success: true,
1523            });
1524        }
1525
1526        let (msgs, _stats) = build_messages(&conv, "sys", 5_000, "");
1527        let has_user = msgs.iter().any(|m| matches!(m.role, Role::User));
1528        assert!(
1529            has_user,
1530            "last user message must always survive, got {} msgs",
1531            msgs.len()
1532        );
1533    }
1534
1535    #[test]
1536    fn microcompact_uses_generic_format_with_tool_label_from_call_id() {
1537        // microcompact emits a single generic format:
1538        // `[<tool> <ok|FAILED>: N lines, first: <line>]`. Tool label comes
1539        // from the model's own `tool_calls.name`, not a `match` on
1540        // hardcoded strings — passes the project's tech-stack-neutrality
1541        // rule. Bash, grep, glob, and unknown-tool calls all flow
1542        // through the same template.
1543        //
1544        // read_file is exempted (5-7 atomgr datalog showed weak models
1545        // build "伪自信" from `first: 205| pub async fn dynamic_connect(`
1546        // and edit blind). Skip behavior is covered by
1547        // `microcompact_skips_read_file_to_preserve_long_session_context`.
1548        //
1549        // Calls `microcompact` directly so the test isolates stub format
1550        // from the rendering pipeline's drop / compression logic.
1551        use crate::tool::{ToolCall, ToolResult};
1552
1553        let mut msgs: Vec<Message> = vec![Message::new(Role::System, "sys")];
1554        msgs.push(Message::new(Role::User, "explore"));
1555
1556        let kinds = [
1557            ("c_bok", "bash", true),
1558            ("c_bfail", "bash", false),
1559            ("c_grep", "grep", true),
1560            ("c_mcp", "mcp_remote.exec", true),
1561        ];
1562        for (id, name, success) in &kinds {
1563            msgs.push(Message {
1564                role: Role::Assistant,
1565                content: MessageContent::AssistantWithToolCalls {
1566                    text: None,
1567                    tool_calls: vec![ToolCall {
1568                        id: (*id).to_string(),
1569                        name: (*name).to_string(),
1570                        arguments: "{}".into(),
1571                    }],
1572                    reasoning_content: None,
1573                    thinking_blocks: Vec::new(),
1574                },
1575            });
1576            msgs.push(Message {
1577                role: Role::Tool,
1578                content: MessageContent::ToolResult(ToolResult {
1579                    call_id: (*id).to_string(),
1580                    output: format!("first line for {}\n{}", name, "x".repeat(4_000)),
1581                    success: *success,
1582                }),
1583            });
1584        }
1585
1586        // Anchor the next turn so the prior tool results above are
1587        // eligible for compaction (turn-aware boundary).
1588        msgs.push(Message::new(Role::User, "now what"));
1589
1590        let n = msgs.len();
1591        // Low threshold so microcompact fires deterministically.
1592        microcompact(&mut msgs, n, 1_000);
1593
1594        let find_by_id = |id: &str| -> Option<String> {
1595            msgs.iter().find_map(|m| {
1596                if let MessageContent::ToolResult(r) = &m.content {
1597                    if r.call_id == id {
1598                        return Some(r.output.clone());
1599                    }
1600                }
1601                None
1602            })
1603        };
1604
1605        // bash (success) → compacted with `bash ok: ...` label.
1606        let bok = find_by_id("c_bok").expect("c_bok must survive");
1607        assert!(
1608            bok.starts_with("[bash ok: ") && bok.contains("first: "),
1609            "bash success format mismatch: {}",
1610            bok
1611        );
1612
1613        // bash (failure) → `bash FAILED: ...` label preserves the
1614        // success/fail axis the model needs for retry reasoning.
1615        let bfail = find_by_id("c_bfail").expect("c_bfail must survive");
1616        assert!(
1617            bfail.starts_with("[bash FAILED: ") && bfail.contains("first: "),
1618            "bash failure format mismatch: {}",
1619            bfail
1620        );
1621
1622        // grep and an unknown tool name use the same template — no
1623        // special-case match arms inside microcompact (read_file is
1624        // exempted; see `microcompact_skips_read_file_*`).
1625        for (id, expected_label) in [
1626            ("c_grep", "grep"),
1627            ("c_mcp", "mcp_remote.exec"),
1628        ] {
1629            let body = find_by_id(id).unwrap_or_else(|| panic!("{} must survive", id));
1630            assert!(
1631                body.starts_with(&format!("[{} ok: ", expected_label)),
1632                "{} expected generic `[{} ok: ...]` format, got: {}",
1633                id,
1634                expected_label,
1635                body
1636            );
1637            assert!(
1638                body.contains("first: first line for"),
1639                "{} should preserve first-line snippet, got: {}",
1640                id,
1641                body
1642            );
1643        }
1644    }
1645
1646    /// 5-7 atomgr datalog (build 942b615): 1704/1704 bash stubs surfaced
1647    /// `first: [elapsed: Xs, exit: N]` — framework metadata, zero signal.
1648    /// Stub now skips that line and shows line 2 (the real output / real
1649    /// error). Failed bash retry decisions go from "exit 101 of unknown
1650    /// origin" to "actual error: ...".
1651    #[test]
1652    fn build_compact_stub_skips_bash_elapsed_metadata() {
1653        let bash_failure = "[elapsed: 1.9s, exit: 101]\nerror: cannot find type `Foo` in this scope";
1654        let stub = build_compact_stub("bash", bash_failure, false);
1655        assert!(
1656            stub.contains("error: cannot find type"),
1657            "bash stub must surface the actual error, not the elapsed metadata: {}",
1658            stub
1659        );
1660        assert!(
1661            !stub.contains("first: [elapsed:"),
1662            "bash stub first-line must skip the elapsed metadata: {}",
1663            stub
1664        );
1665    }
1666
1667    /// Single-line bash (`wc -l`, `echo $?`, etc.) has no line 2 to fall
1668    /// through to. Stub must use whatever line 1 is rather than blanking.
1669    #[test]
1670    fn build_compact_stub_falls_back_to_line1_when_only_one_line() {
1671        let one_liner = "42";
1672        let stub = build_compact_stub("bash", one_liner, true);
1673        assert!(stub.contains("first: 42"), "got: {}", stub);
1674    }
1675
1676    /// `[elapsed:` skip is bash-only by virtue of the prefix being unique
1677    /// to our bash tool. grep / edit_file / web_fetch outputs do NOT
1678    /// start with `[elapsed:` so they hit the normal line-1 path. This
1679    /// test pins that the skip doesn't accidentally eat the first useful
1680    /// line of those tools.
1681    #[test]
1682    fn build_compact_stub_unaffected_for_non_bash_tools() {
1683        let grep = "src/foo.rs:42:    fn bar() {}\nsrc/baz.rs:10:    fn baz()";
1684        let stub = build_compact_stub("grep", grep, true);
1685        assert!(
1686            stub.contains("first: src/foo.rs:42:"),
1687            "grep stub must keep line 1 intact: {}",
1688            stub
1689        );
1690
1691        let edit = "Edited /path/to/file.rs (-3 +5 lines).";
1692        let stub = build_compact_stub("edit_file", edit, true);
1693        assert!(stub.contains("first: Edited /path"), "got: {}", stub);
1694    }
1695
1696    /// 5-7 atomgr datalog (atomgr-2d99b47d/2026-05-07_00-28-34): T22-T29
1697    /// reveal weak models develop "伪自信" when read_file is stubbed —
1698    /// `[read_file ok: 115 lines, first: 205| pub async fn dynamic_connect(]`
1699    /// gives just enough surface (line number + function name) for the
1700    /// model to think it remembers the body, then it edits blind. Result:
1701    /// 6 turns of patch-and-repatch the same file. Keeping read_file
1702    /// FULL preserves attention on the actual code; D3 FileStore handles
1703    /// the disk-side cost of re-reads transparently.
1704    #[test]
1705    fn microcompact_skips_read_file_to_preserve_long_session_context() {
1706        use crate::tool::{ToolCall, ToolResult};
1707        let mut conv = Conversation::new();
1708        conv.add_user_message("explore");
1709
1710        // One read_file call with a large body — would normally be
1711        // compacted under the generic path.
1712        conv.add_assistant_tool_calls(
1713            None,
1714            vec![ToolCall {
1715                id: "c_read".into(),
1716                name: "read_file".into(),
1717                arguments: "{}".into(),
1718            }],
1719            None,
1720        );
1721        let read_body = format!("first line of read\n{}", "x".repeat(5_000));
1722        conv.add_tool_result(ToolResult {
1723            call_id: "c_read".into(),
1724            output: read_body.clone(),
1725            success: true,
1726        });
1727
1728        // Pad with bash so total_chars crosses microcompact's
1729        // threshold. Use a small budget (8K tokens → 22_400 char
1730        // threshold) so the 30 padding bashes + the read_file body
1731        // (~125K chars total) reliably triggers microcompact.
1732        for i in 0..30 {
1733            let id = format!("c_pad{}", i);
1734            conv.add_assistant_tool_calls(
1735                None,
1736                vec![ToolCall {
1737                    id: id.clone(),
1738                    name: "bash".into(),
1739                    arguments: "{}".into(),
1740                }],
1741                None,
1742            );
1743            conv.add_tool_result(ToolResult {
1744                call_id: id,
1745                output: format!("[elapsed: 0.0s, exit: 0]\n{}", "x".repeat(4_000)),
1746                success: true,
1747            });
1748        }
1749        conv.add_user_message("now what");
1750
1751        // 40K budget → 112K char threshold. Payload (read body 5K +
1752        // 30 × 4K padding ≈ 125K chars / ~31K tokens) crosses
1753        // threshold but fits budget without triggering build_messages
1754        // pre-microcompact drops.
1755        let (msgs, _) = build_messages(&conv, "sys", 40_000, "");
1756
1757        // Locate the read_file ToolResult in the rendered messages.
1758        let body = msgs
1759            .iter()
1760            .find_map(|m| {
1761                if let MessageContent::ToolResult(r) = &m.content {
1762                    if r.call_id == "c_read" {
1763                        return Some(r.output.clone());
1764                    }
1765                }
1766                None
1767            })
1768            .expect("c_read must survive in rendered messages");
1769
1770        // Read body must remain FULL — never replaced with the generic
1771        // `[read_file ok: ... first: ...]` stub.
1772        assert!(
1773            !body.starts_with("[read_file "),
1774            "read_file got compacted (伪自信 risk): {}",
1775            &body[..body.len().min(200)]
1776        );
1777        assert_eq!(
1778            body.len(),
1779            read_body.len(),
1780            "read_file body length must equal original (uncompacted)"
1781        );
1782        assert!(
1783            body.contains("first line of read"),
1784            "first line lost: {}",
1785            &body[..body.len().min(200)]
1786        );
1787
1788        // Sanity: bash padding ToolResults DID get compacted — confirms
1789        // the threshold actually triggered, the test isn't passing
1790        // because microcompact was a no-op.
1791        let any_bash_compacted = msgs.iter().any(|m| {
1792            if let MessageContent::ToolResult(r) = &m.content {
1793                r.output.starts_with("[bash ok: ")
1794            } else {
1795                false
1796            }
1797        });
1798        assert!(
1799            any_bash_compacted,
1800            "bash padding should have been compacted; if not, the \
1801             threshold isn't actually triggering and read_file passing \
1802             through is a false positive"
1803        );
1804    }
1805
1806    /// 5-8 atomgr session bug — microcompact was stubbing the CURRENT
1807    /// turn's earlier tool results, leading the model to echo
1808    /// `HELLO_TEST_12345` self-checks because mid-turn it could no
1809    /// longer see what it had just done. The fix: anchor on the last
1810    /// `Role::User` message in the rendered Vec — everything from
1811    /// that message onward is the active turn and stays full-fidelity.
1812    /// Only strictly older content is eligible for stubbing.
1813    ///
1814    /// Calls `microcompact` directly (not through `build_messages`) so
1815    /// the test isolates the boundary logic from the rendering
1816    /// pipeline's drop / token-budget handling.
1817    #[test]
1818    fn microcompact_preserves_current_turn_in_full() {
1819        use crate::tool::{ToolCall, ToolResult};
1820
1821        // Build a Vec<Message> manually with a clear turn boundary:
1822        // System | User#1 | (Asst tool_calls + Tool results)×15 | User#2 | (Asst+Tool)×10
1823        // Last User is User#2 → current turn is everything after it.
1824        let mut msgs: Vec<Message> = vec![Message::new(Role::System, "sys")];
1825
1826        // ── PRIOR turn ────────────────────────────────────────
1827        msgs.push(Message::new(Role::User, "first task"));
1828        for i in 0..15 {
1829            let id = format!("prior_{}", i);
1830            msgs.push(Message {
1831                role: Role::Assistant,
1832                content: MessageContent::AssistantWithToolCalls {
1833                    text: None,
1834                    tool_calls: vec![ToolCall {
1835                        id: id.clone(),
1836                        name: "bash".into(),
1837                        arguments: "{}".into(),
1838                    }],
1839                    reasoning_content: None,
1840                    thinking_blocks: Vec::new(),
1841                },
1842            });
1843            msgs.push(Message {
1844                role: Role::Tool,
1845                content: MessageContent::ToolResult(ToolResult {
1846                    call_id: id,
1847                    output: format!("[elapsed: 0.0s, exit: 0]\n{}", "p".repeat(4_000)),
1848                    success: true,
1849                }),
1850            });
1851        }
1852
1853        // ── CURRENT turn (must stay full) ──────────────────────
1854        msgs.push(Message::new(Role::User, "second task"));
1855        for i in 0..10 {
1856            let id = format!("current_{}", i);
1857            msgs.push(Message {
1858                role: Role::Assistant,
1859                content: MessageContent::AssistantWithToolCalls {
1860                    text: None,
1861                    tool_calls: vec![ToolCall {
1862                        id: id.clone(),
1863                        name: "bash".into(),
1864                        arguments: "{}".into(),
1865                    }],
1866                    reasoning_content: None,
1867                    thinking_blocks: Vec::new(),
1868                },
1869            });
1870            msgs.push(Message {
1871                role: Role::Tool,
1872                content: MessageContent::ToolResult(ToolResult {
1873                    call_id: id,
1874                    output: format!("[elapsed: 0.0s, exit: 0]\n{}", "c".repeat(4_000)),
1875                    success: true,
1876                }),
1877            });
1878        }
1879
1880        let total_chars: usize = msgs
1881            .iter()
1882            .map(|m| match &m.content {
1883                MessageContent::ToolResult(r) => r.output.len(),
1884                MessageContent::Text(t) => t.len(),
1885                _ => 100,
1886            })
1887            .sum();
1888        // Set threshold low so microcompact fires deterministically.
1889        let n = msgs.len();
1890        microcompact(&mut msgs, n, 1_000);
1891
1892        let collect = |prefix: &str| -> Vec<(String, String)> {
1893            msgs.iter()
1894                .filter_map(|m| match &m.content {
1895                    MessageContent::ToolResult(r) if r.call_id.starts_with(prefix) => {
1896                        Some((r.call_id.clone(), r.output.clone()))
1897                    }
1898                    _ => None,
1899                })
1900                .collect()
1901        };
1902
1903        // PRIOR turn: every tool result must be stubbed.
1904        let prior = collect("prior_");
1905        assert_eq!(prior.len(), 15, "expected 15 prior tool results");
1906        for (cid, body) in &prior {
1907            assert!(
1908                body.starts_with("[bash "),
1909                "prior turn `{}` must be stubbed; got body of len={} starting {:?}\n\
1910                 (total_chars before microcompact was {})",
1911                cid,
1912                body.len(),
1913                &body[..body.len().min(80)],
1914                total_chars
1915            );
1916            assert!(
1917                body.len() < 200,
1918                "prior stub should be < 200 bytes, got {}",
1919                body.len()
1920            );
1921        }
1922
1923        // CURRENT turn: every tool result must remain FULL.
1924        let current = collect("current_");
1925        assert_eq!(current.len(), 10, "expected 10 current tool results");
1926        for (cid, body) in &current {
1927            assert!(
1928                !body.starts_with("[bash "),
1929                "current turn `{}` must NOT be stubbed (turn-aware preservation): \
1930                 got {:?}",
1931                cid,
1932                &body[..body.len().min(80)]
1933            );
1934            assert!(
1935                body.len() > 4_000,
1936                "current tool result must keep its full payload (>4K chars), \
1937                 got {} bytes",
1938                body.len()
1939            );
1940        }
1941    }
1942
1943    /// Running compaction twice MUST be idempotent — the upgraded
1944    /// microcompact's `len <= MIN_COLLAPSE_SIZE` guard ensures that
1945    /// once a stub is in place, the next pass sees a < 500-char
1946    /// result and skips it rather than re-stubbing into a less-useful
1947    /// "[older tool result collapsed (60 chars dropped)]" form
1948    /// (the bug pattern from before this unification).
1949    #[test]
1950    fn microcompact_is_idempotent_no_double_stub() {
1951        use crate::tool::{ToolCall, ToolResult};
1952        let mut conv = Conversation::new();
1953        conv.add_user_message("trigger");
1954        for i in 0..30 {
1955            let id = format!("c{}", i);
1956            conv.add_assistant_tool_calls(
1957                None,
1958                vec![ToolCall {
1959                    id: id.clone(),
1960                    name: "bash".into(),
1961                    arguments: "{}".into(),
1962                }],
1963                None,
1964            );
1965            conv.add_tool_result(ToolResult {
1966                call_id: id,
1967                output: format!("first line\n{}", "x".repeat(4_000)),
1968                success: true,
1969            });
1970        }
1971        conv.add_user_message("done");
1972
1973        let (msgs1, _) = build_messages(&conv, "sys", 131_072, "");
1974        let (msgs2, _) = build_messages(&conv, "sys", 131_072, "");
1975
1976        // Compaction is pure over (conv, threshold) — two passes must
1977        // produce byte-identical compacted bodies, no degradation.
1978        let collect_tr = |m: &[Message]| -> Vec<String> {
1979            m.iter()
1980                .filter_map(|m| {
1981                    if let MessageContent::ToolResult(r) = &m.content {
1982                        Some(r.output.clone())
1983                    } else {
1984                        None
1985                    }
1986                })
1987                .collect()
1988        };
1989        assert_eq!(collect_tr(&msgs1), collect_tr(&msgs2));
1990        // And concretely: every stub stays in `[bash ok: ...]` form,
1991        // never devolves into `[older tool result collapsed ...]`.
1992        for body in collect_tr(&msgs1) {
1993            if body.starts_with("[bash") {
1994                assert!(
1995                    body.contains("first: "),
1996                    "stub lost its first-line slot: {}",
1997                    body
1998                );
1999            }
2000        }
2001    }
2002
2003    #[test]
2004    fn test_cold_zone_compression() {
2005        use crate::tool::{ToolCall, ToolResult};
2006        let mut conv = Conversation::new();
2007
2008        // Create 8 turns
2009        for turn in 0..8 {
2010            conv.add_user_message(&format!("task {}", turn));
2011            let call = ToolCall {
2012                id: format!("c{}", turn),
2013                name: "bash".to_string(),
2014                arguments: "{}".to_string(),
2015            };
2016            conv.add_assistant_tool_calls(Some("ok"), vec![call], None);
2017            conv.add_tool_result(ToolResult {
2018                call_id: format!("c{}", turn),
2019                output: "x".repeat(100),
2020                success: true,
2021            });
2022        }
2023
2024        // Apply compression: remove first 9 messages (3 turns × 3 msgs each)
2025        conv.apply_compression(9, "User ran tasks 0, 1, 2 with bash.".to_string());
2026
2027        // Cold zone should have 1 entry
2028        assert_eq!(conv.cold_summaries.len(), 1);
2029        // Messages should be reduced (first 3 turns removed)
2030        assert_eq!(conv.turn_tracker.turns.len(), 5); // 8 - 3
2031
2032        // Budget check: cold zone should appear in output
2033        let (msgs, _stats) = build_messages(&conv, "sys", 100000, "");
2034        let has_cold = msgs.iter().any(|m| {
2035            m.text()
2036                .map_or(false, |t| t.contains("Earlier conversation history"))
2037        });
2038        assert!(has_cold, "Cold zone summary should appear in output");
2039    }
2040
2041    /// Regression: MiniMax-M2.7 returns empty content + 400 (`2013 invalid
2042    /// chat setting`) when the request contains adjacent `system` messages.
2043    /// Post-compression layout used to ship `system(orig) + system(cold-zone)`
2044    /// straight to the wire — `clean_message_pipeline` now coalesces them.
2045    #[test]
2046    fn test_no_consecutive_system_messages_after_compression() {
2047        use crate::tool::{ToolCall, ToolResult};
2048        let mut conv = Conversation::new();
2049
2050        for turn in 0..8 {
2051            conv.add_user_message(&format!("task {}", turn));
2052            let call = ToolCall {
2053                id: format!("c{}", turn),
2054                name: "bash".to_string(),
2055                arguments: "{}".to_string(),
2056            };
2057            conv.add_assistant_tool_calls(Some("ok"), vec![call], None);
2058            conv.add_tool_result(ToolResult {
2059                call_id: format!("c{}", turn),
2060                output: "x".repeat(100),
2061                success: true,
2062            });
2063        }
2064
2065        conv.apply_compression(9, "User ran tasks 0, 1, 2 with bash.".to_string());
2066        assert_eq!(conv.cold_summaries.len(), 1);
2067
2068        let (msgs, _stats) = build_messages(&conv, "you are atomcode", 100_000, "");
2069
2070        for pair in msgs.windows(2) {
2071            assert!(
2072                !(pair[0].role == Role::System && pair[1].role == Role::System),
2073                "consecutive system messages found at the wire boundary"
2074            );
2075        }
2076
2077        // The merged system message must still carry both the original
2078        // prompt and the cold-zone summary so the model retains context.
2079        let merged = msgs
2080            .iter()
2081            .find(|m| matches!(m.role, Role::System))
2082            .and_then(|m| m.text())
2083            .expect("at least one system message");
2084        assert!(
2085            merged.contains("you are atomcode"),
2086            "merged system must keep original prompt"
2087        );
2088        assert!(
2089            merged.contains("Earlier conversation history"),
2090            "merged system must keep cold-zone summary"
2091        );
2092    }
2093
2094    #[test]
2095    fn test_budgeted_drops_when_no_summary_and_over_budget() {
2096        use crate::tool::{ToolCall, ToolResult};
2097        let mut conv = Conversation::new();
2098
2099        // Create 3 turns with large content (no summaries)
2100        for turn in 0..3 {
2101            conv.add_user_message(&format!("task {}", turn));
2102            let call = ToolCall {
2103                id: format!("c{}", turn),
2104                name: "bash".to_string(),
2105                arguments: "{}".to_string(),
2106            };
2107            conv.add_assistant_tool_calls(Some("ok"), vec![call], None);
2108            conv.add_tool_result(ToolResult {
2109                call_id: format!("c{}", turn),
2110                output: "x".repeat(4000),
2111                success: true,
2112            });
2113        }
2114
2115        // Small budget — force dropping
2116        let (msgs, stats) = build_messages(&conv, "sys", 2000, "");
2117        assert!(
2118            stats.dropped_tokens > 0,
2119            "Should drop turns when over budget"
2120        );
2121        assert!(matches!(msgs[0].role, Role::System));
2122    }
2123
2124    /// Bug b regression: after compression has run once, `cold_summaries`
2125    /// is non-empty, which disables the 80% drop cap above (legacy
2126    /// pathology guard). Microcompact still skips the last
2127    /// `OTHER_KEEP=20` messages. That leaves the recent window with no
2128    /// byte enforcement, so many mid-sized ToolResults can blow budget.
2129    /// The final post-cleanup byte ceiling must condense oldest
2130    /// ToolResults in `result` until total estimated tokens fit under
2131    /// 80% of the budget.
2132    #[test]
2133    fn test_final_byte_ceiling_condenses_oversized_recent_toolresults() {
2134        use crate::tool::{ToolCall, ToolResult};
2135        let mut conv = Conversation::new();
2136        // Mark that a prior compression already ran — cold_summaries
2137        // non-empty is the precondition that disables the earlier cap.
2138        conv.cold_summaries.push("earlier task summary".to_string());
2139
2140        // 20 turns, each with a 6K-char bash result. microcompact's
2141        // OTHER_KEEP=20 leaves the trailing 20 messages (≈ last 6-7 turns)
2142        // untouched — those alone sum to > 36K chars ≈ 9K+ est tokens,
2143        // which exceeds the 80% ceiling of the chosen budget.
2144        for turn in 0..20 {
2145            conv.add_user_message(&format!("task {}", turn));
2146            conv.add_assistant_tool_calls(
2147                Some("ok"),
2148                vec![ToolCall {
2149                    id: format!("c{}", turn),
2150                    name: "bash".to_string(),
2151                    arguments: "{}".to_string(),
2152                }],
2153                None,
2154            );
2155            conv.add_tool_result(ToolResult {
2156                call_id: format!("c{}", turn),
2157                output: "x".repeat(6000),
2158                success: true,
2159            });
2160        }
2161
2162        // token_budget = 10K tokens → ceiling = 8K tokens.
2163        let (msgs, _stats) = build_messages(&conv, "sys", 10_000, "");
2164        let total_tokens: usize = msgs.iter().map(|m| m.estimate_tokens()).sum();
2165        assert!(
2166            total_tokens <= 8_000,
2167            "Total estimated tokens {} exceeded 80% ceiling 8000 — \
2168             final byte ceiling did not run",
2169            total_tokens,
2170        );
2171        // The newest turn's tool result must survive in full (not condensed).
2172        let newest_still_full = msgs
2173            .iter()
2174            .any(|m| m.text().map_or(false, |t| t.contains(&"x".repeat(100))));
2175        assert!(
2176            newest_still_full,
2177            "Newest turn's full-size tool result must be preserved",
2178        );
2179    }
2180
2181    #[test]
2182    fn test_budgeted_preserves_message_order() {
2183        let mut conv = Conversation::new();
2184        conv.add_user_message("first");
2185        conv.messages
2186            .push(Message::new(Role::Assistant, "response 1"));
2187        conv.add_user_message("second");
2188        conv.messages
2189            .push(Message::new(Role::Assistant, "response 2"));
2190        conv.add_user_message("third");
2191
2192        let (msgs, _stats) = build_messages(&conv, "sys", 100000, "");
2193        // system + 5 messages
2194        assert_eq!(msgs.len(), 6);
2195        assert_eq!(msgs[1].text(), Some("first"));
2196        assert_eq!(msgs[2].text(), Some("response 1"));
2197        assert_eq!(msgs[3].text(), Some("second"));
2198        assert_eq!(msgs[4].text(), Some("response 2"));
2199        assert_eq!(msgs[5].text(), Some("third"));
2200    }
2201
2202    #[test]
2203    fn test_sanitize_removes_orphan_tool_results() {
2204        use crate::tool::ToolResult;
2205        let mut msgs = vec![
2206            Message::new(Role::System, "sys"),
2207            // Orphan tool result (no matching AssistantWithToolCalls)
2208            Message {
2209                role: Role::Tool,
2210                content: MessageContent::ToolResult(ToolResult {
2211                    call_id: "orphan_1".to_string(),
2212                    output: "some output".to_string(),
2213                    success: true,
2214                }),
2215            },
2216            Message::new(Role::User, "hello"),
2217        ];
2218        sanitize_messages(&mut msgs);
2219        // Orphan should be removed, leaving System + User
2220        assert_eq!(msgs.len(), 2);
2221        assert!(matches!(msgs[0].role, Role::System));
2222        assert!(matches!(msgs[1].role, Role::User));
2223    }
2224
2225    #[test]
2226    fn test_sanitize_preserves_valid_pairs() {
2227        use crate::tool::{ToolCall, ToolResult};
2228        let mut msgs = vec![
2229            Message::new(Role::System, "sys"),
2230            Message::new(Role::User, "do it"),
2231            Message {
2232                role: Role::Assistant,
2233                content: MessageContent::AssistantWithToolCalls {
2234                    text: None,
2235                    tool_calls: vec![ToolCall {
2236                        id: "c1".to_string(),
2237                        name: "bash".to_string(),
2238                        arguments: "{}".to_string(),
2239                    }],
2240                    reasoning_content: None,
2241                    thinking_blocks: Vec::new(),
2242                },
2243            },
2244            Message {
2245                role: Role::Tool,
2246                content: MessageContent::ToolResult(ToolResult {
2247                    call_id: "c1".to_string(),
2248                    output: "ok".to_string(),
2249                    success: true,
2250                }),
2251            },
2252        ];
2253        sanitize_messages(&mut msgs);
2254        // All 4 messages should be preserved (valid pair)
2255        assert_eq!(msgs.len(), 4);
2256    }
2257
2258    /// Regression for DeepSeek `insufficient tool messages following
2259    /// tool_calls message` 400. An assistant emitted N=3 tool_calls but
2260    /// only 2 ToolResults arrived before a User text message — the third
2261    /// call_id never gets a tool message, and strict providers reject.
2262    /// Sanitize must drop the offending ATC + its partial results so the
2263    /// surviving prefix preserves the wire-level invariant.
2264    #[test]
2265    fn test_sanitize_drops_under_paired_atc_in_middle_of_history() {
2266        use crate::tool::{ToolCall, ToolResult};
2267        let mut msgs = vec![
2268            Message::new(Role::System, "sys"),
2269            Message::new(Role::User, "first"),
2270            Message {
2271                role: Role::Assistant,
2272                content: MessageContent::AssistantWithToolCalls {
2273                    text: None,
2274                    tool_calls: vec![
2275                        ToolCall {
2276                            id: "c1".into(),
2277                            name: "bash".into(),
2278                            arguments: "{}".into(),
2279                        },
2280                        ToolCall {
2281                            id: "c2".into(),
2282                            name: "bash".into(),
2283                            arguments: "{}".into(),
2284                        },
2285                        ToolCall {
2286                            id: "c3".into(),
2287                            name: "bash".into(),
2288                            arguments: "{}".into(),
2289                        },
2290                    ],
2291                    reasoning_content: None,
2292                    thinking_blocks: Vec::new(),
2293                },
2294            },
2295            Message {
2296                role: Role::Tool,
2297                content: MessageContent::ToolResult(ToolResult {
2298                    call_id: "c1".into(),
2299                    output: "ok1".into(),
2300                    success: true,
2301                }),
2302            },
2303            Message {
2304                role: Role::Tool,
2305                content: MessageContent::ToolResult(ToolResult {
2306                    call_id: "c2".into(),
2307                    output: "ok2".into(),
2308                    success: true,
2309                }),
2310            },
2311            // c3 result MISSING — the source of the 400.
2312            Message::new(Role::User, "second"),
2313        ];
2314        sanitize_messages(&mut msgs);
2315        // ATC + 2 partial results gone; surviving = sys + user1 + user2.
2316        assert_eq!(msgs.len(), 3, "got: {:?}", msgs);
2317        assert!(matches!(msgs[0].role, Role::System));
2318        assert_eq!(msgs[1].text(), Some("first"));
2319        assert_eq!(msgs[2].text(), Some("second"));
2320    }
2321
2322    /// Same situation as above, but the boundary is a *next* ATC instead
2323    /// of a Text message. The first (under-paired) ATC and its partial
2324    /// results must be dropped; the second (well-paired) ATC stays.
2325    #[test]
2326    fn test_sanitize_drops_under_paired_atc_when_followed_by_another_atc() {
2327        use crate::tool::{ToolCall, ToolResult};
2328        let mut msgs = vec![
2329            Message::new(Role::User, "go"),
2330            Message {
2331                role: Role::Assistant,
2332                content: MessageContent::AssistantWithToolCalls {
2333                    text: None,
2334                    tool_calls: vec![
2335                        ToolCall {
2336                            id: "a1".into(),
2337                            name: "bash".into(),
2338                            arguments: "{}".into(),
2339                        },
2340                        ToolCall {
2341                            id: "a2".into(),
2342                            name: "bash".into(),
2343                            arguments: "{}".into(),
2344                        },
2345                    ],
2346                    reasoning_content: None,
2347                    thinking_blocks: Vec::new(),
2348                },
2349            },
2350            Message {
2351                role: Role::Tool,
2352                content: MessageContent::ToolResult(ToolResult {
2353                    call_id: "a1".into(),
2354                    output: "ok".into(),
2355                    success: true,
2356                }),
2357            },
2358            // a2 missing.
2359            Message {
2360                role: Role::Assistant,
2361                content: MessageContent::AssistantWithToolCalls {
2362                    text: None,
2363                    tool_calls: vec![ToolCall {
2364                        id: "b1".into(),
2365                        name: "bash".into(),
2366                        arguments: "{}".into(),
2367                    }],
2368                    reasoning_content: None,
2369                    thinking_blocks: Vec::new(),
2370                },
2371            },
2372            Message {
2373                role: Role::Tool,
2374                content: MessageContent::ToolResult(ToolResult {
2375                    call_id: "b1".into(),
2376                    output: "ok".into(),
2377                    success: true,
2378                }),
2379            },
2380        ];
2381        sanitize_messages(&mut msgs);
2382        // First ATC + a1 result removed; second ATC + b1 result kept.
2383        assert_eq!(msgs.len(), 3, "got: {:?}", msgs);
2384        assert_eq!(msgs[0].text(), Some("go"));
2385        assert!(matches!(
2386            msgs[1].content,
2387            MessageContent::AssistantWithToolCalls { .. }
2388        ));
2389        assert!(matches!(msgs[2].content, MessageContent::ToolResult(_)));
2390    }
2391
2392    /// Trailing under-paired ATC (no Text / next ATC after it) is the
2393    /// case the original sanitize already handled. Pinning it here so
2394    /// the new mid-history logic doesn't accidentally regress the tail
2395    /// path.
2396    #[test]
2397    fn test_sanitize_drops_under_paired_atc_at_tail() {
2398        use crate::tool::{ToolCall, ToolResult};
2399        let mut msgs = vec![
2400            Message::new(Role::User, "go"),
2401            Message {
2402                role: Role::Assistant,
2403                content: MessageContent::AssistantWithToolCalls {
2404                    text: None,
2405                    tool_calls: vec![
2406                        ToolCall {
2407                            id: "c1".into(),
2408                            name: "bash".into(),
2409                            arguments: "{}".into(),
2410                        },
2411                        ToolCall {
2412                            id: "c2".into(),
2413                            name: "bash".into(),
2414                            arguments: "{}".into(),
2415                        },
2416                    ],
2417                    reasoning_content: None,
2418                    thinking_blocks: Vec::new(),
2419                },
2420            },
2421            Message {
2422                role: Role::Tool,
2423                content: MessageContent::ToolResult(ToolResult {
2424                    call_id: "c1".into(),
2425                    output: "ok".into(),
2426                    success: true,
2427                }),
2428            },
2429            // c2 missing, conversation ends here.
2430        ];
2431        sanitize_messages(&mut msgs);
2432        // ATC + 1 partial result both removed; just the user message remains.
2433        assert_eq!(msgs.len(), 1);
2434        assert_eq!(msgs[0].text(), Some("go"));
2435    }
2436
2437    /// Negative control: when every ATC's tool_calls are fully paired,
2438    /// nothing must be removed even though the new mid-history logic
2439    /// runs over Text boundaries. Catches "fix that throws away valid
2440    /// history" regressions.
2441    #[test]
2442    fn test_sanitize_preserves_fully_paired_history_through_text_boundaries() {
2443        use crate::tool::{ToolCall, ToolResult};
2444        let mut msgs = vec![
2445            Message::new(Role::User, "first"),
2446            Message {
2447                role: Role::Assistant,
2448                content: MessageContent::AssistantWithToolCalls {
2449                    text: None,
2450                    tool_calls: vec![
2451                        ToolCall {
2452                            id: "c1".into(),
2453                            name: "bash".into(),
2454                            arguments: "{}".into(),
2455                        },
2456                        ToolCall {
2457                            id: "c2".into(),
2458                            name: "bash".into(),
2459                            arguments: "{}".into(),
2460                        },
2461                    ],
2462                    reasoning_content: None,
2463                    thinking_blocks: Vec::new(),
2464                },
2465            },
2466            Message {
2467                role: Role::Tool,
2468                content: MessageContent::ToolResult(ToolResult {
2469                    call_id: "c1".into(),
2470                    output: "ok1".into(),
2471                    success: true,
2472                }),
2473            },
2474            Message {
2475                role: Role::Tool,
2476                content: MessageContent::ToolResult(ToolResult {
2477                    call_id: "c2".into(),
2478                    output: "ok2".into(),
2479                    success: true,
2480                }),
2481            },
2482            Message::new(Role::Assistant, "done"),
2483            Message::new(Role::User, "second"),
2484        ];
2485        let len_before = msgs.len();
2486        sanitize_messages(&mut msgs);
2487        assert_eq!(msgs.len(), len_before, "must not drop fully-paired history");
2488    }
2489
2490    /// End-to-end regression for the DeepSeek `insufficient tool
2491    /// messages following tool_calls message` 400 via the main
2492    /// turn-tracked `build_messages` path. The function-level
2493    /// `sanitize_messages` tests cover the unit; this test pins the
2494    /// wiring — sanitize_messages must run from `build_messages`, not
2495    /// just from the fallback. Constructs a Conversation with a
2496    /// turn-bearing under-paired ATC mid-history (ATC(3) + only 2
2497    /// tool_results, then a fresh user turn) and verifies the wire-
2498    /// level invariant holds in the output: every surviving ATC is
2499    /// followed by exactly N tool messages.
2500    #[test]
2501    fn build_messages_satisfies_atc_pairing_after_under_paired_mid_history() {
2502        use crate::tool::{ToolCall, ToolResult};
2503        let mut conv = Conversation::new();
2504        conv.add_user_message("first task");
2505        conv.add_assistant_tool_calls(
2506            None,
2507            vec![
2508                ToolCall { id: "c1".into(), name: "bash".into(), arguments: "{}".into() },
2509                ToolCall { id: "c2".into(), name: "bash".into(), arguments: "{}".into() },
2510                ToolCall { id: "c3".into(), name: "bash".into(), arguments: "{}".into() },
2511            ],
2512            None,
2513        );
2514        conv.add_tool_result(ToolResult {
2515            call_id: "c1".into(),
2516            output: "ok1".into(),
2517            success: true,
2518        });
2519        conv.add_tool_result(ToolResult {
2520            call_id: "c2".into(),
2521            output: "ok2".into(),
2522            success: true,
2523        });
2524        // c3's ToolResult never lands — repro for DeepSeek 400.
2525        conv.add_user_message("second task");
2526
2527        let (msgs, _stats) = build_messages(&conv, "sys", 8000, "");
2528
2529        // Walk the result and assert every ATC is followed by exactly
2530        // N consecutive tool-role messages — the wire invariant
2531        // OpenAI / DeepSeek / Claude / Gemini all require.
2532        let mut i = 0;
2533        while i < msgs.len() {
2534            if let MessageContent::AssistantWithToolCalls { tool_calls, .. } = &msgs[i].content {
2535                let n = tool_calls.len();
2536                for j in 0..n {
2537                    let next_idx = i + 1 + j;
2538                    assert!(
2539                        next_idx < msgs.len(),
2540                        "ATC at {} expects {} tool_results but messages end at {}: {:?}",
2541                        i,
2542                        n,
2543                        msgs.len(),
2544                        msgs.iter().map(|m| &m.role).collect::<Vec<_>>()
2545                    );
2546                    assert!(
2547                        matches!(
2548                            msgs[next_idx].content,
2549                            MessageContent::ToolResult(_) | MessageContent::ToolResultRef(_)
2550                        ),
2551                        "ATC at {} expects tool_result at {} but found {:?}",
2552                        i,
2553                        next_idx,
2554                        msgs[next_idx].role
2555                    );
2556                }
2557                i += 1 + n;
2558            } else {
2559                i += 1;
2560            }
2561        }
2562
2563        // Defensive: the orphan c3 must NOT appear as a tool_call_id
2564        // anywhere in the output (the under-paired ATC was dropped, so
2565        // c1 and c2 are gone with it).
2566        for m in &msgs {
2567            if let MessageContent::AssistantWithToolCalls { tool_calls, .. } = &m.content {
2568                for tc in tool_calls {
2569                    assert_ne!(tc.id, "c3", "dropped ATC's call_ids must not survive");
2570                    assert_ne!(tc.id, "c1");
2571                    assert_ne!(tc.id, "c2");
2572                }
2573            }
2574            if let MessageContent::ToolResult(r) = &m.content {
2575                assert_ne!(r.call_id, "c1", "partial tool_results must not survive");
2576                assert_ne!(r.call_id, "c2");
2577            }
2578        }
2579    }
2580
2581    /// Regression: `microcompact` gate tied to `threshold_chars`.
2582    ///
2583    /// Before: hardcoded `total_chars < 100_000` meant any ctx with a
2584    /// real budget under ~25K tokens (Ollama at 8K) could never hit
2585    /// the gate — per-model `tool_output_cap` optimization was silently
2586    /// neutralized. Now the threshold is passed in; small-window ctx
2587    /// passes a proportionally smaller value.
2588    #[test]
2589    fn microcompact_respects_threshold_parameter() {
2590        use crate::tool::{ToolCall, ToolResult};
2591
2592        // Build 25 turns each with a 1000-char bash result. Total
2593        // tool-result bytes ≈ 25_000 — well below the old 100K gate
2594        // but above a 10K gate.
2595        fn build_msgs() -> Vec<Message> {
2596            let mut msgs = vec![Message::new(Role::System, "sys")];
2597            for i in 0..25 {
2598                msgs.push(Message::new(Role::User, format!("task {}", i)));
2599                msgs.push(Message {
2600                    role: Role::Assistant,
2601                    content: MessageContent::AssistantWithToolCalls {
2602                        text: None,
2603                        tool_calls: vec![ToolCall {
2604                            id: format!("c{}", i),
2605                            name: "bash".to_string(),
2606                            arguments: "{}".to_string(),
2607                        }],
2608                        reasoning_content: None,
2609                        thinking_blocks: Vec::new(),
2610                    },
2611                });
2612                msgs.push(Message {
2613                    role: Role::Tool,
2614                    content: MessageContent::ToolResult(ToolResult {
2615                        call_id: format!("c{}", i),
2616                        output: "x".repeat(1000),
2617                        success: true,
2618                    }),
2619                });
2620            }
2621            msgs
2622        }
2623
2624        fn total_tool_bytes(msgs: &[Message]) -> usize {
2625            msgs.iter()
2626                .map(|m| match &m.content {
2627                    MessageContent::ToolResult(r) => r.output.len(),
2628                    _ => 0,
2629                })
2630                .sum()
2631        }
2632
2633        // High threshold (100K) → total 25K < 100K → no-op.
2634        let mut msgs_high = build_msgs();
2635        let before_high_len = msgs_high.len();
2636        let before_high_bytes = total_tool_bytes(&msgs_high);
2637        let msg_count_high = msgs_high.len();
2638        microcompact(&mut msgs_high, msg_count_high, 100_000);
2639        assert_eq!(
2640            msgs_high.len(),
2641            before_high_len,
2642            "high-threshold run must not drop msgs"
2643        );
2644        assert_eq!(
2645            total_tool_bytes(&msgs_high),
2646            before_high_bytes,
2647            "high threshold (25K < 100K) must leave tool_result bytes untouched"
2648        );
2649
2650        // Low threshold (10K) → total 25K >= 10K → microcompact kicks
2651        // in and shrinks older ToolResults.
2652        let mut msgs_low = build_msgs();
2653        let before_low_bytes = total_tool_bytes(&msgs_low);
2654        let msg_count_low = msgs_low.len();
2655        microcompact(&mut msgs_low, msg_count_low, 10_000);
2656        let after_low_bytes = total_tool_bytes(&msgs_low);
2657        assert!(
2658            after_low_bytes < before_low_bytes,
2659            "low threshold (25K > 10K) must shrink tool_result bytes, before={} after={}",
2660            before_low_bytes,
2661            after_low_bytes
2662        );
2663    }
2664
2665    /// Regression: `build_compression_content` must not cut between an
2666    /// `AssistantWithToolCalls` and its trailing `ToolResult`(s). Cutting
2667    /// mid-pair leaves orphan tool_results which `clean_message_pipeline`
2668    /// silently drops — the model loses edit confirmations. Anthropic API
2669    /// also rejects orphan tool_results.
2670    ///
2671    /// Construct a conversation where the naive cut index
2672    /// (`len - KEEP_MESSAGES`) lands on a ToolResult whose paired ATC
2673    /// sits in the drop range. Verify the returned cut index skips past
2674    /// ALL trailing ToolResults so no orphan survives.
2675    #[test]
2676    fn compression_cut_never_splits_tool_use_result_pair() {
2677        use crate::tool::{ToolCall, ToolResult};
2678
2679        // Helper: build a conv where messages[cut_idx] = ToolResult
2680        // with its ATC at messages[cut_idx - 1] (in drop range).
2681        let build_conv = || {
2682            let mut conv = Conversation::new();
2683
2684            // Pad with plain text turns until we reach the position where
2685            // the problematic tool pair will land.
2686            // KEEP_MESSAGES = 20. We want naive_cut = len - 20 to hit a
2687            // ToolResult. If we put ATC at msg[N-21] and ToolResult at
2688            // msg[N-20], then `conv.len() = N`, `naive_cut = N-20` →
2689            // lands on the ToolResult. ✓
2690            //
2691            // Put a text-only prefix of 20 messages, then ATC+ToolResult,
2692            // then another 20 text-only suffix → len = 42, naive_cut = 22
2693            // which SHOULD be the ToolResult we planted.
2694
2695            for i in 0..10 {
2696                conv.add_user_message(&format!("prefix task {}", i));
2697                conv.push_delta(&format!("prefix reply {}", i));
2698                conv.finalize_stream();
2699            }
2700            // After 10 text turns: 20 messages.
2701
2702            // Position 20 would be the next user msg. But we want ATC here
2703            // (msg[20]) and ToolResult at msg[21]. Problem: ATC must be
2704            // preceded by a User in a normal turn. Use a real tool round.
2705            conv.add_user_message("trigger tool"); // msg[20]
2706            conv.add_assistant_tool_calls(
2707                Some("r"),
2708                vec![ToolCall {
2709                    // msg[21]
2710                    id: "call_would_orphan".to_string(),
2711                    name: "bash".to_string(),
2712                    arguments: "{}".to_string(),
2713                }],
2714                None,
2715            );
2716            conv.add_tool_result(ToolResult {
2717                // msg[22]
2718                call_id: "call_would_orphan".to_string(),
2719                output: "tool output that must not be lost".to_string(),
2720                success: true,
2721            });
2722            // After the tool round: 23 messages.
2723
2724            // Suffix: pad with text turns so len - KEEP_MESSAGES = 22.
2725            // Need len = 42. Currently 23. Add 19 more → 42.
2726            // Adding in user/assistant pairs: 19/2 = 9 full + 1 extra.
2727            for i in 0..9 {
2728                conv.add_user_message(&format!("suffix task {}", i));
2729                conv.push_delta(&format!("suffix reply {}", i));
2730                conv.finalize_stream();
2731            }
2732            // 23 + 18 = 41. Add one more user message.
2733            conv.add_user_message("final task");
2734            conv
2735        };
2736
2737        let conv = build_conv();
2738        let len = conv.messages.len();
2739        assert_eq!(len, 42, "conv layout wrong");
2740
2741        let naive_cut = len - KEEP_MESSAGES;
2742        assert_eq!(naive_cut, 22);
2743        // Confirm msg[22] is indeed the ToolResult we planted.
2744        assert!(
2745            matches!(conv.messages[22].content, MessageContent::ToolResult(_)),
2746            "test layout broken: msg[22] should be ToolResult"
2747        );
2748
2749        // Now query the real fn. Fix guarantees the cut index points at
2750        // a position that is NOT a ToolResult (advanced past trailing
2751        // ToolResults so no orphan survives).
2752        let (_summary, actual_cut) = build_compression_content(&conv);
2753
2754        if actual_cut < conv.messages.len() {
2755            let first_survivor = &conv.messages[actual_cut];
2756            let is_tool_result = matches!(
2757                first_survivor.content,
2758                MessageContent::ToolResult(_) | MessageContent::ToolResultRef(_)
2759            );
2760            assert!(
2761                !is_tool_result,
2762                "cut index {} lands on ToolResult (naive was {}); \
2763                 surviving range would start with orphan",
2764                actual_cut, naive_cut
2765            );
2766        }
2767
2768        // Applied-cut invariant: after draining [..actual_cut], every
2769        // surviving ToolResult has its paired ATC in the surviving range.
2770        let mut c2 = build_conv();
2771        c2.apply_compression(actual_cut, "summary".to_string());
2772
2773        let mut live_call_ids = std::collections::HashSet::<String>::new();
2774        for msg in &c2.messages {
2775            match &msg.content {
2776                MessageContent::AssistantWithToolCalls { tool_calls, .. } => {
2777                    for tc in tool_calls {
2778                        live_call_ids.insert(tc.id.clone());
2779                    }
2780                }
2781                MessageContent::ToolResult(r) => {
2782                    assert!(
2783                        live_call_ids.contains(&r.call_id),
2784                        "orphan ToolResult({}) in surviving range — its ATC was dropped",
2785                        r.call_id
2786                    );
2787                }
2788                _ => {}
2789            }
2790        }
2791    }
2792
2793    /// Conversation compression is correct only when it reduces the next
2794    /// wire payload. A generated summary (plus any post-compress state note)
2795    /// can be larger than the messages it replaces, so callers must judge
2796    /// compression by before/after `build_messages` tokens, not by raw
2797    /// history length.
2798    #[test]
2799    fn compression_must_be_judged_by_wire_token_savings() {
2800        let mut conv = Conversation::new();
2801        for i in 0..16 {
2802            conv.add_user_message(&format!("task {}", i));
2803            conv.push_delta("ok");
2804            conv.finalize_stream();
2805        }
2806
2807        let before_tokens: usize = build_messages(&conv, "sys", 64_000, "")
2808            .0
2809            .iter()
2810            .map(|m| m.estimate_tokens())
2811            .sum();
2812        let (_mechanical_summary, remove_count) = build_compression_content(&conv);
2813        assert!(remove_count > 0, "test conversation should be compressible");
2814
2815        conv.apply_compression(remove_count, "expanded summary ".repeat(2_000));
2816        conv.add_user_message(
2817            "[Context was compressed. Here is your current state:]\n\
2818             TASK: continue the current issue analysis\n\
2819             RECENTLY READ: crates/atomcode-core/src/agent/mod.rs",
2820        );
2821
2822        let after_tokens: usize = build_messages(&conv, "sys", 64_000, "")
2823            .0
2824            .iter()
2825            .map(|m| m.estimate_tokens())
2826            .sum();
2827
2828        assert!(
2829            after_tokens > before_tokens,
2830            "dropped messages alone is not a valid compaction success metric: \
2831             before={before_tokens}, after={after_tokens}"
2832        );
2833    }
2834
2835}