Skip to main content

atomcode_core/agent/
mod.rs

1//! The AgentLoop — a standalone agent that processes user messages,
2//! calls LLM providers, executes tools, and communicates with the UI
3//! via channels. Decoupled from any TUI concerns.
4
5pub mod background;
6pub mod git_auto_commit;
7pub mod git_checkpoint;
8pub mod sub_agent;
9pub mod subtask_driver;
10
11mod diagnose;
12mod discipline;
13pub mod execute;
14mod prompt;
15mod services;
16mod tool_dispatch;
17mod verify;
18
19use std::path::PathBuf;
20use std::sync::atomic::{AtomicBool, AtomicU64, Ordering};
21use std::time::{Duration, Instant};
22
23use tokio::sync::mpsc;
24use tokio_util::sync::CancellationToken;
25
26use crate::config::Config;
27use crate::conversation::Conversation;
28use crate::provider::LlmProvider;
29use crate::skill::SkillRegistry;
30use crate::tool::use_skill::UseSkillTool;
31use crate::tool::{PermissionDecision, PermissionStore, ToolCall, ToolContext, ToolRegistry};
32use crate::turn::event::{TurnEvent, TurnResult};
33use crate::turn::runner::TurnRunner;
34
35/// Commands sent FROM the UI TO the agent loop.
36#[derive(Debug)]
37pub enum AgentCommand {
38    /// User sent a message (may include attached file content and/or images).
39    /// `image_markers[i]` is the `[Image #N]` number printed for `images[i]`
40    /// at paste time. Round-tripped through `AgentEvent::RestorePendingImages`
41    /// so that on VL preprocess failure the TUI can re-attach images with
42    /// their ORIGINAL markers — otherwise an UP-recalled `[Image #5]` text
43    /// wouldn't match a freshly-renumbered restored image. Empty when the
44    /// caller has no images (slash commands, queued text from streaming,
45    /// CLI single-shot).
46    SendMessage {
47        text: String,
48        images: Vec<crate::conversation::message::ImagePart>,
49        #[allow(dead_code)] // used in 2026-05-09 vision-preprocessor retry; agent reflects on Failed
50        image_markers: Vec<usize>,
51    },
52    /// Cancel current operation.
53    Cancel,
54    /// Approve a pending tool call.
55    ApproveTool,
56    /// Approve and always allow this tool for the session.
57    ApproveToolAlways,
58    /// Deny a pending tool call.
59    DenyTool,
60    /// Reload config from TUI (the single source of truth for in-memory config,
61    /// including ephemeral OAuth providers). Switches to the new default provider.
62    ReloadConfig(crate::config::Config),
63    /// Change working directory.
64    ChangeDir(String),
65    /// Append input during streaming — queued and injected before next LLM call.
66    AppendInput(String),
67    /// Clear conversation history.
68    ClearConversation,
69    /// Set messages from a resumed session.
70    SetMessages(Vec<crate::conversation::message::Message>),
71    /// Set plan mode (read-only exploration, no edits).
72    SetPlanMode(bool),
73    /// Manually compact conversation history. `prompt` is accepted for
74    /// forward-compat with an eventual LLM-backed summarize-with-instruction
75    /// path; currently unused — this is the mechanical path only.
76    Compact {
77        prompt: Option<String>,
78    },
79    Remember {
80        content: String,
81        global: bool,
82    },
83    Forget {
84        keyword: String,
85    },
86    ShowMemory,
87    /// Run a one-shot task in an isolated background context (read-only-ish
88    /// tool subset, independent conversation, capped turns + timeout).
89    /// Result is returned via `AgentEvent::BackgroundComplete`.
90    Background {
91        task: String,
92    },
93    /// Recompute and re-emit a rich ContextStats snapshot. `/context` sends
94    /// this before rendering so the user never sees a stale cache — the
95    /// cache is only refreshed on LLM round-trips, so between turns (or
96    /// after out-of-turn mutations like `inject_post_compress_state`) the
97    /// snapshot can lag the actual conversation state.
98    RefreshContextStats,
99    /// Rebuild the hook executor from disk after a `/plugin install|uninstall`
100    /// or other change to plugin state. Cheap (just re-reads JSON files);
101    /// does NOT touch provider/model state, unlike ReloadConfig.
102    ReloadHooks,
103    /// Request a snapshot of the current conversation messages.
104    /// The agent responds with `AgentEvent::MessagesSync` carrying
105    /// `conversation.messages`. Used by the TUI before `/bg` to ensure
106    /// the session has up-to-date message history even when a turn is
107    /// still in progress (e.g. waiting for tool approval).
108    SyncMessages,
109    /// Shutdown the agent.
110    Shutdown,
111}
112
113/// Reason the agent's turn loop stopped. Carried on TurnComplete so downstream
114/// consumers (CLI [done] line, eval harness) can distinguish natural completion
115/// from budget-enforced truncation.
116#[derive(Debug, Clone, Copy, PartialEq, Eq)]
117pub enum TurnStopReason {
118    /// Model responded with text only — no more tool calls, conversation done.
119    Natural,
120    /// Turn budget (AgentLoop.max_turns) was reached.
121    TurnLimit,
122    /// Step budget (check_step_limit tool-call cap) was reached.
123    StepLimit,
124    /// User cancelled the turn.
125    Cancelled,
126    /// API or internal error terminated the loop.
127    Error,
128}
129
130#[derive(Debug, Clone, Copy)]
131struct CompressionOutcome {
132    applied: bool,
133    before_tokens: usize,
134    after_tokens: usize,
135    removed_messages: usize,
136}
137
138impl TurnStopReason {
139    /// Short machine-parseable tag (snake_case) for logs / CLI output.
140    pub fn as_tag(&self) -> &'static str {
141        match self {
142            TurnStopReason::Natural => "natural",
143            TurnStopReason::TurnLimit => "turn_limit",
144            TurnStopReason::StepLimit => "step_limit",
145            TurnStopReason::Cancelled => "cancelled",
146            TurnStopReason::Error => "error",
147        }
148    }
149}
150
151/// One descriptor per sub-agent in a `SubAgentDispatchStart` batch.
152/// Mirrored 1:1 with the `tasks` vector built in `parallel_edit::execute`
153/// so callers can reuse the index across the lifecycle events.
154#[derive(Debug, Clone)]
155pub struct SubAgentTaskInfo {
156    /// Workspace-relative file path the sub-agent will edit. Renderer
157    /// shows this in full (not basename-only) so multi-component paths
158    /// like `src/server/tunnel.rs` vs `src/client/tunnel.rs` stay
159    /// visibly distinct.
160    pub path: String,
161    /// User-facing duplicate-instance qualifier. Empty when the path
162    /// is unique within this dispatch; `" (#2)"`, `" (#3)"` when the
163    /// dispatcher is forking >1 sub-agent against the same path.
164    pub dedup_suffix: String,
165}
166
167/// Events sent FROM the agent loop TO the UI.
168#[derive(Debug, Clone)]
169pub enum AgentEvent {
170    /// LLM text delta (streaming).
171    TextDelta(String),
172    /// LLM reasoning/thinking content (e.g., DeepSeek-R1, MiniMax-M2.7, o1-series).
173    /// Emitted when the model produces thinking content separately from the final response.
174    /// UI can optionally display this in verbose mode (Ctrl+O).
175    ReasoningDelta(String),
176    /// LLM has started emitting a tool call — only the name is known so far,
177    /// arguments are still streaming. UI uses this to display the tool name
178    /// immediately instead of waiting for the full args.
179    ToolCallStreaming { name: String, hint: String },
180    /// A tool call is about to execute (for display).
181    /// `id` pairs with `ToolCallResult.call_id` so the UI can match start→result
182    /// across parallel or interleaved calls without reconstructing ids from counters.
183    ToolCallStarted {
184        id: String,
185        name: String,
186        arguments: String,
187    },
188    /// Multiple tool calls fan out from one assistant message. Fires BEFORE
189    /// the per-call `ToolCallStarted` events, only when ≥ 2 non-duplicate
190    /// calls are about to dispatch. UI uses this to render a single
191    /// grouped block (`▸ Reading 4 files (parallel)` + child rows) rather
192    /// than N independent `▸` rows. Per-call events still fire for
193    /// backward compat — UI dedupes via `batch_id` membership.
194    ToolBatchStarted {
195        batch_id: String,
196        calls: Vec<crate::turn::event::ToolBatchCall>,
197    },
198    /// Closes the batch opened by `ToolBatchStarted`. UI finalizes the
199    /// group header with `· N/M ok · Xs wall` summary.
200    ToolBatchCompleted {
201        batch_id: String,
202        ok: usize,
203        total: usize,
204        elapsed_ms: u64,
205    },
206    /// Real-time output chunk from a running tool (e.g., bash command).
207    /// Sent during tool execution before ToolCallResult.
208    ToolOutputChunk { call_id: String, chunk: String },
209    /// A tool call completed with a result.
210    ToolCallResult {
211        call_id: String,
212        name: String,
213        output: String,
214        success: bool,
215        duration: Duration,
216    },
217    /// Waiting for user approval of a tool call.
218    ApprovalNeeded {
219        tool_name: String,
220        reason: String,
221        call: ToolCall,
222        /// Snapshot of `conversation.messages` at the time the approval
223        /// request was raised. Lets the TUI persist mid-turn session
224        /// state (e.g. when `/bg` backgrounds a session that is waiting
225        /// for approval).
226        messages: Vec<crate::conversation::message::Message>,
227    },
228    /// Token usage update.
229    TokenUsage(crate::stream::TokenUsage),
230    /// The agent's current phase changed.
231    PhaseChange(AgentPhase),
232    /// Turn completed successfully.
233    TurnComplete {
234        duration: Duration,
235        total_tokens: usize,
236        /// LLM round-trips (standard agent metric).
237        turn_count: usize,
238        /// Total individual tool calls.
239        tool_call_count: usize,
240        /// Why the loop stopped. `Natural` for ordinary completion; see
241        /// TurnStopReason for budget / cancel / error variants.
242        stop_reason: TurnStopReason,
243        /// Snapshot of the conversation messages at the moment the turn
244        /// ended. Mirrors `TurnCancelled.messages` so UIs have one uniform
245        /// path for persisting session state on either terminal event.
246        messages: Vec<crate::conversation::message::Message>,
247    },
248    /// Turn was cancelled by user before completion.
249    /// The conversation has been cleaned up - partial messages removed.
250    /// Contains the cleaned message list for TUI to sync.
251    TurnCancelled {
252        messages: Vec<crate::conversation::message::Message>,
253    },
254    /// Response to `AgentCommand::SyncMessages`. Carries a snapshot of
255    /// `conversation.messages` at the time the agent processed the command.
256    /// Used by the TUI to sync session state before backgrounding a session
257    /// that is mid-turn (e.g. waiting for tool approval).
258    MessagesSync {
259        messages: Vec<crate::conversation::message::Message>,
260    },
261    /// An error occurred. Carries a snapshot of `conversation.messages`
262    /// so the TUI can persist mid-turn state even when the turn dies
263    /// before TurnComplete/TurnCancelled fire — without this, a
264    /// first-turn LLM failure silently drops the user's typed message
265    /// from disk and `/resume` shows nothing for that conversation.
266    /// Producers that don't hold the conversation (the inline
267    /// streaming-error forwarder in `run_turn_loop`) send `messages:
268    /// Vec::new()`; the terminal error path captured at
269    /// `handle_send_message` provides the full snapshot.
270    Error {
271        error: String,
272        messages: Vec<crate::conversation::message::Message>,
273    },
274    /// Non-fatal advisory from a provider or other subsystem. UI renders
275    /// this as a one-line yellow banner; does not abort the turn.
276    /// Currently sourced from the OpenAI provider's truncation detector
277    /// when the proxy reports implausibly few prompt_tokens.
278    Warning(String),
279    /// VL preprocessing failed; the agent is returning the user's pending
280    /// images so the TUI can re-attach them to the input state. Lets the
281    /// user retry the same image without re-pasting from clipboard. Hashes
282    /// are TUI-side state, so the renderer recomputes them from the
283    /// returned base64 bytes (best-effort; clipboard-equality dedup may
284    /// fire on a fresh paste of the same image — minor UX, not breaking).
285    RestorePendingImages {
286        images: Vec<crate::conversation::message::ImagePart>,
287        /// Original `[Image #N]` numbers, parallel to `images`. Round-tripped
288        /// from `AgentCommand::SendMessage::image_markers` so the TUI can
289        /// re-attach with the SAME marker numbers — keeps UP-recalled
290        /// caption text matching after retry.
291        markers: Vec<usize>,
292    },
293    /// VL preprocessing succeeded — surface a one-line success notice
294    /// without dumping the (possibly long, sometimes uninformative) VL
295    /// description into the UI. The description still rides into
296    /// conversation history for the main model. `vl_key` is the provider
297    /// key from config; `char_count` is `text.chars().count()` so users
298    /// can spot zero/near-zero outputs that would mislead the main model.
299    VisionPreprocessSuccess {
300        vl_key: String,
301        char_count: usize,
302    },
303    /// Sub-agent batch began. `tasks` is the ordered list of children
304    /// the dispatcher is about to fork — same order as the resulting
305    /// `SubAgentTaskDone`/`SubAgentTaskFailed` events will arrive in,
306    /// so the UI can pre-allocate one display slot per child and
307    /// disambiguate same-basename tasks via the index.
308    SubAgentDispatchStart {
309        /// Per-task descriptors. `path` is the workspace-relative file
310        /// path (preserved as the model wrote it — no basename-only
311        /// truncation). `dedup_suffix` is the user-facing `(#2)`,
312        /// `(#3)` qualifier when the same path appears N times in one
313        /// dispatch; empty for unique entries.
314        tasks: Vec<SubAgentTaskInfo>,
315    },
316    /// Sub-agent batch ended (all tasks settled or pool returned). UI
317    /// clears the override so subsequent thinks/tools resume normal
318    /// label behaviour.
319    SubAgentDispatchEnd,
320    /// One sub-agent has been claimed from the pool and is now running.
321    /// `index` indexes into the `tasks` vector emitted with the
322    /// matching DispatchStart so the UI can locate its slot.
323    SubAgentTaskStarted { index: usize },
324    /// Sub-agent finished successfully. `summary` is a one-sentence
325    /// human-readable result, already truncated to a reasonable length
326    /// by the agent loop.
327    SubAgentTaskDone {
328        index: usize,
329        elapsed_ms: u64,
330        turns: usize,
331        summary: String,
332    },
333    /// Sub-agent failed (error, timeout, no-edit). `reason` is one
334    /// short phrase, not a stack trace.
335    SubAgentTaskFailed {
336        index: usize,
337        elapsed_ms: u64,
338        turns: usize,
339        reason: String,
340    },
341    /// `/background` task finished. `summary` is the final assistant text
342    /// (truncated if long). `success` is false on error / timeout / cancel.
343    BackgroundComplete {
344        summary: String,
345        files_edited: Vec<String>,
346        turns: usize,
347        success: bool,
348    },
349    /// Working directory changed.
350    WorkingDirChanged(PathBuf),
351    /// Context budget stats — piped into datalog and cached by the TUI
352    /// for `/context`. Emitted after every turn's `ctx.build_messages`
353    /// call, so stats reflect the snapshot the model actually saw.
354    ///
355    /// The rich breakdown (tool defs / cold zone / ctx window / ctx name)
356    /// only appears on the second emission path in
357    /// `handle_send_message` — the first path (TurnEvent forwarding) uses
358    /// the narrow stats from the ctx::render output. TUI merges both.
359    ContextStats {
360        system_tokens: usize,
361        sent_tokens: usize,
362        dropped_tokens: usize,
363        working_set_tokens: usize,
364        total_messages: usize,
365        /// Total bytes of tool definitions / 4. 0 when not yet computed.
366        tool_defs_tokens: usize,
367        /// Tokens used by cold-zone compressed summaries.
368        cold_zone_tokens: usize,
369        /// Effective token budget from the active ctx strategy
370        /// (`ctx.ctx_window()`), including any defensive clamping.
371        ctx_window: usize,
372        /// Ctx strategy name — `default` / `ollama` / future impls.
373        ctx_name: String,
374        /// Full assembled system prompt for the turn — lets the TUI's
375        /// `/context prompt` show the exact bytes sent. Empty on the
376        /// narrow TurnEvent-forwarded path; only the rich emission in
377        /// `handle_send_message` fills this.
378        system_prompt: String,
379    },
380}
381
382/// The current phase of the agent (for UI display).
383#[derive(Debug, Clone, PartialEq)]
384pub enum AgentPhase {
385    Idle,
386    Thinking,            // LLM generating text
387    CallingTool(String), // Executing a tool (with name)
388    WaitingApproval,     // Waiting for user to approve
389}
390
391/// Discipline tracking state — counters for loop detection, stagnation,
392/// error streaks, and tool usage patterns. Extracted from AgentLoop to
393/// keep the God Object manageable.
394#[derive(Default)]
395pub(crate) struct DisciplineState {
396    pub consecutive_reads: usize,
397    pub stagnant_turns: usize,
398    pub last_known_files: usize,
399    pub targeted_read_count: usize,
400    pub last_targeted_reads: usize,
401    pub verify_injected: bool,
402    pub model_produced_text: bool,
403    pub silent_tool_rounds: usize,
404    pub is_negative_feedback: bool,
405    pub build_fail_count: usize,
406    pub scouting_count: usize,
407    pub api_confirmed_working: bool,
408    pub consecutive_edits_file: Option<String>,
409    pub consecutive_edits_count: usize,
410    pub sleep_count: usize,
411    pub consecutive_verify_count: usize,
412    pub recent_errors: Vec<String>,
413    pub executed_cmds: std::collections::HashMap<String, usize>,
414    pub category_fail_streak: std::collections::HashMap<String, usize>,
415    pub last_bash_cmd: String,
416    pub last_diagnosed_error: String,
417}
418
419/// The agent loop state.
420pub struct AgentLoop {
421    // Core components
422    pub conversation: Conversation,
423    pub tool_registry: std::sync::Arc<ToolRegistry>,
424    /// TurnRunner owns the provider, tools, and context.
425    pub turn_runner: TurnRunner,
426    pub permission_store: std::sync::Arc<std::sync::RwLock<PermissionStore>>,
427    pub config: Config,
428    /// Context construction strategy for the active provider. Selected
429    /// at construction via `ctx::for_provider` and rebuilt on
430    /// `AgentCommand::ReloadConfig` when the provider changes.
431    ///
432    /// `Arc` (not `Box`) — shared with `turn_runner.ctx` so datalog's
433    /// `build_messages` call and runner's actual send go through the
434    /// same instance. Rebuilds on `ReloadConfig` update both clones
435    /// (see the reload handler below).
436    pub ctx: std::sync::Arc<dyn crate::ctx::CtxBuilder>,
437
438    /// Session-start environment snapshot — git branch / HEAD / status.
439    /// Captured once in `new()`, refreshed on `ChangeDir` (new working
440    /// tree ⇒ new repo). Stale-by-design: rendered with a disclaimer
441    /// in `build_system_prompt` so the model knows it's not live.
442    /// See `crate::ctx::env`.
443    pub env_snapshot: crate::ctx::EnvSnapshot,
444
445    // Execution state
446    pub phase: AgentPhase,
447    pub turn_tokens: usize,
448    pub total_tokens: usize,
449    pub turn_start: Option<Instant>,
450
451    // Per-turn counters
452    tool_call_count: usize,
453    /// LLM round-trip count (standard "turn" metric).
454    /// Each iteration of run_turn_loop = 1 turn, regardless of how many
455    /// tools were called in that iteration.
456    turn_count: usize,
457    /// Optional hard cap on turn_count. When Some(n), run_turn_loop exits
458    /// via finish_turn(TurnStopReason::TurnLimit) before starting turn n+1.
459    /// None = unbounded (historical behavior — loop stops naturally when the
460    /// LLM returns no tool calls, or when the step budget is hit).
461    max_turns: Option<usize>,
462    retry_count: usize,
463    /// Tool-call IDs already forwarded to the renderer in the current
464    /// user turn. Cleared at the start of each new user message (in
465    /// `process_user_input` per-turn reset block).
466    ///
467    /// Dedupes the case where 429 / stream-ended retries cause the
468    /// runner to re-emit `TurnEvent::ToolCallStarted` with the same
469    /// provider-assigned tool_call_id. Without this, every retry adds
470    /// a duplicate `▸ Bash(...)` row in scrollback — at extreme rate-
471    /// limit scenarios users see the same command 30+ times.
472    emitted_tool_ids: std::collections::HashSet<String>,
473
474    // Approval channel endpoints for InteractivePermissionDecider
475    /// Receives approval requests from InteractivePermissionDecider
476    approval_req_rx: mpsc::UnboundedReceiver<crate::turn::permission::ApprovalRequest>,
477    /// Sends approval decisions back to InteractivePermissionDecider
478    approval_resp_tx: mpsc::UnboundedSender<PermissionDecision>,
479    /// Last approval request (for ApproveToolAlways — need to know which tool)
480    last_approval_request: Option<crate::turn::permission::ApprovalRequest>,
481
482    // Cancellation token for the current turn
483    cancel_token: CancellationToken,
484
485    /// Cancellation token for the background code-graph indexer.
486    /// Fresh-cancelled-then-rebuilt on every `/cd` so a prior indexer
487    /// (still parsing files) yields CPU instead of racing the new one.
488    indexer_cancel: CancellationToken,
489
490    /// Guard against concurrent `/background` tasks. Set on dispatch,
491    /// cleared by the spawned task when it completes. Acquire/Release
492    /// ordering so the cleared write is visible to the next dispatcher
493    /// check on a different thread.
494    background_running: std::sync::Arc<AtomicBool>,
495
496    /// Discipline tracking — all counters for loop detection, stagnation,
497    /// error streaks, and tool usage patterns. Extracted from AgentLoop to
498    /// reduce God Object complexity (was 22 fields inline).
499    pub(crate) discipline_state: DisciplineState,
500
501    /// Files read this turn (for tracking read-but-not-edit waste)
502    files_read_this_turn: Vec<String>,
503    /// Files edited/written this turn
504    files_edited_this_turn: Vec<String>,
505    /// The user's original task message for this turn (re-injected as reminders).
506    current_task: String,
507    /// Name of the tool currently being executed (for smart truncation).
508    current_tool_name: String,
509
510    /// Last git checkpoint ref (SHA) for /undo rollback.
511    pub last_checkpoint: Option<String>,
512
513    /// Most recently edited file (absolute path). Injected as full content in system prompt
514    /// so the model doesn't need to re-read it next turn. Capped at ~6K tokens.
515    active_file: Option<PathBuf>,
516
517    /// Pending user input appended during streaming. Injected before next LLM call.
518    pending_input: Option<String>,
519    /// Session-level file tracker: all files read/edited across the entire session.
520    /// Used to build the "working set" — tree-sitter skeletons injected before each LLM call.
521    /// This replaces the old recent_file_cache with a smarter, budget-aware approach.
522    session_files: std::collections::HashMap<String, PathBuf>,
523    /// Whether planning phase is active (first LLM call without tools to force a plan).
524    planning_phase: bool,
525    /// Remaining read-only turns for diagnosis tasks. When > 0, only read-only tools are available.
526    /// Decremented each turn. Forces the model to read code before curl/edit.
527    diagnosis_read_only_turns: usize,
528    /// Plan mode: restrict to read-only tools and inject planning instructions.
529    /// Toggled via `/plan` command or `SetPlanMode` agent command.
530    pub plan_mode: bool,
531    /// Current task type — drives dynamic prompt selection and planning.
532    /// ATLAS-style subtask driver: decomposes plan into per-file subtasks.
533    subtask_driver: subtask_driver::SubtaskDriver,
534    /// Original plan text from model's first response — used for plan adherence reminders.
535    plan_text: Option<String>,
536
537    /// Completion detection: model indicated task is done.
538    /// Set when text contains completion marker AND recent tool results all succeeded.
539    /// Next turn: if model only does read/grep → stop (unnecessary verification).
540    /// If model does edit/write/bash → cancel grace, continue (more substantive work).
541    #[allow(dead_code)]
542    completion_grace: bool,
543
544    /// Track whether all tool results in the last turn were successful.
545    /// Used by completion detection: only trigger grace when tools succeeded.
546    #[allow(dead_code)]
547    last_turn_tools_all_success: bool,
548
549    // Skill registry — provides descriptions for system prompt and powers use_skill tool
550    skill_registry: std::sync::Arc<std::sync::RwLock<SkillRegistry>>,
551
552    /// Hook executor for lifecycle events.
553    hook_executor: std::sync::Arc<crate::hook::executor::HookExecutor>,
554
555    // Code graph background indexer channel
556    reindex_tx: Option<mpsc::UnboundedSender<PathBuf>>,
557
558    // Datalog writer — writes per-turn markdown logs to datalog/ directory.
559    datalog: crate::turn::datalog::DatalogWriter,
560
561    // Channels
562    cmd_rx: mpsc::UnboundedReceiver<AgentCommand>,
563    event_tx: mpsc::UnboundedSender<AgentEvent>,
564}
565
566/// Cloneable sender side for UI/runtime code to communicate with the agent.
567#[derive(Clone)]
568pub struct AgentClient {
569    pub cmd_tx: mpsc::UnboundedSender<AgentCommand>,
570    /// Shared tool registry for dynamic MCP tool registration.
571    pub tool_registry: std::sync::Arc<ToolRegistry>,
572    /// Loaded skills, shared with the agent loop. The TUI uses this
573    /// to populate the slash-command palette with `user_invocable()`
574    /// entries, and to expand the template when a user picks one.
575    /// Same `Arc` the agent loop holds — reload(...) calls there are
576    /// visible here without extra plumbing.
577    pub skill_registry: std::sync::Arc<std::sync::RwLock<SkillRegistry>>,
578}
579
580/// Handle for the UI to communicate with the agent.
581pub struct AgentHandle {
582    pub client: AgentClient,
583    pub event_rx: mpsc::UnboundedReceiver<AgentEvent>,
584}
585
586#[derive(Clone)]
587pub struct AgentRuntimeFactory {
588    pub config: Config,
589    pub working_dir: PathBuf,
590    pub telemetry: std::sync::Arc<atomcode_telemetry::Telemetry>,
591    pub lsp: Option<std::sync::Arc<crate::lsp::manager::LspManager>>,
592    pub shared_tools: std::sync::Arc<ToolRegistry>,
593    pub skill_registry: std::sync::Arc<std::sync::RwLock<SkillRegistry>>,
594    pub max_turns: Option<usize>,
595    runtime_counter: std::sync::Arc<AtomicU64>,
596}
597
598impl AgentRuntimeFactory {
599    pub fn set_config(&mut self, config: Config) {
600        self.config = config;
601    }
602
603    pub fn set_working_dir(&mut self, working_dir: PathBuf) {
604        self.working_dir = working_dir;
605    }
606
607    fn next_runtime_label(&self) -> String {
608        let id = self.runtime_counter.fetch_add(1, Ordering::Relaxed) + 1;
609        format!("runtime-{id}")
610    }
611
612    pub fn build_provider(&self) -> Box<dyn LlmProvider> {
613        let Some(provider_config) = self.config.providers.get(&self.config.default_provider) else {
614            return crate::provider::unavailable_provider(
615                "未配置 provider。请使用 /provider 添加 provider 后再试。",
616            );
617        };
618
619        match crate::provider::create_provider(provider_config) {
620            Ok(provider) => provider,
621            Err(e) => crate::provider::unavailable_provider(format!("provider 初始化失败: {e:#}")),
622        }
623    }
624
625    pub fn spawn_runtime(
626        &self,
627        conversation: Conversation,
628    ) -> (
629        AgentClient,
630        tokio::sync::mpsc::UnboundedReceiver<AgentEvent>,
631    ) {
632        let provider = self.build_provider();
633        let mut tool_context = ToolContext::with_telemetry(
634            self.working_dir.clone(),
635            "default",
636            self.telemetry.clone(),
637        );
638        let runtime_label = self.next_runtime_label();
639        tool_context.file_history = std::sync::Arc::new(tokio::sync::Mutex::new(
640            crate::tool::file_history::FileHistory::new(&runtime_label),
641        ));
642        tool_context.lsp = self.lsp.clone();
643
644        let (mut loop_, handle) = AgentLoop::new_with_shared_parts(
645            self.config.clone(),
646            provider,
647            self.shared_tools.clone(),
648            self.skill_registry.clone(),
649            Some(runtime_label),
650            tool_context,
651            conversation,
652        );
653        loop_.set_max_turns(self.max_turns);
654
655        let ctx = atomcode_telemetry::CurrentContext::current();
656        tokio::spawn(async move {
657            atomcode_telemetry::CurrentContext::scope(ctx, || loop_.run()).await
658        });
659
660        (handle.client, handle.event_rx)
661    }
662
663    pub fn from_initial_loop(agent_loop: &AgentLoop, max_turns: Option<usize>) -> Self {
664        let working_dir = agent_loop
665            .turn_runner
666            .context
667            .working_dir
668            .try_read()
669            .map(|g| g.clone())
670            .unwrap_or_else(|_| PathBuf::from("."));
671
672        Self {
673            config: agent_loop.config.clone(),
674            working_dir,
675            telemetry: agent_loop.turn_runner.context.telemetry.clone(),
676            lsp: agent_loop.turn_runner.context.lsp.clone(),
677            shared_tools: agent_loop.tool_registry.clone(),
678            skill_registry: agent_loop.skill_registry.clone(),
679            max_turns,
680            runtime_counter: std::sync::Arc::new(AtomicU64::new(1)),
681        }
682    }
683
684    pub fn new_for_test(
685        config: Config,
686        working_dir: PathBuf,
687        shared_tools: std::sync::Arc<ToolRegistry>,
688        skill_registry: std::sync::Arc<std::sync::RwLock<SkillRegistry>>,
689    ) -> Self {
690        let telemetry = ToolContext::new(working_dir.clone()).telemetry;
691        Self {
692            config,
693            working_dir,
694            telemetry,
695            lsp: None,
696            shared_tools,
697            skill_registry,
698            max_turns: None,
699            runtime_counter: std::sync::Arc::new(AtomicU64::new(1)),
700        }
701    }
702}
703
704impl AgentLoop {
705    /// Create a new agent loop and its corresponding UI handle.
706    pub fn new(
707        config: Config,
708        provider: Box<dyn LlmProvider>,
709        mut tool_registry: ToolRegistry,
710        tool_context: ToolContext,
711        conversation: Conversation,
712    ) -> (Self, AgentHandle) {
713        // Load skills from disk and register the use_skill tool.
714        let working_dir = tool_context
715            .working_dir
716            .try_read()
717            .map(|g| g.clone())
718            .unwrap_or_else(|_| std::path::PathBuf::from("."));
719
720        let mut registry = SkillRegistry::new();
721        let _ = registry.reload(&working_dir);
722        let skill_registry = std::sync::Arc::new(std::sync::RwLock::new(registry));
723        let disabled_internal: std::collections::HashSet<String> =
724            std::env::var("ATOMCODE_DISABLE_TOOLS")
725                .ok()
726                .map(|v| {
727                    v.split(',')
728                        .map(|s| s.trim().to_string())
729                        .filter(|s| !s.is_empty())
730                        .collect()
731                })
732                .unwrap_or_default();
733        let internal_enabled = |name: &str| !disabled_internal.contains(name);
734
735        // Always register use_skill — the tool itself gracefully reports
736        // "no skills available" when the registry is empty. Gating on
737        // has_skills breaks Windows where skills are installed via plugins
738        // that may not be present at compile time. The model only wastes
739        // a turn calling use_skill with an empty registry, not 5+ turns
740        // re-describing the task that a skill would have covered.
741        if internal_enabled("use_skill") {
742            tool_registry.register_sync(Box::new(UseSkillTool {
743                registry: skill_registry.clone(),
744            }));
745        }
746
747        // Graph query tools: not exposed to model (adds 5 tool definitions that
748        // weak models never use correctly). Graph data is still injected automatically
749        // via grep's graph header and auto_inject_graph_context — the model benefits
750        // from graph without needing to call these tools directly.
751        // To re-enable: set ATOMCODE_GRAPH_TOOLS=1
752        if std::env::var("ATOMCODE_GRAPH_TOOLS")
753            .map(|v| v == "1")
754            .unwrap_or(false)
755        {
756            if internal_enabled("trace_callers") {
757                tool_registry.register_sync(Box::new(crate::tool::trace_callers::TraceCallersTool));
758            }
759            if internal_enabled("trace_callees") {
760                tool_registry.register_sync(Box::new(crate::tool::trace_callees::TraceCalleesTool));
761            }
762            if internal_enabled("trace_chain") {
763                tool_registry.register_sync(Box::new(crate::tool::trace_chain::TraceChainTool));
764            }
765            if internal_enabled("file_dependencies") {
766                tool_registry.register_sync(Box::new(crate::tool::file_deps::FileDependenciesTool));
767            }
768            if internal_enabled("blast_radius") {
769                tool_registry.register_sync(Box::new(crate::tool::blast_radius::BlastRadiusTool));
770            }
771        }
772
773        let shared_tools = std::sync::Arc::new(tool_registry);
774        Self::new_from_shared_bootstrap(
775            config,
776            provider,
777            shared_tools,
778            skill_registry,
779            None,
780            tool_context,
781            conversation,
782        )
783    }
784
785    /// Create a new runtime using the already-shared tool and skill registries.
786    /// This path intentionally does not reload skills or register `use_skill` /
787    /// graph tools; the initial runtime owns that one-time setup.
788    pub fn new_with_shared_parts(
789        config: Config,
790        provider: Box<dyn LlmProvider>,
791        shared_tools: std::sync::Arc<ToolRegistry>,
792        skill_registry: std::sync::Arc<std::sync::RwLock<SkillRegistry>>,
793        runtime_label: Option<String>,
794        tool_context: ToolContext,
795        conversation: Conversation,
796    ) -> (Self, AgentHandle) {
797        Self::new_from_shared_bootstrap(
798            config,
799            provider,
800            shared_tools,
801            skill_registry,
802            runtime_label,
803            tool_context,
804            conversation,
805        )
806    }
807
808    #[allow(clippy::too_many_arguments)]
809    fn new_from_shared_bootstrap(
810        config: Config,
811        provider: Box<dyn LlmProvider>,
812        shared_tools: std::sync::Arc<ToolRegistry>,
813        skill_registry: std::sync::Arc<std::sync::RwLock<SkillRegistry>>,
814        runtime_label: Option<String>,
815        mut tool_context: ToolContext,
816        conversation: Conversation,
817    ) -> (Self, AgentHandle) {
818        let (cmd_tx, cmd_rx) = mpsc::unbounded_channel();
819        let (event_tx, event_rx) = mpsc::unbounded_channel();
820
821        let working_dir = tool_context
822            .working_dir
823            .try_read()
824            .map(|g| g.clone())
825            .unwrap_or_else(|_| std::path::PathBuf::from("."));
826
827        // Load persisted code graph from disk and share with ToolContext.
828        let graph_path = working_dir.join(".atomcode").join("graph.bin");
829        let code_graph = crate::graph::persist::load(&graph_path);
830        let graph = std::sync::Arc::new(tokio::sync::RwLock::new(code_graph));
831        tool_context.graph = graph;
832
833        // Build approval channels for interactive permission flow
834        let (approval_req_tx, approval_req_rx) = mpsc::unbounded_channel();
835        let (approval_resp_tx, approval_resp_rx) = mpsc::unbounded_channel();
836
837        let permission_store = std::sync::Arc::new(std::sync::RwLock::new(PermissionStore::new()));
838
839        let interactive_permission =
840            Box::new(crate::turn::permission::InteractivePermissionDecider::new(
841                approval_req_tx,
842                approval_resp_rx,
843                permission_store.clone(),
844            ));
845
846        // Hand the registry handle to ToolContext so active-dispatch tools
847        // (parallel_edit_files) can read it at execute time without
848        // creating a Tool ↔ Registry Arc cycle.
849        tool_context.tool_registry = Some(shared_tools.clone());
850        // Convert Box → Arc so provider can be shared with sub-agents.
851        let provider: std::sync::Arc<dyn LlmProvider> = std::sync::Arc::from(provider);
852
853        // Build the datalog writer before `config` is moved into the agent below.
854        let datalog = match runtime_label.as_deref() {
855            Some(label) => crate::turn::datalog::DatalogWriter::new_with_filename_tag(
856                &working_dir,
857                &config.datalog,
858                label,
859            ),
860            None => crate::turn::datalog::DatalogWriter::new(&working_dir, &config.datalog),
861        };
862
863        // Select the context-construction strategy once for this session.
864        // Rebuilds on ReloadConfig when the provider changes.
865        let ctx: std::sync::Arc<dyn crate::ctx::CtxBuilder> =
866            match config.providers.get(&config.default_provider) {
867                Some(pc) => crate::ctx::for_provider(pc),
868                // Fallback for first-run / broken-config path: synthesize a
869                // minimal provider so `for_provider` still gets its hands on
870                // a context_window. Matches Config::default_context_window()
871                // behavior (128_000) so sessions without a provider don't
872                // panic before the user runs /login or /model.
873                None => crate::ctx::for_provider(&crate::config::provider::ProviderConfig {
874                    provider_type: String::new(),
875                    api_key: None,
876                    model: String::new(),
877                    base_url: None,
878                    system_prompt: None,
879                    user_agent: None,
880                    context_window: 128_000,
881                    max_tokens: None,
882                    thinking_type: None,
883                    thinking_keep: None,
884                    reasoning_history: None,
885                    thinking_enabled: None,
886                    thinking_budget: None,
887                    skip_tls_verify: false,
888                    ephemeral: true,
889
890}),
891            };
892
893        let hooks = crate::hook::json_config::load_hooks_config(&working_dir);
894        let hook_executor = std::sync::Arc::new(crate::hook::executor::HookExecutor::new(hooks));
895
896        let turn_runner = TurnRunner {
897            provider,
898            tools: shared_tools.clone(),
899            context: tool_context.clone(),
900            config: config.clone(),
901            ctx: ctx.clone(),
902            permission: interactive_permission,
903            recently_edited_files: Vec::new(),
904            hook_executor: hook_executor.clone(),
905            loop_guard: Default::default(),
906        };
907
908        // Capture session-start env snapshot (git status, branch, HEAD).
909        // Blocking I/O here is fine: `new()` runs once at startup, the
910        // capture is ~tens of ms for typical repos, and it's required
911        // before the first turn's system prompt is assembled.
912        let env_snapshot = crate::ctx::EnvSnapshot::capture(&working_dir);
913
914        let agent = Self {
915            conversation,
916            tool_registry: shared_tools.clone(),
917            turn_runner,
918            permission_store,
919            config,
920            ctx,
921            env_snapshot,
922            phase: AgentPhase::Idle,
923            turn_tokens: 0,
924            total_tokens: 0,
925            turn_start: None,
926            tool_call_count: 0,
927            turn_count: 0,
928            max_turns: None,
929            retry_count: 0,
930            emitted_tool_ids: std::collections::HashSet::new(),
931            approval_req_rx,
932            approval_resp_tx,
933            last_approval_request: None,
934            cancel_token: CancellationToken::new(),
935            indexer_cancel: CancellationToken::new(),
936            background_running: std::sync::Arc::new(AtomicBool::new(false)),
937            discipline_state: DisciplineState::default(),
938            files_read_this_turn: Vec::new(),
939            files_edited_this_turn: Vec::new(),
940            current_task: String::new(),
941            current_tool_name: String::new(),
942            last_checkpoint: None,
943            active_file: None,
944            pending_input: None,
945            planning_phase: false,
946            diagnosis_read_only_turns: 0,
947            plan_mode: false,
948            completion_grace: false,
949            last_turn_tools_all_success: false,
950            subtask_driver: subtask_driver::SubtaskDriver::new(),
951            plan_text: None,
952            session_files: std::collections::HashMap::new(),
953            skill_registry,
954            hook_executor,
955            reindex_tx: None,
956            datalog,
957            cmd_rx,
958            event_tx,
959        };
960
961        let client = AgentClient {
962            cmd_tx,
963            tool_registry: shared_tools.clone(),
964            skill_registry: agent.skill_registry.clone(),
965        };
966        let handle = AgentHandle { client, event_rx };
967
968        (agent, handle)
969    }
970
971    /// Set an optional hard cap on the number of LLM turns this agent will
972    /// run. When the cap is reached, run_turn_loop exits via
973    /// finish_turn(TurnStopReason::TurnLimit). `None` (the default) is
974    /// unbounded. Used by the CLI `--max-turns` flag.
975    pub fn set_max_turns(&mut self, max: Option<usize>) {
976        self.max_turns = max;
977    }
978
979    /// Run the agent loop. This is the main entry point — call from a tokio task.
980    /// The loop processes commands from the UI and emits events back.
981    pub async fn run(mut self) {
982        // Active-dispatch tool registration. The model invokes
983        // `parallel_edit_files` explicitly when it judges parallel edit
984        // is the right move; the framework no longer infers from text.
985        // Gated on `subagent.enabled` so users can disable fork
986        // dispatch via `/config subagent.enabled false` without code
987        // changes — the tool simply isn't advertised to the model.
988        // Registered here (not in `new()`) because `register_arc` is
989        // async and `new()` is sync.
990        if self.config.subagent.enabled {
991            let tool = crate::tool::parallel_edit::ParallelEditTool {
992                provider: self.turn_runner.provider.clone(),
993                config: self.config.clone(),
994                event_tx: self.event_tx.clone(),
995            };
996            self.tool_registry
997                .register_arc("parallel_edit_files".to_string(), std::sync::Arc::new(tool))
998                .await;
999        }
1000
1001        // Spawn background code graph indexer
1002        {
1003            let working_dir = self.turn_runner.context.working_dir.read().await.clone();
1004            let graph = self.turn_runner.context.graph.clone();
1005            let (reindex_tx, mut reindex_rx) = mpsc::unbounded_channel::<PathBuf>();
1006            let wd_for_indexer = working_dir.clone();
1007            let cancel = self.indexer_cancel.clone();
1008            tokio::spawn(async move {
1009                let mut indexer =
1010                    crate::graph::indexer::GraphIndexer::new(graph.clone(), wd_for_indexer.clone());
1011                indexer.index_all(cancel).await;
1012                // Persist after initial indexing
1013                let gp = wd_for_indexer.join(".atomcode").join("graph.bin");
1014                if let Ok(g) = graph.try_read() {
1015                    let _ = crate::graph::persist::save(&g, &gp);
1016                }
1017                // Listen for reindex requests
1018                while let Some(path) = reindex_rx.recv().await {
1019                    indexer.reindex_file(&path).await;
1020                }
1021            });
1022            self.reindex_tx = Some(reindex_tx);
1023        }
1024
1025        // --- SessionStart Hook ---
1026        if self.hook_executor.has_hooks() {
1027            let wd = self
1028                .turn_runner
1029                .context
1030                .working_dir
1031                .try_read()
1032                .map(|g| g.display().to_string())
1033                .unwrap_or_default();
1034            let ctx = crate::hook::HookContext {
1035                event: "session_start".into(),
1036                tool_name: None,
1037                tool_args: None,
1038                tool_result: None,
1039                tool_success: None,
1040                session_id: String::new(),
1041                working_dir: wd,
1042            };
1043            self.hook_executor
1044                .run_session_event(crate::hook::HookEvent::SessionStart, &ctx)
1045                .await;
1046        }
1047
1048        while let Some(cmd) = self.cmd_rx.recv().await {
1049            match cmd {
1050                AgentCommand::SendMessage { text, images, image_markers } => {
1051                    self.handle_send_message(text, images, image_markers).await;
1052                }
1053                AgentCommand::Cancel => {
1054                    self.cancel_token.cancel();
1055                    self.cancel_token = CancellationToken::new();
1056                    self.phase = AgentPhase::Idle;
1057                    // Cancel the current turn — preserve completed content, backfill
1058                    // (cancelled) for unpaired tool calls, and mark turn as Completed.
1059                    self.conversation.cancel_current_turn();
1060                    // Sync the preserved messages to TUI
1061                    let messages = self.conversation.messages.clone();
1062                    let _ = self.event_tx.send(AgentEvent::TurnCancelled { messages });
1063                }
1064                AgentCommand::ApproveTool => {
1065                    // Approval handled inside run_turn_loop via channels
1066                }
1067                AgentCommand::ApproveToolAlways => {
1068                    // Approval handled inside run_turn_loop via channels
1069                }
1070                AgentCommand::DenyTool => {
1071                    // Denial handled inside run_turn_loop via channels
1072                }
1073                AgentCommand::ReloadConfig(new_config) => {
1074                    let old_provider_name = self.config.default_provider.clone();
1075                    let old_type = self
1076                        .config
1077                        .providers
1078                        .get(&old_provider_name)
1079                        .map(|p| p.provider_type.clone());
1080                    self.config = new_config;
1081                    // Rebuild hook executor from JSON config files.
1082                    let wd = self
1083                        .turn_runner
1084                        .context
1085                        .working_dir
1086                        .try_read()
1087                        .map(|g| g.clone())
1088                        .unwrap_or_else(|_| std::path::PathBuf::from("."));
1089                    let hooks = crate::hook::json_config::load_hooks_config(&wd);
1090                    self.hook_executor =
1091                        std::sync::Arc::new(crate::hook::executor::HookExecutor::new(hooks));
1092                    self.turn_runner.hook_executor = self.hook_executor.clone();
1093                    let new_provider_name = self.config.default_provider.clone();
1094                    let new_type = self
1095                        .config
1096                        .providers
1097                        .get(&new_provider_name)
1098                        .map(|p| p.provider_type.clone());
1099
1100                    let should_clear = reload_should_clear_conversation(
1101                        &old_provider_name,
1102                        old_type.as_deref(),
1103                        &new_provider_name,
1104                        new_type.as_deref(),
1105                    );
1106                    if should_clear {
1107                        self.conversation.messages.clear();
1108                        self.conversation.turn_tracker =
1109                            crate::conversation::turn::TurnTracker::new();
1110                        self.session_files.clear();
1111                    }
1112
1113                    if let Some(provider_config) = self.config.providers.get(&new_provider_name) {
1114                        // Rebuild the context strategy for the new provider.
1115                        // Selected once per provider; per-model customizations
1116                        // (e.g. Ollama schema trimming, Claude cache markers)
1117                        // take effect from the next turn. Assign the same
1118                        // `Arc` to both `self.ctx` and `self.turn_runner.ctx`
1119                        // so datalog and the send path stay locked together.
1120                        let new_ctx = crate::ctx::for_provider(provider_config);
1121                        self.ctx = new_ctx.clone();
1122                        self.turn_runner.ctx = new_ctx;
1123                        match crate::provider::create_provider(provider_config) {
1124                            Ok(new_provider) => {
1125                                self.turn_runner.provider = std::sync::Arc::from(new_provider);
1126                                self.turn_runner.config = self.config.clone();
1127                            }
1128                            Err(e) => {
1129                                let msg = format!("{:#}", e);
1130                                let is_auth_gap = msg.contains("Not logged in")
1131                                    || msg.contains("Invalid auth.toml")
1132                                    || msg.contains("Token expired")
1133                                    || msg.contains("Token refresh failed");
1134                                if is_auth_gap {
1135                                    self.turn_runner.provider = std::sync::Arc::from(
1136                                        crate::provider::unavailable_provider(format!(
1137                                            "Provider 凭证不可用:{}。请使用 /login 或 /codingplan 完成配置后再试。",
1138                                            msg
1139                                        )),
1140                                    );
1141                                    self.turn_runner.config = self.config.clone();
1142                                } else {
1143                                    let _ = self.event_tx.send(AgentEvent::TextDelta(format!(
1144                                        "**Warning: failed to reload provider: {}**\n\n",
1145                                        e
1146                                    )));
1147                                }
1148                            }
1149                        }
1150                    } else {
1151                        self.turn_runner.provider =
1152                            std::sync::Arc::from(crate::provider::unavailable_provider(
1153                                "No active provider configured. Use /provider to add one.",
1154                            ));
1155                        self.turn_runner.config = self.config.clone();
1156                    }
1157                }
1158                AgentCommand::ChangeDir(path) => {
1159                    self.change_dir(&path).await;
1160                }
1161                AgentCommand::AppendInput(text) => {
1162                    // Queue user input to be injected before the next LLM call.
1163                    if let Some(ref mut existing) = self.pending_input {
1164                        existing.push('\n');
1165                        existing.push_str(&text);
1166                    } else {
1167                        self.pending_input = Some(text);
1168                    }
1169                }
1170                AgentCommand::ClearConversation => {
1171                    // Clear the conversation history in the agent loop.
1172                    self.conversation = Conversation::new();
1173                    self.datalog.clear();
1174                }
1175                AgentCommand::SetMessages(messages) => {
1176                    // Set messages from a resumed session.
1177                    // Rebuild turn_tracker so the context builder can use
1178                    // proper turn-based windowing instead of the fallback path.
1179                    let turn_tracker =
1180                        crate::conversation::turn::TurnTracker::rebuild(&messages);
1181                    self.conversation.messages = messages;
1182                    self.conversation.turn_tracker = turn_tracker;
1183                }
1184                AgentCommand::SetPlanMode(enabled) => {
1185                    self.plan_mode = enabled;
1186                }
1187                AgentCommand::Compact { prompt } => {
1188                    self.run_compact(prompt).await;
1189                }
1190                AgentCommand::Remember { content, global } => {
1191                    use crate::config::memory::MemoryStore;
1192                    let store = if global {
1193                        MemoryStore::global()
1194                    } else {
1195                        let wd = self
1196                            .turn_runner
1197                            .context
1198                            .working_dir
1199                            .try_read()
1200                            .map(|g| g.clone())
1201                            .unwrap_or_default();
1202                        MemoryStore::project(&wd)
1203                    };
1204                    match store.append(&content) {
1205                        Ok(_) => {
1206                            let scope = if global { "global" } else { "project" };
1207                            let _ = self.event_tx.send(AgentEvent::TextDelta(format!(
1208                                "(remembered in {} memory: {})\n",
1209                                scope, content
1210                            )));
1211                        }
1212                        Err(e) => {
1213                            let _ = self.event_tx.send(AgentEvent::TextDelta(format!(
1214                                "(failed to save memory: {})\n",
1215                                e
1216                            )));
1217                        }
1218                    }
1219                }
1220                AgentCommand::Forget { keyword } => {
1221                    use crate::config::memory::MemoryStore;
1222                    let wd = self
1223                        .turn_runner
1224                        .context
1225                        .working_dir
1226                        .try_read()
1227                        .map(|g| g.clone())
1228                        .unwrap_or_default();
1229                    let global = MemoryStore::global();
1230                    let project = MemoryStore::project(&wd);
1231                    let g_matches = global.find_matching(&keyword);
1232                    let p_matches = project.find_matching(&keyword);
1233                    if g_matches.is_empty() && p_matches.is_empty() {
1234                        let _ = self.event_tx.send(AgentEvent::TextDelta(format!(
1235                            "(no memory entries matching '{}')\n",
1236                            keyword
1237                        )));
1238                    } else {
1239                        let mut msg = String::new();
1240                        for entry in &g_matches {
1241                            msg.push_str(&format!("  [global] - {}\n", entry));
1242                        }
1243                        for entry in &p_matches {
1244                            msg.push_str(&format!("  [project] - {}\n", entry));
1245                        }
1246                        let g_result = global.remove_matching(&keyword);
1247                        let p_result = project.remove_matching(&keyword);
1248                        if g_result.is_err() || p_result.is_err() {
1249                            msg.push_str(
1250                                "(warning: some entries could not be removed from disk)\n",
1251                            );
1252                        }
1253                        let total = g_matches.len() + p_matches.len();
1254                        msg.push_str(&format!(
1255                            "(removed {} matching entr{})\n",
1256                            total,
1257                            if total == 1 { "y" } else { "ies" }
1258                        ));
1259                        let _ = self.event_tx.send(AgentEvent::TextDelta(msg));
1260                    }
1261                }
1262                AgentCommand::ShowMemory => {
1263                    use crate::config::memory::MemoryStore;
1264                    let wd = self
1265                        .turn_runner
1266                        .context
1267                        .working_dir
1268                        .try_read()
1269                        .map(|g| g.clone())
1270                        .unwrap_or_default();
1271                    let global = MemoryStore::global();
1272                    let project = MemoryStore::project(&wd);
1273                    let g_entries = global.load();
1274                    let p_entries = project.load();
1275                    if g_entries.is_empty() && p_entries.is_empty() {
1276                        let _ = self.event_tx.send(AgentEvent::TextDelta(
1277                            "(no memories saved yet — use /remember <fact> to add one)\n"
1278                                .to_string(),
1279                        ));
1280                    } else {
1281                        let mut msg = String::new();
1282                        if !g_entries.is_empty() {
1283                            msg.push_str(&format!("  [Global] ({})\n", global.path().display()));
1284                            for e in &g_entries {
1285                                msg.push_str(&format!("    - {}\n", e));
1286                            }
1287                        }
1288                        if !p_entries.is_empty() {
1289                            msg.push_str(&format!("  [Project] ({})\n", project.path().display()));
1290                            for e in &p_entries {
1291                                msg.push_str(&format!("    - {}\n", e));
1292                            }
1293                        }
1294                        let _ = self.event_tx.send(AgentEvent::TextDelta(msg));
1295                    }
1296                }
1297                AgentCommand::Background { task } => {
1298                    // AcqRel: pair with the spawned task's Release store on
1299                    // completion so the next dispatcher sees the cleared flag.
1300                    if self.background_running.swap(true, Ordering::AcqRel) {
1301                        let _ = self.event_tx.send(AgentEvent::Error {
1302                            error: "A background task is already running. Wait for it to finish."
1303                                .to_string(),
1304                            messages: self.conversation.messages.clone(),
1305                        });
1306                    } else {
1307                        let provider = self.turn_runner.provider.clone();
1308                        let tools = self.turn_runner.tools.clone();
1309                        let context = self.turn_runner.context.clone();
1310                        let context_for_commit = context.clone();
1311                        let config = self.config.clone();
1312                        let ctx = self.ctx.clone();
1313                        let event_tx = self.event_tx.clone();
1314                        let flag = self.background_running.clone();
1315                        tokio::spawn(async move {
1316                            let result = background::run_background_task(
1317                                &task,
1318                                provider,
1319                                tools,
1320                                context,
1321                                config,
1322                                ctx,
1323                                event_tx.clone(),
1324                            )
1325                            .await;
1326                            if let AgentEvent::BackgroundComplete {
1327                                files_edited,
1328                                success: true,
1329                                ..
1330                            } = &result
1331                            {
1332                                if !files_edited.is_empty() {
1333                                    let wd = context_for_commit
1334                                        .working_dir
1335                                        .try_read()
1336                                        .map(|g| g.clone())
1337                                        .unwrap_or_default();
1338                                    match git_auto_commit::auto_commit_edited_files(
1339                                        &wd,
1340                                        files_edited,
1341                                    ) {
1342                                        git_auto_commit::AutoCommitOutcome::Committed {
1343                                            sha,
1344                                            message,
1345                                        } => {
1346                                            let _ = event_tx.send(AgentEvent::TextDelta(format!(
1347                                                "\n[auto-commit {sha}] {message}\n"
1348                                            )));
1349                                        }
1350                                        git_auto_commit::AutoCommitOutcome::Failed { reason } => {
1351                                            let _ = event_tx.send(AgentEvent::TextDelta(format!(
1352                                                "\n[auto-commit skipped] {reason}\n"
1353                                            )));
1354                                        }
1355                                        git_auto_commit::AutoCommitOutcome::Skipped { .. } => {}
1356                                    }
1357                                }
1358                            }
1359                            let _ = event_tx.send(result);
1360                            flag.store(false, Ordering::Release);
1361                        });
1362                    }
1363                }
1364                AgentCommand::RefreshContextStats => {
1365                    let system_prompt = self.build_system_prompt();
1366                    let (msgs, _) = self
1367                        .ctx
1368                        .build_messages(&self.conversation, &system_prompt, "");
1369                    self.emit_rich_context_stats(&self.conversation, &msgs)
1370                        .await;
1371                }
1372                AgentCommand::ReloadHooks => {
1373                    // Triggered by /plugin install|uninstall in the TUI so
1374                    // newly-contributed hooks (especially UserPromptSubmit)
1375                    // fire on the very next user message instead of waiting
1376                    // for /cd or restart.
1377                    let wd = self
1378                        .turn_runner
1379                        .context
1380                        .working_dir
1381                        .try_read()
1382                        .map(|g| g.clone())
1383                        .unwrap_or_else(|_| std::path::PathBuf::from("."));
1384                    let hooks = crate::hook::json_config::load_hooks_config(&wd);
1385                    self.hook_executor =
1386                        std::sync::Arc::new(crate::hook::executor::HookExecutor::new(hooks));
1387                    self.turn_runner.hook_executor = self.hook_executor.clone();
1388                }
1389                AgentCommand::SyncMessages => {
1390                    let messages = self.conversation.messages.clone();
1391                    let _ = self.event_tx.send(AgentEvent::MessagesSync { messages });
1392                }
1393                AgentCommand::Shutdown => {
1394                    // --- SessionEnd Hook ---
1395                    if self.hook_executor.has_hooks() {
1396                        let wd = self
1397                            .turn_runner
1398                            .context
1399                            .working_dir
1400                            .try_read()
1401                            .map(|g| g.display().to_string())
1402                            .unwrap_or_default();
1403                        let ctx = crate::hook::HookContext {
1404                            event: "session_end".into(),
1405                            tool_name: None,
1406                            tool_args: None,
1407                            tool_result: None,
1408                            tool_success: None,
1409                            session_id: String::new(),
1410                            working_dir: wd,
1411                        };
1412                        self.hook_executor
1413                            .run_session_event(crate::hook::HookEvent::SessionEnd, &ctx)
1414                            .await;
1415                    }
1416                    break;
1417                }
1418            }
1419        }
1420    }
1421
1422    // -------------------------------------------------------------------------
1423    // Core agent logic
1424    // -------------------------------------------------------------------------
1425
1426    async fn handle_send_message(
1427        &mut self,
1428        mut content: String,
1429        images: Vec<crate::conversation::message::ImagePart>,
1430        image_markers: Vec<usize>,
1431    ) {
1432        self.current_task = content.clone();
1433
1434        if let Some(reason) = self.turn_runner.provider.availability_error() {
1435            let _ = self.event_tx.send(AgentEvent::Error {
1436                error: reason.to_string(),
1437                messages: self.conversation.messages.clone(),
1438            });
1439            self.finish_turn(TurnStopReason::Error);
1440            return;
1441        }
1442
1443        // ── UserPromptSubmit hooks ──
1444        // Run before any preprocessing so plugin hooks see the raw user
1445        // input. A hook can either block the turn (CC `decision: "block"`
1446        // or non-zero exit) or inject extra context that we splice into
1447        // the user message before the LLM sees it.
1448        if self.hook_executor.has_hooks() {
1449            let cwd = self
1450                .turn_runner
1451                .context
1452                .working_dir
1453                .try_read()
1454                .map(|g| g.display().to_string())
1455                .unwrap_or_default();
1456            match self
1457                .hook_executor
1458                .run_user_prompt_submit(&content, "", &cwd)
1459                .await
1460            {
1461                crate::hook::UserPromptHookResult::Continue => {}
1462                crate::hook::UserPromptHookResult::Inject(extra) => {
1463                    // Append rather than prepend so the user's wording stays
1464                    // at the top of the message — the hook context reads as
1465                    // supplementary, not as a rewrite.
1466                    content.push_str("\n\n");
1467                    content.push_str(&extra);
1468                }
1469                crate::hook::UserPromptHookResult::Block(reason) => {
1470                    let _ = self.event_tx.send(AgentEvent::Error {
1471                        error: format!("hook blocked: {}", reason),
1472                        messages: self.conversation.messages.clone(),
1473                    });
1474                    self.finish_turn(TurnStopReason::Error);
1475                    return;
1476                }
1477            }
1478        }
1479
1480        // Detect negative feedback — user is unhappy with previous turn's work.
1481        let lower = content.to_lowercase();
1482        let negative_keywords = [
1483            "改错",
1484            "不对",
1485            "错了",
1486            "还是不行",
1487            "没用",
1488            "不是这样",
1489            "搞错",
1490            "又错",
1491            "白做",
1492            "越改越差",
1493            "恢复",
1494            "回滚",
1495            "撤销",
1496            "不行",
1497            "wrong",
1498            "not right",
1499            "still broken",
1500            "doesn't work",
1501            "undo",
1502            "revert",
1503            "go back",
1504            "that's worse",
1505            "stop",
1506            "broken",
1507        ];
1508        self.discipline_state.is_negative_feedback =
1509            content.chars().count() < 80 && negative_keywords.iter().any(|kw| lower.contains(kw));
1510
1511        // Git checkpoint: snapshot working tree before agent starts editing.
1512        let wd = self
1513            .turn_runner
1514            .context
1515            .working_dir
1516            .try_read()
1517            .map(|g| g.clone())
1518            .unwrap_or_default();
1519        self.last_checkpoint = git_checkpoint::create_checkpoint(&wd);
1520
1521        // Reset ctx_budget_hint to full window at start of each user message.
1522        // Without this, the first tool call in a new turn reads the stale budget
1523        // from the previous turn's last LLM call (when ctx was full), causing
1524        // 670-line files to skeleton when there's plenty of room.
1525        //
1526        // Read from `self.ctx` not `self.config` — ctx applies defensive
1527        // clamps (e.g. OllamaCtx floors at 4K) that config's raw
1528        // `context_window` doesn't reflect. Using config would tell
1529        // read_file "you have 128K" when actual budget is 4K.
1530        self.turn_runner
1531            .context
1532            .ctx_budget_hint
1533            .store(self.ctx.ctx_window(), std::sync::atomic::Ordering::Relaxed);
1534
1535        // Auto-diagnose: if user mentions error keywords, scan logs and attach findings.
1536        // This gives the model the real error from Turn 1, instead of spending 3-5 turns grepping.
1537        let enriched = self.auto_diagnose_errors(&content).await;
1538        // Extract and store exception signature for recurrence detection across turns.
1539        if let Some(pos) = enriched.find("<!-- diag_exception:") {
1540            let rest = &enriched[pos + 20..];
1541            if let Some(end) = rest.find(" -->") {
1542                self.discipline_state.last_diagnosed_error = rest[..end].to_string();
1543            }
1544        }
1545        // Strip the hidden marker before adding to conversation
1546        let clean = if let Some(pos) = enriched.find("\n<!-- diag_exception:") {
1547            enriched[..pos].to_string()
1548        } else {
1549            enriched
1550        };
1551
1552        // ── Task boundary cleanup ──
1553        // New user message = new task. If there's old context from the
1554        // previous task (>12 messages), compress it unconditionally.
1555        // This prevents dirty-start degradation where 20K+ of stale
1556        // conversation dilutes the batch prompt for the new task.
1557        // Unlike maybe_compress_history (which checks the 50% threshold),
1558        // this fires at every task boundary regardless of token count.
1559        if self.conversation.messages.len() > 12 {
1560            // Task-boundary compression goes through the active ctx strategy.
1561            // No LLM call — the compressed content is already
1562            // one-line-per-round summaries (DefaultCtx) compact enough
1563            // for cold zone.
1564            if let Some((content, n_msgs)) = self.ctx.compression_plan(&self.conversation) {
1565                let system_prompt = self.build_system_prompt();
1566                let _ = self.try_apply_compression(&system_prompt, n_msgs, content, false);
1567            }
1568        }
1569
1570        // Vision preprocessing: when the active provider can't accept images
1571        // and the user pasted some, run them through the configured VL model
1572        // first and turn the result into plain text. See
1573        // `vision_preprocessor` module doc for the data-flow contract.
1574        let mut vision_warning: Option<String> = None;
1575        let (clean, images) = if !images.is_empty() {
1576            use crate::vision_preprocessor::{maybe_preprocess, PreprocessOutcome};
1577            match maybe_preprocess(&self.config, &*self.turn_runner.provider, &clean, &images).await {
1578                PreprocessOutcome::Skipped => (clean, images),
1579                PreprocessOutcome::Replaced { text, vl_key } => {
1580                    // Surface a one-line success notice (provider key in
1581                    // muted gray, char count for sanity-check). The full
1582                    // description is intentionally NOT shown in the UI —
1583                    // it would either be redundant with what the main
1584                    // model proceeds to discuss or, on bad VL output,
1585                    // mislead the user that "success" means useful
1586                    // content. Description still rides into conversation
1587                    // history below.
1588                    let _ = self.event_tx.send(AgentEvent::VisionPreprocessSuccess {
1589                        vl_key: vl_key.clone(),
1590                        char_count: text.chars().count(),
1591                    });
1592                    let merged = if clean.is_empty() {
1593                        format!("[图片内容(由 {vl_key} 识别)]\n{text}")
1594                    } else {
1595                        format!("{clean}\n\n[图片内容(由 {vl_key} 识别)]\n{text}")
1596                    };
1597                    (merged, Vec::new())
1598                }
1599                PreprocessOutcome::Failed { reason } => {
1600                    vision_warning = Some(format!(
1601                        "VL 预处理失败:{reason} · 图片已自动保留,可直接重试",
1602                    ));
1603                    // Layer-1 retry support: hand the image bytes back to
1604                    // TUIX so the user doesn't have to re-paste from
1605                    // clipboard. Without this the bytes are gone after
1606                    // submit and Ctrl+V is the only way to re-attach.
1607                    let _ = self.event_tx.send(AgentEvent::RestorePendingImages {
1608                        images: images.clone(),
1609                        markers: image_markers.clone(),
1610                    });
1611                    let merged = if clean.is_empty() {
1612                        "[图片识别失败]".to_string()
1613                    } else {
1614                        format!("{clean}\n\n[图片识别失败]")
1615                    };
1616                    (merged, Vec::new())
1617                }
1618            }
1619        } else {
1620            (clean, images)
1621        };
1622        if let Some(w) = vision_warning {
1623            let _ = self.event_tx.send(AgentEvent::Warning(w));
1624        }
1625
1626        if images.is_empty() {
1627            self.conversation.add_user_message(&clean);
1628        } else {
1629            use crate::conversation::message::{Message, MessageContent, Role};
1630            let msg = Message {
1631                role: Role::User,
1632                content: MessageContent::MultiPart {
1633                    text: if clean.is_empty() { None } else { Some(clean.clone()) },
1634                    images,
1635                },
1636            };
1637            let idx = self.conversation.messages.len();
1638            self.conversation.messages.push(msg);
1639            self.conversation.turn_tracker.on_user_message(idx);
1640        }
1641        self.turn_tokens = 0;
1642        self.tool_call_count = 0;
1643        self.turn_count = 0;
1644        self.retry_count = 0;
1645        self.emitted_tool_ids.clear();
1646        self.files_read_this_turn.clear();
1647        self.files_edited_this_turn.clear();
1648        self.turn_runner.recently_edited_files.clear();
1649        // Cross-batch loop guard is scoped to a single user-message
1650        // turn — every new user message = fresh slate. See
1651        // `turn::loop_guard` for why this clear() is the entire
1652        // per-turn-only contract on the caller side.
1653        self.turn_runner.loop_guard.clear();
1654        self.discipline_state.consecutive_reads = 0;
1655        self.discipline_state.verify_injected = false;
1656        self.discipline_state.model_produced_text = false;
1657        self.discipline_state.silent_tool_rounds = 0;
1658        // Note: is_negative_feedback is set above, do not reset here.
1659        self.discipline_state.build_fail_count = 0;
1660        self.discipline_state.scouting_count = 0;
1661        self.discipline_state.api_confirmed_working = false;
1662        self.discipline_state.consecutive_edits_file = None;
1663        self.discipline_state.consecutive_edits_count = 0;
1664        self.discipline_state.sleep_count = 0;
1665        self.discipline_state.consecutive_verify_count = 0;
1666        self.discipline_state.recent_errors.clear();
1667        self.discipline_state.executed_cmds.clear();
1668        self.discipline_state.category_fail_streak.clear();
1669        // Reset stagnation tracking — new user message = fresh turn,
1670        // previous stagnation state must not carry over.
1671        self.discipline_state.stagnant_turns = 0;
1672        self.discipline_state.last_known_files = 0;
1673        self.discipline_state.last_targeted_reads = 0;
1674        self.discipline_state.targeted_read_count = 0;
1675        // Reset subtask driver and plan — previous turn's plan must not
1676        // bleed into the new turn. Without this, a text-only Q&A response
1677        // that mentions file names (e.g. as examples) triggers extract_from_plan,
1678        // and the plan completion guard then forces the loop to continue
1679        // editing files that were never part of the user's actual request.
1680        self.subtask_driver = subtask_driver::SubtaskDriver::new();
1681        self.plan_text = None;
1682        // Clear session_files on each new user message.
1683        // Working Set only tracks files from the CURRENT task.
1684        self.session_files.clear();
1685        self.turn_start = Some(Instant::now());
1686        self.cancel_token = CancellationToken::new();
1687
1688        // Initialize datalog for this turn
1689        {
1690            let model_name = self.turn_runner.provider.model_name().to_string();
1691            // Use ctx's effective window so datalog matches what build_messages
1692            // actually renders with (OllamaCtx 4K floor, etc).
1693            self.datalog
1694                .begin_turn(&content, &model_name, self.ctx.ctx_window());
1695        }
1696
1697        // State-based decisions (replaces keyword-based task_classifier).
1698        // Two facts, not guesses:
1699
1700        // 1. Has the model read any files this session? If not → read-only first turn.
1701        let has_file_context =
1702            !self.files_read_this_turn.is_empty() || !self.files_edited_this_turn.is_empty();
1703        self.diagnosis_read_only_turns = if has_file_context { 0 } else { 1 };
1704        self.planning_phase = !has_file_context;
1705
1706        // Unified prepend — no task classification, no auto-build injection.
1707        // Build command detection deferred to Phase 5 (LLM-inferred project config).
1708        let _content = format!(
1709            "Read the relevant code first, then plan and implement.\n\n{}",
1710            content
1711        );
1712
1713        self.phase = AgentPhase::Thinking;
1714        let _ = self
1715            .event_tx
1716            .send(AgentEvent::PhaseChange(AgentPhase::Thinking));
1717
1718        self.run_turn_loop().await;
1719    }
1720
1721    // needs_planning replaced by task_classifier::TaskType::needs_planning()
1722
1723    // auto_diagnose_errors → diagnose.rs
1724    // find_file_in_project → diagnose.rs
1725
1726    /// Multi-turn execution loop using TurnRunner.
1727    /// Each iteration calls TurnRunner.run() for one LLM turn, then applies
1728    /// discipline (reminders, step limits) and decides whether to continue.
1729    async fn run_turn_loop(&mut self) {
1730        loop {
1731            // Turn budget check BEFORE incrementing, so the reported
1732            // turn_count equals the number of turns actually executed
1733            // (not including the "would-be" next turn we refuse to run).
1734            // The stop reason is propagated via TurnComplete.stop_reason;
1735            // the CLI [done] line surfaces it as `stopped=turn_limit`.
1736            if self.check_turn_limit() {
1737                self.finish_turn(TurnStopReason::TurnLimit);
1738                return;
1739            }
1740            self.turn_count += 1;
1741
1742            // Decrement diagnosis read-only counter each turn.
1743            if self.diagnosis_read_only_turns > 0 {
1744                self.diagnosis_read_only_turns -= 1;
1745            }
1746
1747            // Inject any pending user input appended during streaming.
1748            if let Some(input) = self.pending_input.take() {
1749                self.conversation
1750                    .add_user_message(&format!("[Additional context from user]: {}", input));
1751            }
1752
1753            // Planning phase: inject planning reminder on turn 3.
1754            // Turn 1-2: model reads files to understand the task.
1755            // Planning phase injection: REMOVED.
1756            // Was injecting "[PLAN NOW]" at turn 3, but this is arbitrary timing.
1757            // The system prompt WORKFLOW section already guides planning.
1758
1759            // NOTE: Negative feedback injection disabled — adds a System message that
1760            // confuses weak models and wastes context. The model sees the user's complaint
1761            // directly; no extra injection needed.
1762
1763            // DIAGNOSTIC STRATEGY injection removed — the model decides its own
1764            // debugging approach. System prompt PLAN FIRST section is sufficient.
1765
1766            // Stagnation detection: REMOVED.
1767            // Was injecting "[STAGNATION WARNING]" after 3 turns without edits.
1768            // Bug: triggered after model output a completion summary (pure text,
1769            // no edits), preventing it from stopping. The warning was interpreted
1770            // as "keep working" by the model. Stagnation detection was harmful —
1771            // the prompt guides the model to work efficiently.
1772
1773            let system_prompt = self.build_system_prompt();
1774            // Per-turn reminder removed: verbatim task now rides on the cadence
1775            // reflection checkpoint — see agent::discipline::reflection_prompt.
1776            let turn_reminder = String::new();
1777            let cancel = self.cancel_token.clone();
1778
1779            // Context compression: when > 70% budget, pause and compress
1780            // old turns via LLM call. Keeps last 5 turns full, compressed
1781            // history goes to cold zone (FIFO, max 3 entries).
1782            self.maybe_compress_history(&system_prompt).await;
1783
1784            // Batch reminder: REMOVED.
1785            // Was injecting fake user messages ("[Batch reminder: call MULTIPLE tools...]")
1786            // every turn after turn 3 when last turn was single-tool. In a 24-turn session,
1787            // this injected 19 fake user messages that disrupted model's diagnostic focus.
1788            // The system prompt already contains batch guidance — injecting mid-conversation
1789            // user messages is counterproductive.
1790
1791            // Move conversation out to avoid borrow conflicts with self in select!
1792            let mut conv = std::mem::take(&mut self.conversation);
1793
1794            // Datalog: mark the start of a new LLM round-trip
1795            self.datalog.log_llm_call();
1796
1797            // Rich ContextStats for `/context` + inline datalog dump.
1798            // The file-level request log (`log_llm_request`) now lives
1799            // inside `TurnRunner::run_with_filter`, paired with
1800            // `log_llm_response`, so any caller — AgentLoop or daemon —
1801            // gets symmetric request/response files. This block only
1802            // feeds UI state + datalog md inline debug.
1803            {
1804                let context_window = self.ctx.ctx_window();
1805                // Same `Arc` instance as `self.turn_runner.ctx`, so
1806                // `build_messages` here and in the runner produce
1807                // byte-identical output (same system prompt, same
1808                // per-model directives, same reminder placement).
1809                let (msgs, _) = self
1810                    .ctx
1811                    .build_messages(&conv, &system_prompt, &turn_reminder);
1812                let tool_defs = self.turn_runner.tools.get_definitions().await;
1813                // Dump request to datalog for inline debugging
1814                self.datalog.log_llm_dump(
1815                    &msgs,
1816                    tool_defs.len(),
1817                    self.turn_runner.provider.model_name(),
1818                    context_window,
1819                );
1820
1821                self.emit_rich_context_stats(&conv, &msgs).await;
1822            }
1823
1824            // Run the turn in a scoped block so all borrows of self.turn_runner
1825            // end before we use self.conversation again.
1826            let (result, mut turn_rx, context_collapsed) = {
1827                let (turn_tx, mut turn_rx) = mpsc::unbounded_channel::<TurnEvent>();
1828
1829                // Destructure self to get split borrows — the borrow checker needs to see
1830                // that turn_runner and the other fields are disjoint borrows.
1831                let mut context_collapsed = false;
1832                let context_collapsed = &mut context_collapsed;
1833                let runner = &mut self.turn_runner;
1834                let cmd_rx = &mut self.cmd_rx;
1835                let approval_req_rx = &mut self.approval_req_rx;
1836                let event_tx = &self.event_tx;
1837                let approval_resp_tx = &self.approval_resp_tx;
1838                let permission_store = &self.permission_store;
1839                let cancel_token = &mut self.cancel_token;
1840                let last_approval_request = &mut self.last_approval_request;
1841                let pending_input = &mut self.pending_input;
1842                let phase = &mut self.phase;
1843                let model_produced_text = &mut self.discipline_state.model_produced_text;
1844                let current_tool_name = &mut self.current_tool_name;
1845                let datalog = &mut self.datalog;
1846                let files_edited_this_turn = &mut self.files_edited_this_turn;
1847                let active_file = &mut self.active_file;
1848                let files_read_this_turn = &mut self.files_read_this_turn;
1849                let consecutive_reads = &mut self.discipline_state.consecutive_reads;
1850                let targeted_read_count = &mut self.discipline_state.targeted_read_count;
1851                let last_bash_cmd = &mut self.discipline_state.last_bash_cmd;
1852                let session_files = &mut self.session_files;
1853                let reindex_tx = &self.reindex_tx;
1854                let emitted_tool_ids = &mut self.emitted_tool_ids;
1855
1856                // Tool filtering: diagnosis phase uses read-only tools.
1857                // All other turns have full tool access (including edit_file).
1858                // EXECUTE thinking is applied INSIDE edit_file (fresh file read,
1859                // ±5 lines context return, fuzzy match, delta validation) —
1860                // not by blocking tools at the agent loop level.
1861                let read_only_tools: &[&str] = &[
1862                    "read_file",
1863                    "grep",
1864                    "glob",
1865                    "list_directory",
1866                    "web_search",
1867                    "web_fetch",
1868                    "trace_callees",
1869                    "trace_callers",
1870                    "trace_chain",
1871                    "file_dependencies",
1872                    "blast_radius",
1873                ];
1874                let use_read_only = self.plan_mode || self.diagnosis_read_only_turns > 0;
1875                let tool_filter: Option<&[&str]> = if use_read_only {
1876                    Some(read_only_tools)
1877                } else {
1878                    None // Full tool access — model can read, edit, bash, search_replace
1879                };
1880                let turn_fut = runner.run_with_filter(
1881                    &mut conv,
1882                    &system_prompt,
1883                    &turn_reminder,
1884                    &turn_tx,
1885                    cancel,
1886                    tool_filter,
1887                );
1888                tokio::pin!(turn_fut);
1889
1890                // Accumulate text deltas for datalog (flushed on tool call or turn end)
1891                let mut datalog_text_accum = String::new();
1892
1893                let result = loop {
1894                    tokio::select! {
1895                        biased;
1896
1897                        result = &mut turn_fut => break result,
1898
1899                        Some(event) = turn_rx.recv() => {
1900                            // Inline forward_turn_event to avoid borrowing self
1901                            match event {
1902                                TurnEvent::TextDelta(text) => {
1903                                    *model_produced_text = true;
1904                                    datalog_text_accum.push_str(&text);
1905                                    let _ = event_tx.send(AgentEvent::TextDelta(text));
1906                                }
1907                                TurnEvent::ReasoningDelta(text) => {
1908                                    let _ = event_tx.send(AgentEvent::ReasoningDelta(text));
1909                                }
1910                                TurnEvent::ToolBatchStarted { ref batch_id, ref calls } => {
1911                                    let _ = event_tx.send(AgentEvent::ToolBatchStarted {
1912                                        batch_id: batch_id.clone(),
1913                                        calls: calls.clone(),
1914                                    });
1915                                }
1916                                TurnEvent::ToolBatchCompleted { ref batch_id, ok, total, elapsed_ms } => {
1917                                    let _ = event_tx.send(AgentEvent::ToolBatchCompleted {
1918                                        batch_id: batch_id.clone(),
1919                                        ok,
1920                                        total,
1921                                        elapsed_ms,
1922                                    });
1923                                }
1924                                TurnEvent::ToolCallStarted { ref id, ref name, ref arguments } => {
1925                                    // Dedupe across retries: the same provider-assigned tool_call_id
1926                                    // arrives again whenever a 429 / stream-ended attempt is retried.
1927                                    // Without this guard, every retry paints another `▸ Bash(...)` row.
1928                                    // Skip ALL downstream side effects (datalog, phase, file tracking,
1929                                    // event emission) for the duplicate — the first emission has
1930                                    // already accounted for them.
1931                                    if !emitted_tool_ids.insert(id.clone()) {
1932                                        continue;
1933                                    }
1934                                    // Forward tool name immediately for UI spinner
1935                                    let _ = event_tx.send(AgentEvent::ToolCallStreaming { name: name.clone(), hint: String::new() });
1936                                    // Flush accumulated model text to datalog before logging tool call accumulated model text to datalog before logging tool call
1937                                    if !datalog_text_accum.is_empty() {
1938                                        datalog.log_model_text(&datalog_text_accum);
1939                                        datalog_text_accum.clear();
1940                                    }
1941                                    datalog.log_tool_call(name, arguments);
1942
1943                                    *current_tool_name = name.clone();
1944                                    *phase = AgentPhase::CallingTool(name.clone());
1945                                    let _ = event_tx.send(AgentEvent::PhaseChange(phase.clone()));
1946
1947                                    if name == "bash" {
1948                                        if let Ok(args) = serde_json::from_str::<serde_json::Value>(arguments) {
1949                                            *last_bash_cmd = args
1950                                                .get("command")
1951                                                .and_then(|v| v.as_str())
1952                                                .unwrap_or("")
1953                                                .to_string();
1954                                        }
1955                                    }
1956
1957                                    // Track files for Working Set + read counts
1958                                    if matches!(name.as_str(), "read_file" | "edit_file" | "create_file" | "search_replace" | "glob" | "grep") {
1959                                        if let Ok(args) = serde_json::from_str::<serde_json::Value>(arguments) {
1960                                            // Try file_path first, then path (glob/grep use path)
1961                                            let fp = args.get("file_path").and_then(|v| v.as_str())
1962                                                .or_else(|| args.get("path").and_then(|v| v.as_str()));
1963                                            if let Some(fp) = fp {
1964                                                let short = std::path::Path::new(fp)
1965                                                    .file_name()
1966                                                    .map(|n| n.to_string_lossy().to_string())
1967                                                    .unwrap_or_else(|| fp.to_string());
1968                                                session_files.insert(short.clone(), std::path::PathBuf::from(fp));
1969                                                if name == "read_file" {
1970                                                    if !files_read_this_turn.contains(&short) {
1971                                                        files_read_this_turn.push(short);
1972                                                    }
1973                                                    // Targeted reads (offset/limit) are always progress
1974                                                    let has_offset = args.get("offset").is_some() || args.get("limit").is_some();
1975                                                    if has_offset {
1976                                                        *targeted_read_count += 1;
1977                                                    }
1978                                                }
1979                                            }
1980                                        }
1981                                    }
1982
1983                                    let _ = event_tx.send(AgentEvent::ToolCallStarted { id: id.clone(), name: name.clone(), arguments: arguments.clone() });
1984                                }
1985                                TurnEvent::ToolOutputChunk { call_id, chunk } => {
1986                                    // Forward real-time tool output to UI
1987                                    let _ = event_tx.send(AgentEvent::ToolOutputChunk { call_id, chunk });
1988                                }
1989                                TurnEvent::ToolCallResult { call_id, name, output, success, duration } => {
1990                                    // Track files for discipline
1991                                    if let Some(pos) = output.find("Edited ") {
1992                                        let rest = &output[pos + 7..];
1993                                        let fp_end = rest.find(|c: char| c == ' ' || c == '\n' || c == '(').unwrap_or(rest.len());
1994                                        let fp = rest[..fp_end].trim();
1995                                        if !fp.is_empty() {
1996                                            *active_file = Some(PathBuf::from(fp));
1997                                        }
1998                                        if !fp.is_empty() {
1999                                            let file = fp.to_string();
2000                                            if !files_edited_this_turn.contains(&file) {
2001                                                files_edited_this_turn.push(file);
2002                                            }
2003                                        }
2004                                    }
2005                                    if let Some(pos) = output.find("Wrote ").or_else(|| output.find("Overwrote ")).or_else(|| output.find("Created new file ")) {
2006                                        let keyword_len = if output[pos..].starts_with("Overwrote ") { 10 }
2007                                            else if output[pos..].starts_with("Created new file ") { 17 }
2008                                            else { 6 };
2009                                        let rest = &output[pos + keyword_len..];
2010                                        let fp_end = rest.find(|c: char| c == ' ' || c == '\n' || c == '(').unwrap_or(rest.len());
2011                                        let fp = rest[..fp_end].trim();
2012                                        if !fp.is_empty() {
2013                                            *active_file = Some(PathBuf::from(fp));
2014                                        }
2015                                        if !fp.is_empty() {
2016                                            let file = fp.to_string();
2017                                            if !files_edited_this_turn.contains(&file) {
2018                                                files_edited_this_turn.push(file);
2019                                            }
2020                                        }
2021                                    }
2022                                    if success {
2023                                        track_tool_modified_files(
2024                                            &name,
2025                                            last_bash_cmd,
2026                                            &output,
2027                                            files_edited_this_turn,
2028                                        );
2029                                    }
2030                                    if matches!(name.as_str(), "read_file" | "list_directory" | "glob" | "grep") {
2031                                        *consecutive_reads += 1;
2032                                    } else if matches!(name.as_str(), "edit_file" | "create_file") {
2033                                        *consecutive_reads = 0;
2034                                    }
2035                                    // Notify background indexer to reindex edited/created files
2036                                    if matches!(name.as_str(), "edit_file" | "create_file") && success {
2037                                        if let Some(ref tx) = reindex_tx {
2038                                            let path_str = output.lines().next().unwrap_or("")
2039                                                .trim_start_matches("Edited ")
2040                                                .trim_start_matches("Created new file ")
2041                                                .trim_start_matches("Created ")
2042                                                .trim_start_matches("Wrote ")
2043                                                .trim_start_matches("Overwrote ")
2044                                                .split_whitespace().next().unwrap_or("");
2045                                            if !path_str.is_empty() {
2046                                                let _ = tx.send(PathBuf::from(path_str));
2047                                            }
2048                                        }
2049                                    }
2050                                    datalog.log_tool_result(&output, success);
2051                                    let _ = event_tx.send(AgentEvent::ToolCallResult {
2052                                        call_id, name, output, success, duration,
2053                                    });
2054                                }
2055                                TurnEvent::TokenUsage { prompt_tokens, completion_tokens, total_tokens: _, cached_tokens } => {
2056                                    datalog.log_token_usage(prompt_tokens, completion_tokens, cached_tokens);
2057                                    if cached_tokens > 0 {
2058                                        datalog.log_cache_hit(prompt_tokens, cached_tokens);
2059                                    }
2060                                    let _ = event_tx.send(AgentEvent::TokenUsage(
2061                                        crate::stream::TokenUsage {
2062                                            prompt_tokens,
2063                                            completion_tokens,
2064                                            cached_tokens,
2065                                        }
2066                                    ));
2067                                }
2068                                TurnEvent::ContextStats { system_tokens, sent_tokens, dropped_tokens, working_set_tokens, total_messages } => {
2069                                    datalog.log_context_stats(system_tokens, sent_tokens, dropped_tokens, working_set_tokens, total_messages);
2070
2071                                    // Detect context collapse: if sent tokens drop dramatically,
2072                                    // model has lost most history. Reset edit tracking so BLOCKED
2073                                    // doesn't prevent the model from re-reading files it forgot about.
2074                                    if sent_tokens < 3000 {
2075                                        *context_collapsed = true;
2076                                    }
2077
2078                                    // Narrow stats path — rich fields (tool_defs / cold_zone /
2079                                    // ctx_window / ctx_name) are sent from the datalog block in
2080                                    // handle_send_message, which has access to self.ctx.
2081                                    // TUI side merges both emissions into a single cache.
2082                                    let _ = event_tx.send(AgentEvent::ContextStats {
2083                                        system_tokens, sent_tokens, dropped_tokens, working_set_tokens, total_messages,
2084                                        tool_defs_tokens: 0,
2085                                        cold_zone_tokens: 0,
2086                                        ctx_window: 0,
2087                                        ctx_name: String::new(),
2088                                        system_prompt: String::new(),
2089                                    });
2090                                }
2091                                TurnEvent::ToolCallStreaming { name, hint } => {
2092                                    let _ = event_tx.send(AgentEvent::ToolCallStreaming { name, hint });
2093                                }
2094                                TurnEvent::Error(e) => {
2095                                    // Streaming-error forwarder: `conv` is borrowed
2096                                    // by the in-flight `turn_fut`, so we can't snapshot
2097                                    // `conv.messages` from here. The terminal-error
2098                                    // branches in `handle_send_message` fire after
2099                                    // turn_fut completes with the proper snapshot.
2100                                    let _ = event_tx.send(AgentEvent::Error {
2101                                        error: e,
2102                                        messages: Vec::new(),
2103                                    });
2104                                }
2105                                TurnEvent::Warning(w) => {
2106                                    datalog.log_warning(&w);
2107                                    let _ = event_tx.send(AgentEvent::Warning(w));
2108                                }
2109                                TurnEvent::WorkingDirChanged(new_dir) => {
2110                                    // A tool (change_dir / bash cd) mutated the shared
2111                                    // cwd. Surface it so the TUI footer can update.
2112                                    // Intentionally does not mirror `services.rs::change_dir`
2113                                    // (which clears the conversation, reloads the code graph,
2114                                    // respawns the indexer) — those side effects are right for
2115                                    // a user-initiated `/cd` but would destroy mid-turn state
2116                                    // when the LLM is just navigating.
2117                                    let _ = event_tx.send(AgentEvent::WorkingDirChanged(new_dir));
2118                                }
2119                                TurnEvent::ApprovalRequested { tool_name, reason, call, messages } => {
2120                                    // Forward approval request to TUI, including
2121                                    // a snapshot of conversation.messages so the
2122                                    // TUI can persist mid-turn session state.
2123                                    let _ = event_tx.send(AgentEvent::ApprovalNeeded {
2124                                        tool_name,
2125                                        reason,
2126                                        call,
2127                                        messages,
2128                                    });
2129                                    *phase = AgentPhase::WaitingApproval;
2130                                    let _ = event_tx.send(AgentEvent::PhaseChange(AgentPhase::WaitingApproval));
2131                                }
2132                            }
2133                        }
2134
2135                        Some(req) = approval_req_rx.recv() => {
2136                            // The ApprovalNeeded event was already sent from the
2137                            // TurnEvent::ApprovalRequested handler above (which
2138                            // has access to conversation.messages).  Here we
2139                            // only record the request for later approve/deny.
2140                            *last_approval_request = Some(req);
2141                        }
2142
2143                        Some(cmd) = cmd_rx.recv() => {
2144                            match cmd {
2145                                AgentCommand::Cancel => {
2146                                    cancel_token.cancel();
2147                                    *cancel_token = CancellationToken::new();
2148                                }
2149                                AgentCommand::ApproveTool => {
2150                                    *phase = AgentPhase::Thinking;
2151                                    let _ = event_tx.send(AgentEvent::PhaseChange(AgentPhase::Thinking));
2152                                    let _ = approval_resp_tx.send(PermissionDecision::Allow);
2153                                }
2154                                AgentCommand::ApproveToolAlways => {
2155                                    if let Some(ref req) = last_approval_request {
2156                                        if let Ok(mut store) = permission_store.write() {
2157                                            store.grant_session(&req.call.name);
2158                                        }
2159                                    }
2160                                    *phase = AgentPhase::Thinking;
2161                                    let _ = event_tx.send(AgentEvent::PhaseChange(AgentPhase::Thinking));
2162                                    let _ = approval_resp_tx.send(PermissionDecision::Allow);
2163                                }
2164                                AgentCommand::DenyTool => {
2165                                    *phase = AgentPhase::Thinking;
2166                                    let _ = event_tx.send(AgentEvent::PhaseChange(AgentPhase::Thinking));
2167                                    let _ = approval_resp_tx.send(PermissionDecision::Deny);
2168                                }
2169                                AgentCommand::Shutdown => {
2170                                    cancel_token.cancel();
2171                                }
2172                                AgentCommand::AppendInput(text) => {
2173                                    if let Some(ref mut existing) = pending_input {
2174                                        existing.push('\n');
2175                                        existing.push_str(&text);
2176                                    } else {
2177                                        *pending_input = Some(text);
2178                                    }
2179                                }
2180                                // SyncMessages is handled in the outer loop
2181                                // (after turn completes) because `conv` is
2182                                // mutably borrowed by `turn_fut` here.
2183                                _ => {} // Other commands ignored during turn
2184                            }
2185                        }
2186                    }
2187                };
2188
2189                // Flush any remaining accumulated text to datalog
2190                if !datalog_text_accum.is_empty() {
2191                    datalog.log_model_text(&datalog_text_accum);
2192                }
2193
2194                // turn_tx drops here (owned by this block), turn_fut also drops
2195                (result, turn_rx, *context_collapsed)
2196            };
2197            // All borrows of self.turn_runner are now released.
2198
2199            // Handle context collapse: clear edit tracking so model can re-read
2200            if context_collapsed {
2201                self.turn_runner.recently_edited_files.clear();
2202            }
2203
2204            // Restore conversation
2205            self.conversation = conv;
2206
2207            // Drain remaining events
2208            while let Ok(event) = turn_rx.try_recv() {
2209                self.forward_turn_event(event);
2210            }
2211
2212            // Handle result
2213            match result {
2214                TurnResult::Responded {
2215                    ref text,
2216                    tokens,
2217                    truncated,
2218                } => {
2219                    self.turn_tokens += tokens;
2220                    self.total_tokens += tokens;
2221                    // Log the final assistant text to datalog (TUI used to do this —
2222                    // absorbed here now that TUI's duplicate TurnLog was removed).
2223                    if !text.trim().is_empty() {
2224                        self.datalog.log_text(text);
2225                    }
2226
2227                    // ATLAS subtask extraction: if model just output a plan (FeatureDev,
2228                    // first response with text, no tools used yet), extract subtasks
2229                    // and drive execution file-by-file.
2230                    //
2231                    // Guard: only extract when the model was truncated (it wanted to
2232                    // continue but hit max_tokens). A Natural stop means the model
2233                    // considers its response complete — it may be answering a question,
2234                    // discussing design, or giving examples that mention file names.
2235                    // Extracting subtasks from such text produces phantom plans
2236                    // (e.g. "auth.rs" mentioned as an example gets treated as an
2237                    // edit target, and plan-completion-guard then forces the loop
2238                    // to keep running).
2239                    if self.tool_call_count == 0
2240                        && truncated
2241                        && !text.trim().is_empty()
2242                        && !self.subtask_driver.active
2243                    {
2244                        self.subtask_driver.extract_from_plan(text);
2245                        // Store plan text for adherence reminders
2246                        self.plan_text = Some(text.clone());
2247
2248                        // Graph: check if plan covers all dependent files.
2249                        // If the plan mentions router.rs and weather.rs but both depend
2250                        // on types.rs, warn that types.rs might also need changes.
2251                        if self.subtask_driver.active {
2252                            let graph = self.turn_runner.context.graph.read().await;
2253                            if graph.is_ready() {
2254                                let plan_files: Vec<&str> = self
2255                                    .subtask_driver
2256                                    .subtasks
2257                                    .iter()
2258                                    .map(|s| s.file.as_str())
2259                                    .collect();
2260                                let mut missing_deps: Vec<String> = Vec::new();
2261                                let mut seen = std::collections::HashSet::new();
2262
2263                                for plan_file in &plan_files {
2264                                    seen.insert(plan_file.to_string());
2265                                }
2266
2267                                for plan_file in &plan_files {
2268                                    // Find this file in graph and get its dependencies
2269                                    for (path, _) in &graph.file_symbols {
2270                                        let basename = path
2271                                            .file_name()
2272                                            .map(|f| f.to_string_lossy().to_string())
2273                                            .unwrap_or_default();
2274                                        if basename == *plan_file {
2275                                            // Check files this file depends on (callees' files)
2276                                            let sym_ids = graph.symbols_in_file(path);
2277                                            if let Some(ids) = sym_ids {
2278                                                for &sid in ids.iter().take(20) {
2279                                                    if let Some(edges) = graph.callees(sid) {
2280                                                        for edge in edges {
2281                                                            if let Some(node) = graph.node(edge.to)
2282                                                            {
2283                                                                let dep_name = node
2284                                                                    .file
2285                                                                    .file_name()
2286                                                                    .map(|f| {
2287                                                                        f.to_string_lossy()
2288                                                                            .to_string()
2289                                                                    })
2290                                                                    .unwrap_or_default();
2291                                                                if !dep_name.is_empty()
2292                                                                    && !seen.contains(&dep_name)
2293                                                                    && dep_name != basename
2294                                                                {
2295                                                                    seen.insert(dep_name.clone());
2296                                                                    missing_deps.push(dep_name);
2297                                                                }
2298                                                            }
2299                                                        }
2300                                                    }
2301                                                }
2302                                            }
2303                                            break;
2304                                        }
2305                                    }
2306                                }
2307
2308                                // PLAN CHECK injection: REMOVED. Dependency warnings are not needed —
2309                                // dependency warnings. Model discovers deps itself.
2310                                let _ = missing_deps; // suppress unused warning
2311                            }
2312                            drop(graph);
2313                        }
2314
2315                        // Subtask driver serial execution: REMOVED.
2316                        // Was injecting "now edit file X" instructions from regex-extracted
2317                        // plan. Batch prompt now lets model handle multi-file work itself.
2318                        // Sub-agent dispatch also disabled (try_sub_agent_dispatch returns None).
2319                    }
2320
2321                    // finish_reason-based termination dispatch (2026-04-22).
2322                    //
2323                    // The previous code injected `(continuing...)` + `Continue.`
2324                    // when the model returned empty text, under the theory that
2325                    // empty = "was about to say more". In practice this conflated:
2326                    //   (a) finish_reason="length" — real max-token cutoff
2327                    //       mid-generation, retrying does salvage the session
2328                    //   (b) finish_reason="stop" + no text — model cleanly
2329                    //       decided to stop after reading tool results
2330                    //       (e.g. `cargo check` passed, nothing more to say)
2331                    // and cycled case (b) into meaningless `Continue.` loops.
2332                    //
2333                    // CC has no such recovery mechanism — empty-on-stop IS the
2334                    // natural termination (`project_cc_prompt_philosophy.md`).
2335                    //
2336                    // Briefly tried adding an "empty-after-failure" branch
2337                    // (2026-04-22 20:44) but the hermes 20-41 session showed
2338                    // the real issue was upstream in edit.rs `find_closest_match_inner`
2339                    // producing garbage "closest match" hints — the model
2340                    // gave up because the framework's hint was actively
2341                    // misleading, not because it needed more nudging.
2342                    // Reverting to the principled state machine.
2343                    if truncated && self.retry_count < 1 {
2344                        self.retry_count += 1;
2345                        self.conversation.add_user_message(
2346                            "Output limit hit. If the task is already complete, just output a \
2347                             short summary and stop (no tool calls). Otherwise resume where you left off."
2348                        );
2349                        continue;
2350                    }
2351
2352                    self.finish_turn(TurnStopReason::Natural);
2353                    return;
2354                }
2355                TurnResult::UsedTools {
2356                    tool_count,
2357                    tokens,
2358                    text,
2359                } => {
2360                    self.turn_tokens += tokens;
2361                    self.total_tokens += tokens;
2362                    self.tool_call_count += tool_count;
2363                    // Track silent rounds: model used tools without explaining anything.
2364                    let had_text = text.as_ref().map(|t| !t.trim().is_empty()).unwrap_or(false);
2365                    if had_text {
2366                        self.discipline_state.silent_tool_rounds = 0;
2367                    } else {
2368                        self.discipline_state.silent_tool_rounds += 1;
2369                    }
2370
2371                    // Fork sub-agent dispatch is no longer driven by parsing this
2372                    // turn's text. The model invokes `parallel_edit_files`
2373                    // explicitly when it judges parallel edit is the right move.
2374                    // See `crate::tool::parallel_edit` for the active-dispatch
2375                    // tool and `agent/mod.rs::run` for its registration.
2376
2377                    // Post-process: truncate large outputs + externalize to disk
2378                    self.post_process_tool_results(tool_count);
2379
2380                    // ATLAS auto-verify: removed along with the verify module.
2381                    // Model runs build/lint itself when needed.
2382                    // See docs/archive/guardian-auto-compile.md if re-introducing.
2383
2384                    // Safety cap at 200 tool calls — only for runaway cost protection.
2385                    if self.check_step_limit() {
2386                        self.finish_turn(TurnStopReason::StepLimit);
2387                        return;
2388                    }
2389                    // Continue to next turn
2390                    self.phase = AgentPhase::Thinking;
2391                    let _ = self
2392                        .event_tx
2393                        .send(AgentEvent::PhaseChange(AgentPhase::Thinking));
2394                    continue;
2395                }
2396                TurnResult::Failed(e) => {
2397                    // Retry logic for transient errors
2398                    let is_rate_limited = is_rate_limited_error(&e);
2399                    let is_auth_error = is_auth_error(&e);
2400                    let is_messages_illegal = e.contains("illegal") || e.contains("messages");
2401                    // Upstream context-length overflow (OpenRouter 400, OpenAI
2402                    // context_length_exceeded, Anthropic "prompt is too long").
2403                    // Without this, the error fell through to the generic
2404                    // retry branch which slept and re-sent the same oversized
2405                    // request — guaranteed to fail again.
2406                    let is_context_overflow = is_context_overflow_error(&e);
2407                    // Open-source build attempted a CodingPlan-signed request.
2408                    // The signing module isn't compiled in; retrying is
2409                    // guaranteed to fail again. Fail-fast skips the otherwise-
2410                    // useless 3-shot retry (3+6+9s of wasted time + 3 spurious
2411                    // "[API error 请求失败]" lines hardcoded in Chinese that
2412                    // would also display to English-locale users).
2413                    let is_official_build_required = is_codingplan_unavailable_error(&e);
2414
2415                    if is_official_build_required {
2416                        self.datalog.log_error(&e);
2417                        let _ = self.event_tx.send(AgentEvent::Error {
2418                            error: public_error_message(&e),
2419                            messages: self.conversation.messages.clone(),
2420                        });
2421                        self.finish_turn(TurnStopReason::Error);
2422                        return;
2423                    } else if (is_messages_illegal || is_context_overflow) && self.retry_count < 2 {
2424                        self.retry_count += 1;
2425                        let sys_prompt = self.build_system_prompt();
2426                        // Auto-discover the proxy's actually-enforced limit
2427                        // from the error body. Self-built proxies for
2428                        // open-weight models often enforce far less than
2429                        // the configured ctx_window — without parsing the
2430                        // rejection we'd compact toward the wrong target.
2431                        let limit = extract_provider_ctx_limit(&e)
2432                            .unwrap_or_else(|| self.ctx.ctx_window());
2433                        // 5K safety buffer — leaves room for the streaming
2434                        // response and one round of tool results before the
2435                        // next compact would be needed.
2436                        let target = limit.saturating_sub(5_000);
2437                        let recovered = self
2438                            .emergency_compact_to_target(target, &sys_prompt)
2439                            .await;
2440                        let msg = if recovered {
2441                            "\n[Context overflow — recovered via layered compact, retrying...]\n"
2442                                .to_string()
2443                        } else {
2444                            format!(
2445                                "\n[Context overflow — compacted toward {}T but still over, \
2446                                 retrying anyway...]\n",
2447                                target
2448                            )
2449                        };
2450                        let _ = self.event_tx.send(AgentEvent::TextDelta(msg));
2451                        continue;
2452                    } else if is_rate_limited && self.retry_count < 5 {
2453                        self.retry_count += 1;
2454                        let wait = (self.retry_count as u64 * 3).min(30);
2455                        let _ = self.event_tx.send(AgentEvent::TextDelta(format!(
2456                            "\n[Rate limited — retrying in {}s...]\n",
2457                            wait
2458                        )));
2459                        tokio::time::sleep(Duration::from_secs(wait)).await;
2460                        continue;
2461                    } else if is_auth_error {
2462                        self.datalog.log_error(&e);
2463                        let _ = self.event_tx.send(AgentEvent::Error {
2464                            error: public_error_message(&e),
2465                            messages: self.conversation.messages.clone(),
2466                        });
2467                        self.finish_turn(TurnStopReason::Error);
2468                        return;
2469                    } else if self.retry_count < 3 {
2470                        self.retry_count += 1;
2471                        let wait = (self.retry_count as u64 * 3).min(15);
2472                        let reason = public_error_reason(&e);
2473                        let _ = self.event_tx.send(AgentEvent::TextDelta(format!(
2474                            "\n[API error {},{} 秒后重试({}/3)...]\n",
2475                            reason, wait, self.retry_count
2476                        )));
2477                        tokio::time::sleep(Duration::from_secs(wait)).await;
2478                        continue;
2479                    } else {
2480                        self.datalog.log_error(&e);
2481                        let _ = self.event_tx.send(AgentEvent::Error {
2482                            error: public_error_message(&e),
2483                            messages: self.conversation.messages.clone(),
2484                        });
2485                        self.finish_turn(TurnStopReason::Error);
2486                        return;
2487                    }
2488                }
2489                TurnResult::Cancelled => {
2490                    // Check if turn was already cancelled by AgentCommand::Cancel
2491                    // (which marks the turn as Completed immediately)
2492                    if self.conversation.turn_tracker.active_turn().is_none() {
2493                        // Already handled by AgentCommand::Cancel - just return
2494                        return;
2495                    }
2496                    // Preserve completed content + backfill (cancelled) for unpaired tool calls
2497                    self.conversation.cancel_current_turn();
2498                    // Send TurnCancelled event for TUI to sync
2499                    let messages = self.conversation.messages.clone();
2500                    let _ = self.event_tx.send(AgentEvent::TurnCancelled { messages });
2501                    // Do finish_turn's bookkeeping WITHOUT emitting TurnComplete.
2502                    // TurnCancelled already tells the TUI the turn ended; emitting
2503                    // TurnComplete on top buffers a stale "✓ done · N rounds" line
2504                    // that fires the next time the TUI's phase becomes Streaming —
2505                    // i.e. right after the user's next submission.
2506                    // Note: cancel_current_turn() already marks the turn Completed,
2507                    // so complete_current() is a no-op; kept as defensive safety net.
2508                    self.datalog
2509                        .end_turn(self.turn_tokens, self.tool_call_count);
2510                    self.turn_start = None;
2511                    self.phase = AgentPhase::Idle;
2512                    let _ = self
2513                        .event_tx
2514                        .send(AgentEvent::PhaseChange(AgentPhase::Idle));
2515                    self.conversation.save(&Conversation::history_path());
2516                    return;
2517                }
2518            }
2519        }
2520    }
2521
2522    // forward_turn_event → tool_dispatch.rs
2523    // post_process_tool_results → tool_dispatch.rs
2524
2525    /// Pro-active context compaction. Two-stage:
2526    ///
2527    /// 1. **Tier 1 (cheap, mechanical):** collapse old `ToolResult`
2528    ///    bodies into stubs (`compact_old_tool_results_in_place`, the
2529    ///    same generic stub format `microcompact` uses at render time;
2530    ///    keeps the last 3 turns full). Zero LLM calls. Cheap to fire,
2531    ///    easy to revert if model needs the bytes back via re-read.
2532    ///
2533    /// 2. **Tier 2 (expensive, LLM-driven):** if Tier 1 didn't bring
2534    ///    the context under threshold, fall through to LLM-summarize
2535    ///    older turns into the cold zone (existing path).
2536    ///
2537    /// Buffer was retuned 2026-05-06: small windows (≤100K, e.g.
2538    /// self-hosted GLM 65K) now trigger at 60K instead of 52K, so
2539    /// the 5K runway above the trigger lets Tier 1 absorb hits
2540    /// before the proxy 65K wall. Datalog 2026-05-06_19-06-50: 4
2541    /// reactive emergency compactions, each dropping 18-30K
2542    /// catastrophically. With proactive Tier 1 firing 5K below the
2543    /// wall, expected pattern is 3-4 mild Tier 1 events dropping
2544    /// 5-10K each, model retains skeleton + recent turns.
2545    async fn maybe_compress_history(&mut self, system_prompt: &str) {
2546        let sys_tokens = system_prompt.len() / 4 + 4;
2547        if !self.ctx.needs_compression(&self.conversation, sys_tokens) {
2548            return;
2549        }
2550
2551        // ── Tier 1: collapse old tool_results (no LLM call) ──
2552        // Keep the most recent 3 turns at full fidelity; older
2553        // turns get their tool_result bodies replaced with the same
2554        // generic stub microcompact uses at render time. One stub
2555        // format, one place to maintain.
2556        crate::ctx::render::compact_old_tool_results_in_place(
2557            &mut self.conversation,
2558            /* keep_recent_turns */ 3,
2559        );
2560
2561        // Re-check: if Tier 1 was enough, stop here and skip the
2562        // LLM summarization round-trip. This is the common case for
2563        // sessions where the bulk of context is heavy bash/cargo
2564        // outputs.
2565        if !self.ctx.needs_compression(&self.conversation, sys_tokens) {
2566            return;
2567        }
2568
2569        // ── Tier 2: LLM-summarize oldest turns into cold zone ──
2570        let (content, n_turns) = match self.ctx.compression_plan(&self.conversation) {
2571            Some(plan) => plan,
2572            None => return,
2573        };
2574
2575        let summarize_prompt = Self::default_summarize_prompt(&content);
2576
2577        let summary = self.run_llm_summary(&summarize_prompt).await;
2578        let final_summary = if summary.trim().is_empty() {
2579            content
2580        } else {
2581            summary
2582        };
2583
2584        let _ = self.try_apply_compression(system_prompt, n_turns, final_summary, true);
2585    }
2586
2587    /// Emit a full ContextStats snapshot for the `/context` command.
2588    /// Callers pass the conversation and the already-built `msgs` (from
2589    /// `self.ctx.build_messages`) so the estimate reflects exactly what
2590    /// the model would see on the next turn — directives and all. Used by
2591    /// both `handle_send_message` (once per turn, post-build_messages) and
2592    /// `run_compact` (to refresh the cached stats TUI reads for `/context`
2593    /// after an out-of-turn compaction).
2594    async fn emit_rich_context_stats(
2595        &self,
2596        conv: &Conversation,
2597        msgs: &[crate::conversation::message::Message],
2598    ) {
2599        let tool_defs = self.turn_runner.tools.get_definitions().await;
2600        let tool_defs_tokens: usize = tool_defs
2601            .iter()
2602            .map(|d| {
2603                let params = serde_json::to_string(&d.parameters).unwrap_or_default();
2604                (d.name.len() + d.description.len() + params.len()) / 4
2605            })
2606            .sum();
2607        let cold_zone_tokens: usize = conv.cold_summaries.iter().map(|s| s.len() / 4 + 4).sum();
2608        let actual_system_prompt = msgs
2609            .iter()
2610            .find(|m| matches!(m.role, crate::conversation::message::Role::System))
2611            .and_then(|m| m.text().map(|s| s.to_string()))
2612            .unwrap_or_default();
2613        let system_tokens_local = msgs
2614            .iter()
2615            .find(|m| matches!(m.role, crate::conversation::message::Role::System))
2616            .map(|m| m.estimate_tokens())
2617            .unwrap_or(0);
2618        let sent_tokens_local: usize = msgs
2619            .iter()
2620            .map(|m| m.estimate_tokens())
2621            .sum::<usize>()
2622            .saturating_sub(system_tokens_local);
2623        let total_messages_local = msgs.len();
2624        let _ = self.event_tx.send(AgentEvent::ContextStats {
2625            system_tokens: system_tokens_local,
2626            sent_tokens: sent_tokens_local,
2627            dropped_tokens: 0,
2628            working_set_tokens: 0,
2629            total_messages: total_messages_local,
2630            tool_defs_tokens,
2631            cold_zone_tokens,
2632            ctx_window: self.ctx.ctx_window(),
2633            ctx_name: self.ctx.name().to_string(),
2634            system_prompt: actual_system_prompt,
2635        });
2636    }
2637
2638    /// Post-compression task state restoration. After compression the model
2639    /// loses track of what it was doing — inject a short status so it can
2640    /// resume without re-exploring. Shared by auto-compact (threshold-driven
2641    /// in `maybe_compress_history`) and manual `/compact`.
2642    fn inject_post_compress_state(&mut self) {
2643        if let Some(msg) = build_post_compress_state(
2644            &self.current_task,
2645            &self.files_edited_this_turn,
2646            &self.files_read_this_turn,
2647        ) {
2648            self.conversation.add_user_message(&msg);
2649        }
2650    }
2651
2652    fn rendered_token_count(&self, system_prompt: &str) -> usize {
2653        self.ctx
2654            .build_messages(&self.conversation, system_prompt, "")
2655            .0
2656            .iter()
2657            .map(|m| m.estimate_tokens())
2658            .sum()
2659    }
2660
2661    /// Apply a compression candidate only when it reduces the next request
2662    /// payload. This is the single success criterion for all compression
2663    /// entry points: manual `/compact`, threshold-driven auto-compression,
2664    /// and task-boundary cleanup.
2665    fn try_apply_compression(
2666        &mut self,
2667        system_prompt: &str,
2668        remove_count: usize,
2669        summary: String,
2670        inject_state: bool,
2671    ) -> CompressionOutcome {
2672        let before_msg_count = self.conversation.messages.len();
2673        let before_tokens = self.rendered_token_count(system_prompt);
2674
2675        let msgs_snapshot = self.conversation.messages.clone();
2676        let cold_snapshot = self.conversation.cold_summaries.clone();
2677        let turns_snapshot = self.conversation.turn_tracker.clone();
2678
2679        self.conversation.apply_compression(remove_count, summary);
2680        if inject_state {
2681            self.inject_post_compress_state();
2682        }
2683
2684        let after_tokens = self.rendered_token_count(system_prompt);
2685        let removed_messages = before_msg_count.saturating_sub(self.conversation.messages.len());
2686
2687        if after_tokens >= before_tokens {
2688            self.conversation.messages = msgs_snapshot;
2689            self.conversation.cold_summaries = cold_snapshot;
2690            self.conversation.turn_tracker = turns_snapshot;
2691            CompressionOutcome {
2692                applied: false,
2693                before_tokens,
2694                after_tokens,
2695                removed_messages: 0,
2696            }
2697        } else {
2698            CompressionOutcome {
2699                applied: true,
2700                before_tokens,
2701                after_tokens,
2702                removed_messages,
2703            }
2704        }
2705    }
2706
2707    /// D2 emergency compact — layered, measured, never combines destructive
2708    /// ops. Replaces the previous "LLM-compress + blind truncate(len-4)"
2709    /// path that destroyed last-turn context (datalog atomgr-2d99b47d/
2710    /// 2026-05-06_08-43-12: 65K → 8516 tokens because compression THEN a
2711    /// 4-message truncate ran back-to-back, and the truncate dropped
2712    /// exactly the recent file reads the user needed for "继续").
2713    ///
2714    /// Each tier checks budget against `target` and breaks at the first
2715    /// sufficient tier. Returns true if any tier reached the target.
2716    ///
2717    /// Tiers (least → most destructive):
2718    /// 1. Collapse old tool_results (keep last 3 turns full).
2719    /// 2. LLM-summarize older turns into cold zone.
2720    /// 3. Hard token-driven truncate (drops oldest until under target,
2721    ///    snapping to safe boundaries; the last user message is sacred).
2722    async fn emergency_compact_to_target(
2723        &mut self,
2724        target_tokens: usize,
2725        system_prompt: &str,
2726    ) -> bool {
2727        let sys_tokens = system_prompt.len() / 4 + 4;
2728        let estimate = |conv: &Conversation| -> usize {
2729            sys_tokens + conv.messages.iter().map(|m| m.estimate_tokens()).sum::<usize>()
2730        };
2731
2732        if estimate(&self.conversation) <= target_tokens {
2733            return true;
2734        }
2735
2736        // Tier 1: collapse heavy tool results in older turns.
2737        crate::ctx::render::compact_old_tool_results_in_place(
2738            &mut self.conversation,
2739            /* keep_recent_turns */ 3,
2740        );
2741        if estimate(&self.conversation) <= target_tokens {
2742            return true;
2743        }
2744
2745        // Tier 2: LLM-summarize older turns into the cold zone. This is
2746        // the most expensive tier (it makes a network round trip), so
2747        // we only reach it after Tier 1 already failed.
2748        self.maybe_compress_history(system_prompt).await;
2749        if estimate(&self.conversation) <= target_tokens {
2750            return true;
2751        }
2752
2753        // Tier 3: hard truncate to fit. Token-driven, not message-count
2754        // driven. The previous code did `truncate(len - 4)` blindly
2755        // which is what produced the 8516-token catastrophe.
2756        hard_truncate_to_target(&mut self.conversation, target_tokens, sys_tokens);
2757        estimate(&self.conversation) <= target_tokens
2758    }
2759
2760    /// Manual `/compact` entry point. Mechanical only — reuses the active
2761    /// ctx strategy's `compression_plan` (same path as the task-boundary
2762    /// cleanup in `handle_send_message`) so behavior stays consistent with
2763    /// the rest of the codebase. `_prompt` is accepted for forward-compat
2764    /// with a future LLM-guided summarize path and ignored today.
2765    ///
2766    /// Net-savings guard: on terse conversations the cold-zone summary
2767    /// header + `inject_post_compress_state` inject can weigh more than
2768    /// the dropped messages, so compaction would silently inflate the
2769    /// prompt. We measure before/after token totals via `build_messages`
2770    /// (post all render-pipeline effects — `clean_message_pipeline`,
2771    /// microcompact, etc.) and roll the conversation back if the
2772    /// operation didn't actually shrink the wire payload. Analytical
2773    /// projection was tried first but too many render-pipeline branches
2774    /// made it unreliable.
2775    async fn run_compact(&mut self, prompt: Option<String>) {
2776        let system_prompt = self.build_system_prompt();
2777        let Some((mechanical_content, n_msgs)) = self.ctx.compression_plan(&self.conversation)
2778        else {
2779            let _ = self.event_tx.send(AgentEvent::TextDelta(
2780                crate::i18n::t(crate::i18n::Msg::CompactNothingShort).into_owned(),
2781            ));
2782            return;
2783        };
2784
2785        let _ = self.event_tx.send(AgentEvent::TextDelta(
2786            crate::i18n::t(crate::i18n::Msg::CompactStarting).into_owned(),
2787        ));
2788
2789        // Try LLM summarization (with optional custom prompt)
2790        let summarize_prompt = if let Some(ref custom) = prompt {
2791            format!(
2792                "Summarize this conversation history, focusing on: {}.\n\
2793                 Keep: file names, what was changed, key decisions, errors encountered.\n\
2794                 Drop: exact code content, tool arguments, line numbers.\n\n{}",
2795                custom, mechanical_content
2796            )
2797        } else {
2798            Self::default_summarize_prompt(&mechanical_content)
2799        };
2800
2801        let summary = self.run_llm_summary(&summarize_prompt).await;
2802        let content = if summary.trim().is_empty() {
2803            mechanical_content
2804        } else {
2805            summary
2806        };
2807
2808        let outcome = self.try_apply_compression(&system_prompt, n_msgs, content, true);
2809
2810        if !outcome.applied {
2811            let before = fmt_k_tokens(outcome.before_tokens);
2812            let after = fmt_k_tokens(outcome.after_tokens);
2813            let _ = self.event_tx.send(AgentEvent::TextDelta(
2814                crate::i18n::t(crate::i18n::Msg::CompactNothingNoSavings {
2815                    before: &before,
2816                    after: &after,
2817                })
2818                .into_owned(),
2819            ));
2820            let (msgs, _) =
2821                self.ctx
2822                    .build_messages(&self.conversation, &system_prompt, "");
2823            self.emit_rich_context_stats(&self.conversation, &msgs).await;
2824            return;
2825        }
2826
2827        let before = fmt_k_tokens(outcome.before_tokens);
2828        let after = fmt_k_tokens(outcome.after_tokens);
2829        let _ = self.event_tx.send(AgentEvent::TextDelta(
2830            crate::i18n::t(crate::i18n::Msg::CompactDropped {
2831                messages: outcome.removed_messages,
2832                before: &before,
2833                after: &after,
2834            })
2835            .into_owned(),
2836        ));
2837
2838        let (msgs, _) = self
2839            .ctx
2840            .build_messages(&self.conversation, &system_prompt, "");
2841        self.emit_rich_context_stats(&self.conversation, &msgs)
2842            .await;
2843    }
2844
2845    fn default_summarize_prompt(content: &str) -> String {
2846        format!(
2847            "Summarize this conversation history in 3-5 concise sentences. \
2848             Keep: file names, what was changed, key decisions, errors encountered. \
2849             Drop: exact code content, tool arguments, line numbers.\n\n{}",
2850            content
2851        )
2852    }
2853
2854    /// Run a lightweight LLM call to summarize content. Returns empty string on failure.
2855    async fn run_llm_summary(&self, prompt: &str) -> String {
2856        let mut mini_conv = crate::conversation::Conversation::new();
2857        mini_conv.add_user_message(prompt);
2858        let msgs = mini_conv
2859            .to_provider_messages("You are a conversation summarizer. Output ONLY the summary.");
2860
2861        let mut summary = String::new();
2862        if let Ok(mut stream) = self.turn_runner.provider.chat_stream(&msgs, None) {
2863            use futures::StreamExt;
2864            let first_timeout = std::time::Duration::from_secs(30);
2865            let stream_timeout = std::time::Duration::from_secs(30);
2866            let mut got_token = false;
2867            loop {
2868                let timeout = if got_token {
2869                    stream_timeout
2870                } else {
2871                    first_timeout
2872                };
2873                match tokio::time::timeout(timeout, stream.next()).await {
2874                    Ok(Some(Ok(crate::stream::StreamEvent::Delta(text)))) => {
2875                        got_token = true;
2876                        let clean = text
2877                            .replace("<think>", "")
2878                            .replace("</think>", "")
2879                            .replace("<|im_start|>", "")
2880                            .replace("<|im_end|>", "");
2881                        summary.push_str(&clean);
2882                    }
2883                    Ok(Some(Ok(crate::stream::StreamEvent::Done { .. }))) => break,
2884                    Ok(Some(Ok(_))) => continue,
2885                    _ => break,
2886                }
2887            }
2888        }
2889        summary
2890    }
2891
2892    fn finish_turn(&mut self, stop_reason: TurnStopReason) {
2893        // Error exits must not leave the user's message in the history
2894        // as an "orphan turn" (user message with no assistant reply).
2895        // The next send_message would then stack another user message
2896        // on top of it — an API call with two consecutive user turns
2897        // and no intervening assistant, which weak models respond to
2898        // with 0 tokens (see test 3 / 4: MiniMax-M2.7 returns empty
2899        // after a failed localhost turn). Cancel the turn instead so
2900        // the next user message starts from a clean transcript.
2901        //
2902        // Counters (turn_count / turn_tokens / tool_call_count) stay
2903        // UNTOUCHED here so the TurnComplete event below still carries
2904        // accurate stats for the UI's "✓ Nailed it · N rounds · M tok"
2905        // line. `start_turn` resets them for the next message.
2906        if matches!(stop_reason, TurnStopReason::Error) {
2907            self.conversation.cancel_current_turn_including_user();
2908        } else {
2909            self.conversation.turn_tracker.complete_current();
2910        }
2911
2912        // Auto-commit edited files if enabled
2913        if self.config.auto_commit
2914            && !matches!(stop_reason, TurnStopReason::Error)
2915            && !self.files_edited_this_turn.is_empty()
2916        {
2917            let wd = self
2918                .turn_runner
2919                .context
2920                .working_dir
2921                .try_read()
2922                .map(|g| g.clone())
2923                .unwrap_or_default();
2924            match git_auto_commit::auto_commit_edited_files(&wd, &self.files_edited_this_turn) {
2925                git_auto_commit::AutoCommitOutcome::Committed { sha, message } => {
2926                    let notice = format!("\n[auto-commit {sha}] {message}\n");
2927                    self.datalog.log_model_text(&notice);
2928                    let _ = self.event_tx.send(AgentEvent::TextDelta(notice));
2929                }
2930                git_auto_commit::AutoCommitOutcome::Failed { reason } => {
2931                    let notice = format!("\n[auto-commit skipped] {reason}\n");
2932                    self.datalog.log_error(&notice);
2933                    let _ = self.event_tx.send(AgentEvent::TextDelta(notice));
2934                }
2935                git_auto_commit::AutoCommitOutcome::Skipped { reason } => {
2936                    self.datalog
2937                        .log_model_text(&format!("[auto-commit skipped] {reason}"));
2938                }
2939            }
2940        }
2941
2942        // Flush datalog with final stats
2943        self.datalog
2944            .end_turn(self.turn_tokens, self.tool_call_count);
2945
2946        let duration = self.turn_start.map(|t| t.elapsed()).unwrap_or_default();
2947        self.turn_start = None;
2948        self.phase = AgentPhase::Idle;
2949        let _ = self.event_tx.send(AgentEvent::TurnComplete {
2950            duration,
2951            total_tokens: self.turn_tokens,
2952            turn_count: self.turn_count,
2953            tool_call_count: self.tool_call_count,
2954            stop_reason,
2955            messages: self.conversation.messages.clone(),
2956        });
2957        let _ = self
2958            .event_tx
2959            .send(AgentEvent::PhaseChange(AgentPhase::Idle));
2960        self.conversation.save(&Conversation::history_path());
2961    }
2962
2963    // store_tool_result → tool_dispatch.rs
2964
2965    // change_dir → services.rs
2966
2967    // try_sub_agent_dispatch → REMOVED. Fork sub-agent dispatch is now
2968    // ACTIVE: the model invokes `parallel_edit_files` (see
2969    // `crate::tool::parallel_edit`) when it judges parallel edit is the
2970    // right move. The framework no longer parses plan text or guesses
2971    // intent — eliminating ~250 lines of heuristics, ~70 hardcoded
2972    // intent-keywords across two iterations of failed gate logic, and
2973    // an entire class of mis-fire failures (read-only turns dispatching
2974    // 6 fork sub-agents that fake edits or no-op).
2975}
2976
2977
2978fn track_tool_modified_files(
2979    tool_name: &str,
2980    bash_command: &str,
2981    output: &str,
2982    edited_files: &mut Vec<String>,
2983) {
2984    if tool_name == "bash" {
2985        track_bash_modified_files(bash_command, output, edited_files);
2986    } else if tool_name == "search_replace" {
2987        track_search_replace_files(output, edited_files);
2988    }
2989}
2990
2991fn track_bash_modified_files(command: &str, output: &str, edited_files: &mut Vec<String>) {
2992    let Some(cwd) = bash_output_cwd(output) else {
2993        return;
2994    };
2995
2996    for file in rm_file_targets(command, &cwd) {
2997        push_edited_file(edited_files, file);
2998    }
2999    for file in bash_workspace_modified_files(output, &cwd) {
3000        push_edited_file(edited_files, file);
3001    }
3002}
3003
3004fn bash_output_cwd(output: &str) -> Option<PathBuf> {
3005    output.lines().rev().find_map(|line| {
3006        line.strip_prefix("[cwd: ")
3007            .and_then(|rest| rest.strip_suffix(']'))
3008            .map(PathBuf::from)
3009    })
3010}
3011
3012fn bash_workspace_modified_files(output: &str, cwd: &std::path::Path) -> Vec<String> {
3013    let Some(line) = output
3014        .lines()
3015        .find(|line| line.starts_with("[workspace modified via bash: "))
3016    else {
3017        return Vec::new();
3018    };
3019    let Some(rest) = line.strip_prefix("[workspace modified via bash: ") else {
3020        return Vec::new();
3021    };
3022    let changed = rest.split(". If ").next().unwrap_or(rest);
3023    changed
3024        .split(',')
3025        .map(str::trim)
3026        .filter(|file| !file.is_empty() && !file.starts_with('+'))
3027        .map(|file| {
3028            let path = std::path::Path::new(file);
3029            if path.is_absolute() {
3030                path.to_path_buf()
3031            } else {
3032                cwd.join(path)
3033            }
3034            .to_string_lossy()
3035            .to_string()
3036        })
3037        .collect()
3038}
3039
3040fn track_search_replace_files(output: &str, edited_files: &mut Vec<String>) {
3041    for line in output.lines() {
3042        let trimmed = line.trim_start();
3043        let Some((path, _summary)) = trimmed.split_once(" (") else {
3044            continue;
3045        };
3046        if path.is_empty() {
3047            continue;
3048        }
3049        push_edited_file(edited_files, path.to_string());
3050    }
3051}
3052
3053fn rm_file_targets(command: &str, cwd: &std::path::Path) -> Vec<String> {
3054    let tokens = shell_words(command);
3055    let mut targets = Vec::new();
3056    let mut i = 0;
3057    while i < tokens.len() {
3058        if tokens[i] != "rm" {
3059            i += 1;
3060            continue;
3061        }
3062
3063        i += 1;
3064        let mut rm_targets = Vec::new();
3065        let mut recursive = false;
3066        while i < tokens.len() {
3067            let token = &tokens[i];
3068            if matches!(token.as_str(), "&&" | "||" | ";" | "|") {
3069                break;
3070            }
3071            if token.starts_with('-') {
3072                if token.contains('r') || token.contains('R') {
3073                    recursive = true;
3074                }
3075                i += 1;
3076                continue;
3077            }
3078
3079            let path = std::path::Path::new(token);
3080            let full_path = if path.is_absolute() {
3081                path.to_path_buf()
3082            } else {
3083                cwd.join(path)
3084            };
3085            rm_targets.push(full_path.to_string_lossy().to_string());
3086            i += 1;
3087        }
3088
3089        if !recursive {
3090            targets.extend(rm_targets);
3091        }
3092    }
3093    targets
3094}
3095
3096fn push_edited_file(edited_files: &mut Vec<String>, file: String) {
3097    if !edited_files.contains(&file) {
3098        edited_files.push(file);
3099    }
3100}
3101
3102fn shell_words(raw: &str) -> Vec<String> {
3103    raw.split_whitespace()
3104        .map(|token| {
3105            token.trim_matches(|c| {
3106                matches!(
3107                    c,
3108                    '"' | '\'' | '`' | '(' | ')' | '[' | ']' | '{' | '}' | ','
3109                )
3110            })
3111        })
3112        .filter(|token| !token.is_empty())
3113        .map(|token| token.to_string())
3114        .collect()
3115}
3116
3117/// Whether a `ReloadConfig` should wipe the existing conversation history.
3118///
3119/// Prior behavior cleared whenever the `default_provider` name changed.
3120/// That was too aggressive: CodingPlan registers one provider entry per
3121/// model, so a user swapping Kimi ↔ GLM via `/model` lost all context
3122/// every time — even though both entries are the same `openai` type and
3123/// all known cross-model differences (reasoning_content echo policy,
3124/// DeepSeek content-field requirement, tool_call args JSON repair) are
3125/// now handled in the per-provider send path.
3126///
3127/// Current policy:
3128/// - Same `provider_type` on both sides → keep history. This covers the
3129///   common Kimi/GLM/DeepSeek-through-AtomGit swap.
3130/// - Different `provider_type` (e.g. openai → claude) → clear, because
3131///   tool_call id formats and tool_use block translation between the
3132///   OpenAI-shaped and Anthropic-shaped messages haven't been proven
3133///   round-trip clean.
3134/// - Can't resolve the old type (old provider was removed from config)
3135///   → clear when the name changed, matching the pre-existing safe
3136///   default.
3137fn reload_should_clear_conversation(
3138    old_name: &str,
3139    old_type: Option<&str>,
3140    new_name: &str,
3141    new_type: Option<&str>,
3142) -> bool {
3143    match (old_type, new_type) {
3144        (Some(a), Some(b)) => a != b,
3145        _ => old_name != new_name,
3146    }
3147}
3148
3149/// D2 Tier 1: replace the `output` of every ToolResult in turns older
3150/// than the last `keep_recent_turns` with a one-line stub. Cheapest
3151/// destructive tier — preserves the conversation skeleton (assistant
3152/// text, tool-call shapes, paired result IDs) so the model can still
3153/// reason about *what was attempted*, just not the heavy outputs.
3154///
3155/// The previous emergency path (`truncate(len - 4)`) destroyed the
3156/// skeleton too. Keeping it intact is what lets the model resume
3157/// after compaction without re-exploring.
3158/// D2 Tier 3: drop oldest messages until total tokens (incl. system) <=
3159/// `target_tokens`. Token-driven — never drops a fixed number of
3160/// messages, since that's how `truncate(len - 4)` corrupted state.
3161///
3162/// Sacred invariants (won't violate even if it means staying over budget):
3163/// 1. The last `User` message is kept (current task anchor).
3164/// 2. The drop boundary snaps to a turn boundary so we never split a
3165///    `tool_call` from its paired `tool_result`.
3166fn hard_truncate_to_target(
3167    conv: &mut crate::conversation::Conversation,
3168    target_tokens: usize,
3169    sys_tokens: usize,
3170) {
3171    use crate::conversation::message::{MessageContent, Role};
3172    if conv.messages.is_empty() {
3173        return;
3174    }
3175    let total_budget = target_tokens.saturating_sub(sys_tokens);
3176
3177    // Find the last User message — it must survive.
3178    let last_user_idx = conv
3179        .messages
3180        .iter()
3181        .enumerate()
3182        .rev()
3183        .find(|(_, m)| m.role == Role::User)
3184        .map(|(i, _)| i);
3185
3186    let mut kept_tokens = 0usize;
3187    let mut keep_from = conv.messages.len();
3188    for i in (0..conv.messages.len()).rev() {
3189        let mt = conv.messages[i].estimate_tokens();
3190        // Always keep the last user message regardless of budget.
3191        let is_sacred = Some(i) == last_user_idx;
3192        if !is_sacred && kept_tokens + mt > total_budget && keep_from < conv.messages.len() {
3193            break;
3194        }
3195        kept_tokens += mt;
3196        keep_from = i;
3197    }
3198
3199    // Snap forward: don't start at a ToolResult orphan (its paired
3200    // assistant tool_call would be in the dropped section). Keep
3201    // walking forward until we land on a User or AssistantText message.
3202    while keep_from < conv.messages.len() {
3203        match &conv.messages[keep_from].content {
3204            MessageContent::ToolResult(_) | MessageContent::ToolResultRef(_) => {
3205                keep_from += 1;
3206            }
3207            _ => break,
3208        }
3209    }
3210    // Don't skip past the sacred last-user index even if the boundary
3211    // walker would have taken us there — better to ship one stub
3212    // tool_result than to drop the user msg.
3213    if let Some(lu) = last_user_idx {
3214        keep_from = keep_from.min(lu);
3215    }
3216
3217    if keep_from > 0 {
3218        conv.messages.drain(0..keep_from);
3219        conv.turn_tracker = crate::conversation::turn::TurnTracker::rebuild(&conv.messages);
3220    }
3221}
3222
3223/// True when an upstream API error string indicates the request exceeded
3224/// the model's context-length budget. Covers OpenRouter's verbose 400
3225/// message, OpenAI's `context_length_exceeded` code, and Anthropic's
3226/// "prompt is too long". Used by the retry path to route into the
3227/// compression branch instead of blindly re-sending the same oversized
3228/// request.
3229fn is_context_overflow_error(e: &str) -> bool {
3230    e.contains("context length")
3231        || e.contains("context_length_exceeded")
3232        || e.contains("maximum context")
3233        || e.contains("prompt is too long")
3234        || e.contains("reduce the length")
3235}
3236
3237/// Extract the provider's actually-enforced context limit from a 400/
3238/// overflow error message, if it's discoverable. Used by D2 emergency
3239/// compaction so we compact toward the *real* limit (proxy-enforced)
3240/// rather than the configured ctx_window — which can be much larger
3241/// than what the upstream actually accepts.
3242///
3243/// Self-built proxies for open-weight models are the worst offender:
3244/// the model is nominally 128K but the proxy enforces 64K, and the
3245/// framework can't know that without parsing the rejection.
3246///
3247/// Recognised shapes (case-sensitive, all observed in real datalogs):
3248/// - OpenAI / GLM proxy: `maximum context length is 65536 tokens`
3249/// - OpenRouter: `This endpoint's maximum context length is 200000 tokens`
3250/// - Generic: `context length of 32768`
3251/// - Anthropic-ish: `prompt is too long: 200000 tokens > 200000 maximum`
3252fn extract_provider_ctx_limit(e: &str) -> Option<usize> {
3253    use std::sync::OnceLock;
3254    static RE: OnceLock<regex::Regex> = OnceLock::new();
3255    let re = RE.get_or_init(|| {
3256        // Three anchors, any of them satisfies. Number captured is the
3257        // smallest plausible limit token count (≥ 1024 — drop very small
3258        // numbers that appear in unrelated parts of error bodies).
3259        regex::Regex::new(
3260            r"(?:maximum context length (?:is|of)|context length of|context length limit (?:is|of)|tokens? > (?P<rhs>\d+))\s*(?P<lhs>\d+)?",
3261        )
3262        .expect("valid regex")
3263    });
3264    for caps in re.captures_iter(e) {
3265        let n = caps
3266            .name("lhs")
3267            .or_else(|| caps.name("rhs"))
3268            .and_then(|m| m.as_str().parse::<usize>().ok());
3269        // Filter out tiny numbers that aren't real ctx limits — every
3270        // realistic context window is at least a few thousand tokens.
3271        if let Some(n) = n {
3272            if n >= 1024 {
3273                return Some(n);
3274            }
3275        }
3276    }
3277    None
3278}
3279
3280fn is_rate_limited_error(e: &str) -> bool {
3281    // English / HTTP standard patterns.
3282    if e.contains("429") || e.contains("rate") || e.contains("Too Many") {
3283        return true;
3284    }
3285    // Chinese / gateway-side patterns. GitCode's litellm proxy on
3286    // glm-5.1 returns the user-facing 「模型「X」的请求负载过高,
3287    // 请稍后再试」 message via in-stream SSE (then closes the
3288    // connection without [DONE], surfaced as StreamEvent::Error by
3289    // openai.rs's abrupt-close discriminator). Without these
3290    // patterns the error fell through to the generic 3-shot retry
3291    // branch — proper rate-limit handling (5 retries, 3-30s
3292    // exponential backoff) only fires when this matches.
3293    e.contains("请求负载过高")
3294        || e.contains("请求过于频繁")
3295        || e.contains("服务繁忙")
3296        || e.contains("限流")
3297}
3298
3299fn is_auth_error(e: &str) -> bool {
3300    e.contains("401 ")
3301        || e.contains("403 ")
3302        || e.contains("Unauthorized")
3303        || e.contains("Forbidden")
3304        || e.contains("invalid_api_key")
3305        || e.contains("incorrect_api_key")
3306}
3307
3308/// True when the error came from `build_codingplan_headers` failing
3309/// with `SignError::Unavailable` — i.e. an open-source AtomCode build
3310/// tried to issue a request that requires the closed-source signing
3311/// module. This is **terminal**: no amount of retry will produce a
3312/// valid signature in this binary; the user must install the official
3313/// release. The retry classifier short-circuits to fail-fast on this
3314/// to avoid the otherwise pointless 3-shot retry cycle.
3315///
3316/// Match on the official releases URL substring — both the English
3317/// (`Msg::CpOfficialBuildRequired`) and Chinese variants embed it
3318/// verbatim, and the URL is not localised, so a single substring
3319/// match handles both locales without coupling to translation strings.
3320fn is_codingplan_unavailable_error(e: &str) -> bool {
3321    e.contains("atomgit_atomcode/atomcode/releases")
3322}
3323
3324fn should_show_raw_api_error() -> bool {
3325    !matches!(
3326        std::env::var("ATOMCODE_SHOW_RAW_API_ERROR").as_deref(),
3327        Ok("0") | Ok("false") | Ok("FALSE") | Ok("no") | Ok("NO")
3328    )
3329}
3330
3331fn public_error_reason(e: &str) -> &'static str {
3332    if is_context_overflow_error(e) {
3333        "上下文过长"
3334    } else if is_auth_error(e) {
3335        "认证失败或无权限"
3336    } else if is_rate_limited_error(e) {
3337        "请求过于频繁或额度已用尽"
3338    } else if e.contains("Stream timeout") || e.contains("no event for") {
3339        "模型响应超时"
3340    } else if e.contains("Connection failed")
3341        || e.contains("dns")
3342        || e.contains("TLS")
3343        || e.contains("certificate")
3344        || e.contains("connect")
3345    {
3346        "网络连接失败"
3347    } else if e.contains("500")
3348        || e.contains("502")
3349        || e.contains("503")
3350        || e.contains("504")
3351        || e.contains("Internal Server Error")
3352        || e.contains("Bad Gateway")
3353        || e.contains("Service Unavailable")
3354        || e.contains("Gateway Timeout")
3355    {
3356        "上游服务暂时不可用"
3357    } else if e.contains("400") {
3358        "请求参数无效"
3359    } else {
3360        "请求失败"
3361    }
3362}
3363
3364fn public_error_message(e: &str) -> String {
3365    if should_show_raw_api_error() {
3366        return e.to_string();
3367    }
3368
3369    match public_error_reason(e) {
3370        "上下文过长" => {
3371            "请求超过了模型上下文长度限制。请减少附加内容或缩短会话历史后重试。".to_string()
3372        }
3373        "认证失败或无权限" => {
3374            "认证失败或当前账号无权限访问该模型。请检查 API Key 和提供方权限配置。".to_string()
3375        }
3376        "请求过于频繁或额度已用尽" => {
3377            "请求过于频繁,或当前额度已用尽。请稍后再试。".to_string()
3378        }
3379        "模型响应超时" => "模型响应超时,请稍后重试。".to_string(),
3380        "网络连接失败" => "连接模型服务失败,请检查网络后重试。".to_string(),
3381        "上游服务暂时不可用" => "模型服务暂时不可用,请稍后重试。".to_string(),
3382        "请求参数无效" => "请求被模型服务拒绝,请调整输入后重试。".to_string(),
3383        _ => e.to_string(),
3384    }
3385}
3386
3387/// Build the post-compaction status note injected into the conversation so
3388/// the model can resume without re-exploring. Returns `None` when there is
3389/// nothing worth saying (all inputs empty) — caller skips the injection then.
3390///
3391/// Extracted as a free function so the truncation / formatting is testable
3392/// without building a full `AgentLoop`.
3393fn build_post_compress_state(
3394    current_task: &str,
3395    files_edited: &[String],
3396    files_read: &[String],
3397) -> Option<String> {
3398    let mut parts: Vec<String> = Vec::new();
3399    if !current_task.is_empty() {
3400        // chars().take — must be char-boundary safe for multi-byte (CJK)
3401        // user messages. A byte-slice truncation here would panic or
3402        // produce invalid UTF-8.
3403        let task_short: String = current_task.chars().take(200).collect();
3404        parts.push(format!("TASK: {}", task_short));
3405    }
3406    if !files_edited.is_empty() {
3407        parts.push(format!("FILES EDITED: {}", files_edited.join(", ")));
3408    }
3409    if !files_read.is_empty() {
3410        let recent: Vec<&str> = files_read
3411            .iter()
3412            .rev()
3413            .take(5)
3414            .map(|s| s.as_str())
3415            .collect();
3416        parts.push(format!("RECENTLY READ: {}", recent.join(", ")));
3417    }
3418    if parts.is_empty() {
3419        return None;
3420    }
3421    Some(format!(
3422        "[Context was compressed. Here is your current state:]\n{}",
3423        parts.join("\n")
3424    ))
3425}
3426
3427/// Format a token count for user-facing banners: `9800` → `"9.8K"`,
3428/// `137` → `"137"`. Mirrors the `k(...)` closure in the TUI's
3429/// `format_context_report` so `/compact` output reads the same units
3430/// as `/context`.
3431fn fmt_k_tokens(t: usize) -> String {
3432    if t >= 1000 {
3433        format!("{:.1}K", t as f64 / 1000.0)
3434    } else {
3435        format!("{}", t)
3436    }
3437}
3438
3439#[cfg(test)]
3440mod agent_handle_tests {
3441    use super::{AgentClient, AgentHandle, AgentRuntimeFactory};
3442
3443    #[test]
3444    fn agent_client_clones_command_sender_and_registries() {
3445        let (cmd_tx, _cmd_rx) = tokio::sync::mpsc::unbounded_channel();
3446        let (_event_tx, event_rx) = tokio::sync::mpsc::unbounded_channel();
3447        let tool_registry = std::sync::Arc::new(crate::tool::ToolRegistry::new());
3448        let skill_registry =
3449            std::sync::Arc::new(std::sync::RwLock::new(crate::skill::SkillRegistry::new()));
3450
3451        let client = AgentClient {
3452            cmd_tx,
3453            tool_registry: tool_registry.clone(),
3454            skill_registry: skill_registry.clone(),
3455        };
3456        let handle = AgentHandle {
3457            client: client.clone(),
3458            event_rx,
3459        };
3460
3461        assert!(std::sync::Arc::ptr_eq(
3462            &client.tool_registry,
3463            &handle.client.tool_registry
3464        ));
3465        assert!(std::sync::Arc::ptr_eq(
3466            &client.skill_registry,
3467            &handle.client.skill_registry
3468        ));
3469    }
3470
3471    #[test]
3472    fn runtime_factory_reports_missing_provider_as_unavailable() {
3473        let mut config = crate::config::Config::default();
3474        config.default_provider = "missing".to_string();
3475        config.providers.clear();
3476
3477        let factory = AgentRuntimeFactory::new_for_test(
3478            config,
3479            std::path::PathBuf::from("/tmp/project"),
3480            std::sync::Arc::new(crate::tool::ToolRegistry::new()),
3481            std::sync::Arc::new(std::sync::RwLock::new(crate::skill::SkillRegistry::new())),
3482        );
3483
3484        let provider = factory.build_provider();
3485
3486        assert_eq!(
3487            provider.availability_error(),
3488            Some("未配置 provider。请使用 /provider 添加 provider 后再试。")
3489        );
3490    }
3491
3492    #[test]
3493    fn runtime_factory_setters_update_snapshots() {
3494        let mut factory = AgentRuntimeFactory::new_for_test(
3495            crate::config::Config::default(),
3496            std::path::PathBuf::from("/tmp/old"),
3497            std::sync::Arc::new(crate::tool::ToolRegistry::new()),
3498            std::sync::Arc::new(std::sync::RwLock::new(crate::skill::SkillRegistry::new())),
3499        );
3500
3501        let mut config = crate::config::Config::default();
3502        config.default_provider = "fresh".to_string();
3503        factory.set_config(config);
3504        factory.set_working_dir(std::path::PathBuf::from("/tmp/new"));
3505
3506        assert_eq!(factory.config.default_provider, "fresh");
3507        assert_eq!(factory.working_dir, std::path::PathBuf::from("/tmp/new"));
3508    }
3509
3510    #[test]
3511    fn cloned_runtime_factories_allocate_unique_labels() {
3512        let factory = AgentRuntimeFactory::new_for_test(
3513            crate::config::Config::default(),
3514            std::path::PathBuf::from("/tmp/project"),
3515            std::sync::Arc::new(crate::tool::ToolRegistry::new()),
3516            std::sync::Arc::new(std::sync::RwLock::new(crate::skill::SkillRegistry::new())),
3517        );
3518        let cloned = factory.clone();
3519
3520        assert_eq!(factory.next_runtime_label(), "runtime-2");
3521        assert_eq!(cloned.next_runtime_label(), "runtime-3");
3522    }
3523}
3524
3525#[cfg(test)]
3526mod classifier_tests {
3527    use super::{
3528        extract_provider_ctx_limit, is_auth_error, is_codingplan_unavailable_error,
3529        is_context_overflow_error, is_rate_limited_error, public_error_message,
3530        public_error_reason, reload_should_clear_conversation,
3531    };
3532
3533    // ── reload_should_clear_conversation ──
3534
3535    #[test]
3536    fn reload_same_type_different_name_keeps_history() {
3537        // The common CodingPlan case: one provider entry per model, all
3538        // `openai`-typed. User swaps Kimi ↔ GLM via `/model` — history MUST
3539        // survive, otherwise every model switch is a brand-new session.
3540        assert!(!reload_should_clear_conversation(
3541            "AtomGit-kimi-k2.6",
3542            Some("openai"),
3543            "AtomGit-glm5",
3544            Some("openai"),
3545        ));
3546    }
3547
3548    #[test]
3549    fn reload_different_type_clears() {
3550        // Cross-type (openai → claude) is not proven round-trip clean:
3551        // tool_call id formats differ, tool_use block translation is
3552        // non-trivial. Stay safe and clear.
3553        assert!(reload_should_clear_conversation(
3554            "kimi",
3555            Some("openai"),
3556            "claude-sonnet",
3557            Some("claude"),
3558        ));
3559    }
3560
3561    #[test]
3562    fn reload_missing_old_type_falls_back_to_name_change() {
3563        // Old provider was removed from new_config (rename, delete, config
3564        // rewritten by wizard). We can't tell whether types match, so fall
3565        // back to the historical safe default: clear when the name flips.
3566        assert!(reload_should_clear_conversation(
3567            "old-gone",
3568            None,
3569            "new-arrival",
3570            Some("openai"),
3571        ));
3572        assert!(!reload_should_clear_conversation(
3573            "same",
3574            None,
3575            "same",
3576            Some("openai"),
3577        ));
3578    }
3579
3580    #[test]
3581    fn reload_same_name_never_clears() {
3582        // A no-op ReloadConfig (same default, same type) is a noop here too.
3583        // Sanity — should not accidentally wipe history.
3584        assert!(!reload_should_clear_conversation(
3585            "kimi",
3586            Some("openai"),
3587            "kimi",
3588            Some("openai"),
3589        ));
3590    }
3591
3592    #[test]
3593    fn openrouter_400_is_overflow() {
3594        let msg = "API error (400 Bad Request): This endpoint's maximum context \
3595                   length is 204800 tokens. However, you requested about 745279 \
3596                   tokens... Please reduce the length of either one.";
3597        assert!(is_context_overflow_error(msg));
3598    }
3599
3600    #[test]
3601    fn openai_context_length_exceeded_is_overflow() {
3602        assert!(is_context_overflow_error(
3603            "{\"error\":{\"code\":\"context_length_exceeded\"}}"
3604        ));
3605    }
3606
3607    #[test]
3608    fn anthropic_prompt_too_long_is_overflow() {
3609        assert!(is_context_overflow_error(
3610            "prompt is too long: 250000 tokens"
3611        ));
3612    }
3613
3614    #[test]
3615    fn generic_rate_limit_is_not_overflow() {
3616        assert!(!is_context_overflow_error("429 Too Many Requests"));
3617    }
3618
3619    #[test]
3620    fn auth_error_is_not_overflow() {
3621        assert!(!is_context_overflow_error("401 Unauthorized"));
3622    }
3623
3624    #[test]
3625    fn extract_glm_proxy_ctx_limit() {
3626        // From the actual datalog that motivated D2.
3627        let msg = "API error (400 Bad Request) at `http://115.120.18.212:18005/v1/chat/completions`: \
3628                   {\"error\":{\"message\":\"This model's maximum context length is 65536 tokens. \
3629                   However, you requested 15210 output tokens and your prompt contains at least \
3630                   50327 input tokens, for a total of at least 65537 tokens.\"}}";
3631        assert_eq!(extract_provider_ctx_limit(msg), Some(65536));
3632    }
3633
3634    #[test]
3635    fn extract_openrouter_ctx_limit() {
3636        let msg = "API error (400): This endpoint's maximum context length is 204800 tokens. \
3637                   However, you requested about 745279 tokens";
3638        assert_eq!(extract_provider_ctx_limit(msg), Some(204800));
3639    }
3640
3641    #[test]
3642    fn extract_anthropic_prompt_too_long() {
3643        let msg = "prompt is too long: 200000 tokens > 200000 maximum";
3644        assert_eq!(extract_provider_ctx_limit(msg), Some(200000));
3645    }
3646
3647    #[test]
3648    fn extract_no_limit_returns_none_for_non_overflow_errors() {
3649        assert_eq!(extract_provider_ctx_limit("429 Too Many Requests"), None);
3650        assert_eq!(extract_provider_ctx_limit("401 Unauthorized"), None);
3651        assert_eq!(extract_provider_ctx_limit(""), None);
3652    }
3653
3654    #[test]
3655    fn extract_filters_out_implausibly_small_numbers() {
3656        // Status codes and small ints in error bodies must not be
3657        // mistaken for context limits.
3658        let msg = "Error 400: maximum context length is 200 tokens";
3659        assert_eq!(extract_provider_ctx_limit(msg), None);
3660    }
3661
3662    // ── D2 emergency compact tier helpers ──
3663
3664    use crate::conversation::{Conversation, message::MessageContent};
3665    use crate::tool::{ToolCall, ToolResult};
3666
3667    /// Build a synthetic conversation with `n_turns` turns, each carrying
3668    /// one user message + one assistant tool_call + one tool_result of
3669    /// `result_size` chars.
3670    fn build_conv(n_turns: usize, result_size: usize) -> Conversation {
3671        let mut conv = Conversation::new();
3672        for t in 0..n_turns {
3673            conv.add_user_message(&format!("turn {} request", t));
3674            conv.add_assistant_tool_calls(
3675                None,
3676                vec![ToolCall {
3677                    id: format!("call_{}", t),
3678                    name: "read_file".into(),
3679                    arguments: r#"{"file_path":"/x"}"#.into(),
3680                }],
3681                None,
3682            );
3683            conv.add_tool_result(ToolResult {
3684                call_id: format!("call_{}", t),
3685                output: "x".repeat(result_size),
3686                success: true,
3687            });
3688        }
3689        conv
3690    }
3691
3692    fn count_collapsed_results(conv: &Conversation) -> usize {
3693        // New unified stub format: `[<tool> <ok|FAILED>: N lines, first: …]`.
3694        // The substring " lines, first:" is unique to the stub shape and
3695        // robust whether the tool name is "bash", "grep", or "tool".
3696        conv.messages
3697            .iter()
3698            .filter(|m| match &m.content {
3699                MessageContent::ToolResult(tr) => tr.output.contains(" lines, first:"),
3700                _ => false,
3701            })
3702            .count()
3703    }
3704
3705    /// Phase 1 proactive compact: Tier 1 (collapse) is enough for the
3706    /// common case — heavy old tool_result bodies become stubs and
3707    /// the conversation token total drops below threshold without
3708    /// invoking the LLM-summary round trip. Tier 2 only fires when
3709    /// Tier 1 wasn't enough. This test pins the contract that Tier 1
3710    /// is invoked first; Tier 2 path is covered separately by the
3711    /// existing emergency-compact tests.
3712    #[test]
3713    fn proactive_tier1_collapses_old_tool_results_only() {
3714        // Build a conversation heavy with old, large tool_results
3715        // (typical bash/cargo session shape). After Tier 1 with
3716        // keep_recent_turns=3, the 3 OLDEST turns' tool_results
3717        // should be stubs while the 3 RECENT turns retain full
3718        // payload. Pins the "older=collapsed, newer=intact" split.
3719        let mut conv = build_conv(/* n_turns */ 6, /* result_size */ 4_000);
3720        crate::ctx::render::compact_old_tool_results_in_place(&mut conv, 3);
3721
3722        // Walk the messages: each turn pushes (User, AssistantToolCall,
3723        // ToolResult). 6 turns × 3 msgs = 18 msgs. The first 3 turns
3724        // are "old"; turns 4-6 are "recent".
3725        let mut tr_sizes: Vec<usize> = Vec::new();
3726        for m in &conv.messages {
3727            if let MessageContent::ToolResult(tr) = &m.content {
3728                tr_sizes.push(tr.output.len());
3729            }
3730        }
3731        assert_eq!(tr_sizes.len(), 6, "expected 6 tool_results");
3732        // Old: index 0, 1, 2 — must be stubs (small).
3733        for &s in &tr_sizes[..3] {
3734            assert!(
3735                s < 200,
3736                "old tool_result must collapse to stub; got len={}",
3737                s
3738            );
3739        }
3740        // Recent: index 3, 4, 5 — must remain full (4_000 chars + the
3741        // 'x' chars).
3742        for &s in &tr_sizes[3..] {
3743            assert!(
3744                s >= 4_000,
3745                "recent tool_result must remain full; got len={}",
3746                s
3747            );
3748        }
3749    }
3750
3751    #[test]
3752    fn collapse_keeps_last_n_turns_full() {
3753        let mut conv = build_conv(5, 1024);
3754        crate::ctx::render::compact_old_tool_results_in_place(&mut conv, 2);
3755        // 5 turns, keep last 2 → first 3 should have stubbed tool_results.
3756        assert_eq!(count_collapsed_results(&conv), 3);
3757    }
3758
3759    #[test]
3760    fn collapse_skips_already_tiny_results() {
3761        // Tool results under 200 chars aren't worth collapsing — the stub
3762        // would weigh more than the original.
3763        let mut conv = build_conv(5, 50);
3764        crate::ctx::render::compact_old_tool_results_in_place(&mut conv, 2);
3765        assert_eq!(count_collapsed_results(&conv), 0);
3766    }
3767
3768    #[test]
3769    fn collapse_no_op_when_under_keep_threshold() {
3770        let mut conv = build_conv(2, 1024);
3771        crate::ctx::render::compact_old_tool_results_in_place(&mut conv, 3);
3772        // Only 2 turns total, keep 3 — nothing to collapse.
3773        assert_eq!(count_collapsed_results(&conv), 0);
3774    }
3775
3776    #[test]
3777    fn collapse_preserves_call_id_and_success_flag() {
3778        let mut conv = build_conv(3, 1024);
3779        crate::ctx::render::compact_old_tool_results_in_place(&mut conv, 1);
3780        // Verify call_0's tool_result still has the right call_id even
3781        // though its body was stubbed — preserves tool_call/tool_result
3782        // pairing for OpenAI-style providers.
3783        let tr = conv
3784            .messages
3785            .iter()
3786            .find_map(|m| match &m.content {
3787                MessageContent::ToolResult(tr) if tr.call_id == "call_0" => Some(tr),
3788                _ => None,
3789            })
3790            .expect("call_0 result must still exist");
3791        // New unified stub format: `[<tool> <ok|FAILED>: N lines, first: …]`.
3792        assert!(tr.output.contains(" lines, first:"));
3793        assert!(tr.output.starts_with("["));
3794        assert!(tr.success);
3795    }
3796
3797    #[test]
3798    fn hard_truncate_keeps_last_user_message_even_under_budget() {
3799        // Tight budget that forces aggressive drops; sacred invariant
3800        // says the last user msg must survive regardless. This is the
3801        // structural guarantee the previous `truncate(len-4)` code
3802        // *violated*, producing the 8516-token catastrophe.
3803        let mut conv = build_conv(10, 2048);
3804        super::hard_truncate_to_target(&mut conv, /* target */ 100, /* sys */ 50);
3805        let has_user = conv
3806            .messages
3807            .iter()
3808            .any(|m| matches!(m.role, crate::conversation::message::Role::User));
3809        assert!(has_user, "last user message must survive even at tight budget");
3810    }
3811
3812    #[test]
3813    fn hard_truncate_does_not_start_with_orphan_tool_result() {
3814        // After truncate the first surviving message must NOT be a
3815        // ToolResult — that would orphan it from its paired assistant
3816        // tool_call, which OpenAI-style APIs reject with 400.
3817        let mut conv = build_conv(8, 1024);
3818        super::hard_truncate_to_target(&mut conv, /* target */ 2000, /* sys */ 100);
3819        if let Some(first) = conv.messages.first() {
3820            assert!(
3821                !matches!(
3822                    first.content,
3823                    MessageContent::ToolResult(_) | MessageContent::ToolResultRef(_)
3824                ),
3825                "first surviving message must not be an orphan tool_result"
3826            );
3827        }
3828    }
3829
3830    #[test]
3831    fn hard_truncate_no_op_when_already_under_target() {
3832        let mut conv = build_conv(3, 100);
3833        let before = conv.messages.len();
3834        super::hard_truncate_to_target(&mut conv, /* target */ 100_000, /* sys */ 100);
3835        assert_eq!(conv.messages.len(), before);
3836    }
3837
3838    #[test]
3839    fn hard_truncate_rebuilds_turn_tracker() {
3840        // After draining messages from the front, the turn_tracker must
3841        // be rebuilt so its Turn entries point at valid indices. Without
3842        // this, the next build_messages crashes or silently emits wrong
3843        // boundaries (the bug the old `truncate(len-4)` path also
3844        // patched, but inconsistently — see the rebuild call there).
3845        let mut conv = build_conv(10, 2048);
3846        super::hard_truncate_to_target(&mut conv, /* target */ 1000, /* sys */ 100);
3847        // Every turn's start_idx must be a valid index into messages.
3848        for t in &conv.turn_tracker.turns {
3849            assert!(
3850                t.start_idx <= conv.messages.len(),
3851                "turn start_idx {} out of bounds (messages.len()={})",
3852                t.start_idx,
3853                conv.messages.len()
3854            );
3855        }
3856    }
3857
3858    #[test]
3859    fn stream_timeout_is_summarized() {
3860        // public_error_message defers to ATOMCODE_SHOW_RAW_API_ERROR (raw by
3861        // default), so the user-facing string can't be tested deterministically
3862        // without env-var manipulation that races other parallel tests.
3863        // public_error_reason covers the routing logic regardless of env state.
3864        assert_eq!(
3865            public_error_reason("Stream timeout: no event for 300s"),
3866            "模型响应超时"
3867        );
3868    }
3869
3870    #[test]
3871    fn upstream_5xx_is_summarized() {
3872        assert_eq!(
3873            public_error_reason(
3874                "API error (503 Service Unavailable) at `https://x`:\nbackend trace"
3875            ),
3876            "上游服务暂时不可用"
3877        );
3878    }
3879
3880    #[test]
3881    fn auth_error_is_detected() {
3882        assert!(is_auth_error(
3883            "API error (401 Unauthorized): invalid_api_key"
3884        ));
3885    }
3886
3887    /// CpOfficialBuildRequired (English variant) — surfaces from
3888    /// build_codingplan_headers in open-source builds when an
3889    /// AtomGit-bound request is attempted.
3890    #[test]
3891    fn codingplan_unavailable_detected_in_english_message() {
3892        let en = "This feature requires the official AtomCode build. \
3893                  Download it from https://atomgit.com/atomgit_atomcode/atomcode/releases.";
3894        assert!(is_codingplan_unavailable_error(en));
3895    }
3896
3897    /// Same error, Chinese locale. The Releases URL is the substring
3898    /// match — it's not localised, so the same classifier handles
3899    /// both en and zh-CN without coupling to translation text.
3900    #[test]
3901    fn codingplan_unavailable_detected_in_chinese_message() {
3902        let zh = "此功能需要官方 AtomCode 构建,请前往 \
3903                  https://atomgit.com/atomgit_atomcode/atomcode/releases 下载安装。";
3904        assert!(is_codingplan_unavailable_error(zh));
3905    }
3906
3907    /// Negative: an unrelated network error must NOT trip the
3908    /// classifier. Verifies the URL anchor is narrow enough to avoid
3909    /// false positives.
3910    #[test]
3911    fn codingplan_unavailable_does_not_match_unrelated_errors() {
3912        assert!(!is_codingplan_unavailable_error(
3913            "API error (500 Internal Server Error) at `https://api.openai.com/v1/chat/completions`"
3914        ));
3915        assert!(!is_codingplan_unavailable_error("Stream timeout: no event for 300s"));
3916        assert!(!is_codingplan_unavailable_error(""));
3917    }
3918
3919    #[test]
3920    fn rate_limit_error_is_detected() {
3921        assert!(is_rate_limited_error("API error (429 Too Many Requests)"));
3922    }
3923
3924    /// Chinese gateway-side rate-limit blobs streamed in-band by
3925    /// GitCode litellm (and similar proxies) must route to the
3926    /// proper rate-limit retry path (5 attempts × 3-30s backoff),
3927    /// not the generic 3-shot fallback. Without this the
3928    /// abrupt-close discriminator in openai.rs converts the SSE
3929    /// blob to StreamEvent::Error but the agent then mis-retries
3930    /// it.
3931    #[test]
3932    fn rate_limit_error_detects_chinese_gateway_patterns() {
3933        assert!(is_rate_limited_error("模型「GLM-5.1」的请求负载过高,请稍后再试。"));
3934        assert!(is_rate_limited_error("请求过于频繁,请稍后再试"));
3935        assert!(is_rate_limited_error("服务繁忙"));
3936        assert!(is_rate_limited_error("当前已被限流"));
3937        // Negative: a vanilla error must NOT be classified as rate
3938        // limit just because it mentions "请稍后再试" alone
3939        // (which is generic Chinese "try again later").
3940        assert!(!is_rate_limited_error("请稍后再试"));
3941        assert!(!is_rate_limited_error("API error (500 Internal Server Error)"));
3942    }
3943
3944    #[test]
3945    fn invalid_request_is_summarized_without_raw_body() {
3946        let old = std::env::var("ATOMCODE_SHOW_RAW_API_ERROR").ok();
3947        unsafe { std::env::set_var("ATOMCODE_SHOW_RAW_API_ERROR", "0") };
3948        let raw = "API error (400 Bad Request) at `https://x`:\nstack=secret detail";
3949        assert_eq!(public_error_reason(raw), "请求参数无效");
3950        assert!(!public_error_message(raw).contains("secret detail"));
3951        if let Some(v) = old {
3952            unsafe { std::env::set_var("ATOMCODE_SHOW_RAW_API_ERROR", v) };
3953        } else {
3954            unsafe { std::env::remove_var("ATOMCODE_SHOW_RAW_API_ERROR") };
3955        }
3956    }
3957
3958    #[test]
3959    fn raw_error_is_shown_by_default() {
3960        let old = std::env::var("ATOMCODE_SHOW_RAW_API_ERROR").ok();
3961        unsafe { std::env::remove_var("ATOMCODE_SHOW_RAW_API_ERROR") };
3962        let raw = "API error (400 Bad Request) at `https://x`:\nstack=secret detail";
3963        assert_eq!(public_error_message(raw), raw);
3964        if let Some(v) = old {
3965            unsafe { std::env::set_var("ATOMCODE_SHOW_RAW_API_ERROR", v) };
3966        }
3967    }
3968}
3969
3970#[cfg(test)]
3971mod post_compress_state_tests {
3972    use super::build_post_compress_state;
3973
3974    #[test]
3975    fn empty_inputs_return_none() {
3976        assert!(build_post_compress_state("", &[], &[]).is_none());
3977    }
3978
3979    #[test]
3980    fn task_only() {
3981        let out = build_post_compress_state("fix login bug", &[], &[]).unwrap();
3982        assert!(out.starts_with("[Context was compressed. Here is your current state:]\n"));
3983        assert!(out.contains("TASK: fix login bug"));
3984        assert!(!out.contains("FILES EDITED"));
3985        assert!(!out.contains("RECENTLY READ"));
3986    }
3987
3988    #[test]
3989    fn task_exact_200_is_unchanged() {
3990        // chars().take(200) on an exactly-200-char input must pass through.
3991        let exact: String = "字".repeat(200);
3992        let out = build_post_compress_state(&exact, &[], &[]).unwrap();
3993        let line = out.lines().find(|l| l.starts_with("TASK: ")).unwrap();
3994        let payload = &line["TASK: ".len()..];
3995        assert_eq!(payload.chars().count(), 200);
3996        assert_eq!(payload, exact);
3997    }
3998
3999    #[test]
4000    fn task_201_drops_exactly_one_char() {
4001        // Boundary: 201 → 200, and must land on a char boundary (not split
4002        // the last 3-byte "字").
4003        let over: String = "字".repeat(201);
4004        let out = build_post_compress_state(&over, &[], &[]).unwrap();
4005        let line = out.lines().find(|l| l.starts_with("TASK: ")).unwrap();
4006        let payload = &line["TASK: ".len()..];
4007        assert_eq!(payload.chars().count(), 200);
4008        assert!(payload.is_char_boundary(payload.len()));
4009    }
4010
4011    #[test]
4012    fn task_long_multibyte_truncates_safely() {
4013        // Regression guard: byte-slicing here would panic mid-codepoint.
4014        let long: String = "字".repeat(500);
4015        let out = build_post_compress_state(&long, &[], &[]).unwrap();
4016        let line = out.lines().find(|l| l.starts_with("TASK: ")).unwrap();
4017        let payload = &line["TASK: ".len()..];
4018        assert_eq!(payload.chars().count(), 200);
4019    }
4020
4021    #[test]
4022    fn files_edited_comma_joined() {
4023        let edited = vec!["a.rs".to_string(), "b.rs".to_string()];
4024        let out = build_post_compress_state("", &edited, &[]).unwrap();
4025        assert!(out.contains("FILES EDITED: a.rs, b.rs"));
4026    }
4027
4028    #[test]
4029    fn files_read_last_five_reversed() {
4030        // rev().take(5) → newest first, at most 5.
4031        let read: Vec<String> = (1..=8).map(|i| format!("f{}.rs", i)).collect();
4032        let out = build_post_compress_state("", &[], &read).unwrap();
4033        let line = out
4034            .lines()
4035            .find(|l| l.starts_with("RECENTLY READ: "))
4036            .unwrap();
4037        assert_eq!(line, "RECENTLY READ: f8.rs, f7.rs, f6.rs, f5.rs, f4.rs");
4038    }
4039
4040    #[test]
4041    fn all_three_parts_combined() {
4042        let out = build_post_compress_state("task x", &["a.rs".to_string()], &["b.rs".to_string()])
4043            .unwrap();
4044        assert!(out.contains("TASK: task x"));
4045        assert!(out.contains("FILES EDITED: a.rs"));
4046        assert!(out.contains("RECENTLY READ: b.rs"));
4047    }
4048}
4049
4050#[cfg(test)]
4051mod fmt_k_tokens_tests {
4052    use super::fmt_k_tokens;
4053
4054    #[test]
4055    fn under_1000_no_suffix() {
4056        assert_eq!(fmt_k_tokens(0), "0");
4057        assert_eq!(fmt_k_tokens(137), "137");
4058        assert_eq!(fmt_k_tokens(999), "999");
4059    }
4060
4061    #[test]
4062    fn one_thousand_and_above_use_k_suffix_with_one_decimal() {
4063        assert_eq!(fmt_k_tokens(1000), "1.0K");
4064        assert_eq!(fmt_k_tokens(3700), "3.7K");
4065        assert_eq!(fmt_k_tokens(9800), "9.8K");
4066        assert_eq!(fmt_k_tokens(64000), "64.0K");
4067    }
4068}
4069
4070#[cfg(test)]
4071mod bash_deleted_file_tracking_tests {
4072    use super::{
4073        bash_workspace_modified_files, rm_file_targets, track_search_replace_files,
4074        track_tool_modified_files,
4075    };
4076    use std::path::Path;
4077
4078    #[test]
4079    fn tracks_simple_rm_target_from_cwd() {
4080        let targets = rm_file_targets("rm numbers.txt", Path::new("/tmp/project"));
4081        assert_eq!(targets, vec!["/tmp/project/numbers.txt"]);
4082    }
4083
4084    #[test]
4085    fn skips_recursive_rm_targets() {
4086        let targets = rm_file_targets("rm -rf dist", Path::new("/tmp/project"));
4087        assert!(targets.is_empty());
4088    }
4089
4090    #[test]
4091    fn tracks_successful_bash_rm_from_output_cwd() {
4092        let mut edited = Vec::new();
4093        track_tool_modified_files(
4094            "bash",
4095            "rm numbers.txt",
4096            "[elapsed: 0.0s, exit: 0]\n[cwd: /tmp/project]",
4097            &mut edited,
4098        );
4099        assert_eq!(edited, vec!["/tmp/project/numbers.txt"]);
4100    }
4101
4102    #[test]
4103    fn tracks_workspace_modified_bash_output() {
4104        let files = bash_workspace_modified_files(
4105            "[workspace modified via bash: src/a.rs, /tmp/project/b.txt. If you meant to edit source, use edit_file next time]\n[cwd: /tmp/project]",
4106            Path::new("/tmp/project"),
4107        );
4108        assert_eq!(
4109            files,
4110            vec![
4111                "/tmp/project/src/a.rs".to_string(),
4112                "/tmp/project/b.txt".to_string()
4113            ]
4114        );
4115    }
4116
4117    #[test]
4118    fn tracks_search_replace_output_files() {
4119        let mut edited = Vec::new();
4120        track_search_replace_files(
4121            "Replaced 'old' -> 'new': 2 replacements across 2 files.\n  /tmp/project/a.rs (1 replacements)\n  /tmp/project/b.rs (1 replacements)",
4122            &mut edited,
4123        );
4124        assert_eq!(
4125            edited,
4126            vec![
4127                "/tmp/project/a.rs".to_string(),
4128                "/tmp/project/b.rs".to_string()
4129            ]
4130        );
4131    }
4132}
4133