atomcode_core/agent/mod.rs
1//! The AgentLoop — a standalone agent that processes user messages,
2//! calls LLM providers, executes tools, and communicates with the UI
3//! via channels. Decoupled from any TUI concerns.
4
5pub mod background;
6pub mod git_auto_commit;
7pub mod git_checkpoint;
8pub mod sub_agent;
9pub mod subtask_driver;
10
11mod diagnose;
12mod discipline;
13pub mod execute;
14mod prompt;
15mod services;
16mod tool_dispatch;
17mod verify;
18
19use std::path::PathBuf;
20use std::sync::atomic::{AtomicBool, AtomicU64, Ordering};
21use std::time::{Duration, Instant};
22
23use tokio::sync::mpsc;
24use tokio_util::sync::CancellationToken;
25
26use crate::config::Config;
27use crate::conversation::Conversation;
28use crate::provider::LlmProvider;
29use crate::skill::SkillRegistry;
30use crate::tool::use_skill::UseSkillTool;
31use crate::tool::{PermissionDecision, PermissionStore, ToolCall, ToolContext, ToolRegistry};
32use crate::turn::event::{TurnEvent, TurnResult};
33use crate::turn::runner::TurnRunner;
34
35/// Commands sent FROM the UI TO the agent loop.
36#[derive(Debug)]
37pub enum AgentCommand {
38 /// User sent a message (may include attached file content and/or images).
39 /// `image_markers[i]` is the `[Image #N]` number printed for `images[i]`
40 /// at paste time. Round-tripped through `AgentEvent::RestorePendingImages`
41 /// so that on VL preprocess failure the TUI can re-attach images with
42 /// their ORIGINAL markers — otherwise an UP-recalled `[Image #5]` text
43 /// wouldn't match a freshly-renumbered restored image. Empty when the
44 /// caller has no images (slash commands, queued text from streaming,
45 /// CLI single-shot).
46 SendMessage {
47 text: String,
48 images: Vec<crate::conversation::message::ImagePart>,
49 #[allow(dead_code)] // used in 2026-05-09 vision-preprocessor retry; agent reflects on Failed
50 image_markers: Vec<usize>,
51 },
52 /// Cancel current operation.
53 Cancel,
54 /// Approve a pending tool call.
55 ApproveTool,
56 /// Approve and always allow this tool for the session.
57 ApproveToolAlways,
58 /// Deny a pending tool call.
59 DenyTool,
60 /// Reload config from TUI (the single source of truth for in-memory config,
61 /// including ephemeral OAuth providers). Switches to the new default provider.
62 ReloadConfig(crate::config::Config),
63 /// Change working directory.
64 ChangeDir(String),
65 /// Append input during streaming — queued and injected before next LLM call.
66 AppendInput(String),
67 /// Clear conversation history.
68 ClearConversation,
69 /// Set messages from a resumed session.
70 SetMessages(Vec<crate::conversation::message::Message>),
71 /// Set plan mode (read-only exploration, no edits).
72 SetPlanMode(bool),
73 /// Manually compact conversation history. `prompt` is accepted for
74 /// forward-compat with an eventual LLM-backed summarize-with-instruction
75 /// path; currently unused — this is the mechanical path only.
76 Compact {
77 prompt: Option<String>,
78 },
79 Remember {
80 content: String,
81 global: bool,
82 },
83 Forget {
84 keyword: String,
85 },
86 ShowMemory,
87 /// Run a one-shot task in an isolated background context (read-only-ish
88 /// tool subset, independent conversation, capped turns + timeout).
89 /// Result is returned via `AgentEvent::BackgroundComplete`.
90 Background {
91 task: String,
92 },
93 /// Recompute and re-emit a rich ContextStats snapshot. `/context` sends
94 /// this before rendering so the user never sees a stale cache — the
95 /// cache is only refreshed on LLM round-trips, so between turns (or
96 /// after out-of-turn mutations like `inject_post_compress_state`) the
97 /// snapshot can lag the actual conversation state.
98 RefreshContextStats,
99 /// Rebuild the hook executor from disk after a `/plugin install|uninstall`
100 /// or other change to plugin state. Cheap (just re-reads JSON files);
101 /// does NOT touch provider/model state, unlike ReloadConfig.
102 ReloadHooks,
103 /// Request a snapshot of the current conversation messages.
104 /// The agent responds with `AgentEvent::MessagesSync` carrying
105 /// `conversation.messages`. Used by the TUI before `/bg` to ensure
106 /// the session has up-to-date message history even when a turn is
107 /// still in progress (e.g. waiting for tool approval).
108 SyncMessages,
109 /// Shutdown the agent.
110 Shutdown,
111}
112
113/// Reason the agent's turn loop stopped. Carried on TurnComplete so downstream
114/// consumers (CLI [done] line, eval harness) can distinguish natural completion
115/// from budget-enforced truncation.
116#[derive(Debug, Clone, Copy, PartialEq, Eq)]
117pub enum TurnStopReason {
118 /// Model responded with text only — no more tool calls, conversation done.
119 Natural,
120 /// Turn budget (AgentLoop.max_turns) was reached.
121 TurnLimit,
122 /// Step budget (check_step_limit tool-call cap) was reached.
123 StepLimit,
124 /// User cancelled the turn.
125 Cancelled,
126 /// API or internal error terminated the loop.
127 Error,
128}
129
130#[derive(Debug, Clone, Copy)]
131struct CompressionOutcome {
132 applied: bool,
133 before_tokens: usize,
134 after_tokens: usize,
135 removed_messages: usize,
136}
137
138impl TurnStopReason {
139 /// Short machine-parseable tag (snake_case) for logs / CLI output.
140 pub fn as_tag(&self) -> &'static str {
141 match self {
142 TurnStopReason::Natural => "natural",
143 TurnStopReason::TurnLimit => "turn_limit",
144 TurnStopReason::StepLimit => "step_limit",
145 TurnStopReason::Cancelled => "cancelled",
146 TurnStopReason::Error => "error",
147 }
148 }
149}
150
151/// One descriptor per sub-agent in a `SubAgentDispatchStart` batch.
152/// Mirrored 1:1 with the `tasks` vector built in `parallel_edit::execute`
153/// so callers can reuse the index across the lifecycle events.
154#[derive(Debug, Clone)]
155pub struct SubAgentTaskInfo {
156 /// Workspace-relative file path the sub-agent will edit. Renderer
157 /// shows this in full (not basename-only) so multi-component paths
158 /// like `src/server/tunnel.rs` vs `src/client/tunnel.rs` stay
159 /// visibly distinct.
160 pub path: String,
161 /// User-facing duplicate-instance qualifier. Empty when the path
162 /// is unique within this dispatch; `" (#2)"`, `" (#3)"` when the
163 /// dispatcher is forking >1 sub-agent against the same path.
164 pub dedup_suffix: String,
165}
166
167/// Events sent FROM the agent loop TO the UI.
168#[derive(Debug, Clone)]
169pub enum AgentEvent {
170 /// LLM text delta (streaming).
171 TextDelta(String),
172 /// LLM reasoning/thinking content (e.g., DeepSeek-R1, MiniMax-M2.7, o1-series).
173 /// Emitted when the model produces thinking content separately from the final response.
174 /// UI can optionally display this in verbose mode (Ctrl+O).
175 ReasoningDelta(String),
176 /// LLM has started emitting a tool call — only the name is known so far,
177 /// arguments are still streaming. UI uses this to display the tool name
178 /// immediately instead of waiting for the full args.
179 ToolCallStreaming { name: String, hint: String },
180 /// A tool call is about to execute (for display).
181 /// `id` pairs with `ToolCallResult.call_id` so the UI can match start→result
182 /// across parallel or interleaved calls without reconstructing ids from counters.
183 ToolCallStarted {
184 id: String,
185 name: String,
186 arguments: String,
187 },
188 /// Multiple tool calls fan out from one assistant message. Fires BEFORE
189 /// the per-call `ToolCallStarted` events, only when ≥ 2 non-duplicate
190 /// calls are about to dispatch. UI uses this to render a single
191 /// grouped block (`▸ Reading 4 files (parallel)` + child rows) rather
192 /// than N independent `▸` rows. Per-call events still fire for
193 /// backward compat — UI dedupes via `batch_id` membership.
194 ToolBatchStarted {
195 batch_id: String,
196 calls: Vec<crate::turn::event::ToolBatchCall>,
197 },
198 /// Closes the batch opened by `ToolBatchStarted`. UI finalizes the
199 /// group header with `· N/M ok · Xs wall` summary.
200 ToolBatchCompleted {
201 batch_id: String,
202 ok: usize,
203 total: usize,
204 elapsed_ms: u64,
205 },
206 /// Real-time output chunk from a running tool (e.g., bash command).
207 /// Sent during tool execution before ToolCallResult.
208 ToolOutputChunk { call_id: String, chunk: String },
209 /// A tool call completed with a result.
210 ToolCallResult {
211 call_id: String,
212 name: String,
213 output: String,
214 success: bool,
215 duration: Duration,
216 },
217 /// Waiting for user approval of a tool call.
218 ApprovalNeeded {
219 tool_name: String,
220 reason: String,
221 call: ToolCall,
222 /// Snapshot of `conversation.messages` at the time the approval
223 /// request was raised. Lets the TUI persist mid-turn session
224 /// state (e.g. when `/bg` backgrounds a session that is waiting
225 /// for approval).
226 messages: Vec<crate::conversation::message::Message>,
227 },
228 /// Token usage update.
229 TokenUsage(crate::stream::TokenUsage),
230 /// The agent's current phase changed.
231 PhaseChange(AgentPhase),
232 /// Turn completed successfully.
233 TurnComplete {
234 duration: Duration,
235 total_tokens: usize,
236 /// LLM round-trips (standard agent metric).
237 turn_count: usize,
238 /// Total individual tool calls.
239 tool_call_count: usize,
240 /// Why the loop stopped. `Natural` for ordinary completion; see
241 /// TurnStopReason for budget / cancel / error variants.
242 stop_reason: TurnStopReason,
243 /// Snapshot of the conversation messages at the moment the turn
244 /// ended. Mirrors `TurnCancelled.messages` so UIs have one uniform
245 /// path for persisting session state on either terminal event.
246 messages: Vec<crate::conversation::message::Message>,
247 },
248 /// Turn was cancelled by user before completion.
249 /// The conversation has been cleaned up - partial messages removed.
250 /// Contains the cleaned message list for TUI to sync.
251 TurnCancelled {
252 messages: Vec<crate::conversation::message::Message>,
253 },
254 /// Response to `AgentCommand::SyncMessages`. Carries a snapshot of
255 /// `conversation.messages` at the time the agent processed the command.
256 /// Used by the TUI to sync session state before backgrounding a session
257 /// that is mid-turn (e.g. waiting for tool approval).
258 MessagesSync {
259 messages: Vec<crate::conversation::message::Message>,
260 },
261 /// An error occurred. Carries a snapshot of `conversation.messages`
262 /// so the TUI can persist mid-turn state even when the turn dies
263 /// before TurnComplete/TurnCancelled fire — without this, a
264 /// first-turn LLM failure silently drops the user's typed message
265 /// from disk and `/resume` shows nothing for that conversation.
266 /// Producers that don't hold the conversation (the inline
267 /// streaming-error forwarder in `run_turn_loop`) send `messages:
268 /// Vec::new()`; the terminal error path captured at
269 /// `handle_send_message` provides the full snapshot.
270 Error {
271 error: String,
272 messages: Vec<crate::conversation::message::Message>,
273 },
274 /// Non-fatal advisory from a provider or other subsystem. UI renders
275 /// this as a one-line yellow banner; does not abort the turn.
276 /// Currently sourced from the OpenAI provider's truncation detector
277 /// when the proxy reports implausibly few prompt_tokens.
278 Warning(String),
279 /// VL preprocessing failed; the agent is returning the user's pending
280 /// images so the TUI can re-attach them to the input state. Lets the
281 /// user retry the same image without re-pasting from clipboard. Hashes
282 /// are TUI-side state, so the renderer recomputes them from the
283 /// returned base64 bytes (best-effort; clipboard-equality dedup may
284 /// fire on a fresh paste of the same image — minor UX, not breaking).
285 RestorePendingImages {
286 images: Vec<crate::conversation::message::ImagePart>,
287 /// Original `[Image #N]` numbers, parallel to `images`. Round-tripped
288 /// from `AgentCommand::SendMessage::image_markers` so the TUI can
289 /// re-attach with the SAME marker numbers — keeps UP-recalled
290 /// caption text matching after retry.
291 markers: Vec<usize>,
292 },
293 /// VL preprocessing succeeded — surface a one-line success notice
294 /// without dumping the (possibly long, sometimes uninformative) VL
295 /// description into the UI. The description still rides into
296 /// conversation history for the main model. `vl_key` is the provider
297 /// key from config; `char_count` is `text.chars().count()` so users
298 /// can spot zero/near-zero outputs that would mislead the main model.
299 VisionPreprocessSuccess {
300 vl_key: String,
301 char_count: usize,
302 },
303 /// Sub-agent batch began. `tasks` is the ordered list of children
304 /// the dispatcher is about to fork — same order as the resulting
305 /// `SubAgentTaskDone`/`SubAgentTaskFailed` events will arrive in,
306 /// so the UI can pre-allocate one display slot per child and
307 /// disambiguate same-basename tasks via the index.
308 SubAgentDispatchStart {
309 /// Per-task descriptors. `path` is the workspace-relative file
310 /// path (preserved as the model wrote it — no basename-only
311 /// truncation). `dedup_suffix` is the user-facing `(#2)`,
312 /// `(#3)` qualifier when the same path appears N times in one
313 /// dispatch; empty for unique entries.
314 tasks: Vec<SubAgentTaskInfo>,
315 },
316 /// Sub-agent batch ended (all tasks settled or pool returned). UI
317 /// clears the override so subsequent thinks/tools resume normal
318 /// label behaviour.
319 SubAgentDispatchEnd,
320 /// One sub-agent has been claimed from the pool and is now running.
321 /// `index` indexes into the `tasks` vector emitted with the
322 /// matching DispatchStart so the UI can locate its slot.
323 SubAgentTaskStarted { index: usize },
324 /// Sub-agent finished successfully. `summary` is a one-sentence
325 /// human-readable result, already truncated to a reasonable length
326 /// by the agent loop.
327 SubAgentTaskDone {
328 index: usize,
329 elapsed_ms: u64,
330 turns: usize,
331 summary: String,
332 },
333 /// Sub-agent failed (error, timeout, no-edit). `reason` is one
334 /// short phrase, not a stack trace.
335 SubAgentTaskFailed {
336 index: usize,
337 elapsed_ms: u64,
338 turns: usize,
339 reason: String,
340 },
341 /// `/background` task finished. `summary` is the final assistant text
342 /// (truncated if long). `success` is false on error / timeout / cancel.
343 BackgroundComplete {
344 summary: String,
345 files_edited: Vec<String>,
346 turns: usize,
347 success: bool,
348 },
349 /// Working directory changed.
350 WorkingDirChanged(PathBuf),
351 /// Context budget stats — piped into datalog and cached by the TUI
352 /// for `/context`. Emitted after every turn's `ctx.build_messages`
353 /// call, so stats reflect the snapshot the model actually saw.
354 ///
355 /// The rich breakdown (tool defs / cold zone / ctx window / ctx name)
356 /// only appears on the second emission path in
357 /// `handle_send_message` — the first path (TurnEvent forwarding) uses
358 /// the narrow stats from the ctx::render output. TUI merges both.
359 ContextStats {
360 system_tokens: usize,
361 sent_tokens: usize,
362 dropped_tokens: usize,
363 working_set_tokens: usize,
364 total_messages: usize,
365 /// Total bytes of tool definitions / 4. 0 when not yet computed.
366 tool_defs_tokens: usize,
367 /// Tokens used by cold-zone compressed summaries.
368 cold_zone_tokens: usize,
369 /// Effective token budget from the active ctx strategy
370 /// (`ctx.ctx_window()`), including any defensive clamping.
371 ctx_window: usize,
372 /// Ctx strategy name — `default` / `ollama` / future impls.
373 ctx_name: String,
374 /// Full assembled system prompt for the turn — lets the TUI's
375 /// `/context prompt` show the exact bytes sent. Empty on the
376 /// narrow TurnEvent-forwarded path; only the rich emission in
377 /// `handle_send_message` fills this.
378 system_prompt: String,
379 },
380}
381
382/// The current phase of the agent (for UI display).
383#[derive(Debug, Clone, PartialEq)]
384pub enum AgentPhase {
385 Idle,
386 Thinking, // LLM generating text
387 CallingTool(String), // Executing a tool (with name)
388 WaitingApproval, // Waiting for user to approve
389}
390
391/// Discipline tracking state — counters for loop detection, stagnation,
392/// error streaks, and tool usage patterns. Extracted from AgentLoop to
393/// keep the God Object manageable.
394#[derive(Default)]
395pub(crate) struct DisciplineState {
396 pub consecutive_reads: usize,
397 pub stagnant_turns: usize,
398 pub last_known_files: usize,
399 pub targeted_read_count: usize,
400 pub last_targeted_reads: usize,
401 pub verify_injected: bool,
402 pub model_produced_text: bool,
403 pub silent_tool_rounds: usize,
404 pub is_negative_feedback: bool,
405 pub build_fail_count: usize,
406 pub scouting_count: usize,
407 pub api_confirmed_working: bool,
408 pub consecutive_edits_file: Option<String>,
409 pub consecutive_edits_count: usize,
410 pub sleep_count: usize,
411 pub consecutive_verify_count: usize,
412 pub recent_errors: Vec<String>,
413 pub executed_cmds: std::collections::HashMap<String, usize>,
414 pub category_fail_streak: std::collections::HashMap<String, usize>,
415 pub last_bash_cmd: String,
416 pub last_diagnosed_error: String,
417}
418
419/// The agent loop state.
420pub struct AgentLoop {
421 // Core components
422 pub conversation: Conversation,
423 pub tool_registry: std::sync::Arc<ToolRegistry>,
424 /// TurnRunner owns the provider, tools, and context.
425 pub turn_runner: TurnRunner,
426 pub permission_store: std::sync::Arc<std::sync::RwLock<PermissionStore>>,
427 pub config: Config,
428 /// Context construction strategy for the active provider. Selected
429 /// at construction via `ctx::for_provider` and rebuilt on
430 /// `AgentCommand::ReloadConfig` when the provider changes.
431 ///
432 /// `Arc` (not `Box`) — shared with `turn_runner.ctx` so datalog's
433 /// `build_messages` call and runner's actual send go through the
434 /// same instance. Rebuilds on `ReloadConfig` update both clones
435 /// (see the reload handler below).
436 pub ctx: std::sync::Arc<dyn crate::ctx::CtxBuilder>,
437
438 /// Session-start environment snapshot — git branch / HEAD / status.
439 /// Captured once in `new()`, refreshed on `ChangeDir` (new working
440 /// tree ⇒ new repo). Stale-by-design: rendered with a disclaimer
441 /// in `build_system_prompt` so the model knows it's not live.
442 /// See `crate::ctx::env`.
443 pub env_snapshot: crate::ctx::EnvSnapshot,
444
445 // Execution state
446 pub phase: AgentPhase,
447 pub turn_tokens: usize,
448 pub total_tokens: usize,
449 pub turn_start: Option<Instant>,
450
451 // Per-turn counters
452 tool_call_count: usize,
453 /// LLM round-trip count (standard "turn" metric).
454 /// Each iteration of run_turn_loop = 1 turn, regardless of how many
455 /// tools were called in that iteration.
456 turn_count: usize,
457 /// Optional hard cap on turn_count. When Some(n), run_turn_loop exits
458 /// via finish_turn(TurnStopReason::TurnLimit) before starting turn n+1.
459 /// None = unbounded (historical behavior — loop stops naturally when the
460 /// LLM returns no tool calls, or when the step budget is hit).
461 max_turns: Option<usize>,
462 retry_count: usize,
463 /// Tool-call IDs already forwarded to the renderer in the current
464 /// user turn. Cleared at the start of each new user message (in
465 /// `process_user_input` per-turn reset block).
466 ///
467 /// Dedupes the case where 429 / stream-ended retries cause the
468 /// runner to re-emit `TurnEvent::ToolCallStarted` with the same
469 /// provider-assigned tool_call_id. Without this, every retry adds
470 /// a duplicate `▸ Bash(...)` row in scrollback — at extreme rate-
471 /// limit scenarios users see the same command 30+ times.
472 emitted_tool_ids: std::collections::HashSet<String>,
473
474 // Approval channel endpoints for InteractivePermissionDecider
475 /// Receives approval requests from InteractivePermissionDecider
476 approval_req_rx: mpsc::UnboundedReceiver<crate::turn::permission::ApprovalRequest>,
477 /// Sends approval decisions back to InteractivePermissionDecider
478 approval_resp_tx: mpsc::UnboundedSender<PermissionDecision>,
479 /// Last approval request (for ApproveToolAlways — need to know which tool)
480 last_approval_request: Option<crate::turn::permission::ApprovalRequest>,
481
482 // Cancellation token for the current turn
483 cancel_token: CancellationToken,
484
485 /// Cancellation token for the background code-graph indexer.
486 /// Fresh-cancelled-then-rebuilt on every `/cd` so a prior indexer
487 /// (still parsing files) yields CPU instead of racing the new one.
488 indexer_cancel: CancellationToken,
489
490 /// Guard against concurrent `/background` tasks. Set on dispatch,
491 /// cleared by the spawned task when it completes. Acquire/Release
492 /// ordering so the cleared write is visible to the next dispatcher
493 /// check on a different thread.
494 background_running: std::sync::Arc<AtomicBool>,
495
496 /// Discipline tracking — all counters for loop detection, stagnation,
497 /// error streaks, and tool usage patterns. Extracted from AgentLoop to
498 /// reduce God Object complexity (was 22 fields inline).
499 pub(crate) discipline_state: DisciplineState,
500
501 /// Files read this turn (for tracking read-but-not-edit waste)
502 files_read_this_turn: Vec<String>,
503 /// Files edited/written this turn
504 files_edited_this_turn: Vec<String>,
505 /// The user's original task message for this turn (re-injected as reminders).
506 current_task: String,
507 /// Name of the tool currently being executed (for smart truncation).
508 current_tool_name: String,
509
510 /// Last git checkpoint ref (SHA) for /undo rollback.
511 pub last_checkpoint: Option<String>,
512
513 /// Most recently edited file (absolute path). Injected as full content in system prompt
514 /// so the model doesn't need to re-read it next turn. Capped at ~6K tokens.
515 active_file: Option<PathBuf>,
516
517 /// Pending user input appended during streaming. Injected before next LLM call.
518 pending_input: Option<String>,
519 /// Session-level file tracker: all files read/edited across the entire session.
520 /// Used to build the "working set" — tree-sitter skeletons injected before each LLM call.
521 /// This replaces the old recent_file_cache with a smarter, budget-aware approach.
522 session_files: std::collections::HashMap<String, PathBuf>,
523 /// Whether planning phase is active (first LLM call without tools to force a plan).
524 planning_phase: bool,
525 /// Remaining read-only turns for diagnosis tasks. When > 0, only read-only tools are available.
526 /// Decremented each turn. Forces the model to read code before curl/edit.
527 diagnosis_read_only_turns: usize,
528 /// Plan mode: restrict to read-only tools and inject planning instructions.
529 /// Toggled via `/plan` command or `SetPlanMode` agent command.
530 pub plan_mode: bool,
531 /// Current task type — drives dynamic prompt selection and planning.
532 /// ATLAS-style subtask driver: decomposes plan into per-file subtasks.
533 subtask_driver: subtask_driver::SubtaskDriver,
534 /// Original plan text from model's first response — used for plan adherence reminders.
535 plan_text: Option<String>,
536
537 /// Completion detection: model indicated task is done.
538 /// Set when text contains completion marker AND recent tool results all succeeded.
539 /// Next turn: if model only does read/grep → stop (unnecessary verification).
540 /// If model does edit/write/bash → cancel grace, continue (more substantive work).
541 #[allow(dead_code)]
542 completion_grace: bool,
543
544 /// Track whether all tool results in the last turn were successful.
545 /// Used by completion detection: only trigger grace when tools succeeded.
546 #[allow(dead_code)]
547 last_turn_tools_all_success: bool,
548
549 // Skill registry — provides descriptions for system prompt and powers use_skill tool
550 skill_registry: std::sync::Arc<std::sync::RwLock<SkillRegistry>>,
551
552 /// Hook executor for lifecycle events.
553 hook_executor: std::sync::Arc<crate::hook::executor::HookExecutor>,
554
555 // Code graph background indexer channel
556 reindex_tx: Option<mpsc::UnboundedSender<PathBuf>>,
557
558 // Datalog writer — writes per-turn markdown logs to datalog/ directory.
559 datalog: crate::turn::datalog::DatalogWriter,
560
561 // Channels
562 cmd_rx: mpsc::UnboundedReceiver<AgentCommand>,
563 event_tx: mpsc::UnboundedSender<AgentEvent>,
564}
565
566/// Cloneable sender side for UI/runtime code to communicate with the agent.
567#[derive(Clone)]
568pub struct AgentClient {
569 pub cmd_tx: mpsc::UnboundedSender<AgentCommand>,
570 /// Shared tool registry for dynamic MCP tool registration.
571 pub tool_registry: std::sync::Arc<ToolRegistry>,
572 /// Loaded skills, shared with the agent loop. The TUI uses this
573 /// to populate the slash-command palette with `user_invocable()`
574 /// entries, and to expand the template when a user picks one.
575 /// Same `Arc` the agent loop holds — reload(...) calls there are
576 /// visible here without extra plumbing.
577 pub skill_registry: std::sync::Arc<std::sync::RwLock<SkillRegistry>>,
578}
579
580/// Handle for the UI to communicate with the agent.
581pub struct AgentHandle {
582 pub client: AgentClient,
583 pub event_rx: mpsc::UnboundedReceiver<AgentEvent>,
584}
585
586#[derive(Clone)]
587pub struct AgentRuntimeFactory {
588 pub config: Config,
589 pub working_dir: PathBuf,
590 pub telemetry: std::sync::Arc<atomcode_telemetry::Telemetry>,
591 pub lsp: Option<std::sync::Arc<crate::lsp::manager::LspManager>>,
592 pub shared_tools: std::sync::Arc<ToolRegistry>,
593 pub skill_registry: std::sync::Arc<std::sync::RwLock<SkillRegistry>>,
594 pub max_turns: Option<usize>,
595 runtime_counter: std::sync::Arc<AtomicU64>,
596}
597
598impl AgentRuntimeFactory {
599 pub fn set_config(&mut self, config: Config) {
600 self.config = config;
601 }
602
603 pub fn set_working_dir(&mut self, working_dir: PathBuf) {
604 self.working_dir = working_dir;
605 }
606
607 fn next_runtime_label(&self) -> String {
608 let id = self.runtime_counter.fetch_add(1, Ordering::Relaxed) + 1;
609 format!("runtime-{id}")
610 }
611
612 pub fn build_provider(&self) -> Box<dyn LlmProvider> {
613 let Some(provider_config) = self.config.providers.get(&self.config.default_provider) else {
614 return crate::provider::unavailable_provider(
615 "未配置 provider。请使用 /provider 添加 provider 后再试。",
616 );
617 };
618
619 match crate::provider::create_provider(provider_config) {
620 Ok(provider) => provider,
621 Err(e) => crate::provider::unavailable_provider(format!("provider 初始化失败: {e:#}")),
622 }
623 }
624
625 pub fn spawn_runtime(
626 &self,
627 conversation: Conversation,
628 ) -> (
629 AgentClient,
630 tokio::sync::mpsc::UnboundedReceiver<AgentEvent>,
631 ) {
632 let provider = self.build_provider();
633 let mut tool_context = ToolContext::with_telemetry(
634 self.working_dir.clone(),
635 "default",
636 self.telemetry.clone(),
637 );
638 let runtime_label = self.next_runtime_label();
639 tool_context.file_history = std::sync::Arc::new(tokio::sync::Mutex::new(
640 crate::tool::file_history::FileHistory::new(&runtime_label),
641 ));
642 tool_context.lsp = self.lsp.clone();
643
644 let (mut loop_, handle) = AgentLoop::new_with_shared_parts(
645 self.config.clone(),
646 provider,
647 self.shared_tools.clone(),
648 self.skill_registry.clone(),
649 Some(runtime_label),
650 tool_context,
651 conversation,
652 );
653 loop_.set_max_turns(self.max_turns);
654
655 let ctx = atomcode_telemetry::CurrentContext::current();
656 tokio::spawn(async move {
657 atomcode_telemetry::CurrentContext::scope(ctx, || loop_.run()).await
658 });
659
660 (handle.client, handle.event_rx)
661 }
662
663 pub fn from_initial_loop(agent_loop: &AgentLoop, max_turns: Option<usize>) -> Self {
664 let working_dir = agent_loop
665 .turn_runner
666 .context
667 .working_dir
668 .try_read()
669 .map(|g| g.clone())
670 .unwrap_or_else(|_| PathBuf::from("."));
671
672 Self {
673 config: agent_loop.config.clone(),
674 working_dir,
675 telemetry: agent_loop.turn_runner.context.telemetry.clone(),
676 lsp: agent_loop.turn_runner.context.lsp.clone(),
677 shared_tools: agent_loop.tool_registry.clone(),
678 skill_registry: agent_loop.skill_registry.clone(),
679 max_turns,
680 runtime_counter: std::sync::Arc::new(AtomicU64::new(1)),
681 }
682 }
683
684 pub fn new_for_test(
685 config: Config,
686 working_dir: PathBuf,
687 shared_tools: std::sync::Arc<ToolRegistry>,
688 skill_registry: std::sync::Arc<std::sync::RwLock<SkillRegistry>>,
689 ) -> Self {
690 let telemetry = ToolContext::new(working_dir.clone()).telemetry;
691 Self {
692 config,
693 working_dir,
694 telemetry,
695 lsp: None,
696 shared_tools,
697 skill_registry,
698 max_turns: None,
699 runtime_counter: std::sync::Arc::new(AtomicU64::new(1)),
700 }
701 }
702}
703
704impl AgentLoop {
705 /// Create a new agent loop and its corresponding UI handle.
706 pub fn new(
707 config: Config,
708 provider: Box<dyn LlmProvider>,
709 mut tool_registry: ToolRegistry,
710 tool_context: ToolContext,
711 conversation: Conversation,
712 ) -> (Self, AgentHandle) {
713 // Load skills from disk and register the use_skill tool.
714 let working_dir = tool_context
715 .working_dir
716 .try_read()
717 .map(|g| g.clone())
718 .unwrap_or_else(|_| std::path::PathBuf::from("."));
719
720 let mut registry = SkillRegistry::new();
721 let _ = registry.reload(&working_dir);
722 let skill_registry = std::sync::Arc::new(std::sync::RwLock::new(registry));
723 let disabled_internal: std::collections::HashSet<String> =
724 std::env::var("ATOMCODE_DISABLE_TOOLS")
725 .ok()
726 .map(|v| {
727 v.split(',')
728 .map(|s| s.trim().to_string())
729 .filter(|s| !s.is_empty())
730 .collect()
731 })
732 .unwrap_or_default();
733 let internal_enabled = |name: &str| !disabled_internal.contains(name);
734
735 // Always register use_skill — the tool itself gracefully reports
736 // "no skills available" when the registry is empty. Gating on
737 // has_skills breaks Windows where skills are installed via plugins
738 // that may not be present at compile time. The model only wastes
739 // a turn calling use_skill with an empty registry, not 5+ turns
740 // re-describing the task that a skill would have covered.
741 if internal_enabled("use_skill") {
742 tool_registry.register_sync(Box::new(UseSkillTool {
743 registry: skill_registry.clone(),
744 }));
745 }
746
747 // Graph query tools: not exposed to model (adds 5 tool definitions that
748 // weak models never use correctly). Graph data is still injected automatically
749 // via grep's graph header and auto_inject_graph_context — the model benefits
750 // from graph without needing to call these tools directly.
751 // To re-enable: set ATOMCODE_GRAPH_TOOLS=1
752 if std::env::var("ATOMCODE_GRAPH_TOOLS")
753 .map(|v| v == "1")
754 .unwrap_or(false)
755 {
756 if internal_enabled("trace_callers") {
757 tool_registry.register_sync(Box::new(crate::tool::trace_callers::TraceCallersTool));
758 }
759 if internal_enabled("trace_callees") {
760 tool_registry.register_sync(Box::new(crate::tool::trace_callees::TraceCalleesTool));
761 }
762 if internal_enabled("trace_chain") {
763 tool_registry.register_sync(Box::new(crate::tool::trace_chain::TraceChainTool));
764 }
765 if internal_enabled("file_dependencies") {
766 tool_registry.register_sync(Box::new(crate::tool::file_deps::FileDependenciesTool));
767 }
768 if internal_enabled("blast_radius") {
769 tool_registry.register_sync(Box::new(crate::tool::blast_radius::BlastRadiusTool));
770 }
771 }
772
773 let shared_tools = std::sync::Arc::new(tool_registry);
774 Self::new_from_shared_bootstrap(
775 config,
776 provider,
777 shared_tools,
778 skill_registry,
779 None,
780 tool_context,
781 conversation,
782 )
783 }
784
785 /// Create a new runtime using the already-shared tool and skill registries.
786 /// This path intentionally does not reload skills or register `use_skill` /
787 /// graph tools; the initial runtime owns that one-time setup.
788 pub fn new_with_shared_parts(
789 config: Config,
790 provider: Box<dyn LlmProvider>,
791 shared_tools: std::sync::Arc<ToolRegistry>,
792 skill_registry: std::sync::Arc<std::sync::RwLock<SkillRegistry>>,
793 runtime_label: Option<String>,
794 tool_context: ToolContext,
795 conversation: Conversation,
796 ) -> (Self, AgentHandle) {
797 Self::new_from_shared_bootstrap(
798 config,
799 provider,
800 shared_tools,
801 skill_registry,
802 runtime_label,
803 tool_context,
804 conversation,
805 )
806 }
807
808 #[allow(clippy::too_many_arguments)]
809 fn new_from_shared_bootstrap(
810 config: Config,
811 provider: Box<dyn LlmProvider>,
812 shared_tools: std::sync::Arc<ToolRegistry>,
813 skill_registry: std::sync::Arc<std::sync::RwLock<SkillRegistry>>,
814 runtime_label: Option<String>,
815 mut tool_context: ToolContext,
816 conversation: Conversation,
817 ) -> (Self, AgentHandle) {
818 let (cmd_tx, cmd_rx) = mpsc::unbounded_channel();
819 let (event_tx, event_rx) = mpsc::unbounded_channel();
820
821 let working_dir = tool_context
822 .working_dir
823 .try_read()
824 .map(|g| g.clone())
825 .unwrap_or_else(|_| std::path::PathBuf::from("."));
826
827 // Load persisted code graph from disk and share with ToolContext.
828 let graph_path = working_dir.join(".atomcode").join("graph.bin");
829 let code_graph = crate::graph::persist::load(&graph_path);
830 let graph = std::sync::Arc::new(tokio::sync::RwLock::new(code_graph));
831 tool_context.graph = graph;
832
833 // Build approval channels for interactive permission flow
834 let (approval_req_tx, approval_req_rx) = mpsc::unbounded_channel();
835 let (approval_resp_tx, approval_resp_rx) = mpsc::unbounded_channel();
836
837 let permission_store = std::sync::Arc::new(std::sync::RwLock::new(PermissionStore::new()));
838
839 let interactive_permission =
840 Box::new(crate::turn::permission::InteractivePermissionDecider::new(
841 approval_req_tx,
842 approval_resp_rx,
843 permission_store.clone(),
844 ));
845
846 // Hand the registry handle to ToolContext so active-dispatch tools
847 // (parallel_edit_files) can read it at execute time without
848 // creating a Tool ↔ Registry Arc cycle.
849 tool_context.tool_registry = Some(shared_tools.clone());
850 // Convert Box → Arc so provider can be shared with sub-agents.
851 let provider: std::sync::Arc<dyn LlmProvider> = std::sync::Arc::from(provider);
852
853 // Build the datalog writer before `config` is moved into the agent below.
854 let datalog = match runtime_label.as_deref() {
855 Some(label) => crate::turn::datalog::DatalogWriter::new_with_filename_tag(
856 &working_dir,
857 &config.datalog,
858 label,
859 ),
860 None => crate::turn::datalog::DatalogWriter::new(&working_dir, &config.datalog),
861 };
862
863 // Select the context-construction strategy once for this session.
864 // Rebuilds on ReloadConfig when the provider changes.
865 let ctx: std::sync::Arc<dyn crate::ctx::CtxBuilder> =
866 match config.providers.get(&config.default_provider) {
867 Some(pc) => crate::ctx::for_provider(pc),
868 // Fallback for first-run / broken-config path: synthesize a
869 // minimal provider so `for_provider` still gets its hands on
870 // a context_window. Matches Config::default_context_window()
871 // behavior (128_000) so sessions without a provider don't
872 // panic before the user runs /login or /model.
873 None => crate::ctx::for_provider(&crate::config::provider::ProviderConfig {
874 provider_type: String::new(),
875 api_key: None,
876 model: String::new(),
877 base_url: None,
878 system_prompt: None,
879 user_agent: None,
880 context_window: 128_000,
881 max_tokens: None,
882 thinking_type: None,
883 thinking_keep: None,
884 reasoning_history: None,
885 thinking_enabled: None,
886 thinking_budget: None,
887 skip_tls_verify: false,
888 ephemeral: true,
889
890}),
891 };
892
893 let hooks = crate::hook::json_config::load_hooks_config(&working_dir);
894 let hook_executor = std::sync::Arc::new(crate::hook::executor::HookExecutor::new(hooks));
895
896 let turn_runner = TurnRunner {
897 provider,
898 tools: shared_tools.clone(),
899 context: tool_context.clone(),
900 config: config.clone(),
901 ctx: ctx.clone(),
902 permission: interactive_permission,
903 recently_edited_files: Vec::new(),
904 hook_executor: hook_executor.clone(),
905 loop_guard: Default::default(),
906 };
907
908 // Capture session-start env snapshot (git status, branch, HEAD).
909 // Blocking I/O here is fine: `new()` runs once at startup, the
910 // capture is ~tens of ms for typical repos, and it's required
911 // before the first turn's system prompt is assembled.
912 let env_snapshot = crate::ctx::EnvSnapshot::capture(&working_dir);
913
914 let agent = Self {
915 conversation,
916 tool_registry: shared_tools.clone(),
917 turn_runner,
918 permission_store,
919 config,
920 ctx,
921 env_snapshot,
922 phase: AgentPhase::Idle,
923 turn_tokens: 0,
924 total_tokens: 0,
925 turn_start: None,
926 tool_call_count: 0,
927 turn_count: 0,
928 max_turns: None,
929 retry_count: 0,
930 emitted_tool_ids: std::collections::HashSet::new(),
931 approval_req_rx,
932 approval_resp_tx,
933 last_approval_request: None,
934 cancel_token: CancellationToken::new(),
935 indexer_cancel: CancellationToken::new(),
936 background_running: std::sync::Arc::new(AtomicBool::new(false)),
937 discipline_state: DisciplineState::default(),
938 files_read_this_turn: Vec::new(),
939 files_edited_this_turn: Vec::new(),
940 current_task: String::new(),
941 current_tool_name: String::new(),
942 last_checkpoint: None,
943 active_file: None,
944 pending_input: None,
945 planning_phase: false,
946 diagnosis_read_only_turns: 0,
947 plan_mode: false,
948 completion_grace: false,
949 last_turn_tools_all_success: false,
950 subtask_driver: subtask_driver::SubtaskDriver::new(),
951 plan_text: None,
952 session_files: std::collections::HashMap::new(),
953 skill_registry,
954 hook_executor,
955 reindex_tx: None,
956 datalog,
957 cmd_rx,
958 event_tx,
959 };
960
961 let client = AgentClient {
962 cmd_tx,
963 tool_registry: shared_tools.clone(),
964 skill_registry: agent.skill_registry.clone(),
965 };
966 let handle = AgentHandle { client, event_rx };
967
968 (agent, handle)
969 }
970
971 /// Set an optional hard cap on the number of LLM turns this agent will
972 /// run. When the cap is reached, run_turn_loop exits via
973 /// finish_turn(TurnStopReason::TurnLimit). `None` (the default) is
974 /// unbounded. Used by the CLI `--max-turns` flag.
975 pub fn set_max_turns(&mut self, max: Option<usize>) {
976 self.max_turns = max;
977 }
978
979 /// Run the agent loop. This is the main entry point — call from a tokio task.
980 /// The loop processes commands from the UI and emits events back.
981 pub async fn run(mut self) {
982 // Active-dispatch tool registration. The model invokes
983 // `parallel_edit_files` explicitly when it judges parallel edit
984 // is the right move; the framework no longer infers from text.
985 // Gated on `subagent.enabled` so users can disable fork
986 // dispatch via `/config subagent.enabled false` without code
987 // changes — the tool simply isn't advertised to the model.
988 // Registered here (not in `new()`) because `register_arc` is
989 // async and `new()` is sync.
990 if self.config.subagent.enabled {
991 let tool = crate::tool::parallel_edit::ParallelEditTool {
992 provider: self.turn_runner.provider.clone(),
993 config: self.config.clone(),
994 event_tx: self.event_tx.clone(),
995 };
996 self.tool_registry
997 .register_arc("parallel_edit_files".to_string(), std::sync::Arc::new(tool))
998 .await;
999 }
1000
1001 // Spawn background code graph indexer
1002 {
1003 let working_dir = self.turn_runner.context.working_dir.read().await.clone();
1004 let graph = self.turn_runner.context.graph.clone();
1005 let (reindex_tx, mut reindex_rx) = mpsc::unbounded_channel::<PathBuf>();
1006 let wd_for_indexer = working_dir.clone();
1007 let cancel = self.indexer_cancel.clone();
1008 tokio::spawn(async move {
1009 let mut indexer =
1010 crate::graph::indexer::GraphIndexer::new(graph.clone(), wd_for_indexer.clone());
1011 indexer.index_all(cancel).await;
1012 // Persist after initial indexing
1013 let gp = wd_for_indexer.join(".atomcode").join("graph.bin");
1014 if let Ok(g) = graph.try_read() {
1015 let _ = crate::graph::persist::save(&g, &gp);
1016 }
1017 // Listen for reindex requests
1018 while let Some(path) = reindex_rx.recv().await {
1019 indexer.reindex_file(&path).await;
1020 }
1021 });
1022 self.reindex_tx = Some(reindex_tx);
1023 }
1024
1025 // --- SessionStart Hook ---
1026 if self.hook_executor.has_hooks() {
1027 let wd = self
1028 .turn_runner
1029 .context
1030 .working_dir
1031 .try_read()
1032 .map(|g| g.display().to_string())
1033 .unwrap_or_default();
1034 let ctx = crate::hook::HookContext {
1035 event: "session_start".into(),
1036 tool_name: None,
1037 tool_args: None,
1038 tool_result: None,
1039 tool_success: None,
1040 session_id: String::new(),
1041 working_dir: wd,
1042 };
1043 self.hook_executor
1044 .run_session_event(crate::hook::HookEvent::SessionStart, &ctx)
1045 .await;
1046 }
1047
1048 while let Some(cmd) = self.cmd_rx.recv().await {
1049 match cmd {
1050 AgentCommand::SendMessage { text, images, image_markers } => {
1051 self.handle_send_message(text, images, image_markers).await;
1052 }
1053 AgentCommand::Cancel => {
1054 self.cancel_token.cancel();
1055 self.cancel_token = CancellationToken::new();
1056 self.phase = AgentPhase::Idle;
1057 // Cancel the current turn — preserve completed content, backfill
1058 // (cancelled) for unpaired tool calls, and mark turn as Completed.
1059 self.conversation.cancel_current_turn();
1060 // Sync the preserved messages to TUI
1061 let messages = self.conversation.messages.clone();
1062 let _ = self.event_tx.send(AgentEvent::TurnCancelled { messages });
1063 }
1064 AgentCommand::ApproveTool => {
1065 // Approval handled inside run_turn_loop via channels
1066 }
1067 AgentCommand::ApproveToolAlways => {
1068 // Approval handled inside run_turn_loop via channels
1069 }
1070 AgentCommand::DenyTool => {
1071 // Denial handled inside run_turn_loop via channels
1072 }
1073 AgentCommand::ReloadConfig(new_config) => {
1074 let old_provider_name = self.config.default_provider.clone();
1075 let old_type = self
1076 .config
1077 .providers
1078 .get(&old_provider_name)
1079 .map(|p| p.provider_type.clone());
1080 self.config = new_config;
1081 // Rebuild hook executor from JSON config files.
1082 let wd = self
1083 .turn_runner
1084 .context
1085 .working_dir
1086 .try_read()
1087 .map(|g| g.clone())
1088 .unwrap_or_else(|_| std::path::PathBuf::from("."));
1089 let hooks = crate::hook::json_config::load_hooks_config(&wd);
1090 self.hook_executor =
1091 std::sync::Arc::new(crate::hook::executor::HookExecutor::new(hooks));
1092 self.turn_runner.hook_executor = self.hook_executor.clone();
1093 let new_provider_name = self.config.default_provider.clone();
1094 let new_type = self
1095 .config
1096 .providers
1097 .get(&new_provider_name)
1098 .map(|p| p.provider_type.clone());
1099
1100 let should_clear = reload_should_clear_conversation(
1101 &old_provider_name,
1102 old_type.as_deref(),
1103 &new_provider_name,
1104 new_type.as_deref(),
1105 );
1106 if should_clear {
1107 self.conversation.messages.clear();
1108 self.conversation.turn_tracker =
1109 crate::conversation::turn::TurnTracker::new();
1110 self.session_files.clear();
1111 }
1112
1113 if let Some(provider_config) = self.config.providers.get(&new_provider_name) {
1114 // Rebuild the context strategy for the new provider.
1115 // Selected once per provider; per-model customizations
1116 // (e.g. Ollama schema trimming, Claude cache markers)
1117 // take effect from the next turn. Assign the same
1118 // `Arc` to both `self.ctx` and `self.turn_runner.ctx`
1119 // so datalog and the send path stay locked together.
1120 let new_ctx = crate::ctx::for_provider(provider_config);
1121 self.ctx = new_ctx.clone();
1122 self.turn_runner.ctx = new_ctx;
1123 match crate::provider::create_provider(provider_config) {
1124 Ok(new_provider) => {
1125 self.turn_runner.provider = std::sync::Arc::from(new_provider);
1126 self.turn_runner.config = self.config.clone();
1127 }
1128 Err(e) => {
1129 let msg = format!("{:#}", e);
1130 let is_auth_gap = msg.contains("Not logged in")
1131 || msg.contains("Invalid auth.toml")
1132 || msg.contains("Token expired")
1133 || msg.contains("Token refresh failed");
1134 if is_auth_gap {
1135 self.turn_runner.provider = std::sync::Arc::from(
1136 crate::provider::unavailable_provider(format!(
1137 "Provider 凭证不可用:{}。请使用 /login 或 /codingplan 完成配置后再试。",
1138 msg
1139 )),
1140 );
1141 self.turn_runner.config = self.config.clone();
1142 } else {
1143 let _ = self.event_tx.send(AgentEvent::TextDelta(format!(
1144 "**Warning: failed to reload provider: {}**\n\n",
1145 e
1146 )));
1147 }
1148 }
1149 }
1150 } else {
1151 self.turn_runner.provider =
1152 std::sync::Arc::from(crate::provider::unavailable_provider(
1153 "No active provider configured. Use /provider to add one.",
1154 ));
1155 self.turn_runner.config = self.config.clone();
1156 }
1157 }
1158 AgentCommand::ChangeDir(path) => {
1159 self.change_dir(&path).await;
1160 }
1161 AgentCommand::AppendInput(text) => {
1162 // Queue user input to be injected before the next LLM call.
1163 if let Some(ref mut existing) = self.pending_input {
1164 existing.push('\n');
1165 existing.push_str(&text);
1166 } else {
1167 self.pending_input = Some(text);
1168 }
1169 }
1170 AgentCommand::ClearConversation => {
1171 // Clear the conversation history in the agent loop.
1172 self.conversation = Conversation::new();
1173 self.datalog.clear();
1174 }
1175 AgentCommand::SetMessages(messages) => {
1176 // Set messages from a resumed session.
1177 // Rebuild turn_tracker so the context builder can use
1178 // proper turn-based windowing instead of the fallback path.
1179 let turn_tracker =
1180 crate::conversation::turn::TurnTracker::rebuild(&messages);
1181 self.conversation.messages = messages;
1182 self.conversation.turn_tracker = turn_tracker;
1183 }
1184 AgentCommand::SetPlanMode(enabled) => {
1185 self.plan_mode = enabled;
1186 }
1187 AgentCommand::Compact { prompt } => {
1188 self.run_compact(prompt).await;
1189 }
1190 AgentCommand::Remember { content, global } => {
1191 use crate::config::memory::MemoryStore;
1192 let store = if global {
1193 MemoryStore::global()
1194 } else {
1195 let wd = self
1196 .turn_runner
1197 .context
1198 .working_dir
1199 .try_read()
1200 .map(|g| g.clone())
1201 .unwrap_or_default();
1202 MemoryStore::project(&wd)
1203 };
1204 match store.append(&content) {
1205 Ok(_) => {
1206 let scope = if global { "global" } else { "project" };
1207 let _ = self.event_tx.send(AgentEvent::TextDelta(format!(
1208 "(remembered in {} memory: {})\n",
1209 scope, content
1210 )));
1211 }
1212 Err(e) => {
1213 let _ = self.event_tx.send(AgentEvent::TextDelta(format!(
1214 "(failed to save memory: {})\n",
1215 e
1216 )));
1217 }
1218 }
1219 }
1220 AgentCommand::Forget { keyword } => {
1221 use crate::config::memory::MemoryStore;
1222 let wd = self
1223 .turn_runner
1224 .context
1225 .working_dir
1226 .try_read()
1227 .map(|g| g.clone())
1228 .unwrap_or_default();
1229 let global = MemoryStore::global();
1230 let project = MemoryStore::project(&wd);
1231 let g_matches = global.find_matching(&keyword);
1232 let p_matches = project.find_matching(&keyword);
1233 if g_matches.is_empty() && p_matches.is_empty() {
1234 let _ = self.event_tx.send(AgentEvent::TextDelta(format!(
1235 "(no memory entries matching '{}')\n",
1236 keyword
1237 )));
1238 } else {
1239 let mut msg = String::new();
1240 for entry in &g_matches {
1241 msg.push_str(&format!(" [global] - {}\n", entry));
1242 }
1243 for entry in &p_matches {
1244 msg.push_str(&format!(" [project] - {}\n", entry));
1245 }
1246 let g_result = global.remove_matching(&keyword);
1247 let p_result = project.remove_matching(&keyword);
1248 if g_result.is_err() || p_result.is_err() {
1249 msg.push_str(
1250 "(warning: some entries could not be removed from disk)\n",
1251 );
1252 }
1253 let total = g_matches.len() + p_matches.len();
1254 msg.push_str(&format!(
1255 "(removed {} matching entr{})\n",
1256 total,
1257 if total == 1 { "y" } else { "ies" }
1258 ));
1259 let _ = self.event_tx.send(AgentEvent::TextDelta(msg));
1260 }
1261 }
1262 AgentCommand::ShowMemory => {
1263 use crate::config::memory::MemoryStore;
1264 let wd = self
1265 .turn_runner
1266 .context
1267 .working_dir
1268 .try_read()
1269 .map(|g| g.clone())
1270 .unwrap_or_default();
1271 let global = MemoryStore::global();
1272 let project = MemoryStore::project(&wd);
1273 let g_entries = global.load();
1274 let p_entries = project.load();
1275 if g_entries.is_empty() && p_entries.is_empty() {
1276 let _ = self.event_tx.send(AgentEvent::TextDelta(
1277 "(no memories saved yet — use /remember <fact> to add one)\n"
1278 .to_string(),
1279 ));
1280 } else {
1281 let mut msg = String::new();
1282 if !g_entries.is_empty() {
1283 msg.push_str(&format!(" [Global] ({})\n", global.path().display()));
1284 for e in &g_entries {
1285 msg.push_str(&format!(" - {}\n", e));
1286 }
1287 }
1288 if !p_entries.is_empty() {
1289 msg.push_str(&format!(" [Project] ({})\n", project.path().display()));
1290 for e in &p_entries {
1291 msg.push_str(&format!(" - {}\n", e));
1292 }
1293 }
1294 let _ = self.event_tx.send(AgentEvent::TextDelta(msg));
1295 }
1296 }
1297 AgentCommand::Background { task } => {
1298 // AcqRel: pair with the spawned task's Release store on
1299 // completion so the next dispatcher sees the cleared flag.
1300 if self.background_running.swap(true, Ordering::AcqRel) {
1301 let _ = self.event_tx.send(AgentEvent::Error {
1302 error: "A background task is already running. Wait for it to finish."
1303 .to_string(),
1304 messages: self.conversation.messages.clone(),
1305 });
1306 } else {
1307 let provider = self.turn_runner.provider.clone();
1308 let tools = self.turn_runner.tools.clone();
1309 let context = self.turn_runner.context.clone();
1310 let context_for_commit = context.clone();
1311 let config = self.config.clone();
1312 let ctx = self.ctx.clone();
1313 let event_tx = self.event_tx.clone();
1314 let flag = self.background_running.clone();
1315 tokio::spawn(async move {
1316 let result = background::run_background_task(
1317 &task,
1318 provider,
1319 tools,
1320 context,
1321 config,
1322 ctx,
1323 event_tx.clone(),
1324 )
1325 .await;
1326 if let AgentEvent::BackgroundComplete {
1327 files_edited,
1328 success: true,
1329 ..
1330 } = &result
1331 {
1332 if !files_edited.is_empty() {
1333 let wd = context_for_commit
1334 .working_dir
1335 .try_read()
1336 .map(|g| g.clone())
1337 .unwrap_or_default();
1338 match git_auto_commit::auto_commit_edited_files(
1339 &wd,
1340 files_edited,
1341 ) {
1342 git_auto_commit::AutoCommitOutcome::Committed {
1343 sha,
1344 message,
1345 } => {
1346 let _ = event_tx.send(AgentEvent::TextDelta(format!(
1347 "\n[auto-commit {sha}] {message}\n"
1348 )));
1349 }
1350 git_auto_commit::AutoCommitOutcome::Failed { reason } => {
1351 let _ = event_tx.send(AgentEvent::TextDelta(format!(
1352 "\n[auto-commit skipped] {reason}\n"
1353 )));
1354 }
1355 git_auto_commit::AutoCommitOutcome::Skipped { .. } => {}
1356 }
1357 }
1358 }
1359 let _ = event_tx.send(result);
1360 flag.store(false, Ordering::Release);
1361 });
1362 }
1363 }
1364 AgentCommand::RefreshContextStats => {
1365 let system_prompt = self.build_system_prompt();
1366 let (msgs, _) = self
1367 .ctx
1368 .build_messages(&self.conversation, &system_prompt, "");
1369 self.emit_rich_context_stats(&self.conversation, &msgs)
1370 .await;
1371 }
1372 AgentCommand::ReloadHooks => {
1373 // Triggered by /plugin install|uninstall in the TUI so
1374 // newly-contributed hooks (especially UserPromptSubmit)
1375 // fire on the very next user message instead of waiting
1376 // for /cd or restart.
1377 let wd = self
1378 .turn_runner
1379 .context
1380 .working_dir
1381 .try_read()
1382 .map(|g| g.clone())
1383 .unwrap_or_else(|_| std::path::PathBuf::from("."));
1384 let hooks = crate::hook::json_config::load_hooks_config(&wd);
1385 self.hook_executor =
1386 std::sync::Arc::new(crate::hook::executor::HookExecutor::new(hooks));
1387 self.turn_runner.hook_executor = self.hook_executor.clone();
1388 }
1389 AgentCommand::SyncMessages => {
1390 let messages = self.conversation.messages.clone();
1391 let _ = self.event_tx.send(AgentEvent::MessagesSync { messages });
1392 }
1393 AgentCommand::Shutdown => {
1394 // --- SessionEnd Hook ---
1395 if self.hook_executor.has_hooks() {
1396 let wd = self
1397 .turn_runner
1398 .context
1399 .working_dir
1400 .try_read()
1401 .map(|g| g.display().to_string())
1402 .unwrap_or_default();
1403 let ctx = crate::hook::HookContext {
1404 event: "session_end".into(),
1405 tool_name: None,
1406 tool_args: None,
1407 tool_result: None,
1408 tool_success: None,
1409 session_id: String::new(),
1410 working_dir: wd,
1411 };
1412 self.hook_executor
1413 .run_session_event(crate::hook::HookEvent::SessionEnd, &ctx)
1414 .await;
1415 }
1416 break;
1417 }
1418 }
1419 }
1420 }
1421
1422 // -------------------------------------------------------------------------
1423 // Core agent logic
1424 // -------------------------------------------------------------------------
1425
1426 async fn handle_send_message(
1427 &mut self,
1428 mut content: String,
1429 images: Vec<crate::conversation::message::ImagePart>,
1430 image_markers: Vec<usize>,
1431 ) {
1432 self.current_task = content.clone();
1433
1434 if let Some(reason) = self.turn_runner.provider.availability_error() {
1435 let _ = self.event_tx.send(AgentEvent::Error {
1436 error: reason.to_string(),
1437 messages: self.conversation.messages.clone(),
1438 });
1439 self.finish_turn(TurnStopReason::Error);
1440 return;
1441 }
1442
1443 // ── UserPromptSubmit hooks ──
1444 // Run before any preprocessing so plugin hooks see the raw user
1445 // input. A hook can either block the turn (CC `decision: "block"`
1446 // or non-zero exit) or inject extra context that we splice into
1447 // the user message before the LLM sees it.
1448 if self.hook_executor.has_hooks() {
1449 let cwd = self
1450 .turn_runner
1451 .context
1452 .working_dir
1453 .try_read()
1454 .map(|g| g.display().to_string())
1455 .unwrap_or_default();
1456 match self
1457 .hook_executor
1458 .run_user_prompt_submit(&content, "", &cwd)
1459 .await
1460 {
1461 crate::hook::UserPromptHookResult::Continue => {}
1462 crate::hook::UserPromptHookResult::Inject(extra) => {
1463 // Append rather than prepend so the user's wording stays
1464 // at the top of the message — the hook context reads as
1465 // supplementary, not as a rewrite.
1466 content.push_str("\n\n");
1467 content.push_str(&extra);
1468 }
1469 crate::hook::UserPromptHookResult::Block(reason) => {
1470 let _ = self.event_tx.send(AgentEvent::Error {
1471 error: format!("hook blocked: {}", reason),
1472 messages: self.conversation.messages.clone(),
1473 });
1474 self.finish_turn(TurnStopReason::Error);
1475 return;
1476 }
1477 }
1478 }
1479
1480 // Detect negative feedback — user is unhappy with previous turn's work.
1481 let lower = content.to_lowercase();
1482 let negative_keywords = [
1483 "改错",
1484 "不对",
1485 "错了",
1486 "还是不行",
1487 "没用",
1488 "不是这样",
1489 "搞错",
1490 "又错",
1491 "白做",
1492 "越改越差",
1493 "恢复",
1494 "回滚",
1495 "撤销",
1496 "不行",
1497 "wrong",
1498 "not right",
1499 "still broken",
1500 "doesn't work",
1501 "undo",
1502 "revert",
1503 "go back",
1504 "that's worse",
1505 "stop",
1506 "broken",
1507 ];
1508 self.discipline_state.is_negative_feedback =
1509 content.chars().count() < 80 && negative_keywords.iter().any(|kw| lower.contains(kw));
1510
1511 // Git checkpoint: snapshot working tree before agent starts editing.
1512 let wd = self
1513 .turn_runner
1514 .context
1515 .working_dir
1516 .try_read()
1517 .map(|g| g.clone())
1518 .unwrap_or_default();
1519 self.last_checkpoint = git_checkpoint::create_checkpoint(&wd);
1520
1521 // Reset ctx_budget_hint to full window at start of each user message.
1522 // Without this, the first tool call in a new turn reads the stale budget
1523 // from the previous turn's last LLM call (when ctx was full), causing
1524 // 670-line files to skeleton when there's plenty of room.
1525 //
1526 // Read from `self.ctx` not `self.config` — ctx applies defensive
1527 // clamps (e.g. OllamaCtx floors at 4K) that config's raw
1528 // `context_window` doesn't reflect. Using config would tell
1529 // read_file "you have 128K" when actual budget is 4K.
1530 self.turn_runner
1531 .context
1532 .ctx_budget_hint
1533 .store(self.ctx.ctx_window(), std::sync::atomic::Ordering::Relaxed);
1534
1535 // Auto-diagnose: if user mentions error keywords, scan logs and attach findings.
1536 // This gives the model the real error from Turn 1, instead of spending 3-5 turns grepping.
1537 let enriched = self.auto_diagnose_errors(&content).await;
1538 // Extract and store exception signature for recurrence detection across turns.
1539 if let Some(pos) = enriched.find("<!-- diag_exception:") {
1540 let rest = &enriched[pos + 20..];
1541 if let Some(end) = rest.find(" -->") {
1542 self.discipline_state.last_diagnosed_error = rest[..end].to_string();
1543 }
1544 }
1545 // Strip the hidden marker before adding to conversation
1546 let clean = if let Some(pos) = enriched.find("\n<!-- diag_exception:") {
1547 enriched[..pos].to_string()
1548 } else {
1549 enriched
1550 };
1551
1552 // ── Task boundary cleanup ──
1553 // New user message = new task. If there's old context from the
1554 // previous task (>12 messages), compress it unconditionally.
1555 // This prevents dirty-start degradation where 20K+ of stale
1556 // conversation dilutes the batch prompt for the new task.
1557 // Unlike maybe_compress_history (which checks the 50% threshold),
1558 // this fires at every task boundary regardless of token count.
1559 if self.conversation.messages.len() > 12 {
1560 // Task-boundary compression goes through the active ctx strategy.
1561 // No LLM call — the compressed content is already
1562 // one-line-per-round summaries (DefaultCtx) compact enough
1563 // for cold zone.
1564 if let Some((content, n_msgs)) = self.ctx.compression_plan(&self.conversation) {
1565 let system_prompt = self.build_system_prompt();
1566 let _ = self.try_apply_compression(&system_prompt, n_msgs, content, false);
1567 }
1568 }
1569
1570 // Vision preprocessing: when the active provider can't accept images
1571 // and the user pasted some, run them through the configured VL model
1572 // first and turn the result into plain text. See
1573 // `vision_preprocessor` module doc for the data-flow contract.
1574 let mut vision_warning: Option<String> = None;
1575 let (clean, images) = if !images.is_empty() {
1576 use crate::vision_preprocessor::{maybe_preprocess, PreprocessOutcome};
1577 match maybe_preprocess(&self.config, &*self.turn_runner.provider, &clean, &images).await {
1578 PreprocessOutcome::Skipped => (clean, images),
1579 PreprocessOutcome::Replaced { text, vl_key } => {
1580 // Surface a one-line success notice (provider key in
1581 // muted gray, char count for sanity-check). The full
1582 // description is intentionally NOT shown in the UI —
1583 // it would either be redundant with what the main
1584 // model proceeds to discuss or, on bad VL output,
1585 // mislead the user that "success" means useful
1586 // content. Description still rides into conversation
1587 // history below.
1588 let _ = self.event_tx.send(AgentEvent::VisionPreprocessSuccess {
1589 vl_key: vl_key.clone(),
1590 char_count: text.chars().count(),
1591 });
1592 let merged = if clean.is_empty() {
1593 format!("[图片内容(由 {vl_key} 识别)]\n{text}")
1594 } else {
1595 format!("{clean}\n\n[图片内容(由 {vl_key} 识别)]\n{text}")
1596 };
1597 (merged, Vec::new())
1598 }
1599 PreprocessOutcome::Failed { reason } => {
1600 vision_warning = Some(format!(
1601 "VL 预处理失败:{reason} · 图片已自动保留,可直接重试",
1602 ));
1603 // Layer-1 retry support: hand the image bytes back to
1604 // TUIX so the user doesn't have to re-paste from
1605 // clipboard. Without this the bytes are gone after
1606 // submit and Ctrl+V is the only way to re-attach.
1607 let _ = self.event_tx.send(AgentEvent::RestorePendingImages {
1608 images: images.clone(),
1609 markers: image_markers.clone(),
1610 });
1611 let merged = if clean.is_empty() {
1612 "[图片识别失败]".to_string()
1613 } else {
1614 format!("{clean}\n\n[图片识别失败]")
1615 };
1616 (merged, Vec::new())
1617 }
1618 }
1619 } else {
1620 (clean, images)
1621 };
1622 if let Some(w) = vision_warning {
1623 let _ = self.event_tx.send(AgentEvent::Warning(w));
1624 }
1625
1626 if images.is_empty() {
1627 self.conversation.add_user_message(&clean);
1628 } else {
1629 use crate::conversation::message::{Message, MessageContent, Role};
1630 let msg = Message {
1631 role: Role::User,
1632 content: MessageContent::MultiPart {
1633 text: if clean.is_empty() { None } else { Some(clean.clone()) },
1634 images,
1635 },
1636 };
1637 let idx = self.conversation.messages.len();
1638 self.conversation.messages.push(msg);
1639 self.conversation.turn_tracker.on_user_message(idx);
1640 }
1641 self.turn_tokens = 0;
1642 self.tool_call_count = 0;
1643 self.turn_count = 0;
1644 self.retry_count = 0;
1645 self.emitted_tool_ids.clear();
1646 self.files_read_this_turn.clear();
1647 self.files_edited_this_turn.clear();
1648 self.turn_runner.recently_edited_files.clear();
1649 // Cross-batch loop guard is scoped to a single user-message
1650 // turn — every new user message = fresh slate. See
1651 // `turn::loop_guard` for why this clear() is the entire
1652 // per-turn-only contract on the caller side.
1653 self.turn_runner.loop_guard.clear();
1654 self.discipline_state.consecutive_reads = 0;
1655 self.discipline_state.verify_injected = false;
1656 self.discipline_state.model_produced_text = false;
1657 self.discipline_state.silent_tool_rounds = 0;
1658 // Note: is_negative_feedback is set above, do not reset here.
1659 self.discipline_state.build_fail_count = 0;
1660 self.discipline_state.scouting_count = 0;
1661 self.discipline_state.api_confirmed_working = false;
1662 self.discipline_state.consecutive_edits_file = None;
1663 self.discipline_state.consecutive_edits_count = 0;
1664 self.discipline_state.sleep_count = 0;
1665 self.discipline_state.consecutive_verify_count = 0;
1666 self.discipline_state.recent_errors.clear();
1667 self.discipline_state.executed_cmds.clear();
1668 self.discipline_state.category_fail_streak.clear();
1669 // Reset stagnation tracking — new user message = fresh turn,
1670 // previous stagnation state must not carry over.
1671 self.discipline_state.stagnant_turns = 0;
1672 self.discipline_state.last_known_files = 0;
1673 self.discipline_state.last_targeted_reads = 0;
1674 self.discipline_state.targeted_read_count = 0;
1675 // Reset subtask driver and plan — previous turn's plan must not
1676 // bleed into the new turn. Without this, a text-only Q&A response
1677 // that mentions file names (e.g. as examples) triggers extract_from_plan,
1678 // and the plan completion guard then forces the loop to continue
1679 // editing files that were never part of the user's actual request.
1680 self.subtask_driver = subtask_driver::SubtaskDriver::new();
1681 self.plan_text = None;
1682 // Clear session_files on each new user message.
1683 // Working Set only tracks files from the CURRENT task.
1684 self.session_files.clear();
1685 self.turn_start = Some(Instant::now());
1686 self.cancel_token = CancellationToken::new();
1687
1688 // Initialize datalog for this turn
1689 {
1690 let model_name = self.turn_runner.provider.model_name().to_string();
1691 // Use ctx's effective window so datalog matches what build_messages
1692 // actually renders with (OllamaCtx 4K floor, etc).
1693 self.datalog
1694 .begin_turn(&content, &model_name, self.ctx.ctx_window());
1695 }
1696
1697 // State-based decisions (replaces keyword-based task_classifier).
1698 // Two facts, not guesses:
1699
1700 // 1. Has the model read any files this session? If not → read-only first turn.
1701 let has_file_context =
1702 !self.files_read_this_turn.is_empty() || !self.files_edited_this_turn.is_empty();
1703 self.diagnosis_read_only_turns = if has_file_context { 0 } else { 1 };
1704 self.planning_phase = !has_file_context;
1705
1706 // Unified prepend — no task classification, no auto-build injection.
1707 // Build command detection deferred to Phase 5 (LLM-inferred project config).
1708 let _content = format!(
1709 "Read the relevant code first, then plan and implement.\n\n{}",
1710 content
1711 );
1712
1713 self.phase = AgentPhase::Thinking;
1714 let _ = self
1715 .event_tx
1716 .send(AgentEvent::PhaseChange(AgentPhase::Thinking));
1717
1718 self.run_turn_loop().await;
1719 }
1720
1721 // needs_planning replaced by task_classifier::TaskType::needs_planning()
1722
1723 // auto_diagnose_errors → diagnose.rs
1724 // find_file_in_project → diagnose.rs
1725
1726 /// Multi-turn execution loop using TurnRunner.
1727 /// Each iteration calls TurnRunner.run() for one LLM turn, then applies
1728 /// discipline (reminders, step limits) and decides whether to continue.
1729 async fn run_turn_loop(&mut self) {
1730 loop {
1731 // Turn budget check BEFORE incrementing, so the reported
1732 // turn_count equals the number of turns actually executed
1733 // (not including the "would-be" next turn we refuse to run).
1734 // The stop reason is propagated via TurnComplete.stop_reason;
1735 // the CLI [done] line surfaces it as `stopped=turn_limit`.
1736 if self.check_turn_limit() {
1737 self.finish_turn(TurnStopReason::TurnLimit);
1738 return;
1739 }
1740 self.turn_count += 1;
1741
1742 // Decrement diagnosis read-only counter each turn.
1743 if self.diagnosis_read_only_turns > 0 {
1744 self.diagnosis_read_only_turns -= 1;
1745 }
1746
1747 // Inject any pending user input appended during streaming.
1748 if let Some(input) = self.pending_input.take() {
1749 self.conversation
1750 .add_user_message(&format!("[Additional context from user]: {}", input));
1751 }
1752
1753 // Planning phase: inject planning reminder on turn 3.
1754 // Turn 1-2: model reads files to understand the task.
1755 // Planning phase injection: REMOVED.
1756 // Was injecting "[PLAN NOW]" at turn 3, but this is arbitrary timing.
1757 // The system prompt WORKFLOW section already guides planning.
1758
1759 // NOTE: Negative feedback injection disabled — adds a System message that
1760 // confuses weak models and wastes context. The model sees the user's complaint
1761 // directly; no extra injection needed.
1762
1763 // DIAGNOSTIC STRATEGY injection removed — the model decides its own
1764 // debugging approach. System prompt PLAN FIRST section is sufficient.
1765
1766 // Stagnation detection: REMOVED.
1767 // Was injecting "[STAGNATION WARNING]" after 3 turns without edits.
1768 // Bug: triggered after model output a completion summary (pure text,
1769 // no edits), preventing it from stopping. The warning was interpreted
1770 // as "keep working" by the model. Stagnation detection was harmful —
1771 // the prompt guides the model to work efficiently.
1772
1773 let system_prompt = self.build_system_prompt();
1774 // Per-turn reminder removed: verbatim task now rides on the cadence
1775 // reflection checkpoint — see agent::discipline::reflection_prompt.
1776 let turn_reminder = String::new();
1777 let cancel = self.cancel_token.clone();
1778
1779 // Context compression: when > 70% budget, pause and compress
1780 // old turns via LLM call. Keeps last 5 turns full, compressed
1781 // history goes to cold zone (FIFO, max 3 entries).
1782 self.maybe_compress_history(&system_prompt).await;
1783
1784 // Batch reminder: REMOVED.
1785 // Was injecting fake user messages ("[Batch reminder: call MULTIPLE tools...]")
1786 // every turn after turn 3 when last turn was single-tool. In a 24-turn session,
1787 // this injected 19 fake user messages that disrupted model's diagnostic focus.
1788 // The system prompt already contains batch guidance — injecting mid-conversation
1789 // user messages is counterproductive.
1790
1791 // Move conversation out to avoid borrow conflicts with self in select!
1792 let mut conv = std::mem::take(&mut self.conversation);
1793
1794 // Datalog: mark the start of a new LLM round-trip
1795 self.datalog.log_llm_call();
1796
1797 // Rich ContextStats for `/context` + inline datalog dump.
1798 // The file-level request log (`log_llm_request`) now lives
1799 // inside `TurnRunner::run_with_filter`, paired with
1800 // `log_llm_response`, so any caller — AgentLoop or daemon —
1801 // gets symmetric request/response files. This block only
1802 // feeds UI state + datalog md inline debug.
1803 {
1804 let context_window = self.ctx.ctx_window();
1805 // Same `Arc` instance as `self.turn_runner.ctx`, so
1806 // `build_messages` here and in the runner produce
1807 // byte-identical output (same system prompt, same
1808 // per-model directives, same reminder placement).
1809 let (msgs, _) = self
1810 .ctx
1811 .build_messages(&conv, &system_prompt, &turn_reminder);
1812 let tool_defs = self.turn_runner.tools.get_definitions().await;
1813 // Dump request to datalog for inline debugging
1814 self.datalog.log_llm_dump(
1815 &msgs,
1816 tool_defs.len(),
1817 self.turn_runner.provider.model_name(),
1818 context_window,
1819 );
1820
1821 self.emit_rich_context_stats(&conv, &msgs).await;
1822 }
1823
1824 // Run the turn in a scoped block so all borrows of self.turn_runner
1825 // end before we use self.conversation again.
1826 let (result, mut turn_rx, context_collapsed) = {
1827 let (turn_tx, mut turn_rx) = mpsc::unbounded_channel::<TurnEvent>();
1828
1829 // Destructure self to get split borrows — the borrow checker needs to see
1830 // that turn_runner and the other fields are disjoint borrows.
1831 let mut context_collapsed = false;
1832 let context_collapsed = &mut context_collapsed;
1833 let runner = &mut self.turn_runner;
1834 let cmd_rx = &mut self.cmd_rx;
1835 let approval_req_rx = &mut self.approval_req_rx;
1836 let event_tx = &self.event_tx;
1837 let approval_resp_tx = &self.approval_resp_tx;
1838 let permission_store = &self.permission_store;
1839 let cancel_token = &mut self.cancel_token;
1840 let last_approval_request = &mut self.last_approval_request;
1841 let pending_input = &mut self.pending_input;
1842 let phase = &mut self.phase;
1843 let model_produced_text = &mut self.discipline_state.model_produced_text;
1844 let current_tool_name = &mut self.current_tool_name;
1845 let datalog = &mut self.datalog;
1846 let files_edited_this_turn = &mut self.files_edited_this_turn;
1847 let active_file = &mut self.active_file;
1848 let files_read_this_turn = &mut self.files_read_this_turn;
1849 let consecutive_reads = &mut self.discipline_state.consecutive_reads;
1850 let targeted_read_count = &mut self.discipline_state.targeted_read_count;
1851 let last_bash_cmd = &mut self.discipline_state.last_bash_cmd;
1852 let session_files = &mut self.session_files;
1853 let reindex_tx = &self.reindex_tx;
1854 let emitted_tool_ids = &mut self.emitted_tool_ids;
1855
1856 // Tool filtering: diagnosis phase uses read-only tools.
1857 // All other turns have full tool access (including edit_file).
1858 // EXECUTE thinking is applied INSIDE edit_file (fresh file read,
1859 // ±5 lines context return, fuzzy match, delta validation) —
1860 // not by blocking tools at the agent loop level.
1861 let read_only_tools: &[&str] = &[
1862 "read_file",
1863 "grep",
1864 "glob",
1865 "list_directory",
1866 "web_search",
1867 "web_fetch",
1868 "trace_callees",
1869 "trace_callers",
1870 "trace_chain",
1871 "file_dependencies",
1872 "blast_radius",
1873 ];
1874 let use_read_only = self.plan_mode || self.diagnosis_read_only_turns > 0;
1875 let tool_filter: Option<&[&str]> = if use_read_only {
1876 Some(read_only_tools)
1877 } else {
1878 None // Full tool access — model can read, edit, bash, search_replace
1879 };
1880 let turn_fut = runner.run_with_filter(
1881 &mut conv,
1882 &system_prompt,
1883 &turn_reminder,
1884 &turn_tx,
1885 cancel,
1886 tool_filter,
1887 );
1888 tokio::pin!(turn_fut);
1889
1890 // Accumulate text deltas for datalog (flushed on tool call or turn end)
1891 let mut datalog_text_accum = String::new();
1892
1893 let result = loop {
1894 tokio::select! {
1895 biased;
1896
1897 result = &mut turn_fut => break result,
1898
1899 Some(event) = turn_rx.recv() => {
1900 // Inline forward_turn_event to avoid borrowing self
1901 match event {
1902 TurnEvent::TextDelta(text) => {
1903 *model_produced_text = true;
1904 datalog_text_accum.push_str(&text);
1905 let _ = event_tx.send(AgentEvent::TextDelta(text));
1906 }
1907 TurnEvent::ReasoningDelta(text) => {
1908 let _ = event_tx.send(AgentEvent::ReasoningDelta(text));
1909 }
1910 TurnEvent::ToolBatchStarted { ref batch_id, ref calls } => {
1911 let _ = event_tx.send(AgentEvent::ToolBatchStarted {
1912 batch_id: batch_id.clone(),
1913 calls: calls.clone(),
1914 });
1915 }
1916 TurnEvent::ToolBatchCompleted { ref batch_id, ok, total, elapsed_ms } => {
1917 let _ = event_tx.send(AgentEvent::ToolBatchCompleted {
1918 batch_id: batch_id.clone(),
1919 ok,
1920 total,
1921 elapsed_ms,
1922 });
1923 }
1924 TurnEvent::ToolCallStarted { ref id, ref name, ref arguments } => {
1925 // Dedupe across retries: the same provider-assigned tool_call_id
1926 // arrives again whenever a 429 / stream-ended attempt is retried.
1927 // Without this guard, every retry paints another `▸ Bash(...)` row.
1928 // Skip ALL downstream side effects (datalog, phase, file tracking,
1929 // event emission) for the duplicate — the first emission has
1930 // already accounted for them.
1931 if !emitted_tool_ids.insert(id.clone()) {
1932 continue;
1933 }
1934 // Forward tool name immediately for UI spinner
1935 let _ = event_tx.send(AgentEvent::ToolCallStreaming { name: name.clone(), hint: String::new() });
1936 // Flush accumulated model text to datalog before logging tool call accumulated model text to datalog before logging tool call
1937 if !datalog_text_accum.is_empty() {
1938 datalog.log_model_text(&datalog_text_accum);
1939 datalog_text_accum.clear();
1940 }
1941 datalog.log_tool_call(name, arguments);
1942
1943 *current_tool_name = name.clone();
1944 *phase = AgentPhase::CallingTool(name.clone());
1945 let _ = event_tx.send(AgentEvent::PhaseChange(phase.clone()));
1946
1947 if name == "bash" {
1948 if let Ok(args) = serde_json::from_str::<serde_json::Value>(arguments) {
1949 *last_bash_cmd = args
1950 .get("command")
1951 .and_then(|v| v.as_str())
1952 .unwrap_or("")
1953 .to_string();
1954 }
1955 }
1956
1957 // Track files for Working Set + read counts
1958 if matches!(name.as_str(), "read_file" | "edit_file" | "create_file" | "search_replace" | "glob" | "grep") {
1959 if let Ok(args) = serde_json::from_str::<serde_json::Value>(arguments) {
1960 // Try file_path first, then path (glob/grep use path)
1961 let fp = args.get("file_path").and_then(|v| v.as_str())
1962 .or_else(|| args.get("path").and_then(|v| v.as_str()));
1963 if let Some(fp) = fp {
1964 let short = std::path::Path::new(fp)
1965 .file_name()
1966 .map(|n| n.to_string_lossy().to_string())
1967 .unwrap_or_else(|| fp.to_string());
1968 session_files.insert(short.clone(), std::path::PathBuf::from(fp));
1969 if name == "read_file" {
1970 if !files_read_this_turn.contains(&short) {
1971 files_read_this_turn.push(short);
1972 }
1973 // Targeted reads (offset/limit) are always progress
1974 let has_offset = args.get("offset").is_some() || args.get("limit").is_some();
1975 if has_offset {
1976 *targeted_read_count += 1;
1977 }
1978 }
1979 }
1980 }
1981 }
1982
1983 let _ = event_tx.send(AgentEvent::ToolCallStarted { id: id.clone(), name: name.clone(), arguments: arguments.clone() });
1984 }
1985 TurnEvent::ToolOutputChunk { call_id, chunk } => {
1986 // Forward real-time tool output to UI
1987 let _ = event_tx.send(AgentEvent::ToolOutputChunk { call_id, chunk });
1988 }
1989 TurnEvent::ToolCallResult { call_id, name, output, success, duration } => {
1990 // Track files for discipline
1991 if let Some(pos) = output.find("Edited ") {
1992 let rest = &output[pos + 7..];
1993 let fp_end = rest.find(|c: char| c == ' ' || c == '\n' || c == '(').unwrap_or(rest.len());
1994 let fp = rest[..fp_end].trim();
1995 if !fp.is_empty() {
1996 *active_file = Some(PathBuf::from(fp));
1997 }
1998 if !fp.is_empty() {
1999 let file = fp.to_string();
2000 if !files_edited_this_turn.contains(&file) {
2001 files_edited_this_turn.push(file);
2002 }
2003 }
2004 }
2005 if let Some(pos) = output.find("Wrote ").or_else(|| output.find("Overwrote ")).or_else(|| output.find("Created new file ")) {
2006 let keyword_len = if output[pos..].starts_with("Overwrote ") { 10 }
2007 else if output[pos..].starts_with("Created new file ") { 17 }
2008 else { 6 };
2009 let rest = &output[pos + keyword_len..];
2010 let fp_end = rest.find(|c: char| c == ' ' || c == '\n' || c == '(').unwrap_or(rest.len());
2011 let fp = rest[..fp_end].trim();
2012 if !fp.is_empty() {
2013 *active_file = Some(PathBuf::from(fp));
2014 }
2015 if !fp.is_empty() {
2016 let file = fp.to_string();
2017 if !files_edited_this_turn.contains(&file) {
2018 files_edited_this_turn.push(file);
2019 }
2020 }
2021 }
2022 if success {
2023 track_tool_modified_files(
2024 &name,
2025 last_bash_cmd,
2026 &output,
2027 files_edited_this_turn,
2028 );
2029 }
2030 if matches!(name.as_str(), "read_file" | "list_directory" | "glob" | "grep") {
2031 *consecutive_reads += 1;
2032 } else if matches!(name.as_str(), "edit_file" | "create_file") {
2033 *consecutive_reads = 0;
2034 }
2035 // Notify background indexer to reindex edited/created files
2036 if matches!(name.as_str(), "edit_file" | "create_file") && success {
2037 if let Some(ref tx) = reindex_tx {
2038 let path_str = output.lines().next().unwrap_or("")
2039 .trim_start_matches("Edited ")
2040 .trim_start_matches("Created new file ")
2041 .trim_start_matches("Created ")
2042 .trim_start_matches("Wrote ")
2043 .trim_start_matches("Overwrote ")
2044 .split_whitespace().next().unwrap_or("");
2045 if !path_str.is_empty() {
2046 let _ = tx.send(PathBuf::from(path_str));
2047 }
2048 }
2049 }
2050 datalog.log_tool_result(&output, success);
2051 let _ = event_tx.send(AgentEvent::ToolCallResult {
2052 call_id, name, output, success, duration,
2053 });
2054 }
2055 TurnEvent::TokenUsage { prompt_tokens, completion_tokens, total_tokens: _, cached_tokens } => {
2056 datalog.log_token_usage(prompt_tokens, completion_tokens, cached_tokens);
2057 if cached_tokens > 0 {
2058 datalog.log_cache_hit(prompt_tokens, cached_tokens);
2059 }
2060 let _ = event_tx.send(AgentEvent::TokenUsage(
2061 crate::stream::TokenUsage {
2062 prompt_tokens,
2063 completion_tokens,
2064 cached_tokens,
2065 }
2066 ));
2067 }
2068 TurnEvent::ContextStats { system_tokens, sent_tokens, dropped_tokens, working_set_tokens, total_messages } => {
2069 datalog.log_context_stats(system_tokens, sent_tokens, dropped_tokens, working_set_tokens, total_messages);
2070
2071 // Detect context collapse: if sent tokens drop dramatically,
2072 // model has lost most history. Reset edit tracking so BLOCKED
2073 // doesn't prevent the model from re-reading files it forgot about.
2074 if sent_tokens < 3000 {
2075 *context_collapsed = true;
2076 }
2077
2078 // Narrow stats path — rich fields (tool_defs / cold_zone /
2079 // ctx_window / ctx_name) are sent from the datalog block in
2080 // handle_send_message, which has access to self.ctx.
2081 // TUI side merges both emissions into a single cache.
2082 let _ = event_tx.send(AgentEvent::ContextStats {
2083 system_tokens, sent_tokens, dropped_tokens, working_set_tokens, total_messages,
2084 tool_defs_tokens: 0,
2085 cold_zone_tokens: 0,
2086 ctx_window: 0,
2087 ctx_name: String::new(),
2088 system_prompt: String::new(),
2089 });
2090 }
2091 TurnEvent::ToolCallStreaming { name, hint } => {
2092 let _ = event_tx.send(AgentEvent::ToolCallStreaming { name, hint });
2093 }
2094 TurnEvent::Error(e) => {
2095 // Streaming-error forwarder: `conv` is borrowed
2096 // by the in-flight `turn_fut`, so we can't snapshot
2097 // `conv.messages` from here. The terminal-error
2098 // branches in `handle_send_message` fire after
2099 // turn_fut completes with the proper snapshot.
2100 let _ = event_tx.send(AgentEvent::Error {
2101 error: e,
2102 messages: Vec::new(),
2103 });
2104 }
2105 TurnEvent::Warning(w) => {
2106 datalog.log_warning(&w);
2107 let _ = event_tx.send(AgentEvent::Warning(w));
2108 }
2109 TurnEvent::WorkingDirChanged(new_dir) => {
2110 // A tool (change_dir / bash cd) mutated the shared
2111 // cwd. Surface it so the TUI footer can update.
2112 // Intentionally does not mirror `services.rs::change_dir`
2113 // (which clears the conversation, reloads the code graph,
2114 // respawns the indexer) — those side effects are right for
2115 // a user-initiated `/cd` but would destroy mid-turn state
2116 // when the LLM is just navigating.
2117 let _ = event_tx.send(AgentEvent::WorkingDirChanged(new_dir));
2118 }
2119 TurnEvent::ApprovalRequested { tool_name, reason, call, messages } => {
2120 // Forward approval request to TUI, including
2121 // a snapshot of conversation.messages so the
2122 // TUI can persist mid-turn session state.
2123 let _ = event_tx.send(AgentEvent::ApprovalNeeded {
2124 tool_name,
2125 reason,
2126 call,
2127 messages,
2128 });
2129 *phase = AgentPhase::WaitingApproval;
2130 let _ = event_tx.send(AgentEvent::PhaseChange(AgentPhase::WaitingApproval));
2131 }
2132 }
2133 }
2134
2135 Some(req) = approval_req_rx.recv() => {
2136 // The ApprovalNeeded event was already sent from the
2137 // TurnEvent::ApprovalRequested handler above (which
2138 // has access to conversation.messages). Here we
2139 // only record the request for later approve/deny.
2140 *last_approval_request = Some(req);
2141 }
2142
2143 Some(cmd) = cmd_rx.recv() => {
2144 match cmd {
2145 AgentCommand::Cancel => {
2146 cancel_token.cancel();
2147 *cancel_token = CancellationToken::new();
2148 }
2149 AgentCommand::ApproveTool => {
2150 *phase = AgentPhase::Thinking;
2151 let _ = event_tx.send(AgentEvent::PhaseChange(AgentPhase::Thinking));
2152 let _ = approval_resp_tx.send(PermissionDecision::Allow);
2153 }
2154 AgentCommand::ApproveToolAlways => {
2155 if let Some(ref req) = last_approval_request {
2156 if let Ok(mut store) = permission_store.write() {
2157 store.grant_session(&req.call.name);
2158 }
2159 }
2160 *phase = AgentPhase::Thinking;
2161 let _ = event_tx.send(AgentEvent::PhaseChange(AgentPhase::Thinking));
2162 let _ = approval_resp_tx.send(PermissionDecision::Allow);
2163 }
2164 AgentCommand::DenyTool => {
2165 *phase = AgentPhase::Thinking;
2166 let _ = event_tx.send(AgentEvent::PhaseChange(AgentPhase::Thinking));
2167 let _ = approval_resp_tx.send(PermissionDecision::Deny);
2168 }
2169 AgentCommand::Shutdown => {
2170 cancel_token.cancel();
2171 }
2172 AgentCommand::AppendInput(text) => {
2173 if let Some(ref mut existing) = pending_input {
2174 existing.push('\n');
2175 existing.push_str(&text);
2176 } else {
2177 *pending_input = Some(text);
2178 }
2179 }
2180 // SyncMessages is handled in the outer loop
2181 // (after turn completes) because `conv` is
2182 // mutably borrowed by `turn_fut` here.
2183 _ => {} // Other commands ignored during turn
2184 }
2185 }
2186 }
2187 };
2188
2189 // Flush any remaining accumulated text to datalog
2190 if !datalog_text_accum.is_empty() {
2191 datalog.log_model_text(&datalog_text_accum);
2192 }
2193
2194 // turn_tx drops here (owned by this block), turn_fut also drops
2195 (result, turn_rx, *context_collapsed)
2196 };
2197 // All borrows of self.turn_runner are now released.
2198
2199 // Handle context collapse: clear edit tracking so model can re-read
2200 if context_collapsed {
2201 self.turn_runner.recently_edited_files.clear();
2202 }
2203
2204 // Restore conversation
2205 self.conversation = conv;
2206
2207 // Drain remaining events
2208 while let Ok(event) = turn_rx.try_recv() {
2209 self.forward_turn_event(event);
2210 }
2211
2212 // Handle result
2213 match result {
2214 TurnResult::Responded {
2215 ref text,
2216 tokens,
2217 truncated,
2218 } => {
2219 self.turn_tokens += tokens;
2220 self.total_tokens += tokens;
2221 // Log the final assistant text to datalog (TUI used to do this —
2222 // absorbed here now that TUI's duplicate TurnLog was removed).
2223 if !text.trim().is_empty() {
2224 self.datalog.log_text(text);
2225 }
2226
2227 // ATLAS subtask extraction: if model just output a plan (FeatureDev,
2228 // first response with text, no tools used yet), extract subtasks
2229 // and drive execution file-by-file.
2230 //
2231 // Guard: only extract when the model was truncated (it wanted to
2232 // continue but hit max_tokens). A Natural stop means the model
2233 // considers its response complete — it may be answering a question,
2234 // discussing design, or giving examples that mention file names.
2235 // Extracting subtasks from such text produces phantom plans
2236 // (e.g. "auth.rs" mentioned as an example gets treated as an
2237 // edit target, and plan-completion-guard then forces the loop
2238 // to keep running).
2239 if self.tool_call_count == 0
2240 && truncated
2241 && !text.trim().is_empty()
2242 && !self.subtask_driver.active
2243 {
2244 self.subtask_driver.extract_from_plan(text);
2245 // Store plan text for adherence reminders
2246 self.plan_text = Some(text.clone());
2247
2248 // Graph: check if plan covers all dependent files.
2249 // If the plan mentions router.rs and weather.rs but both depend
2250 // on types.rs, warn that types.rs might also need changes.
2251 if self.subtask_driver.active {
2252 let graph = self.turn_runner.context.graph.read().await;
2253 if graph.is_ready() {
2254 let plan_files: Vec<&str> = self
2255 .subtask_driver
2256 .subtasks
2257 .iter()
2258 .map(|s| s.file.as_str())
2259 .collect();
2260 let mut missing_deps: Vec<String> = Vec::new();
2261 let mut seen = std::collections::HashSet::new();
2262
2263 for plan_file in &plan_files {
2264 seen.insert(plan_file.to_string());
2265 }
2266
2267 for plan_file in &plan_files {
2268 // Find this file in graph and get its dependencies
2269 for (path, _) in &graph.file_symbols {
2270 let basename = path
2271 .file_name()
2272 .map(|f| f.to_string_lossy().to_string())
2273 .unwrap_or_default();
2274 if basename == *plan_file {
2275 // Check files this file depends on (callees' files)
2276 let sym_ids = graph.symbols_in_file(path);
2277 if let Some(ids) = sym_ids {
2278 for &sid in ids.iter().take(20) {
2279 if let Some(edges) = graph.callees(sid) {
2280 for edge in edges {
2281 if let Some(node) = graph.node(edge.to)
2282 {
2283 let dep_name = node
2284 .file
2285 .file_name()
2286 .map(|f| {
2287 f.to_string_lossy()
2288 .to_string()
2289 })
2290 .unwrap_or_default();
2291 if !dep_name.is_empty()
2292 && !seen.contains(&dep_name)
2293 && dep_name != basename
2294 {
2295 seen.insert(dep_name.clone());
2296 missing_deps.push(dep_name);
2297 }
2298 }
2299 }
2300 }
2301 }
2302 }
2303 break;
2304 }
2305 }
2306 }
2307
2308 // PLAN CHECK injection: REMOVED. Dependency warnings are not needed —
2309 // dependency warnings. Model discovers deps itself.
2310 let _ = missing_deps; // suppress unused warning
2311 }
2312 drop(graph);
2313 }
2314
2315 // Subtask driver serial execution: REMOVED.
2316 // Was injecting "now edit file X" instructions from regex-extracted
2317 // plan. Batch prompt now lets model handle multi-file work itself.
2318 // Sub-agent dispatch also disabled (try_sub_agent_dispatch returns None).
2319 }
2320
2321 // finish_reason-based termination dispatch (2026-04-22).
2322 //
2323 // The previous code injected `(continuing...)` + `Continue.`
2324 // when the model returned empty text, under the theory that
2325 // empty = "was about to say more". In practice this conflated:
2326 // (a) finish_reason="length" — real max-token cutoff
2327 // mid-generation, retrying does salvage the session
2328 // (b) finish_reason="stop" + no text — model cleanly
2329 // decided to stop after reading tool results
2330 // (e.g. `cargo check` passed, nothing more to say)
2331 // and cycled case (b) into meaningless `Continue.` loops.
2332 //
2333 // CC has no such recovery mechanism — empty-on-stop IS the
2334 // natural termination (`project_cc_prompt_philosophy.md`).
2335 //
2336 // Briefly tried adding an "empty-after-failure" branch
2337 // (2026-04-22 20:44) but the hermes 20-41 session showed
2338 // the real issue was upstream in edit.rs `find_closest_match_inner`
2339 // producing garbage "closest match" hints — the model
2340 // gave up because the framework's hint was actively
2341 // misleading, not because it needed more nudging.
2342 // Reverting to the principled state machine.
2343 if truncated && self.retry_count < 1 {
2344 self.retry_count += 1;
2345 self.conversation.add_user_message(
2346 "Output limit hit. If the task is already complete, just output a \
2347 short summary and stop (no tool calls). Otherwise resume where you left off."
2348 );
2349 continue;
2350 }
2351
2352 self.finish_turn(TurnStopReason::Natural);
2353 return;
2354 }
2355 TurnResult::UsedTools {
2356 tool_count,
2357 tokens,
2358 text,
2359 } => {
2360 self.turn_tokens += tokens;
2361 self.total_tokens += tokens;
2362 self.tool_call_count += tool_count;
2363 // Track silent rounds: model used tools without explaining anything.
2364 let had_text = text.as_ref().map(|t| !t.trim().is_empty()).unwrap_or(false);
2365 if had_text {
2366 self.discipline_state.silent_tool_rounds = 0;
2367 } else {
2368 self.discipline_state.silent_tool_rounds += 1;
2369 }
2370
2371 // Fork sub-agent dispatch is no longer driven by parsing this
2372 // turn's text. The model invokes `parallel_edit_files`
2373 // explicitly when it judges parallel edit is the right move.
2374 // See `crate::tool::parallel_edit` for the active-dispatch
2375 // tool and `agent/mod.rs::run` for its registration.
2376
2377 // Post-process: truncate large outputs + externalize to disk
2378 self.post_process_tool_results(tool_count);
2379
2380 // ATLAS auto-verify: removed along with the verify module.
2381 // Model runs build/lint itself when needed.
2382 // See docs/archive/guardian-auto-compile.md if re-introducing.
2383
2384 // Safety cap at 200 tool calls — only for runaway cost protection.
2385 if self.check_step_limit() {
2386 self.finish_turn(TurnStopReason::StepLimit);
2387 return;
2388 }
2389 // Continue to next turn
2390 self.phase = AgentPhase::Thinking;
2391 let _ = self
2392 .event_tx
2393 .send(AgentEvent::PhaseChange(AgentPhase::Thinking));
2394 continue;
2395 }
2396 TurnResult::Failed(e) => {
2397 // Retry logic for transient errors
2398 let is_rate_limited = is_rate_limited_error(&e);
2399 let is_auth_error = is_auth_error(&e);
2400 let is_messages_illegal = e.contains("illegal") || e.contains("messages");
2401 // Upstream context-length overflow (OpenRouter 400, OpenAI
2402 // context_length_exceeded, Anthropic "prompt is too long").
2403 // Without this, the error fell through to the generic
2404 // retry branch which slept and re-sent the same oversized
2405 // request — guaranteed to fail again.
2406 let is_context_overflow = is_context_overflow_error(&e);
2407 // Open-source build attempted a CodingPlan-signed request.
2408 // The signing module isn't compiled in; retrying is
2409 // guaranteed to fail again. Fail-fast skips the otherwise-
2410 // useless 3-shot retry (3+6+9s of wasted time + 3 spurious
2411 // "[API error 请求失败]" lines hardcoded in Chinese that
2412 // would also display to English-locale users).
2413 let is_official_build_required = is_codingplan_unavailable_error(&e);
2414
2415 if is_official_build_required {
2416 self.datalog.log_error(&e);
2417 let _ = self.event_tx.send(AgentEvent::Error {
2418 error: public_error_message(&e),
2419 messages: self.conversation.messages.clone(),
2420 });
2421 self.finish_turn(TurnStopReason::Error);
2422 return;
2423 } else if (is_messages_illegal || is_context_overflow) && self.retry_count < 2 {
2424 self.retry_count += 1;
2425 let sys_prompt = self.build_system_prompt();
2426 // Auto-discover the proxy's actually-enforced limit
2427 // from the error body. Self-built proxies for
2428 // open-weight models often enforce far less than
2429 // the configured ctx_window — without parsing the
2430 // rejection we'd compact toward the wrong target.
2431 let limit = extract_provider_ctx_limit(&e)
2432 .unwrap_or_else(|| self.ctx.ctx_window());
2433 // 5K safety buffer — leaves room for the streaming
2434 // response and one round of tool results before the
2435 // next compact would be needed.
2436 let target = limit.saturating_sub(5_000);
2437 let recovered = self
2438 .emergency_compact_to_target(target, &sys_prompt)
2439 .await;
2440 let msg = if recovered {
2441 "\n[Context overflow — recovered via layered compact, retrying...]\n"
2442 .to_string()
2443 } else {
2444 format!(
2445 "\n[Context overflow — compacted toward {}T but still over, \
2446 retrying anyway...]\n",
2447 target
2448 )
2449 };
2450 let _ = self.event_tx.send(AgentEvent::TextDelta(msg));
2451 continue;
2452 } else if is_rate_limited && self.retry_count < 5 {
2453 self.retry_count += 1;
2454 let wait = (self.retry_count as u64 * 3).min(30);
2455 let _ = self.event_tx.send(AgentEvent::TextDelta(format!(
2456 "\n[Rate limited — retrying in {}s...]\n",
2457 wait
2458 )));
2459 tokio::time::sleep(Duration::from_secs(wait)).await;
2460 continue;
2461 } else if is_auth_error {
2462 self.datalog.log_error(&e);
2463 let _ = self.event_tx.send(AgentEvent::Error {
2464 error: public_error_message(&e),
2465 messages: self.conversation.messages.clone(),
2466 });
2467 self.finish_turn(TurnStopReason::Error);
2468 return;
2469 } else if self.retry_count < 3 {
2470 self.retry_count += 1;
2471 let wait = (self.retry_count as u64 * 3).min(15);
2472 let reason = public_error_reason(&e);
2473 let _ = self.event_tx.send(AgentEvent::TextDelta(format!(
2474 "\n[API error {},{} 秒后重试({}/3)...]\n",
2475 reason, wait, self.retry_count
2476 )));
2477 tokio::time::sleep(Duration::from_secs(wait)).await;
2478 continue;
2479 } else {
2480 self.datalog.log_error(&e);
2481 let _ = self.event_tx.send(AgentEvent::Error {
2482 error: public_error_message(&e),
2483 messages: self.conversation.messages.clone(),
2484 });
2485 self.finish_turn(TurnStopReason::Error);
2486 return;
2487 }
2488 }
2489 TurnResult::Cancelled => {
2490 // Check if turn was already cancelled by AgentCommand::Cancel
2491 // (which marks the turn as Completed immediately)
2492 if self.conversation.turn_tracker.active_turn().is_none() {
2493 // Already handled by AgentCommand::Cancel - just return
2494 return;
2495 }
2496 // Preserve completed content + backfill (cancelled) for unpaired tool calls
2497 self.conversation.cancel_current_turn();
2498 // Send TurnCancelled event for TUI to sync
2499 let messages = self.conversation.messages.clone();
2500 let _ = self.event_tx.send(AgentEvent::TurnCancelled { messages });
2501 // Do finish_turn's bookkeeping WITHOUT emitting TurnComplete.
2502 // TurnCancelled already tells the TUI the turn ended; emitting
2503 // TurnComplete on top buffers a stale "✓ done · N rounds" line
2504 // that fires the next time the TUI's phase becomes Streaming —
2505 // i.e. right after the user's next submission.
2506 // Note: cancel_current_turn() already marks the turn Completed,
2507 // so complete_current() is a no-op; kept as defensive safety net.
2508 self.datalog
2509 .end_turn(self.turn_tokens, self.tool_call_count);
2510 self.turn_start = None;
2511 self.phase = AgentPhase::Idle;
2512 let _ = self
2513 .event_tx
2514 .send(AgentEvent::PhaseChange(AgentPhase::Idle));
2515 self.conversation.save(&Conversation::history_path());
2516 return;
2517 }
2518 }
2519 }
2520 }
2521
2522 // forward_turn_event → tool_dispatch.rs
2523 // post_process_tool_results → tool_dispatch.rs
2524
2525 /// Pro-active context compaction. Two-stage:
2526 ///
2527 /// 1. **Tier 1 (cheap, mechanical):** collapse old `ToolResult`
2528 /// bodies into stubs (`compact_old_tool_results_in_place`, the
2529 /// same generic stub format `microcompact` uses at render time;
2530 /// keeps the last 3 turns full). Zero LLM calls. Cheap to fire,
2531 /// easy to revert if model needs the bytes back via re-read.
2532 ///
2533 /// 2. **Tier 2 (expensive, LLM-driven):** if Tier 1 didn't bring
2534 /// the context under threshold, fall through to LLM-summarize
2535 /// older turns into the cold zone (existing path).
2536 ///
2537 /// Buffer was retuned 2026-05-06: small windows (≤100K, e.g.
2538 /// self-hosted GLM 65K) now trigger at 60K instead of 52K, so
2539 /// the 5K runway above the trigger lets Tier 1 absorb hits
2540 /// before the proxy 65K wall. Datalog 2026-05-06_19-06-50: 4
2541 /// reactive emergency compactions, each dropping 18-30K
2542 /// catastrophically. With proactive Tier 1 firing 5K below the
2543 /// wall, expected pattern is 3-4 mild Tier 1 events dropping
2544 /// 5-10K each, model retains skeleton + recent turns.
2545 async fn maybe_compress_history(&mut self, system_prompt: &str) {
2546 let sys_tokens = system_prompt.len() / 4 + 4;
2547 if !self.ctx.needs_compression(&self.conversation, sys_tokens) {
2548 return;
2549 }
2550
2551 // ── Tier 1: collapse old tool_results (no LLM call) ──
2552 // Keep the most recent 3 turns at full fidelity; older
2553 // turns get their tool_result bodies replaced with the same
2554 // generic stub microcompact uses at render time. One stub
2555 // format, one place to maintain.
2556 crate::ctx::render::compact_old_tool_results_in_place(
2557 &mut self.conversation,
2558 /* keep_recent_turns */ 3,
2559 );
2560
2561 // Re-check: if Tier 1 was enough, stop here and skip the
2562 // LLM summarization round-trip. This is the common case for
2563 // sessions where the bulk of context is heavy bash/cargo
2564 // outputs.
2565 if !self.ctx.needs_compression(&self.conversation, sys_tokens) {
2566 return;
2567 }
2568
2569 // ── Tier 2: LLM-summarize oldest turns into cold zone ──
2570 let (content, n_turns) = match self.ctx.compression_plan(&self.conversation) {
2571 Some(plan) => plan,
2572 None => return,
2573 };
2574
2575 let summarize_prompt = Self::default_summarize_prompt(&content);
2576
2577 let summary = self.run_llm_summary(&summarize_prompt).await;
2578 let final_summary = if summary.trim().is_empty() {
2579 content
2580 } else {
2581 summary
2582 };
2583
2584 let _ = self.try_apply_compression(system_prompt, n_turns, final_summary, true);
2585 }
2586
2587 /// Emit a full ContextStats snapshot for the `/context` command.
2588 /// Callers pass the conversation and the already-built `msgs` (from
2589 /// `self.ctx.build_messages`) so the estimate reflects exactly what
2590 /// the model would see on the next turn — directives and all. Used by
2591 /// both `handle_send_message` (once per turn, post-build_messages) and
2592 /// `run_compact` (to refresh the cached stats TUI reads for `/context`
2593 /// after an out-of-turn compaction).
2594 async fn emit_rich_context_stats(
2595 &self,
2596 conv: &Conversation,
2597 msgs: &[crate::conversation::message::Message],
2598 ) {
2599 let tool_defs = self.turn_runner.tools.get_definitions().await;
2600 let tool_defs_tokens: usize = tool_defs
2601 .iter()
2602 .map(|d| {
2603 let params = serde_json::to_string(&d.parameters).unwrap_or_default();
2604 (d.name.len() + d.description.len() + params.len()) / 4
2605 })
2606 .sum();
2607 let cold_zone_tokens: usize = conv.cold_summaries.iter().map(|s| s.len() / 4 + 4).sum();
2608 let actual_system_prompt = msgs
2609 .iter()
2610 .find(|m| matches!(m.role, crate::conversation::message::Role::System))
2611 .and_then(|m| m.text().map(|s| s.to_string()))
2612 .unwrap_or_default();
2613 let system_tokens_local = msgs
2614 .iter()
2615 .find(|m| matches!(m.role, crate::conversation::message::Role::System))
2616 .map(|m| m.estimate_tokens())
2617 .unwrap_or(0);
2618 let sent_tokens_local: usize = msgs
2619 .iter()
2620 .map(|m| m.estimate_tokens())
2621 .sum::<usize>()
2622 .saturating_sub(system_tokens_local);
2623 let total_messages_local = msgs.len();
2624 let _ = self.event_tx.send(AgentEvent::ContextStats {
2625 system_tokens: system_tokens_local,
2626 sent_tokens: sent_tokens_local,
2627 dropped_tokens: 0,
2628 working_set_tokens: 0,
2629 total_messages: total_messages_local,
2630 tool_defs_tokens,
2631 cold_zone_tokens,
2632 ctx_window: self.ctx.ctx_window(),
2633 ctx_name: self.ctx.name().to_string(),
2634 system_prompt: actual_system_prompt,
2635 });
2636 }
2637
2638 /// Post-compression task state restoration. After compression the model
2639 /// loses track of what it was doing — inject a short status so it can
2640 /// resume without re-exploring. Shared by auto-compact (threshold-driven
2641 /// in `maybe_compress_history`) and manual `/compact`.
2642 fn inject_post_compress_state(&mut self) {
2643 if let Some(msg) = build_post_compress_state(
2644 &self.current_task,
2645 &self.files_edited_this_turn,
2646 &self.files_read_this_turn,
2647 ) {
2648 self.conversation.add_user_message(&msg);
2649 }
2650 }
2651
2652 fn rendered_token_count(&self, system_prompt: &str) -> usize {
2653 self.ctx
2654 .build_messages(&self.conversation, system_prompt, "")
2655 .0
2656 .iter()
2657 .map(|m| m.estimate_tokens())
2658 .sum()
2659 }
2660
2661 /// Apply a compression candidate only when it reduces the next request
2662 /// payload. This is the single success criterion for all compression
2663 /// entry points: manual `/compact`, threshold-driven auto-compression,
2664 /// and task-boundary cleanup.
2665 fn try_apply_compression(
2666 &mut self,
2667 system_prompt: &str,
2668 remove_count: usize,
2669 summary: String,
2670 inject_state: bool,
2671 ) -> CompressionOutcome {
2672 let before_msg_count = self.conversation.messages.len();
2673 let before_tokens = self.rendered_token_count(system_prompt);
2674
2675 let msgs_snapshot = self.conversation.messages.clone();
2676 let cold_snapshot = self.conversation.cold_summaries.clone();
2677 let turns_snapshot = self.conversation.turn_tracker.clone();
2678
2679 self.conversation.apply_compression(remove_count, summary);
2680 if inject_state {
2681 self.inject_post_compress_state();
2682 }
2683
2684 let after_tokens = self.rendered_token_count(system_prompt);
2685 let removed_messages = before_msg_count.saturating_sub(self.conversation.messages.len());
2686
2687 if after_tokens >= before_tokens {
2688 self.conversation.messages = msgs_snapshot;
2689 self.conversation.cold_summaries = cold_snapshot;
2690 self.conversation.turn_tracker = turns_snapshot;
2691 CompressionOutcome {
2692 applied: false,
2693 before_tokens,
2694 after_tokens,
2695 removed_messages: 0,
2696 }
2697 } else {
2698 CompressionOutcome {
2699 applied: true,
2700 before_tokens,
2701 after_tokens,
2702 removed_messages,
2703 }
2704 }
2705 }
2706
2707 /// D2 emergency compact — layered, measured, never combines destructive
2708 /// ops. Replaces the previous "LLM-compress + blind truncate(len-4)"
2709 /// path that destroyed last-turn context (datalog atomgr-2d99b47d/
2710 /// 2026-05-06_08-43-12: 65K → 8516 tokens because compression THEN a
2711 /// 4-message truncate ran back-to-back, and the truncate dropped
2712 /// exactly the recent file reads the user needed for "继续").
2713 ///
2714 /// Each tier checks budget against `target` and breaks at the first
2715 /// sufficient tier. Returns true if any tier reached the target.
2716 ///
2717 /// Tiers (least → most destructive):
2718 /// 1. Collapse old tool_results (keep last 3 turns full).
2719 /// 2. LLM-summarize older turns into cold zone.
2720 /// 3. Hard token-driven truncate (drops oldest until under target,
2721 /// snapping to safe boundaries; the last user message is sacred).
2722 async fn emergency_compact_to_target(
2723 &mut self,
2724 target_tokens: usize,
2725 system_prompt: &str,
2726 ) -> bool {
2727 let sys_tokens = system_prompt.len() / 4 + 4;
2728 let estimate = |conv: &Conversation| -> usize {
2729 sys_tokens + conv.messages.iter().map(|m| m.estimate_tokens()).sum::<usize>()
2730 };
2731
2732 if estimate(&self.conversation) <= target_tokens {
2733 return true;
2734 }
2735
2736 // Tier 1: collapse heavy tool results in older turns.
2737 crate::ctx::render::compact_old_tool_results_in_place(
2738 &mut self.conversation,
2739 /* keep_recent_turns */ 3,
2740 );
2741 if estimate(&self.conversation) <= target_tokens {
2742 return true;
2743 }
2744
2745 // Tier 2: LLM-summarize older turns into the cold zone. This is
2746 // the most expensive tier (it makes a network round trip), so
2747 // we only reach it after Tier 1 already failed.
2748 self.maybe_compress_history(system_prompt).await;
2749 if estimate(&self.conversation) <= target_tokens {
2750 return true;
2751 }
2752
2753 // Tier 3: hard truncate to fit. Token-driven, not message-count
2754 // driven. The previous code did `truncate(len - 4)` blindly
2755 // which is what produced the 8516-token catastrophe.
2756 hard_truncate_to_target(&mut self.conversation, target_tokens, sys_tokens);
2757 estimate(&self.conversation) <= target_tokens
2758 }
2759
2760 /// Manual `/compact` entry point. Mechanical only — reuses the active
2761 /// ctx strategy's `compression_plan` (same path as the task-boundary
2762 /// cleanup in `handle_send_message`) so behavior stays consistent with
2763 /// the rest of the codebase. `_prompt` is accepted for forward-compat
2764 /// with a future LLM-guided summarize path and ignored today.
2765 ///
2766 /// Net-savings guard: on terse conversations the cold-zone summary
2767 /// header + `inject_post_compress_state` inject can weigh more than
2768 /// the dropped messages, so compaction would silently inflate the
2769 /// prompt. We measure before/after token totals via `build_messages`
2770 /// (post all render-pipeline effects — `clean_message_pipeline`,
2771 /// microcompact, etc.) and roll the conversation back if the
2772 /// operation didn't actually shrink the wire payload. Analytical
2773 /// projection was tried first but too many render-pipeline branches
2774 /// made it unreliable.
2775 async fn run_compact(&mut self, prompt: Option<String>) {
2776 let system_prompt = self.build_system_prompt();
2777 let Some((mechanical_content, n_msgs)) = self.ctx.compression_plan(&self.conversation)
2778 else {
2779 let _ = self.event_tx.send(AgentEvent::TextDelta(
2780 crate::i18n::t(crate::i18n::Msg::CompactNothingShort).into_owned(),
2781 ));
2782 return;
2783 };
2784
2785 let _ = self.event_tx.send(AgentEvent::TextDelta(
2786 crate::i18n::t(crate::i18n::Msg::CompactStarting).into_owned(),
2787 ));
2788
2789 // Try LLM summarization (with optional custom prompt)
2790 let summarize_prompt = if let Some(ref custom) = prompt {
2791 format!(
2792 "Summarize this conversation history, focusing on: {}.\n\
2793 Keep: file names, what was changed, key decisions, errors encountered.\n\
2794 Drop: exact code content, tool arguments, line numbers.\n\n{}",
2795 custom, mechanical_content
2796 )
2797 } else {
2798 Self::default_summarize_prompt(&mechanical_content)
2799 };
2800
2801 let summary = self.run_llm_summary(&summarize_prompt).await;
2802 let content = if summary.trim().is_empty() {
2803 mechanical_content
2804 } else {
2805 summary
2806 };
2807
2808 let outcome = self.try_apply_compression(&system_prompt, n_msgs, content, true);
2809
2810 if !outcome.applied {
2811 let before = fmt_k_tokens(outcome.before_tokens);
2812 let after = fmt_k_tokens(outcome.after_tokens);
2813 let _ = self.event_tx.send(AgentEvent::TextDelta(
2814 crate::i18n::t(crate::i18n::Msg::CompactNothingNoSavings {
2815 before: &before,
2816 after: &after,
2817 })
2818 .into_owned(),
2819 ));
2820 let (msgs, _) =
2821 self.ctx
2822 .build_messages(&self.conversation, &system_prompt, "");
2823 self.emit_rich_context_stats(&self.conversation, &msgs).await;
2824 return;
2825 }
2826
2827 let before = fmt_k_tokens(outcome.before_tokens);
2828 let after = fmt_k_tokens(outcome.after_tokens);
2829 let _ = self.event_tx.send(AgentEvent::TextDelta(
2830 crate::i18n::t(crate::i18n::Msg::CompactDropped {
2831 messages: outcome.removed_messages,
2832 before: &before,
2833 after: &after,
2834 })
2835 .into_owned(),
2836 ));
2837
2838 let (msgs, _) = self
2839 .ctx
2840 .build_messages(&self.conversation, &system_prompt, "");
2841 self.emit_rich_context_stats(&self.conversation, &msgs)
2842 .await;
2843 }
2844
2845 fn default_summarize_prompt(content: &str) -> String {
2846 format!(
2847 "Summarize this conversation history in 3-5 concise sentences. \
2848 Keep: file names, what was changed, key decisions, errors encountered. \
2849 Drop: exact code content, tool arguments, line numbers.\n\n{}",
2850 content
2851 )
2852 }
2853
2854 /// Run a lightweight LLM call to summarize content. Returns empty string on failure.
2855 async fn run_llm_summary(&self, prompt: &str) -> String {
2856 let mut mini_conv = crate::conversation::Conversation::new();
2857 mini_conv.add_user_message(prompt);
2858 let msgs = mini_conv
2859 .to_provider_messages("You are a conversation summarizer. Output ONLY the summary.");
2860
2861 let mut summary = String::new();
2862 if let Ok(mut stream) = self.turn_runner.provider.chat_stream(&msgs, None) {
2863 use futures::StreamExt;
2864 let first_timeout = std::time::Duration::from_secs(30);
2865 let stream_timeout = std::time::Duration::from_secs(30);
2866 let mut got_token = false;
2867 loop {
2868 let timeout = if got_token {
2869 stream_timeout
2870 } else {
2871 first_timeout
2872 };
2873 match tokio::time::timeout(timeout, stream.next()).await {
2874 Ok(Some(Ok(crate::stream::StreamEvent::Delta(text)))) => {
2875 got_token = true;
2876 let clean = text
2877 .replace("<think>", "")
2878 .replace("</think>", "")
2879 .replace("<|im_start|>", "")
2880 .replace("<|im_end|>", "");
2881 summary.push_str(&clean);
2882 }
2883 Ok(Some(Ok(crate::stream::StreamEvent::Done { .. }))) => break,
2884 Ok(Some(Ok(_))) => continue,
2885 _ => break,
2886 }
2887 }
2888 }
2889 summary
2890 }
2891
2892 fn finish_turn(&mut self, stop_reason: TurnStopReason) {
2893 // Error exits must not leave the user's message in the history
2894 // as an "orphan turn" (user message with no assistant reply).
2895 // The next send_message would then stack another user message
2896 // on top of it — an API call with two consecutive user turns
2897 // and no intervening assistant, which weak models respond to
2898 // with 0 tokens (see test 3 / 4: MiniMax-M2.7 returns empty
2899 // after a failed localhost turn). Cancel the turn instead so
2900 // the next user message starts from a clean transcript.
2901 //
2902 // Counters (turn_count / turn_tokens / tool_call_count) stay
2903 // UNTOUCHED here so the TurnComplete event below still carries
2904 // accurate stats for the UI's "✓ Nailed it · N rounds · M tok"
2905 // line. `start_turn` resets them for the next message.
2906 if matches!(stop_reason, TurnStopReason::Error) {
2907 self.conversation.cancel_current_turn_including_user();
2908 } else {
2909 self.conversation.turn_tracker.complete_current();
2910 }
2911
2912 // Auto-commit edited files if enabled
2913 if self.config.auto_commit
2914 && !matches!(stop_reason, TurnStopReason::Error)
2915 && !self.files_edited_this_turn.is_empty()
2916 {
2917 let wd = self
2918 .turn_runner
2919 .context
2920 .working_dir
2921 .try_read()
2922 .map(|g| g.clone())
2923 .unwrap_or_default();
2924 match git_auto_commit::auto_commit_edited_files(&wd, &self.files_edited_this_turn) {
2925 git_auto_commit::AutoCommitOutcome::Committed { sha, message } => {
2926 let notice = format!("\n[auto-commit {sha}] {message}\n");
2927 self.datalog.log_model_text(¬ice);
2928 let _ = self.event_tx.send(AgentEvent::TextDelta(notice));
2929 }
2930 git_auto_commit::AutoCommitOutcome::Failed { reason } => {
2931 let notice = format!("\n[auto-commit skipped] {reason}\n");
2932 self.datalog.log_error(¬ice);
2933 let _ = self.event_tx.send(AgentEvent::TextDelta(notice));
2934 }
2935 git_auto_commit::AutoCommitOutcome::Skipped { reason } => {
2936 self.datalog
2937 .log_model_text(&format!("[auto-commit skipped] {reason}"));
2938 }
2939 }
2940 }
2941
2942 // Flush datalog with final stats
2943 self.datalog
2944 .end_turn(self.turn_tokens, self.tool_call_count);
2945
2946 let duration = self.turn_start.map(|t| t.elapsed()).unwrap_or_default();
2947 self.turn_start = None;
2948 self.phase = AgentPhase::Idle;
2949 let _ = self.event_tx.send(AgentEvent::TurnComplete {
2950 duration,
2951 total_tokens: self.turn_tokens,
2952 turn_count: self.turn_count,
2953 tool_call_count: self.tool_call_count,
2954 stop_reason,
2955 messages: self.conversation.messages.clone(),
2956 });
2957 let _ = self
2958 .event_tx
2959 .send(AgentEvent::PhaseChange(AgentPhase::Idle));
2960 self.conversation.save(&Conversation::history_path());
2961 }
2962
2963 // store_tool_result → tool_dispatch.rs
2964
2965 // change_dir → services.rs
2966
2967 // try_sub_agent_dispatch → REMOVED. Fork sub-agent dispatch is now
2968 // ACTIVE: the model invokes `parallel_edit_files` (see
2969 // `crate::tool::parallel_edit`) when it judges parallel edit is the
2970 // right move. The framework no longer parses plan text or guesses
2971 // intent — eliminating ~250 lines of heuristics, ~70 hardcoded
2972 // intent-keywords across two iterations of failed gate logic, and
2973 // an entire class of mis-fire failures (read-only turns dispatching
2974 // 6 fork sub-agents that fake edits or no-op).
2975}
2976
2977
2978fn track_tool_modified_files(
2979 tool_name: &str,
2980 bash_command: &str,
2981 output: &str,
2982 edited_files: &mut Vec<String>,
2983) {
2984 if tool_name == "bash" {
2985 track_bash_modified_files(bash_command, output, edited_files);
2986 } else if tool_name == "search_replace" {
2987 track_search_replace_files(output, edited_files);
2988 }
2989}
2990
2991fn track_bash_modified_files(command: &str, output: &str, edited_files: &mut Vec<String>) {
2992 let Some(cwd) = bash_output_cwd(output) else {
2993 return;
2994 };
2995
2996 for file in rm_file_targets(command, &cwd) {
2997 push_edited_file(edited_files, file);
2998 }
2999 for file in bash_workspace_modified_files(output, &cwd) {
3000 push_edited_file(edited_files, file);
3001 }
3002}
3003
3004fn bash_output_cwd(output: &str) -> Option<PathBuf> {
3005 output.lines().rev().find_map(|line| {
3006 line.strip_prefix("[cwd: ")
3007 .and_then(|rest| rest.strip_suffix(']'))
3008 .map(PathBuf::from)
3009 })
3010}
3011
3012fn bash_workspace_modified_files(output: &str, cwd: &std::path::Path) -> Vec<String> {
3013 let Some(line) = output
3014 .lines()
3015 .find(|line| line.starts_with("[workspace modified via bash: "))
3016 else {
3017 return Vec::new();
3018 };
3019 let Some(rest) = line.strip_prefix("[workspace modified via bash: ") else {
3020 return Vec::new();
3021 };
3022 let changed = rest.split(". If ").next().unwrap_or(rest);
3023 changed
3024 .split(',')
3025 .map(str::trim)
3026 .filter(|file| !file.is_empty() && !file.starts_with('+'))
3027 .map(|file| {
3028 let path = std::path::Path::new(file);
3029 if path.is_absolute() {
3030 path.to_path_buf()
3031 } else {
3032 cwd.join(path)
3033 }
3034 .to_string_lossy()
3035 .to_string()
3036 })
3037 .collect()
3038}
3039
3040fn track_search_replace_files(output: &str, edited_files: &mut Vec<String>) {
3041 for line in output.lines() {
3042 let trimmed = line.trim_start();
3043 let Some((path, _summary)) = trimmed.split_once(" (") else {
3044 continue;
3045 };
3046 if path.is_empty() {
3047 continue;
3048 }
3049 push_edited_file(edited_files, path.to_string());
3050 }
3051}
3052
3053fn rm_file_targets(command: &str, cwd: &std::path::Path) -> Vec<String> {
3054 let tokens = shell_words(command);
3055 let mut targets = Vec::new();
3056 let mut i = 0;
3057 while i < tokens.len() {
3058 if tokens[i] != "rm" {
3059 i += 1;
3060 continue;
3061 }
3062
3063 i += 1;
3064 let mut rm_targets = Vec::new();
3065 let mut recursive = false;
3066 while i < tokens.len() {
3067 let token = &tokens[i];
3068 if matches!(token.as_str(), "&&" | "||" | ";" | "|") {
3069 break;
3070 }
3071 if token.starts_with('-') {
3072 if token.contains('r') || token.contains('R') {
3073 recursive = true;
3074 }
3075 i += 1;
3076 continue;
3077 }
3078
3079 let path = std::path::Path::new(token);
3080 let full_path = if path.is_absolute() {
3081 path.to_path_buf()
3082 } else {
3083 cwd.join(path)
3084 };
3085 rm_targets.push(full_path.to_string_lossy().to_string());
3086 i += 1;
3087 }
3088
3089 if !recursive {
3090 targets.extend(rm_targets);
3091 }
3092 }
3093 targets
3094}
3095
3096fn push_edited_file(edited_files: &mut Vec<String>, file: String) {
3097 if !edited_files.contains(&file) {
3098 edited_files.push(file);
3099 }
3100}
3101
3102fn shell_words(raw: &str) -> Vec<String> {
3103 raw.split_whitespace()
3104 .map(|token| {
3105 token.trim_matches(|c| {
3106 matches!(
3107 c,
3108 '"' | '\'' | '`' | '(' | ')' | '[' | ']' | '{' | '}' | ','
3109 )
3110 })
3111 })
3112 .filter(|token| !token.is_empty())
3113 .map(|token| token.to_string())
3114 .collect()
3115}
3116
3117/// Whether a `ReloadConfig` should wipe the existing conversation history.
3118///
3119/// Prior behavior cleared whenever the `default_provider` name changed.
3120/// That was too aggressive: CodingPlan registers one provider entry per
3121/// model, so a user swapping Kimi ↔ GLM via `/model` lost all context
3122/// every time — even though both entries are the same `openai` type and
3123/// all known cross-model differences (reasoning_content echo policy,
3124/// DeepSeek content-field requirement, tool_call args JSON repair) are
3125/// now handled in the per-provider send path.
3126///
3127/// Current policy:
3128/// - Same `provider_type` on both sides → keep history. This covers the
3129/// common Kimi/GLM/DeepSeek-through-AtomGit swap.
3130/// - Different `provider_type` (e.g. openai → claude) → clear, because
3131/// tool_call id formats and tool_use block translation between the
3132/// OpenAI-shaped and Anthropic-shaped messages haven't been proven
3133/// round-trip clean.
3134/// - Can't resolve the old type (old provider was removed from config)
3135/// → clear when the name changed, matching the pre-existing safe
3136/// default.
3137fn reload_should_clear_conversation(
3138 old_name: &str,
3139 old_type: Option<&str>,
3140 new_name: &str,
3141 new_type: Option<&str>,
3142) -> bool {
3143 match (old_type, new_type) {
3144 (Some(a), Some(b)) => a != b,
3145 _ => old_name != new_name,
3146 }
3147}
3148
3149/// D2 Tier 1: replace the `output` of every ToolResult in turns older
3150/// than the last `keep_recent_turns` with a one-line stub. Cheapest
3151/// destructive tier — preserves the conversation skeleton (assistant
3152/// text, tool-call shapes, paired result IDs) so the model can still
3153/// reason about *what was attempted*, just not the heavy outputs.
3154///
3155/// The previous emergency path (`truncate(len - 4)`) destroyed the
3156/// skeleton too. Keeping it intact is what lets the model resume
3157/// after compaction without re-exploring.
3158/// D2 Tier 3: drop oldest messages until total tokens (incl. system) <=
3159/// `target_tokens`. Token-driven — never drops a fixed number of
3160/// messages, since that's how `truncate(len - 4)` corrupted state.
3161///
3162/// Sacred invariants (won't violate even if it means staying over budget):
3163/// 1. The last `User` message is kept (current task anchor).
3164/// 2. The drop boundary snaps to a turn boundary so we never split a
3165/// `tool_call` from its paired `tool_result`.
3166fn hard_truncate_to_target(
3167 conv: &mut crate::conversation::Conversation,
3168 target_tokens: usize,
3169 sys_tokens: usize,
3170) {
3171 use crate::conversation::message::{MessageContent, Role};
3172 if conv.messages.is_empty() {
3173 return;
3174 }
3175 let total_budget = target_tokens.saturating_sub(sys_tokens);
3176
3177 // Find the last User message — it must survive.
3178 let last_user_idx = conv
3179 .messages
3180 .iter()
3181 .enumerate()
3182 .rev()
3183 .find(|(_, m)| m.role == Role::User)
3184 .map(|(i, _)| i);
3185
3186 let mut kept_tokens = 0usize;
3187 let mut keep_from = conv.messages.len();
3188 for i in (0..conv.messages.len()).rev() {
3189 let mt = conv.messages[i].estimate_tokens();
3190 // Always keep the last user message regardless of budget.
3191 let is_sacred = Some(i) == last_user_idx;
3192 if !is_sacred && kept_tokens + mt > total_budget && keep_from < conv.messages.len() {
3193 break;
3194 }
3195 kept_tokens += mt;
3196 keep_from = i;
3197 }
3198
3199 // Snap forward: don't start at a ToolResult orphan (its paired
3200 // assistant tool_call would be in the dropped section). Keep
3201 // walking forward until we land on a User or AssistantText message.
3202 while keep_from < conv.messages.len() {
3203 match &conv.messages[keep_from].content {
3204 MessageContent::ToolResult(_) | MessageContent::ToolResultRef(_) => {
3205 keep_from += 1;
3206 }
3207 _ => break,
3208 }
3209 }
3210 // Don't skip past the sacred last-user index even if the boundary
3211 // walker would have taken us there — better to ship one stub
3212 // tool_result than to drop the user msg.
3213 if let Some(lu) = last_user_idx {
3214 keep_from = keep_from.min(lu);
3215 }
3216
3217 if keep_from > 0 {
3218 conv.messages.drain(0..keep_from);
3219 conv.turn_tracker = crate::conversation::turn::TurnTracker::rebuild(&conv.messages);
3220 }
3221}
3222
3223/// True when an upstream API error string indicates the request exceeded
3224/// the model's context-length budget. Covers OpenRouter's verbose 400
3225/// message, OpenAI's `context_length_exceeded` code, and Anthropic's
3226/// "prompt is too long". Used by the retry path to route into the
3227/// compression branch instead of blindly re-sending the same oversized
3228/// request.
3229fn is_context_overflow_error(e: &str) -> bool {
3230 e.contains("context length")
3231 || e.contains("context_length_exceeded")
3232 || e.contains("maximum context")
3233 || e.contains("prompt is too long")
3234 || e.contains("reduce the length")
3235}
3236
3237/// Extract the provider's actually-enforced context limit from a 400/
3238/// overflow error message, if it's discoverable. Used by D2 emergency
3239/// compaction so we compact toward the *real* limit (proxy-enforced)
3240/// rather than the configured ctx_window — which can be much larger
3241/// than what the upstream actually accepts.
3242///
3243/// Self-built proxies for open-weight models are the worst offender:
3244/// the model is nominally 128K but the proxy enforces 64K, and the
3245/// framework can't know that without parsing the rejection.
3246///
3247/// Recognised shapes (case-sensitive, all observed in real datalogs):
3248/// - OpenAI / GLM proxy: `maximum context length is 65536 tokens`
3249/// - OpenRouter: `This endpoint's maximum context length is 200000 tokens`
3250/// - Generic: `context length of 32768`
3251/// - Anthropic-ish: `prompt is too long: 200000 tokens > 200000 maximum`
3252fn extract_provider_ctx_limit(e: &str) -> Option<usize> {
3253 use std::sync::OnceLock;
3254 static RE: OnceLock<regex::Regex> = OnceLock::new();
3255 let re = RE.get_or_init(|| {
3256 // Three anchors, any of them satisfies. Number captured is the
3257 // smallest plausible limit token count (≥ 1024 — drop very small
3258 // numbers that appear in unrelated parts of error bodies).
3259 regex::Regex::new(
3260 r"(?:maximum context length (?:is|of)|context length of|context length limit (?:is|of)|tokens? > (?P<rhs>\d+))\s*(?P<lhs>\d+)?",
3261 )
3262 .expect("valid regex")
3263 });
3264 for caps in re.captures_iter(e) {
3265 let n = caps
3266 .name("lhs")
3267 .or_else(|| caps.name("rhs"))
3268 .and_then(|m| m.as_str().parse::<usize>().ok());
3269 // Filter out tiny numbers that aren't real ctx limits — every
3270 // realistic context window is at least a few thousand tokens.
3271 if let Some(n) = n {
3272 if n >= 1024 {
3273 return Some(n);
3274 }
3275 }
3276 }
3277 None
3278}
3279
3280fn is_rate_limited_error(e: &str) -> bool {
3281 // English / HTTP standard patterns.
3282 if e.contains("429") || e.contains("rate") || e.contains("Too Many") {
3283 return true;
3284 }
3285 // Chinese / gateway-side patterns. GitCode's litellm proxy on
3286 // glm-5.1 returns the user-facing 「模型「X」的请求负载过高,
3287 // 请稍后再试」 message via in-stream SSE (then closes the
3288 // connection without [DONE], surfaced as StreamEvent::Error by
3289 // openai.rs's abrupt-close discriminator). Without these
3290 // patterns the error fell through to the generic 3-shot retry
3291 // branch — proper rate-limit handling (5 retries, 3-30s
3292 // exponential backoff) only fires when this matches.
3293 e.contains("请求负载过高")
3294 || e.contains("请求过于频繁")
3295 || e.contains("服务繁忙")
3296 || e.contains("限流")
3297}
3298
3299fn is_auth_error(e: &str) -> bool {
3300 e.contains("401 ")
3301 || e.contains("403 ")
3302 || e.contains("Unauthorized")
3303 || e.contains("Forbidden")
3304 || e.contains("invalid_api_key")
3305 || e.contains("incorrect_api_key")
3306}
3307
3308/// True when the error came from `build_codingplan_headers` failing
3309/// with `SignError::Unavailable` — i.e. an open-source AtomCode build
3310/// tried to issue a request that requires the closed-source signing
3311/// module. This is **terminal**: no amount of retry will produce a
3312/// valid signature in this binary; the user must install the official
3313/// release. The retry classifier short-circuits to fail-fast on this
3314/// to avoid the otherwise pointless 3-shot retry cycle.
3315///
3316/// Match on the official releases URL substring — both the English
3317/// (`Msg::CpOfficialBuildRequired`) and Chinese variants embed it
3318/// verbatim, and the URL is not localised, so a single substring
3319/// match handles both locales without coupling to translation strings.
3320fn is_codingplan_unavailable_error(e: &str) -> bool {
3321 e.contains("atomgit_atomcode/atomcode/releases")
3322}
3323
3324fn should_show_raw_api_error() -> bool {
3325 !matches!(
3326 std::env::var("ATOMCODE_SHOW_RAW_API_ERROR").as_deref(),
3327 Ok("0") | Ok("false") | Ok("FALSE") | Ok("no") | Ok("NO")
3328 )
3329}
3330
3331fn public_error_reason(e: &str) -> &'static str {
3332 if is_context_overflow_error(e) {
3333 "上下文过长"
3334 } else if is_auth_error(e) {
3335 "认证失败或无权限"
3336 } else if is_rate_limited_error(e) {
3337 "请求过于频繁或额度已用尽"
3338 } else if e.contains("Stream timeout") || e.contains("no event for") {
3339 "模型响应超时"
3340 } else if e.contains("Connection failed")
3341 || e.contains("dns")
3342 || e.contains("TLS")
3343 || e.contains("certificate")
3344 || e.contains("connect")
3345 {
3346 "网络连接失败"
3347 } else if e.contains("500")
3348 || e.contains("502")
3349 || e.contains("503")
3350 || e.contains("504")
3351 || e.contains("Internal Server Error")
3352 || e.contains("Bad Gateway")
3353 || e.contains("Service Unavailable")
3354 || e.contains("Gateway Timeout")
3355 {
3356 "上游服务暂时不可用"
3357 } else if e.contains("400") {
3358 "请求参数无效"
3359 } else {
3360 "请求失败"
3361 }
3362}
3363
3364fn public_error_message(e: &str) -> String {
3365 if should_show_raw_api_error() {
3366 return e.to_string();
3367 }
3368
3369 match public_error_reason(e) {
3370 "上下文过长" => {
3371 "请求超过了模型上下文长度限制。请减少附加内容或缩短会话历史后重试。".to_string()
3372 }
3373 "认证失败或无权限" => {
3374 "认证失败或当前账号无权限访问该模型。请检查 API Key 和提供方权限配置。".to_string()
3375 }
3376 "请求过于频繁或额度已用尽" => {
3377 "请求过于频繁,或当前额度已用尽。请稍后再试。".to_string()
3378 }
3379 "模型响应超时" => "模型响应超时,请稍后重试。".to_string(),
3380 "网络连接失败" => "连接模型服务失败,请检查网络后重试。".to_string(),
3381 "上游服务暂时不可用" => "模型服务暂时不可用,请稍后重试。".to_string(),
3382 "请求参数无效" => "请求被模型服务拒绝,请调整输入后重试。".to_string(),
3383 _ => e.to_string(),
3384 }
3385}
3386
3387/// Build the post-compaction status note injected into the conversation so
3388/// the model can resume without re-exploring. Returns `None` when there is
3389/// nothing worth saying (all inputs empty) — caller skips the injection then.
3390///
3391/// Extracted as a free function so the truncation / formatting is testable
3392/// without building a full `AgentLoop`.
3393fn build_post_compress_state(
3394 current_task: &str,
3395 files_edited: &[String],
3396 files_read: &[String],
3397) -> Option<String> {
3398 let mut parts: Vec<String> = Vec::new();
3399 if !current_task.is_empty() {
3400 // chars().take — must be char-boundary safe for multi-byte (CJK)
3401 // user messages. A byte-slice truncation here would panic or
3402 // produce invalid UTF-8.
3403 let task_short: String = current_task.chars().take(200).collect();
3404 parts.push(format!("TASK: {}", task_short));
3405 }
3406 if !files_edited.is_empty() {
3407 parts.push(format!("FILES EDITED: {}", files_edited.join(", ")));
3408 }
3409 if !files_read.is_empty() {
3410 let recent: Vec<&str> = files_read
3411 .iter()
3412 .rev()
3413 .take(5)
3414 .map(|s| s.as_str())
3415 .collect();
3416 parts.push(format!("RECENTLY READ: {}", recent.join(", ")));
3417 }
3418 if parts.is_empty() {
3419 return None;
3420 }
3421 Some(format!(
3422 "[Context was compressed. Here is your current state:]\n{}",
3423 parts.join("\n")
3424 ))
3425}
3426
3427/// Format a token count for user-facing banners: `9800` → `"9.8K"`,
3428/// `137` → `"137"`. Mirrors the `k(...)` closure in the TUI's
3429/// `format_context_report` so `/compact` output reads the same units
3430/// as `/context`.
3431fn fmt_k_tokens(t: usize) -> String {
3432 if t >= 1000 {
3433 format!("{:.1}K", t as f64 / 1000.0)
3434 } else {
3435 format!("{}", t)
3436 }
3437}
3438
3439#[cfg(test)]
3440mod agent_handle_tests {
3441 use super::{AgentClient, AgentHandle, AgentRuntimeFactory};
3442
3443 #[test]
3444 fn agent_client_clones_command_sender_and_registries() {
3445 let (cmd_tx, _cmd_rx) = tokio::sync::mpsc::unbounded_channel();
3446 let (_event_tx, event_rx) = tokio::sync::mpsc::unbounded_channel();
3447 let tool_registry = std::sync::Arc::new(crate::tool::ToolRegistry::new());
3448 let skill_registry =
3449 std::sync::Arc::new(std::sync::RwLock::new(crate::skill::SkillRegistry::new()));
3450
3451 let client = AgentClient {
3452 cmd_tx,
3453 tool_registry: tool_registry.clone(),
3454 skill_registry: skill_registry.clone(),
3455 };
3456 let handle = AgentHandle {
3457 client: client.clone(),
3458 event_rx,
3459 };
3460
3461 assert!(std::sync::Arc::ptr_eq(
3462 &client.tool_registry,
3463 &handle.client.tool_registry
3464 ));
3465 assert!(std::sync::Arc::ptr_eq(
3466 &client.skill_registry,
3467 &handle.client.skill_registry
3468 ));
3469 }
3470
3471 #[test]
3472 fn runtime_factory_reports_missing_provider_as_unavailable() {
3473 let mut config = crate::config::Config::default();
3474 config.default_provider = "missing".to_string();
3475 config.providers.clear();
3476
3477 let factory = AgentRuntimeFactory::new_for_test(
3478 config,
3479 std::path::PathBuf::from("/tmp/project"),
3480 std::sync::Arc::new(crate::tool::ToolRegistry::new()),
3481 std::sync::Arc::new(std::sync::RwLock::new(crate::skill::SkillRegistry::new())),
3482 );
3483
3484 let provider = factory.build_provider();
3485
3486 assert_eq!(
3487 provider.availability_error(),
3488 Some("未配置 provider。请使用 /provider 添加 provider 后再试。")
3489 );
3490 }
3491
3492 #[test]
3493 fn runtime_factory_setters_update_snapshots() {
3494 let mut factory = AgentRuntimeFactory::new_for_test(
3495 crate::config::Config::default(),
3496 std::path::PathBuf::from("/tmp/old"),
3497 std::sync::Arc::new(crate::tool::ToolRegistry::new()),
3498 std::sync::Arc::new(std::sync::RwLock::new(crate::skill::SkillRegistry::new())),
3499 );
3500
3501 let mut config = crate::config::Config::default();
3502 config.default_provider = "fresh".to_string();
3503 factory.set_config(config);
3504 factory.set_working_dir(std::path::PathBuf::from("/tmp/new"));
3505
3506 assert_eq!(factory.config.default_provider, "fresh");
3507 assert_eq!(factory.working_dir, std::path::PathBuf::from("/tmp/new"));
3508 }
3509
3510 #[test]
3511 fn cloned_runtime_factories_allocate_unique_labels() {
3512 let factory = AgentRuntimeFactory::new_for_test(
3513 crate::config::Config::default(),
3514 std::path::PathBuf::from("/tmp/project"),
3515 std::sync::Arc::new(crate::tool::ToolRegistry::new()),
3516 std::sync::Arc::new(std::sync::RwLock::new(crate::skill::SkillRegistry::new())),
3517 );
3518 let cloned = factory.clone();
3519
3520 assert_eq!(factory.next_runtime_label(), "runtime-2");
3521 assert_eq!(cloned.next_runtime_label(), "runtime-3");
3522 }
3523}
3524
3525#[cfg(test)]
3526mod classifier_tests {
3527 use super::{
3528 extract_provider_ctx_limit, is_auth_error, is_codingplan_unavailable_error,
3529 is_context_overflow_error, is_rate_limited_error, public_error_message,
3530 public_error_reason, reload_should_clear_conversation,
3531 };
3532
3533 // ── reload_should_clear_conversation ──
3534
3535 #[test]
3536 fn reload_same_type_different_name_keeps_history() {
3537 // The common CodingPlan case: one provider entry per model, all
3538 // `openai`-typed. User swaps Kimi ↔ GLM via `/model` — history MUST
3539 // survive, otherwise every model switch is a brand-new session.
3540 assert!(!reload_should_clear_conversation(
3541 "AtomGit-kimi-k2.6",
3542 Some("openai"),
3543 "AtomGit-glm5",
3544 Some("openai"),
3545 ));
3546 }
3547
3548 #[test]
3549 fn reload_different_type_clears() {
3550 // Cross-type (openai → claude) is not proven round-trip clean:
3551 // tool_call id formats differ, tool_use block translation is
3552 // non-trivial. Stay safe and clear.
3553 assert!(reload_should_clear_conversation(
3554 "kimi",
3555 Some("openai"),
3556 "claude-sonnet",
3557 Some("claude"),
3558 ));
3559 }
3560
3561 #[test]
3562 fn reload_missing_old_type_falls_back_to_name_change() {
3563 // Old provider was removed from new_config (rename, delete, config
3564 // rewritten by wizard). We can't tell whether types match, so fall
3565 // back to the historical safe default: clear when the name flips.
3566 assert!(reload_should_clear_conversation(
3567 "old-gone",
3568 None,
3569 "new-arrival",
3570 Some("openai"),
3571 ));
3572 assert!(!reload_should_clear_conversation(
3573 "same",
3574 None,
3575 "same",
3576 Some("openai"),
3577 ));
3578 }
3579
3580 #[test]
3581 fn reload_same_name_never_clears() {
3582 // A no-op ReloadConfig (same default, same type) is a noop here too.
3583 // Sanity — should not accidentally wipe history.
3584 assert!(!reload_should_clear_conversation(
3585 "kimi",
3586 Some("openai"),
3587 "kimi",
3588 Some("openai"),
3589 ));
3590 }
3591
3592 #[test]
3593 fn openrouter_400_is_overflow() {
3594 let msg = "API error (400 Bad Request): This endpoint's maximum context \
3595 length is 204800 tokens. However, you requested about 745279 \
3596 tokens... Please reduce the length of either one.";
3597 assert!(is_context_overflow_error(msg));
3598 }
3599
3600 #[test]
3601 fn openai_context_length_exceeded_is_overflow() {
3602 assert!(is_context_overflow_error(
3603 "{\"error\":{\"code\":\"context_length_exceeded\"}}"
3604 ));
3605 }
3606
3607 #[test]
3608 fn anthropic_prompt_too_long_is_overflow() {
3609 assert!(is_context_overflow_error(
3610 "prompt is too long: 250000 tokens"
3611 ));
3612 }
3613
3614 #[test]
3615 fn generic_rate_limit_is_not_overflow() {
3616 assert!(!is_context_overflow_error("429 Too Many Requests"));
3617 }
3618
3619 #[test]
3620 fn auth_error_is_not_overflow() {
3621 assert!(!is_context_overflow_error("401 Unauthorized"));
3622 }
3623
3624 #[test]
3625 fn extract_glm_proxy_ctx_limit() {
3626 // From the actual datalog that motivated D2.
3627 let msg = "API error (400 Bad Request) at `http://115.120.18.212:18005/v1/chat/completions`: \
3628 {\"error\":{\"message\":\"This model's maximum context length is 65536 tokens. \
3629 However, you requested 15210 output tokens and your prompt contains at least \
3630 50327 input tokens, for a total of at least 65537 tokens.\"}}";
3631 assert_eq!(extract_provider_ctx_limit(msg), Some(65536));
3632 }
3633
3634 #[test]
3635 fn extract_openrouter_ctx_limit() {
3636 let msg = "API error (400): This endpoint's maximum context length is 204800 tokens. \
3637 However, you requested about 745279 tokens";
3638 assert_eq!(extract_provider_ctx_limit(msg), Some(204800));
3639 }
3640
3641 #[test]
3642 fn extract_anthropic_prompt_too_long() {
3643 let msg = "prompt is too long: 200000 tokens > 200000 maximum";
3644 assert_eq!(extract_provider_ctx_limit(msg), Some(200000));
3645 }
3646
3647 #[test]
3648 fn extract_no_limit_returns_none_for_non_overflow_errors() {
3649 assert_eq!(extract_provider_ctx_limit("429 Too Many Requests"), None);
3650 assert_eq!(extract_provider_ctx_limit("401 Unauthorized"), None);
3651 assert_eq!(extract_provider_ctx_limit(""), None);
3652 }
3653
3654 #[test]
3655 fn extract_filters_out_implausibly_small_numbers() {
3656 // Status codes and small ints in error bodies must not be
3657 // mistaken for context limits.
3658 let msg = "Error 400: maximum context length is 200 tokens";
3659 assert_eq!(extract_provider_ctx_limit(msg), None);
3660 }
3661
3662 // ── D2 emergency compact tier helpers ──
3663
3664 use crate::conversation::{Conversation, message::MessageContent};
3665 use crate::tool::{ToolCall, ToolResult};
3666
3667 /// Build a synthetic conversation with `n_turns` turns, each carrying
3668 /// one user message + one assistant tool_call + one tool_result of
3669 /// `result_size` chars.
3670 fn build_conv(n_turns: usize, result_size: usize) -> Conversation {
3671 let mut conv = Conversation::new();
3672 for t in 0..n_turns {
3673 conv.add_user_message(&format!("turn {} request", t));
3674 conv.add_assistant_tool_calls(
3675 None,
3676 vec![ToolCall {
3677 id: format!("call_{}", t),
3678 name: "read_file".into(),
3679 arguments: r#"{"file_path":"/x"}"#.into(),
3680 }],
3681 None,
3682 );
3683 conv.add_tool_result(ToolResult {
3684 call_id: format!("call_{}", t),
3685 output: "x".repeat(result_size),
3686 success: true,
3687 });
3688 }
3689 conv
3690 }
3691
3692 fn count_collapsed_results(conv: &Conversation) -> usize {
3693 // New unified stub format: `[<tool> <ok|FAILED>: N lines, first: …]`.
3694 // The substring " lines, first:" is unique to the stub shape and
3695 // robust whether the tool name is "bash", "grep", or "tool".
3696 conv.messages
3697 .iter()
3698 .filter(|m| match &m.content {
3699 MessageContent::ToolResult(tr) => tr.output.contains(" lines, first:"),
3700 _ => false,
3701 })
3702 .count()
3703 }
3704
3705 /// Phase 1 proactive compact: Tier 1 (collapse) is enough for the
3706 /// common case — heavy old tool_result bodies become stubs and
3707 /// the conversation token total drops below threshold without
3708 /// invoking the LLM-summary round trip. Tier 2 only fires when
3709 /// Tier 1 wasn't enough. This test pins the contract that Tier 1
3710 /// is invoked first; Tier 2 path is covered separately by the
3711 /// existing emergency-compact tests.
3712 #[test]
3713 fn proactive_tier1_collapses_old_tool_results_only() {
3714 // Build a conversation heavy with old, large tool_results
3715 // (typical bash/cargo session shape). After Tier 1 with
3716 // keep_recent_turns=3, the 3 OLDEST turns' tool_results
3717 // should be stubs while the 3 RECENT turns retain full
3718 // payload. Pins the "older=collapsed, newer=intact" split.
3719 let mut conv = build_conv(/* n_turns */ 6, /* result_size */ 4_000);
3720 crate::ctx::render::compact_old_tool_results_in_place(&mut conv, 3);
3721
3722 // Walk the messages: each turn pushes (User, AssistantToolCall,
3723 // ToolResult). 6 turns × 3 msgs = 18 msgs. The first 3 turns
3724 // are "old"; turns 4-6 are "recent".
3725 let mut tr_sizes: Vec<usize> = Vec::new();
3726 for m in &conv.messages {
3727 if let MessageContent::ToolResult(tr) = &m.content {
3728 tr_sizes.push(tr.output.len());
3729 }
3730 }
3731 assert_eq!(tr_sizes.len(), 6, "expected 6 tool_results");
3732 // Old: index 0, 1, 2 — must be stubs (small).
3733 for &s in &tr_sizes[..3] {
3734 assert!(
3735 s < 200,
3736 "old tool_result must collapse to stub; got len={}",
3737 s
3738 );
3739 }
3740 // Recent: index 3, 4, 5 — must remain full (4_000 chars + the
3741 // 'x' chars).
3742 for &s in &tr_sizes[3..] {
3743 assert!(
3744 s >= 4_000,
3745 "recent tool_result must remain full; got len={}",
3746 s
3747 );
3748 }
3749 }
3750
3751 #[test]
3752 fn collapse_keeps_last_n_turns_full() {
3753 let mut conv = build_conv(5, 1024);
3754 crate::ctx::render::compact_old_tool_results_in_place(&mut conv, 2);
3755 // 5 turns, keep last 2 → first 3 should have stubbed tool_results.
3756 assert_eq!(count_collapsed_results(&conv), 3);
3757 }
3758
3759 #[test]
3760 fn collapse_skips_already_tiny_results() {
3761 // Tool results under 200 chars aren't worth collapsing — the stub
3762 // would weigh more than the original.
3763 let mut conv = build_conv(5, 50);
3764 crate::ctx::render::compact_old_tool_results_in_place(&mut conv, 2);
3765 assert_eq!(count_collapsed_results(&conv), 0);
3766 }
3767
3768 #[test]
3769 fn collapse_no_op_when_under_keep_threshold() {
3770 let mut conv = build_conv(2, 1024);
3771 crate::ctx::render::compact_old_tool_results_in_place(&mut conv, 3);
3772 // Only 2 turns total, keep 3 — nothing to collapse.
3773 assert_eq!(count_collapsed_results(&conv), 0);
3774 }
3775
3776 #[test]
3777 fn collapse_preserves_call_id_and_success_flag() {
3778 let mut conv = build_conv(3, 1024);
3779 crate::ctx::render::compact_old_tool_results_in_place(&mut conv, 1);
3780 // Verify call_0's tool_result still has the right call_id even
3781 // though its body was stubbed — preserves tool_call/tool_result
3782 // pairing for OpenAI-style providers.
3783 let tr = conv
3784 .messages
3785 .iter()
3786 .find_map(|m| match &m.content {
3787 MessageContent::ToolResult(tr) if tr.call_id == "call_0" => Some(tr),
3788 _ => None,
3789 })
3790 .expect("call_0 result must still exist");
3791 // New unified stub format: `[<tool> <ok|FAILED>: N lines, first: …]`.
3792 assert!(tr.output.contains(" lines, first:"));
3793 assert!(tr.output.starts_with("["));
3794 assert!(tr.success);
3795 }
3796
3797 #[test]
3798 fn hard_truncate_keeps_last_user_message_even_under_budget() {
3799 // Tight budget that forces aggressive drops; sacred invariant
3800 // says the last user msg must survive regardless. This is the
3801 // structural guarantee the previous `truncate(len-4)` code
3802 // *violated*, producing the 8516-token catastrophe.
3803 let mut conv = build_conv(10, 2048);
3804 super::hard_truncate_to_target(&mut conv, /* target */ 100, /* sys */ 50);
3805 let has_user = conv
3806 .messages
3807 .iter()
3808 .any(|m| matches!(m.role, crate::conversation::message::Role::User));
3809 assert!(has_user, "last user message must survive even at tight budget");
3810 }
3811
3812 #[test]
3813 fn hard_truncate_does_not_start_with_orphan_tool_result() {
3814 // After truncate the first surviving message must NOT be a
3815 // ToolResult — that would orphan it from its paired assistant
3816 // tool_call, which OpenAI-style APIs reject with 400.
3817 let mut conv = build_conv(8, 1024);
3818 super::hard_truncate_to_target(&mut conv, /* target */ 2000, /* sys */ 100);
3819 if let Some(first) = conv.messages.first() {
3820 assert!(
3821 !matches!(
3822 first.content,
3823 MessageContent::ToolResult(_) | MessageContent::ToolResultRef(_)
3824 ),
3825 "first surviving message must not be an orphan tool_result"
3826 );
3827 }
3828 }
3829
3830 #[test]
3831 fn hard_truncate_no_op_when_already_under_target() {
3832 let mut conv = build_conv(3, 100);
3833 let before = conv.messages.len();
3834 super::hard_truncate_to_target(&mut conv, /* target */ 100_000, /* sys */ 100);
3835 assert_eq!(conv.messages.len(), before);
3836 }
3837
3838 #[test]
3839 fn hard_truncate_rebuilds_turn_tracker() {
3840 // After draining messages from the front, the turn_tracker must
3841 // be rebuilt so its Turn entries point at valid indices. Without
3842 // this, the next build_messages crashes or silently emits wrong
3843 // boundaries (the bug the old `truncate(len-4)` path also
3844 // patched, but inconsistently — see the rebuild call there).
3845 let mut conv = build_conv(10, 2048);
3846 super::hard_truncate_to_target(&mut conv, /* target */ 1000, /* sys */ 100);
3847 // Every turn's start_idx must be a valid index into messages.
3848 for t in &conv.turn_tracker.turns {
3849 assert!(
3850 t.start_idx <= conv.messages.len(),
3851 "turn start_idx {} out of bounds (messages.len()={})",
3852 t.start_idx,
3853 conv.messages.len()
3854 );
3855 }
3856 }
3857
3858 #[test]
3859 fn stream_timeout_is_summarized() {
3860 // public_error_message defers to ATOMCODE_SHOW_RAW_API_ERROR (raw by
3861 // default), so the user-facing string can't be tested deterministically
3862 // without env-var manipulation that races other parallel tests.
3863 // public_error_reason covers the routing logic regardless of env state.
3864 assert_eq!(
3865 public_error_reason("Stream timeout: no event for 300s"),
3866 "模型响应超时"
3867 );
3868 }
3869
3870 #[test]
3871 fn upstream_5xx_is_summarized() {
3872 assert_eq!(
3873 public_error_reason(
3874 "API error (503 Service Unavailable) at `https://x`:\nbackend trace"
3875 ),
3876 "上游服务暂时不可用"
3877 );
3878 }
3879
3880 #[test]
3881 fn auth_error_is_detected() {
3882 assert!(is_auth_error(
3883 "API error (401 Unauthorized): invalid_api_key"
3884 ));
3885 }
3886
3887 /// CpOfficialBuildRequired (English variant) — surfaces from
3888 /// build_codingplan_headers in open-source builds when an
3889 /// AtomGit-bound request is attempted.
3890 #[test]
3891 fn codingplan_unavailable_detected_in_english_message() {
3892 let en = "This feature requires the official AtomCode build. \
3893 Download it from https://atomgit.com/atomgit_atomcode/atomcode/releases.";
3894 assert!(is_codingplan_unavailable_error(en));
3895 }
3896
3897 /// Same error, Chinese locale. The Releases URL is the substring
3898 /// match — it's not localised, so the same classifier handles
3899 /// both en and zh-CN without coupling to translation text.
3900 #[test]
3901 fn codingplan_unavailable_detected_in_chinese_message() {
3902 let zh = "此功能需要官方 AtomCode 构建,请前往 \
3903 https://atomgit.com/atomgit_atomcode/atomcode/releases 下载安装。";
3904 assert!(is_codingplan_unavailable_error(zh));
3905 }
3906
3907 /// Negative: an unrelated network error must NOT trip the
3908 /// classifier. Verifies the URL anchor is narrow enough to avoid
3909 /// false positives.
3910 #[test]
3911 fn codingplan_unavailable_does_not_match_unrelated_errors() {
3912 assert!(!is_codingplan_unavailable_error(
3913 "API error (500 Internal Server Error) at `https://api.openai.com/v1/chat/completions`"
3914 ));
3915 assert!(!is_codingplan_unavailable_error("Stream timeout: no event for 300s"));
3916 assert!(!is_codingplan_unavailable_error(""));
3917 }
3918
3919 #[test]
3920 fn rate_limit_error_is_detected() {
3921 assert!(is_rate_limited_error("API error (429 Too Many Requests)"));
3922 }
3923
3924 /// Chinese gateway-side rate-limit blobs streamed in-band by
3925 /// GitCode litellm (and similar proxies) must route to the
3926 /// proper rate-limit retry path (5 attempts × 3-30s backoff),
3927 /// not the generic 3-shot fallback. Without this the
3928 /// abrupt-close discriminator in openai.rs converts the SSE
3929 /// blob to StreamEvent::Error but the agent then mis-retries
3930 /// it.
3931 #[test]
3932 fn rate_limit_error_detects_chinese_gateway_patterns() {
3933 assert!(is_rate_limited_error("模型「GLM-5.1」的请求负载过高,请稍后再试。"));
3934 assert!(is_rate_limited_error("请求过于频繁,请稍后再试"));
3935 assert!(is_rate_limited_error("服务繁忙"));
3936 assert!(is_rate_limited_error("当前已被限流"));
3937 // Negative: a vanilla error must NOT be classified as rate
3938 // limit just because it mentions "请稍后再试" alone
3939 // (which is generic Chinese "try again later").
3940 assert!(!is_rate_limited_error("请稍后再试"));
3941 assert!(!is_rate_limited_error("API error (500 Internal Server Error)"));
3942 }
3943
3944 #[test]
3945 fn invalid_request_is_summarized_without_raw_body() {
3946 let old = std::env::var("ATOMCODE_SHOW_RAW_API_ERROR").ok();
3947 unsafe { std::env::set_var("ATOMCODE_SHOW_RAW_API_ERROR", "0") };
3948 let raw = "API error (400 Bad Request) at `https://x`:\nstack=secret detail";
3949 assert_eq!(public_error_reason(raw), "请求参数无效");
3950 assert!(!public_error_message(raw).contains("secret detail"));
3951 if let Some(v) = old {
3952 unsafe { std::env::set_var("ATOMCODE_SHOW_RAW_API_ERROR", v) };
3953 } else {
3954 unsafe { std::env::remove_var("ATOMCODE_SHOW_RAW_API_ERROR") };
3955 }
3956 }
3957
3958 #[test]
3959 fn raw_error_is_shown_by_default() {
3960 let old = std::env::var("ATOMCODE_SHOW_RAW_API_ERROR").ok();
3961 unsafe { std::env::remove_var("ATOMCODE_SHOW_RAW_API_ERROR") };
3962 let raw = "API error (400 Bad Request) at `https://x`:\nstack=secret detail";
3963 assert_eq!(public_error_message(raw), raw);
3964 if let Some(v) = old {
3965 unsafe { std::env::set_var("ATOMCODE_SHOW_RAW_API_ERROR", v) };
3966 }
3967 }
3968}
3969
3970#[cfg(test)]
3971mod post_compress_state_tests {
3972 use super::build_post_compress_state;
3973
3974 #[test]
3975 fn empty_inputs_return_none() {
3976 assert!(build_post_compress_state("", &[], &[]).is_none());
3977 }
3978
3979 #[test]
3980 fn task_only() {
3981 let out = build_post_compress_state("fix login bug", &[], &[]).unwrap();
3982 assert!(out.starts_with("[Context was compressed. Here is your current state:]\n"));
3983 assert!(out.contains("TASK: fix login bug"));
3984 assert!(!out.contains("FILES EDITED"));
3985 assert!(!out.contains("RECENTLY READ"));
3986 }
3987
3988 #[test]
3989 fn task_exact_200_is_unchanged() {
3990 // chars().take(200) on an exactly-200-char input must pass through.
3991 let exact: String = "字".repeat(200);
3992 let out = build_post_compress_state(&exact, &[], &[]).unwrap();
3993 let line = out.lines().find(|l| l.starts_with("TASK: ")).unwrap();
3994 let payload = &line["TASK: ".len()..];
3995 assert_eq!(payload.chars().count(), 200);
3996 assert_eq!(payload, exact);
3997 }
3998
3999 #[test]
4000 fn task_201_drops_exactly_one_char() {
4001 // Boundary: 201 → 200, and must land on a char boundary (not split
4002 // the last 3-byte "字").
4003 let over: String = "字".repeat(201);
4004 let out = build_post_compress_state(&over, &[], &[]).unwrap();
4005 let line = out.lines().find(|l| l.starts_with("TASK: ")).unwrap();
4006 let payload = &line["TASK: ".len()..];
4007 assert_eq!(payload.chars().count(), 200);
4008 assert!(payload.is_char_boundary(payload.len()));
4009 }
4010
4011 #[test]
4012 fn task_long_multibyte_truncates_safely() {
4013 // Regression guard: byte-slicing here would panic mid-codepoint.
4014 let long: String = "字".repeat(500);
4015 let out = build_post_compress_state(&long, &[], &[]).unwrap();
4016 let line = out.lines().find(|l| l.starts_with("TASK: ")).unwrap();
4017 let payload = &line["TASK: ".len()..];
4018 assert_eq!(payload.chars().count(), 200);
4019 }
4020
4021 #[test]
4022 fn files_edited_comma_joined() {
4023 let edited = vec!["a.rs".to_string(), "b.rs".to_string()];
4024 let out = build_post_compress_state("", &edited, &[]).unwrap();
4025 assert!(out.contains("FILES EDITED: a.rs, b.rs"));
4026 }
4027
4028 #[test]
4029 fn files_read_last_five_reversed() {
4030 // rev().take(5) → newest first, at most 5.
4031 let read: Vec<String> = (1..=8).map(|i| format!("f{}.rs", i)).collect();
4032 let out = build_post_compress_state("", &[], &read).unwrap();
4033 let line = out
4034 .lines()
4035 .find(|l| l.starts_with("RECENTLY READ: "))
4036 .unwrap();
4037 assert_eq!(line, "RECENTLY READ: f8.rs, f7.rs, f6.rs, f5.rs, f4.rs");
4038 }
4039
4040 #[test]
4041 fn all_three_parts_combined() {
4042 let out = build_post_compress_state("task x", &["a.rs".to_string()], &["b.rs".to_string()])
4043 .unwrap();
4044 assert!(out.contains("TASK: task x"));
4045 assert!(out.contains("FILES EDITED: a.rs"));
4046 assert!(out.contains("RECENTLY READ: b.rs"));
4047 }
4048}
4049
4050#[cfg(test)]
4051mod fmt_k_tokens_tests {
4052 use super::fmt_k_tokens;
4053
4054 #[test]
4055 fn under_1000_no_suffix() {
4056 assert_eq!(fmt_k_tokens(0), "0");
4057 assert_eq!(fmt_k_tokens(137), "137");
4058 assert_eq!(fmt_k_tokens(999), "999");
4059 }
4060
4061 #[test]
4062 fn one_thousand_and_above_use_k_suffix_with_one_decimal() {
4063 assert_eq!(fmt_k_tokens(1000), "1.0K");
4064 assert_eq!(fmt_k_tokens(3700), "3.7K");
4065 assert_eq!(fmt_k_tokens(9800), "9.8K");
4066 assert_eq!(fmt_k_tokens(64000), "64.0K");
4067 }
4068}
4069
4070#[cfg(test)]
4071mod bash_deleted_file_tracking_tests {
4072 use super::{
4073 bash_workspace_modified_files, rm_file_targets, track_search_replace_files,
4074 track_tool_modified_files,
4075 };
4076 use std::path::Path;
4077
4078 #[test]
4079 fn tracks_simple_rm_target_from_cwd() {
4080 let targets = rm_file_targets("rm numbers.txt", Path::new("/tmp/project"));
4081 assert_eq!(targets, vec!["/tmp/project/numbers.txt"]);
4082 }
4083
4084 #[test]
4085 fn skips_recursive_rm_targets() {
4086 let targets = rm_file_targets("rm -rf dist", Path::new("/tmp/project"));
4087 assert!(targets.is_empty());
4088 }
4089
4090 #[test]
4091 fn tracks_successful_bash_rm_from_output_cwd() {
4092 let mut edited = Vec::new();
4093 track_tool_modified_files(
4094 "bash",
4095 "rm numbers.txt",
4096 "[elapsed: 0.0s, exit: 0]\n[cwd: /tmp/project]",
4097 &mut edited,
4098 );
4099 assert_eq!(edited, vec!["/tmp/project/numbers.txt"]);
4100 }
4101
4102 #[test]
4103 fn tracks_workspace_modified_bash_output() {
4104 let files = bash_workspace_modified_files(
4105 "[workspace modified via bash: src/a.rs, /tmp/project/b.txt. If you meant to edit source, use edit_file next time]\n[cwd: /tmp/project]",
4106 Path::new("/tmp/project"),
4107 );
4108 assert_eq!(
4109 files,
4110 vec![
4111 "/tmp/project/src/a.rs".to_string(),
4112 "/tmp/project/b.txt".to_string()
4113 ]
4114 );
4115 }
4116
4117 #[test]
4118 fn tracks_search_replace_output_files() {
4119 let mut edited = Vec::new();
4120 track_search_replace_files(
4121 "Replaced 'old' -> 'new': 2 replacements across 2 files.\n /tmp/project/a.rs (1 replacements)\n /tmp/project/b.rs (1 replacements)",
4122 &mut edited,
4123 );
4124 assert_eq!(
4125 edited,
4126 vec![
4127 "/tmp/project/a.rs".to_string(),
4128 "/tmp/project/b.rs".to_string()
4129 ]
4130 );
4131 }
4132}
4133