Skip to main content

a3s_code_core/
agent.rs

1//! Agent Loop Implementation
2//!
3//! The agent loop handles the core conversation cycle:
4//! 1. User sends a prompt
5//! 2. LLM generates a response (possibly with tool calls)
6//! 3. If tool calls present, execute them and send results back
7//! 4. Repeat until LLM returns without tool calls
8//!
9//! This implements agentic behavior where the LLM can use tools
10//! to accomplish tasks agentically.
11
12use crate::context::{ContextProvider, ContextQuery, ContextResult};
13use crate::hitl::ConfirmationProvider;
14use crate::hooks::{
15    ErrorType, GenerateEndEvent, GenerateStartEvent, HookEvent, HookExecutor, HookResult,
16    OnErrorEvent, PostResponseEvent, PostToolUseEvent, PrePromptEvent, PreToolUseEvent,
17    TokenUsageInfo, ToolCallInfo, ToolResultData,
18};
19use crate::llm::{LlmClient, LlmResponse, Message, TokenUsage, ToolDefinition};
20use crate::permissions::{PermissionChecker, PermissionDecision};
21use crate::planning::{AgentGoal, ExecutionPlan, TaskStatus};
22use crate::prompts::{AgentStyle, DetectionConfidence, PlanningMode, SystemPromptSlots};
23use crate::queue::SessionCommand;
24use crate::session_lane_queue::SessionLaneQueue;
25use crate::text::truncate_utf8;
26use crate::tool_search::ToolIndex;
27use crate::tools::{ToolContext, ToolExecutor, ToolStreamEvent};
28use anyhow::{Context, Result};
29use async_trait::async_trait;
30use futures::future::join_all;
31use serde::{Deserialize, Serialize};
32use serde_json::Value;
33use std::sync::Arc;
34use std::time::Duration;
35use tokio::sync::{mpsc, RwLock};
36
37/// Maximum number of tool execution rounds before stopping
38const MAX_TOOL_ROUNDS: usize = 50;
39
40/// Agent configuration
41#[derive(Clone)]
42pub struct AgentConfig {
43    /// Slot-based system prompt customization.
44    ///
45    /// Users can customize specific parts (role, guidelines, response style, extra)
46    /// without overriding the core agentic capabilities. The default agentic core
47    /// (tool usage, autonomous behavior, completion criteria) is always preserved.
48    pub prompt_slots: SystemPromptSlots,
49    pub tools: Vec<ToolDefinition>,
50    pub max_tool_rounds: usize,
51    /// Optional security provider for input taint tracking and output sanitization
52    pub security_provider: Option<Arc<dyn crate::security::SecurityProvider>>,
53    /// Optional permission checker for tool execution control
54    pub permission_checker: Option<Arc<dyn PermissionChecker>>,
55    /// Optional confirmation manager for HITL (Human-in-the-Loop)
56    pub confirmation_manager: Option<Arc<dyn ConfirmationProvider>>,
57    /// Context providers for augmenting prompts with external context
58    pub context_providers: Vec<Arc<dyn ContextProvider>>,
59    /// Planning mode — Auto (detect from message), Enabled, or Disabled.
60    pub planning_mode: PlanningMode,
61    /// Enable goal tracking
62    pub goal_tracking: bool,
63    /// Optional hook engine for firing lifecycle events (PreToolUse, PostToolUse, etc.)
64    pub hook_engine: Option<Arc<dyn HookExecutor>>,
65    /// Optional skill registry for tool permission enforcement
66    pub skill_registry: Option<Arc<crate::skills::SkillRegistry>>,
67    /// Max consecutive malformed-tool-args errors before aborting (default: 2).
68    ///
69    /// When the LLM returns tool arguments with `__parse_error`, the error is
70    /// fed back as a tool result. After this many consecutive parse errors the
71    /// loop bails instead of retrying indefinitely.
72    pub max_parse_retries: u32,
73    /// Per-tool execution timeout in milliseconds (`None` = no timeout).
74    ///
75    /// When set, each tool execution is wrapped in `tokio::time::timeout`.
76    /// A timeout produces an error result sent back to the LLM rather than
77    /// crashing the session.
78    pub tool_timeout_ms: Option<u64>,
79    /// Circuit-breaker threshold: max consecutive LLM API failures before
80    /// aborting (default: 3).
81    ///
82    /// In non-streaming mode, transient LLM failures are retried up to this
83    /// many times (with short exponential backoff) before the loop bails.
84    /// In streaming mode, any failure is fatal (events cannot be replayed).
85    pub circuit_breaker_threshold: u32,
86    /// Max consecutive identical tool signatures before aborting (default: 3).
87    ///
88    /// A tool signature is the exact combination of tool name + compact JSON
89    /// arguments. This prevents the agent from getting stuck repeating the same
90    /// tool call in a loop, for example repeatedly fetching the same URL.
91    pub duplicate_tool_call_threshold: u32,
92    /// Enable auto-compaction when context usage exceeds threshold.
93    pub auto_compact: bool,
94    /// Context usage percentage threshold to trigger auto-compaction (0.0 - 1.0).
95    /// Default: 0.80 (80%).
96    pub auto_compact_threshold: f32,
97    /// Maximum context window size in tokens (used for auto-compact calculation).
98    /// Default: 200_000.
99    pub max_context_tokens: usize,
100    /// LLM client reference for auto-compaction (needs to call LLM for summarization).
101    pub llm_client: Option<Arc<dyn LlmClient>>,
102    /// Optional agent memory for auto-remember after tool execution and recall before prompts.
103    pub memory: Option<Arc<crate::memory::AgentMemory>>,
104    /// Inject a continuation message when the LLM stops calling tools before the
105    /// task is complete. Enabled by default. Set to `false` to disable.
106    ///
107    /// When enabled, if the LLM produces a response with no tool calls but the
108    /// response text looks like an intermediate step (not a final answer), the
109    /// loop injects [`crate::prompts::CONTINUATION`] as a user message and
110    /// continues for up to `max_continuation_turns` additional turns.
111    pub continuation_enabled: bool,
112    /// Maximum number of continuation injections per execution (default: 3).
113    ///
114    /// Prevents infinite loops when the LLM repeatedly stops without completing.
115    pub max_continuation_turns: u32,
116    /// Optional tool search index for filtering tools per-turn.
117    ///
118    /// When set, only tools matching the user prompt are sent to the LLM,
119    /// reducing context usage when many MCP tools are registered.
120    pub tool_index: Option<ToolIndex>,
121}
122
123impl std::fmt::Debug for AgentConfig {
124    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
125        f.debug_struct("AgentConfig")
126            .field("prompt_slots", &self.prompt_slots)
127            .field("tools", &self.tools)
128            .field("max_tool_rounds", &self.max_tool_rounds)
129            .field("security_provider", &self.security_provider.is_some())
130            .field("permission_checker", &self.permission_checker.is_some())
131            .field("confirmation_manager", &self.confirmation_manager.is_some())
132            .field("context_providers", &self.context_providers.len())
133            .field("planning_mode", &self.planning_mode)
134            .field("goal_tracking", &self.goal_tracking)
135            .field("hook_engine", &self.hook_engine.is_some())
136            .field(
137                "skill_registry",
138                &self.skill_registry.as_ref().map(|r| r.len()),
139            )
140            .field("max_parse_retries", &self.max_parse_retries)
141            .field("tool_timeout_ms", &self.tool_timeout_ms)
142            .field("circuit_breaker_threshold", &self.circuit_breaker_threshold)
143            .field(
144                "duplicate_tool_call_threshold",
145                &self.duplicate_tool_call_threshold,
146            )
147            .field("auto_compact", &self.auto_compact)
148            .field("auto_compact_threshold", &self.auto_compact_threshold)
149            .field("max_context_tokens", &self.max_context_tokens)
150            .field("continuation_enabled", &self.continuation_enabled)
151            .field("max_continuation_turns", &self.max_continuation_turns)
152            .field("memory", &self.memory.is_some())
153            .field("tool_index", &self.tool_index.as_ref().map(|i| i.len()))
154            .finish()
155    }
156}
157
158impl Default for AgentConfig {
159    fn default() -> Self {
160        Self {
161            prompt_slots: SystemPromptSlots::default(),
162            tools: Vec::new(), // Tools are provided by ToolExecutor
163            max_tool_rounds: MAX_TOOL_ROUNDS,
164            security_provider: None,
165            permission_checker: None,
166            confirmation_manager: None,
167            context_providers: Vec::new(),
168            planning_mode: PlanningMode::default(),
169            goal_tracking: false,
170            hook_engine: None,
171            skill_registry: Some(Arc::new(crate::skills::SkillRegistry::with_builtins())),
172            max_parse_retries: 2,
173            tool_timeout_ms: None,
174            circuit_breaker_threshold: 3,
175            duplicate_tool_call_threshold: 3,
176            auto_compact: false,
177            auto_compact_threshold: 0.80,
178            max_context_tokens: 200_000,
179            llm_client: None,
180            memory: None,
181            continuation_enabled: true,
182            max_continuation_turns: 3,
183            tool_index: None,
184        }
185    }
186}
187
188/// Events emitted during agent execution
189///
190/// Subscribe via [`Session::subscribe_events()`](crate::session::Session::subscribe_events).
191/// New variants may be added in minor releases — always include a wildcard arm
192/// (`_ => {}`) when matching.
193#[derive(Debug, Clone, Serialize, Deserialize)]
194#[serde(tag = "type")]
195#[non_exhaustive]
196pub enum AgentEvent {
197    /// Agent started processing
198    #[serde(rename = "agent_start")]
199    Start { prompt: String },
200
201    /// Runtime agent style/mode selected for the current execution.
202    #[serde(rename = "agent_mode_changed")]
203    AgentModeChanged {
204        /// Stable UI/runtime mode label, e.g. "general", "planning", "explore".
205        mode: String,
206        /// Canonical built-in agent name associated with this mode.
207        agent: String,
208        /// Human-readable explanation of the selected style.
209        description: String,
210    },
211
212    /// LLM turn started
213    #[serde(rename = "turn_start")]
214    TurnStart { turn: usize },
215
216    /// Text delta from streaming
217    #[serde(rename = "text_delta")]
218    TextDelta { text: String },
219
220    /// Tool execution started
221    #[serde(rename = "tool_start")]
222    ToolStart { id: String, name: String },
223
224    /// Tool input delta from streaming (partial JSON arguments)
225    #[serde(rename = "tool_input_delta")]
226    ToolInputDelta { delta: String },
227
228    /// Tool execution completed
229    #[serde(rename = "tool_end")]
230    ToolEnd {
231        id: String,
232        name: String,
233        output: String,
234        exit_code: i32,
235        #[serde(skip_serializing_if = "Option::is_none")]
236        metadata: Option<serde_json::Value>,
237    },
238
239    /// Intermediate tool output (streaming delta)
240    #[serde(rename = "tool_output_delta")]
241    ToolOutputDelta {
242        id: String,
243        name: String,
244        delta: String,
245    },
246
247    /// LLM turn completed
248    #[serde(rename = "turn_end")]
249    TurnEnd { turn: usize, usage: TokenUsage },
250
251    /// Agent completed
252    #[serde(rename = "agent_end")]
253    End {
254        text: String,
255        usage: TokenUsage,
256        #[serde(skip_serializing_if = "Option::is_none")]
257        meta: Option<crate::llm::LlmResponseMeta>,
258    },
259
260    /// Error occurred
261    #[serde(rename = "error")]
262    Error { message: String },
263
264    /// Tool execution requires confirmation (HITL)
265    #[serde(rename = "confirmation_required")]
266    ConfirmationRequired {
267        tool_id: String,
268        tool_name: String,
269        args: serde_json::Value,
270        timeout_ms: u64,
271    },
272
273    /// Confirmation received from user (HITL)
274    #[serde(rename = "confirmation_received")]
275    ConfirmationReceived {
276        tool_id: String,
277        approved: bool,
278        reason: Option<String>,
279    },
280
281    /// Confirmation timed out (HITL)
282    #[serde(rename = "confirmation_timeout")]
283    ConfirmationTimeout {
284        tool_id: String,
285        action_taken: String, // "rejected" or "auto_approved"
286    },
287
288    /// External task pending (needs SDK processing)
289    #[serde(rename = "external_task_pending")]
290    ExternalTaskPending {
291        task_id: String,
292        session_id: String,
293        lane: crate::hitl::SessionLane,
294        command_type: String,
295        payload: serde_json::Value,
296        timeout_ms: u64,
297    },
298
299    /// External task completed
300    #[serde(rename = "external_task_completed")]
301    ExternalTaskCompleted {
302        task_id: String,
303        session_id: String,
304        success: bool,
305    },
306
307    /// Tool execution denied by permission policy
308    #[serde(rename = "permission_denied")]
309    PermissionDenied {
310        tool_id: String,
311        tool_name: String,
312        args: serde_json::Value,
313        reason: String,
314    },
315
316    /// Context resolution started
317    #[serde(rename = "context_resolving")]
318    ContextResolving { providers: Vec<String> },
319
320    /// Context resolution completed
321    #[serde(rename = "context_resolved")]
322    ContextResolved {
323        total_items: usize,
324        total_tokens: usize,
325    },
326
327    // ========================================================================
328    // a3s-lane integration events
329    // ========================================================================
330    /// Command moved to dead letter queue after exhausting retries
331    #[serde(rename = "command_dead_lettered")]
332    CommandDeadLettered {
333        command_id: String,
334        command_type: String,
335        lane: String,
336        error: String,
337        attempts: u32,
338    },
339
340    /// Command retry attempt
341    #[serde(rename = "command_retry")]
342    CommandRetry {
343        command_id: String,
344        command_type: String,
345        lane: String,
346        attempt: u32,
347        delay_ms: u64,
348    },
349
350    /// Queue alert (depth warning, latency alert, etc.)
351    #[serde(rename = "queue_alert")]
352    QueueAlert {
353        level: String,
354        alert_type: String,
355        message: String,
356    },
357
358    // ========================================================================
359    // Task tracking events
360    // ========================================================================
361    /// Task list updated
362    #[serde(rename = "task_updated")]
363    TaskUpdated {
364        session_id: String,
365        tasks: Vec<crate::planning::Task>,
366    },
367
368    // ========================================================================
369    // Memory System events (Phase 3)
370    // ========================================================================
371    /// Memory stored
372    #[serde(rename = "memory_stored")]
373    MemoryStored {
374        memory_id: String,
375        memory_type: String,
376        importance: f32,
377        tags: Vec<String>,
378    },
379
380    /// Memory recalled
381    #[serde(rename = "memory_recalled")]
382    MemoryRecalled {
383        memory_id: String,
384        content: String,
385        relevance: f32,
386    },
387
388    /// Memories searched
389    #[serde(rename = "memories_searched")]
390    MemoriesSearched {
391        query: Option<String>,
392        tags: Vec<String>,
393        result_count: usize,
394    },
395
396    /// Memory cleared
397    #[serde(rename = "memory_cleared")]
398    MemoryCleared {
399        tier: String, // "long_term", "short_term", "working"
400        count: u64,
401    },
402
403    // ========================================================================
404    // Subagent events
405    // ========================================================================
406    /// Subagent task started
407    #[serde(rename = "subagent_start")]
408    SubagentStart {
409        /// Unique task identifier
410        task_id: String,
411        /// Child session ID
412        session_id: String,
413        /// Parent session ID
414        parent_session_id: String,
415        /// Agent type (e.g., "explore", "general")
416        agent: String,
417        /// Short description of the task
418        description: String,
419    },
420
421    /// Subagent task progress update
422    #[serde(rename = "subagent_progress")]
423    SubagentProgress {
424        /// Task identifier
425        task_id: String,
426        /// Child session ID
427        session_id: String,
428        /// Progress status message
429        status: String,
430        /// Additional metadata
431        metadata: serde_json::Value,
432    },
433
434    /// Subagent task completed
435    #[serde(rename = "subagent_end")]
436    SubagentEnd {
437        /// Task identifier
438        task_id: String,
439        /// Child session ID
440        session_id: String,
441        /// Agent type
442        agent: String,
443        /// Task output/result
444        output: String,
445        /// Whether the task succeeded
446        success: bool,
447    },
448
449    // ========================================================================
450    // Planning and Goal Tracking Events (Phase 1)
451    // ========================================================================
452    /// Planning phase started
453    #[serde(rename = "planning_start")]
454    PlanningStart { prompt: String },
455
456    /// Planning phase completed
457    #[serde(rename = "planning_end")]
458    PlanningEnd {
459        plan: ExecutionPlan,
460        estimated_steps: usize,
461    },
462
463    /// Step execution started
464    #[serde(rename = "step_start")]
465    StepStart {
466        step_id: String,
467        description: String,
468        step_number: usize,
469        total_steps: usize,
470    },
471
472    /// Step execution completed
473    #[serde(rename = "step_end")]
474    StepEnd {
475        step_id: String,
476        status: TaskStatus,
477        step_number: usize,
478        total_steps: usize,
479    },
480
481    /// Goal extracted from prompt
482    #[serde(rename = "goal_extracted")]
483    GoalExtracted { goal: AgentGoal },
484
485    /// Goal progress update
486    #[serde(rename = "goal_progress")]
487    GoalProgress {
488        goal: String,
489        progress: f32,
490        completed_steps: usize,
491        total_steps: usize,
492    },
493
494    /// Goal achieved
495    #[serde(rename = "goal_achieved")]
496    GoalAchieved {
497        goal: String,
498        total_steps: usize,
499        duration_ms: i64,
500    },
501
502    // ========================================================================
503    // Context Compaction events
504    // ========================================================================
505    /// Context automatically compacted due to high usage
506    #[serde(rename = "context_compacted")]
507    ContextCompacted {
508        session_id: String,
509        before_messages: usize,
510        after_messages: usize,
511        percent_before: f32,
512    },
513
514    // ========================================================================
515    // Persistence events
516    // ========================================================================
517    /// Session persistence failed — SDK clients should handle this
518    #[serde(rename = "persistence_failed")]
519    PersistenceFailed {
520        session_id: String,
521        operation: String,
522        error: String,
523    },
524
525    // ========================================================================
526    // Side question (btw)
527    // ========================================================================
528    /// Ephemeral side question answered.
529    ///
530    /// Emitted by [`crate::agent_api::AgentSession::btw()`] in streaming mode.
531    /// The answer is never added to conversation history.
532    #[serde(rename = "btw_answer")]
533    BtwAnswer {
534        question: String,
535        answer: String,
536        usage: TokenUsage,
537    },
538}
539
540/// Result of agent execution
541#[derive(Debug, Clone)]
542pub struct AgentResult {
543    pub text: String,
544    pub messages: Vec<Message>,
545    pub usage: TokenUsage,
546    pub tool_calls_count: usize,
547}
548
549// ============================================================================
550// ToolCommand — bridges ToolExecutor to SessionCommand for queue submission
551// ============================================================================
552
553/// Adapter that implements `SessionCommand` for tool execution via the queue.
554///
555/// Wraps a `ToolExecutor` call so it can be submitted to `SessionLaneQueue`.
556pub struct ToolCommand {
557    tool_executor: Arc<ToolExecutor>,
558    tool_name: String,
559    tool_args: Value,
560    tool_context: ToolContext,
561    skill_registry: Option<Arc<crate::skills::SkillRegistry>>,
562}
563
564impl ToolCommand {
565    /// Create a new ToolCommand
566    pub fn new(
567        tool_executor: Arc<ToolExecutor>,
568        tool_name: String,
569        tool_args: Value,
570        tool_context: ToolContext,
571        skill_registry: Option<Arc<crate::skills::SkillRegistry>>,
572    ) -> Self {
573        Self {
574            tool_executor,
575            tool_name,
576            tool_args,
577            tool_context,
578            skill_registry,
579        }
580    }
581}
582
583#[async_trait]
584impl SessionCommand for ToolCommand {
585    async fn execute(&self) -> Result<Value> {
586        // Check skill-based tool permissions
587        if let Some(registry) = &self.skill_registry {
588            let instruction_skills = registry.by_kind(crate::skills::SkillKind::Instruction);
589
590            // If there are instruction skills with tool restrictions, check permissions
591            let has_restrictions = instruction_skills.iter().any(|s| s.allowed_tools.is_some());
592
593            if has_restrictions {
594                let mut allowed = false;
595
596                for skill in &instruction_skills {
597                    if skill.is_tool_allowed(&self.tool_name) {
598                        allowed = true;
599                        break;
600                    }
601                }
602
603                if !allowed {
604                    return Err(anyhow::anyhow!(
605                        "Tool '{}' is not allowed by any active skill. Active skills restrict tools to their allowed-tools lists.",
606                        self.tool_name
607                    ));
608                }
609            }
610        }
611
612        // Execute the tool
613        let result = self
614            .tool_executor
615            .execute_with_context(&self.tool_name, &self.tool_args, &self.tool_context)
616            .await?;
617        Ok(serde_json::json!({
618            "output": result.output,
619            "exit_code": result.exit_code,
620            "metadata": result.metadata,
621        }))
622    }
623
624    fn command_type(&self) -> &str {
625        &self.tool_name
626    }
627
628    fn payload(&self) -> Value {
629        self.tool_args.clone()
630    }
631}
632
633// ============================================================================
634// AgentLoop
635// ============================================================================
636
637/// Agent loop executor
638#[derive(Clone)]
639pub struct AgentLoop {
640    llm_client: Arc<dyn LlmClient>,
641    tool_executor: Arc<ToolExecutor>,
642    tool_context: ToolContext,
643    config: AgentConfig,
644    /// Optional per-session tool metrics collector
645    tool_metrics: Option<Arc<RwLock<crate::telemetry::ToolMetrics>>>,
646    /// Optional lane queue for priority-based tool execution
647    command_queue: Option<Arc<SessionLaneQueue>>,
648    /// Optional progress tracker for real-time tool/token usage tracking
649    progress_tracker: Option<Arc<tokio::sync::RwLock<crate::task::ProgressTracker>>>,
650    /// Optional task manager for centralized task lifecycle tracking
651    task_manager: Option<Arc<crate::task::TaskManager>>,
652}
653
654impl AgentLoop {
655    pub fn new(
656        llm_client: Arc<dyn LlmClient>,
657        tool_executor: Arc<ToolExecutor>,
658        tool_context: ToolContext,
659        config: AgentConfig,
660    ) -> Self {
661        Self {
662            llm_client,
663            tool_executor,
664            tool_context,
665            config,
666            tool_metrics: None,
667            command_queue: None,
668            progress_tracker: None,
669            task_manager: None,
670        }
671    }
672
673    /// Set the progress tracker for real-time tool/token usage tracking.
674    pub fn with_progress_tracker(
675        mut self,
676        tracker: Arc<tokio::sync::RwLock<crate::task::ProgressTracker>>,
677    ) -> Self {
678        self.progress_tracker = Some(tracker);
679        self
680    }
681
682    /// Set the task manager for centralized task lifecycle tracking.
683    pub fn with_task_manager(mut self, manager: Arc<crate::task::TaskManager>) -> Self {
684        self.task_manager = Some(manager);
685        self
686    }
687
688    /// Set the tool metrics collector for this agent loop
689    pub fn with_tool_metrics(
690        mut self,
691        metrics: Arc<RwLock<crate::telemetry::ToolMetrics>>,
692    ) -> Self {
693        self.tool_metrics = Some(metrics);
694        self
695    }
696
697    /// Set the lane queue for priority-based tool execution.
698    ///
699    /// When set, tools are routed through the lane queue which supports
700    /// External task handling for multi-machine parallel processing.
701    pub fn with_queue(mut self, queue: Arc<SessionLaneQueue>) -> Self {
702        self.command_queue = Some(queue);
703        self
704    }
705
706    /// Track a tool call result in the progress tracker.
707    fn track_tool_result(&self, tool_name: &str, args: &serde_json::Value, exit_code: i32) {
708        if let Some(ref tracker) = self.progress_tracker {
709            let args_summary = Self::compact_json_args(args);
710            let success = exit_code == 0;
711            if let Ok(mut guard) = tracker.try_write() {
712                guard.track_tool_call(tool_name, args_summary, success);
713            }
714        }
715    }
716
717    /// Compact JSON arguments to a short summary string for tracking.
718    fn compact_json_args(args: &serde_json::Value) -> String {
719        let raw = match args {
720            serde_json::Value::Null => String::new(),
721            serde_json::Value::String(s) => s.clone(),
722            _ => serde_json::to_string(args).unwrap_or_default(),
723        };
724        let compact = raw.split_whitespace().collect::<Vec<_>>().join(" ");
725        if compact.len() > 180 {
726            format!("{}...", truncate_utf8(&compact, 180))
727        } else {
728            compact
729        }
730    }
731
732    /// Execute a tool, applying the configured timeout if set.
733    ///
734    /// On timeout, returns an error describing which tool timed out and after
735    /// how many milliseconds. The caller converts this to a tool-result error
736    /// message that is fed back to the LLM.
737    async fn execute_tool_timed(
738        &self,
739        name: &str,
740        args: &serde_json::Value,
741        ctx: &ToolContext,
742    ) -> anyhow::Result<crate::tools::ToolResult> {
743        let fut = self.tool_executor.execute_with_context(name, args, ctx);
744        if let Some(timeout_ms) = self.config.tool_timeout_ms {
745            match tokio::time::timeout(Duration::from_millis(timeout_ms), fut).await {
746                Ok(result) => result,
747                Err(_) => Err(anyhow::anyhow!(
748                    "Tool '{}' timed out after {}ms",
749                    name,
750                    timeout_ms
751                )),
752            }
753        } else {
754            fut.await
755        }
756    }
757
758    /// Convert a tool execution result into the (output, exit_code, is_error, metadata, images) tuple.
759    fn tool_result_to_tuple(
760        result: anyhow::Result<crate::tools::ToolResult>,
761    ) -> (
762        String,
763        i32,
764        bool,
765        Option<serde_json::Value>,
766        Vec<crate::llm::Attachment>,
767    ) {
768        match result {
769            Ok(r) => (
770                r.output,
771                r.exit_code,
772                r.exit_code != 0,
773                r.metadata,
774                r.images,
775            ),
776            Err(e) => {
777                let msg = e.to_string();
778                // Classify the error so the LLM knows whether retrying makes sense.
779                let hint = if Self::is_transient_error(&msg) {
780                    " [transient — you may retry this tool call]"
781                } else {
782                    " [permanent — do not retry without changing the arguments]"
783                };
784                (
785                    format!("Tool execution error: {}{}", msg, hint),
786                    1,
787                    true,
788                    None,
789                    Vec::new(),
790                )
791            }
792        }
793    }
794
795    /// Inspect the workspace for well-known project marker files and return a short
796    /// `## Project Context` section that the agent can use without any manual configuration.
797    /// Returns an empty string when the workspace type cannot be determined.
798    fn detect_project_hint(workspace: &std::path::Path) -> String {
799        struct Marker {
800            file: &'static str,
801            lang: &'static str,
802            tip: &'static str,
803        }
804
805        let markers = [
806            Marker {
807                file: "Cargo.toml",
808                lang: "Rust",
809                tip: "Use `cargo build`, `cargo test`, `cargo clippy`, and `cargo fmt`. \
810                  Prefer `anyhow` / `thiserror` for error handling. \
811                  Follow the Microsoft Rust Guidelines (no panics in library code, \
812                  async-first with Tokio).",
813            },
814            Marker {
815                file: "package.json",
816                lang: "Node.js / TypeScript",
817                tip: "Check `package.json` for the package manager (npm/yarn/pnpm/bun) \
818                  and available scripts. Prefer TypeScript with strict mode. \
819                  Use ESM imports unless the project is CommonJS.",
820            },
821            Marker {
822                file: "pyproject.toml",
823                lang: "Python",
824                tip: "Use the package manager declared in `pyproject.toml` \
825                  (uv, poetry, hatch, etc.). Prefer type hints and async/await for I/O.",
826            },
827            Marker {
828                file: "setup.py",
829                lang: "Python",
830                tip: "Legacy Python project. Prefer type hints and async/await for I/O.",
831            },
832            Marker {
833                file: "requirements.txt",
834                lang: "Python",
835                tip: "Python project with pip-style dependencies. \
836                  Prefer type hints and async/await for I/O.",
837            },
838            Marker {
839                file: "go.mod",
840                lang: "Go",
841                tip: "Use `go build ./...` and `go test ./...`. \
842                  Follow standard Go project layout. Use `gofmt` for formatting.",
843            },
844            Marker {
845                file: "pom.xml",
846                lang: "Java / Maven",
847                tip: "Use `mvn compile`, `mvn test`, `mvn package`. \
848                  Follow standard Maven project structure.",
849            },
850            Marker {
851                file: "build.gradle",
852                lang: "Java / Gradle",
853                tip: "Use `./gradlew build` and `./gradlew test`. \
854                  Follow standard Gradle project structure.",
855            },
856            Marker {
857                file: "build.gradle.kts",
858                lang: "Kotlin / Gradle",
859                tip: "Use `./gradlew build` and `./gradlew test`. \
860                  Prefer Kotlin coroutines for async work.",
861            },
862            Marker {
863                file: "CMakeLists.txt",
864                lang: "C / C++",
865                tip: "Use `cmake -B build && cmake --build build`. \
866                  Check for `compile_commands.json` for IDE tooling.",
867            },
868            Marker {
869                file: "Makefile",
870                lang: "C / C++ (or generic)",
871                tip: "Use `make` or `make <target>`. \
872                  Check available targets with `make help` or by reading the Makefile.",
873            },
874        ];
875
876        // Check for C# / .NET — no single fixed filename, so glob for *.csproj / *.sln
877        let is_dotnet = workspace.join("*.csproj").exists() || {
878            // Fast check: look for any .csproj or .sln in the workspace root
879            std::fs::read_dir(workspace)
880                .map(|entries| {
881                    entries.flatten().any(|e| {
882                        let name = e.file_name();
883                        let s = name.to_string_lossy();
884                        s.ends_with(".csproj") || s.ends_with(".sln")
885                    })
886                })
887                .unwrap_or(false)
888        };
889
890        if is_dotnet {
891            return "## Project Context\n\nThis is a **C# / .NET** project. \
892             Use `dotnet build`, `dotnet test`, and `dotnet run`. \
893             Follow C# coding conventions and async/await patterns."
894                .to_string();
895        }
896
897        for marker in &markers {
898            if workspace.join(marker.file).exists() {
899                return format!(
900                    "## Project Context\n\nThis is a **{}** project. {}",
901                    marker.lang, marker.tip
902                );
903            }
904        }
905
906        String::new()
907    }
908
909    /// Returns `true` for errors that are likely transient (network, timeout, I/O contention).
910    /// Used to annotate tool error messages so the LLM knows whether retrying is safe.
911    fn is_transient_error(msg: &str) -> bool {
912        let lower = msg.to_lowercase();
913        lower.contains("timeout")
914        || lower.contains("timed out")
915        || lower.contains("connection refused")
916        || lower.contains("connection reset")
917        || lower.contains("broken pipe")
918        || lower.contains("temporarily unavailable")
919        || lower.contains("resource temporarily unavailable")
920        || lower.contains("os error 11")  // EAGAIN
921        || lower.contains("os error 35")  // EAGAIN on macOS
922        || lower.contains("rate limit")
923        || lower.contains("too many requests")
924        || lower.contains("service unavailable")
925        || lower.contains("network unreachable")
926    }
927
928    /// Returns `true` when a tool writes a file and is safe to run concurrently with other
929    /// independent writes (no ordering dependencies, no side-channel output).
930    fn is_parallel_safe_write(name: &str, _args: &serde_json::Value) -> bool {
931        matches!(
932            name,
933            "write_file" | "edit_file" | "create_file" | "append_to_file" | "replace_in_file"
934        )
935    }
936
937    /// Extract the target file path from write-tool arguments so we can check for conflicts.
938    fn extract_write_path(args: &serde_json::Value) -> Option<String> {
939        // write_file / create_file / append_to_file / replace_in_file use "path"
940        // edit_file uses "path" as well
941        args.get("path")
942            .and_then(|v| v.as_str())
943            .map(|s| s.to_string())
944    }
945
946    /// Execute a tool through the lane queue (if configured) or directly.
947    /// Wraps execution in task lifecycle if task_manager is configured.
948    async fn execute_tool_queued_or_direct(
949        &self,
950        name: &str,
951        args: &serde_json::Value,
952        ctx: &ToolContext,
953    ) -> anyhow::Result<crate::tools::ToolResult> {
954        // Create task for this tool execution if task_manager is available
955        let task_id = if let Some(ref tm) = self.task_manager {
956            let task = crate::task::Task::tool(name, args.clone());
957            let id = task.id;
958            tm.spawn(task);
959            // Start the task immediately
960            let _ = tm.start(id);
961            Some(id)
962        } else {
963            None
964        };
965
966        let result = self
967            .execute_tool_queued_or_direct_inner(name, args, ctx)
968            .await;
969
970        // Complete or fail the task based on result
971        if let Some(ref tm) = self.task_manager {
972            if let Some(tid) = task_id {
973                match &result {
974                    Ok(r) => {
975                        let output = serde_json::json!({
976                            "output": r.output.clone(),
977                            "exit_code": r.exit_code,
978                        });
979                        let _ = tm.complete(tid, Some(output));
980                    }
981                    Err(e) => {
982                        let _ = tm.fail(tid, e.to_string());
983                    }
984                }
985            }
986        }
987
988        result
989    }
990
991    /// Inner execution without task lifecycle wrapping.
992    async fn execute_tool_queued_or_direct_inner(
993        &self,
994        name: &str,
995        args: &serde_json::Value,
996        ctx: &ToolContext,
997    ) -> anyhow::Result<crate::tools::ToolResult> {
998        if let Some(ref queue) = self.command_queue {
999            let command = ToolCommand::new(
1000                Arc::clone(&self.tool_executor),
1001                name.to_string(),
1002                args.clone(),
1003                ctx.clone(),
1004                self.config.skill_registry.clone(),
1005            );
1006            let rx = queue.submit_by_tool(name, Box::new(command)).await;
1007            match rx.await {
1008                Ok(Ok(value)) => {
1009                    let output = value["output"]
1010                        .as_str()
1011                        .ok_or_else(|| {
1012                            anyhow::anyhow!(
1013                                "Queue result missing 'output' field for tool '{}'",
1014                                name
1015                            )
1016                        })?
1017                        .to_string();
1018                    let exit_code = value["exit_code"].as_i64().unwrap_or(0) as i32;
1019                    return Ok(crate::tools::ToolResult {
1020                        name: name.to_string(),
1021                        output,
1022                        exit_code,
1023                        metadata: None,
1024                        images: Vec::new(),
1025                    });
1026                }
1027                Ok(Err(e)) => {
1028                    tracing::warn!(
1029                        "Queue execution failed for tool '{}', falling back to direct: {}",
1030                        name,
1031                        e
1032                    );
1033                }
1034                Err(_) => {
1035                    tracing::warn!(
1036                        "Queue channel closed for tool '{}', falling back to direct",
1037                        name
1038                    );
1039                }
1040            }
1041        }
1042        self.execute_tool_timed(name, args, ctx).await
1043    }
1044
1045    /// Call the LLM, handling streaming vs non-streaming internally.
1046    ///
1047    /// Streaming events (`TextDelta`, `ToolStart`) are forwarded to `event_tx`
1048    /// as they arrive. Non-streaming mode simply awaits the complete response.
1049    ///
1050    /// When a `ToolIndex` is configured, tools are filtered per-turn based on
1051    /// the last user message, reducing context usage with large tool sets.
1052    ///
1053    /// Returns `Err` on any LLM API failure. The circuit breaker in
1054    /// `execute_loop` wraps this call with retry logic for non-streaming mode.
1055    async fn call_llm(
1056        &self,
1057        messages: &[Message],
1058        system: Option<&str>,
1059        event_tx: &Option<mpsc::Sender<AgentEvent>>,
1060        cancel_token: &tokio_util::sync::CancellationToken,
1061    ) -> anyhow::Result<LlmResponse> {
1062        // Filter tools through ToolIndex if configured
1063        let tools = if let Some(ref index) = self.config.tool_index {
1064            let query = messages
1065                .iter()
1066                .rev()
1067                .find(|m| m.role == "user")
1068                .and_then(|m| {
1069                    m.content.iter().find_map(|b| match b {
1070                        crate::llm::ContentBlock::Text { text } => Some(text.as_str()),
1071                        _ => None,
1072                    })
1073                })
1074                .unwrap_or("");
1075            let matches = index.search(query, index.len());
1076            let matched_names: std::collections::HashSet<&str> =
1077                matches.iter().map(|m| m.name.as_str()).collect();
1078            self.config
1079                .tools
1080                .iter()
1081                .filter(|t| matched_names.contains(t.name.as_str()))
1082                .cloned()
1083                .collect::<Vec<_>>()
1084        } else {
1085            self.config.tools.clone()
1086        };
1087
1088        if event_tx.is_some() {
1089            let mut stream_rx = match self
1090                .llm_client
1091                .complete_streaming(messages, system, &tools)
1092                .await
1093            {
1094                Ok(rx) => rx,
1095                Err(stream_error) => {
1096                    tracing::warn!(
1097                        error = %stream_error,
1098                        "LLM streaming setup failed; falling back to non-streaming completion"
1099                    );
1100                    return self
1101                        .llm_client
1102                        .complete(messages, system, &tools)
1103                        .await
1104                        .with_context(|| {
1105                            format!(
1106                                "LLM streaming call failed ({stream_error}); non-streaming fallback also failed"
1107                            )
1108                        });
1109                }
1110            };
1111
1112            let mut final_response: Option<LlmResponse> = None;
1113            loop {
1114                tokio::select! {
1115                    _ = cancel_token.cancelled() => {
1116                        tracing::info!("🛑 LLM streaming cancelled by CancellationToken");
1117                        anyhow::bail!("Operation cancelled by user");
1118                    }
1119                    event = stream_rx.recv() => {
1120                        match event {
1121                            Some(crate::llm::StreamEvent::TextDelta(text)) => {
1122                                if let Some(tx) = event_tx {
1123                                    tx.send(AgentEvent::TextDelta { text }).await.ok();
1124                                }
1125                            }
1126                            Some(crate::llm::StreamEvent::ToolUseStart { id, name }) => {
1127                                if let Some(tx) = event_tx {
1128                                    tx.send(AgentEvent::ToolStart { id, name }).await.ok();
1129                                }
1130                            }
1131                            Some(crate::llm::StreamEvent::ToolUseInputDelta(delta)) => {
1132                                if let Some(tx) = event_tx {
1133                                    tx.send(AgentEvent::ToolInputDelta { delta }).await.ok();
1134                                }
1135                            }
1136                            Some(crate::llm::StreamEvent::Done(resp)) => {
1137                                final_response = Some(resp);
1138                                break;
1139                            }
1140                            None => break,
1141                        }
1142                    }
1143                }
1144            }
1145            final_response.context("Stream ended without final response")
1146        } else {
1147            self.llm_client
1148                .complete(messages, system, &tools)
1149                .await
1150                .context("LLM call failed")
1151        }
1152    }
1153
1154    /// Create a tool context with streaming support.
1155    ///
1156    /// When `event_tx` is Some, spawns a forwarder task that converts
1157    /// `ToolStreamEvent::OutputDelta` into `AgentEvent::ToolOutputDelta`
1158    /// and sends them to the agent event channel.
1159    ///
1160    /// Returns the augmented `ToolContext`. The forwarder task runs until
1161    /// the tool-side sender is dropped (i.e., tool execution finishes).
1162    fn streaming_tool_context(
1163        &self,
1164        event_tx: &Option<mpsc::Sender<AgentEvent>>,
1165        tool_id: &str,
1166        tool_name: &str,
1167    ) -> ToolContext {
1168        let mut ctx = self.tool_context.clone();
1169        if let Some(agent_tx) = event_tx {
1170            let (tool_tx, mut tool_rx) = mpsc::channel::<ToolStreamEvent>(64);
1171            ctx.event_tx = Some(tool_tx);
1172
1173            let agent_tx = agent_tx.clone();
1174            let tool_id = tool_id.to_string();
1175            let tool_name = tool_name.to_string();
1176            tokio::spawn(async move {
1177                while let Some(event) = tool_rx.recv().await {
1178                    match event {
1179                        ToolStreamEvent::OutputDelta(delta) => {
1180                            agent_tx
1181                                .send(AgentEvent::ToolOutputDelta {
1182                                    id: tool_id.clone(),
1183                                    name: tool_name.clone(),
1184                                    delta,
1185                                })
1186                                .await
1187                                .ok();
1188                        }
1189                    }
1190                }
1191            });
1192        }
1193        ctx
1194    }
1195
1196    /// Resolve context from all providers for a given prompt
1197    ///
1198    /// Returns aggregated context results from all configured providers.
1199    async fn resolve_context(&self, prompt: &str, session_id: Option<&str>) -> Vec<ContextResult> {
1200        if self.config.context_providers.is_empty() {
1201            return Vec::new();
1202        }
1203
1204        let query = ContextQuery::new(prompt).with_session_id(session_id.unwrap_or(""));
1205
1206        let futures = self
1207            .config
1208            .context_providers
1209            .iter()
1210            .map(|p| p.query(&query));
1211        let outcomes = join_all(futures).await;
1212
1213        outcomes
1214            .into_iter()
1215            .enumerate()
1216            .filter_map(|(i, r)| match r {
1217                Ok(result) if !result.is_empty() => Some(result),
1218                Ok(_) => None,
1219                Err(e) => {
1220                    tracing::warn!(
1221                        "Context provider '{}' failed: {}",
1222                        self.config.context_providers[i].name(),
1223                        e
1224                    );
1225                    None
1226                }
1227            })
1228            .collect()
1229    }
1230
1231    /// Detect whether the LLM's no-tool-call response looks like an intermediate
1232    /// step rather than a genuine final answer.
1233    ///
1234    /// Returns `true` when continuation should be injected. Heuristics:
1235    /// - Response ends with a colon or ellipsis (mid-thought)
1236    /// - Response contains phrases that signal incomplete work
1237    /// - Response is very short (< 80 chars) and doesn't look like a summary
1238    fn looks_incomplete(text: &str) -> bool {
1239        let t = text.trim();
1240        if t.is_empty() {
1241            return true;
1242        }
1243        // Very short responses that aren't clearly a final answer
1244        if t.len() < 80 && !t.contains('\n') {
1245            // Short single-line — could be a genuine one-liner answer, keep going
1246            // only if it ends with punctuation that signals continuation
1247            let ends_continuation =
1248                t.ends_with(':') || t.ends_with("...") || t.ends_with('…') || t.ends_with(',');
1249            if ends_continuation {
1250                return true;
1251            }
1252        }
1253        // Phrases that signal the LLM is describing what it will do rather than doing it
1254        let incomplete_phrases = [
1255            "i'll ",
1256            "i will ",
1257            "let me ",
1258            "i need to ",
1259            "i should ",
1260            "next, i",
1261            "first, i",
1262            "now i",
1263            "i'll start",
1264            "i'll begin",
1265            "i'll now",
1266            "let's start",
1267            "let's begin",
1268            "to do this",
1269            "i'm going to",
1270        ];
1271        let lower = t.to_lowercase();
1272        for phrase in &incomplete_phrases {
1273            if lower.contains(phrase) {
1274                return true;
1275            }
1276        }
1277        false
1278    }
1279
1280    /// Get the assembled system prompt from slots.
1281    #[allow(dead_code)]
1282    fn system_prompt(&self) -> String {
1283        self.config.prompt_slots.build()
1284    }
1285
1286    /// Get the assembled system prompt from slots with an explicit style.
1287    fn system_prompt_for_style(&self, style: AgentStyle) -> String {
1288        let mut slots = self.config.prompt_slots.clone();
1289        slots.style = Some(style);
1290        slots.build()
1291    }
1292
1293    async fn resolve_effective_style(&self, prompt: &str) -> AgentStyle {
1294        if let Some(style) = self.config.prompt_slots.style {
1295            return style;
1296        }
1297
1298        let (style, confidence) = AgentStyle::detect_with_confidence(prompt);
1299        if confidence != DetectionConfidence::Low {
1300            return style;
1301        }
1302
1303        match AgentStyle::detect_with_llm(self.llm_client.as_ref(), prompt).await {
1304            Ok(classified_style) => {
1305                tracing::debug!(
1306                    intent.classification = ?classified_style,
1307                    intent.source = "llm",
1308                    "Intent classified via LLM"
1309                );
1310                classified_style
1311            }
1312            Err(e) => {
1313                tracing::warn!(error = %e, "LLM intent classification failed, using keyword detection");
1314                style
1315            }
1316        }
1317    }
1318
1319    /// Build augmented system prompt with context
1320    #[allow(dead_code)]
1321    fn build_augmented_system_prompt(&self, context_results: &[ContextResult]) -> Option<String> {
1322        let base = self.system_prompt();
1323        self.build_augmented_system_prompt_with_base(&base, context_results)
1324    }
1325
1326    fn build_augmented_system_prompt_with_base(
1327        &self,
1328        base: &str,
1329        context_results: &[ContextResult],
1330    ) -> Option<String> {
1331        let base = base.to_string();
1332
1333        // Use live tool executor definitions so tools added via add_mcp_server() are included
1334        let live_tools = self.tool_executor.definitions();
1335        let mcp_tools: Vec<&ToolDefinition> = live_tools
1336            .iter()
1337            .filter(|t| t.name.starts_with("mcp__"))
1338            .collect();
1339
1340        let mcp_section = if mcp_tools.is_empty() {
1341            String::new()
1342        } else {
1343            let mut lines = vec![
1344                "## MCP Tools".to_string(),
1345                String::new(),
1346                "The following MCP (Model Context Protocol) tools are available. Use them when the task requires external capabilities beyond the built-in tools:".to_string(),
1347                String::new(),
1348            ];
1349            for tool in &mcp_tools {
1350                let display = format!("- `{}` — {}", tool.name, tool.description);
1351                lines.push(display);
1352            }
1353            lines.join("\n")
1354        };
1355
1356        let parts: Vec<&str> = [base.as_str(), mcp_section.as_str()]
1357            .iter()
1358            .filter(|s| !s.is_empty())
1359            .copied()
1360            .collect();
1361
1362        // Auto-detect project type from workspace and inject language-specific guidelines,
1363        // but only when the user hasn't already set a custom `guidelines` slot.
1364        let project_hint = if self.config.prompt_slots.guidelines.is_none() {
1365            Self::detect_project_hint(&self.tool_context.workspace)
1366        } else {
1367            String::new()
1368        };
1369
1370        if context_results.is_empty() {
1371            if project_hint.is_empty() {
1372                return Some(parts.join("\n\n"));
1373            }
1374            return Some(format!("{}\n\n{}", parts.join("\n\n"), project_hint));
1375        }
1376
1377        // Build context XML block
1378        let context_xml: String = context_results
1379            .iter()
1380            .map(|r| r.to_xml())
1381            .collect::<Vec<_>>()
1382            .join("\n\n");
1383
1384        if project_hint.is_empty() {
1385            Some(format!("{}\n\n{}", parts.join("\n\n"), context_xml))
1386        } else {
1387            Some(format!(
1388                "{}\n\n{}\n\n{}",
1389                parts.join("\n\n"),
1390                project_hint,
1391                context_xml
1392            ))
1393        }
1394    }
1395
1396    /// Notify providers of turn completion for memory extraction
1397    async fn notify_turn_complete(&self, session_id: &str, prompt: &str, response: &str) {
1398        let futures = self
1399            .config
1400            .context_providers
1401            .iter()
1402            .map(|p| p.on_turn_complete(session_id, prompt, response));
1403        let outcomes = join_all(futures).await;
1404
1405        for (i, result) in outcomes.into_iter().enumerate() {
1406            if let Err(e) = result {
1407                tracing::warn!(
1408                    "Context provider '{}' on_turn_complete failed: {}",
1409                    self.config.context_providers[i].name(),
1410                    e
1411                );
1412            }
1413        }
1414    }
1415
1416    /// Fire PreToolUse hook event before tool execution.
1417    /// Returns the HookResult which may block the tool call.
1418    async fn fire_pre_tool_use(
1419        &self,
1420        session_id: &str,
1421        tool_name: &str,
1422        args: &serde_json::Value,
1423        recent_tools: Vec<String>,
1424    ) -> Option<HookResult> {
1425        if let Some(he) = &self.config.hook_engine {
1426            // Convert null args to empty object so JS callbacks don't get null.input errors
1427            let safe_args = if args.is_null() {
1428                serde_json::Value::Object(Default::default())
1429            } else {
1430                args.clone()
1431            };
1432            let event = HookEvent::PreToolUse(PreToolUseEvent {
1433                session_id: session_id.to_string(),
1434                tool: tool_name.to_string(),
1435                args: safe_args,
1436                working_directory: self.tool_context.workspace.to_string_lossy().to_string(),
1437                recent_tools,
1438            });
1439            let result = he.fire(&event).await;
1440            if result.is_block() {
1441                return Some(result);
1442            }
1443        }
1444        None
1445    }
1446
1447    /// Fire PostToolUse hook event after tool execution (fire-and-forget).
1448    async fn fire_post_tool_use(
1449        &self,
1450        session_id: &str,
1451        tool_name: &str,
1452        args: &serde_json::Value,
1453        output: &str,
1454        success: bool,
1455        duration_ms: u64,
1456    ) {
1457        if let Some(he) = &self.config.hook_engine {
1458            // Convert null args to empty object so JS callbacks don't get null.input errors
1459            let safe_args = if args.is_null() {
1460                serde_json::Value::Object(Default::default())
1461            } else {
1462                args.clone()
1463            };
1464            let event = HookEvent::PostToolUse(PostToolUseEvent {
1465                session_id: session_id.to_string(),
1466                tool: tool_name.to_string(),
1467                args: safe_args,
1468                result: ToolResultData {
1469                    success,
1470                    output: output.to_string(),
1471                    exit_code: if success { Some(0) } else { Some(1) },
1472                    duration_ms,
1473                },
1474            });
1475            let he = Arc::clone(he);
1476            tokio::spawn(async move {
1477                let _ = he.fire(&event).await;
1478            });
1479        }
1480    }
1481
1482    /// Fire GenerateStart hook event before an LLM call.
1483    async fn fire_generate_start(
1484        &self,
1485        session_id: &str,
1486        prompt: &str,
1487        system_prompt: &Option<String>,
1488    ) {
1489        if let Some(he) = &self.config.hook_engine {
1490            let event = HookEvent::GenerateStart(GenerateStartEvent {
1491                session_id: session_id.to_string(),
1492                prompt: prompt.to_string(),
1493                system_prompt: system_prompt.clone(),
1494                model_provider: String::new(),
1495                model_name: String::new(),
1496                available_tools: self.config.tools.iter().map(|t| t.name.clone()).collect(),
1497            });
1498            let _ = he.fire(&event).await;
1499        }
1500    }
1501
1502    /// Fire GenerateEnd hook event after an LLM call.
1503    async fn fire_generate_end(
1504        &self,
1505        session_id: &str,
1506        prompt: &str,
1507        response: &LlmResponse,
1508        duration_ms: u64,
1509    ) {
1510        if let Some(he) = &self.config.hook_engine {
1511            let tool_calls: Vec<ToolCallInfo> = response
1512                .tool_calls()
1513                .iter()
1514                .map(|tc| {
1515                    let args = if tc.args.is_null() {
1516                        serde_json::Value::Object(Default::default())
1517                    } else {
1518                        tc.args.clone()
1519                    };
1520                    ToolCallInfo {
1521                        name: tc.name.clone(),
1522                        args,
1523                    }
1524                })
1525                .collect();
1526
1527            let event = HookEvent::GenerateEnd(GenerateEndEvent {
1528                session_id: session_id.to_string(),
1529                prompt: prompt.to_string(),
1530                response_text: response.text().to_string(),
1531                tool_calls,
1532                usage: TokenUsageInfo {
1533                    prompt_tokens: response.usage.prompt_tokens as i32,
1534                    completion_tokens: response.usage.completion_tokens as i32,
1535                    total_tokens: response.usage.total_tokens as i32,
1536                },
1537                duration_ms,
1538            });
1539            let _ = he.fire(&event).await;
1540        }
1541    }
1542
1543    /// Fire PrePrompt hook event before prompt augmentation.
1544    /// Returns optional modified prompt text from the hook.
1545    async fn fire_pre_prompt(
1546        &self,
1547        session_id: &str,
1548        prompt: &str,
1549        system_prompt: &Option<String>,
1550        message_count: usize,
1551    ) -> Option<String> {
1552        if let Some(he) = &self.config.hook_engine {
1553            let event = HookEvent::PrePrompt(PrePromptEvent {
1554                session_id: session_id.to_string(),
1555                prompt: prompt.to_string(),
1556                system_prompt: system_prompt.clone(),
1557                message_count,
1558            });
1559            let result = he.fire(&event).await;
1560            if let HookResult::Continue(Some(modified)) = result {
1561                // Extract modified prompt from hook response
1562                if let Some(new_prompt) = modified.get("prompt").and_then(|v| v.as_str()) {
1563                    return Some(new_prompt.to_string());
1564                }
1565            }
1566        }
1567        None
1568    }
1569
1570    /// Fire PostResponse hook event after the agent loop completes.
1571    async fn fire_post_response(
1572        &self,
1573        session_id: &str,
1574        response_text: &str,
1575        tool_calls_count: usize,
1576        usage: &TokenUsage,
1577        duration_ms: u64,
1578    ) {
1579        if let Some(he) = &self.config.hook_engine {
1580            let event = HookEvent::PostResponse(PostResponseEvent {
1581                session_id: session_id.to_string(),
1582                response_text: response_text.to_string(),
1583                tool_calls_count,
1584                usage: TokenUsageInfo {
1585                    prompt_tokens: usage.prompt_tokens as i32,
1586                    completion_tokens: usage.completion_tokens as i32,
1587                    total_tokens: usage.total_tokens as i32,
1588                },
1589                duration_ms,
1590            });
1591            let he = Arc::clone(he);
1592            tokio::spawn(async move {
1593                let _ = he.fire(&event).await;
1594            });
1595        }
1596    }
1597
1598    /// Fire OnError hook event when an error occurs.
1599    async fn fire_on_error(
1600        &self,
1601        session_id: &str,
1602        error_type: ErrorType,
1603        error_message: &str,
1604        context: serde_json::Value,
1605    ) {
1606        if let Some(he) = &self.config.hook_engine {
1607            let event = HookEvent::OnError(OnErrorEvent {
1608                session_id: session_id.to_string(),
1609                error_type,
1610                error_message: error_message.to_string(),
1611                context,
1612            });
1613            let he = Arc::clone(he);
1614            tokio::spawn(async move {
1615                let _ = he.fire(&event).await;
1616            });
1617        }
1618    }
1619
1620    /// Execute the agent loop for a prompt
1621    ///
1622    /// Takes the conversation history and a new user prompt.
1623    /// Returns the agent result and updated message history.
1624    /// When event_tx is provided, uses streaming LLM API for real-time text output.
1625    pub async fn execute(
1626        &self,
1627        history: &[Message],
1628        prompt: &str,
1629        event_tx: Option<mpsc::Sender<AgentEvent>>,
1630    ) -> Result<AgentResult> {
1631        self.execute_with_session(history, prompt, None, event_tx, None)
1632            .await
1633    }
1634
1635    /// Execute the agent loop with pre-built messages (user message already included).
1636    ///
1637    /// Used by `send_with_attachments` / `stream_with_attachments` where the
1638    /// user message contains multi-modal content and is already appended to
1639    /// the messages vec.
1640    pub async fn execute_from_messages(
1641        &self,
1642        messages: Vec<Message>,
1643        session_id: Option<&str>,
1644        event_tx: Option<mpsc::Sender<AgentEvent>>,
1645        cancel_token: Option<&tokio_util::sync::CancellationToken>,
1646    ) -> Result<AgentResult> {
1647        let default_token = tokio_util::sync::CancellationToken::new();
1648        let token = cancel_token.unwrap_or(&default_token);
1649        tracing::info!(
1650            a3s.session.id = session_id.unwrap_or("none"),
1651            a3s.agent.max_turns = self.config.max_tool_rounds,
1652            "a3s.agent.execute_from_messages started"
1653        );
1654
1655        // Extract the last user message text for hooks, memory recall, and events.
1656        // Pass empty prompt so execute_loop skips adding a duplicate user message,
1657        // but provide effective_prompt for hook/memory/event purposes.
1658        let effective_prompt = messages
1659            .iter()
1660            .rev()
1661            .find(|m| m.role == "user")
1662            .map(|m| m.text())
1663            .unwrap_or_default();
1664
1665        let result = self
1666            .execute_loop_inner(
1667                &messages,
1668                "",
1669                &effective_prompt,
1670                session_id,
1671                event_tx,
1672                token,
1673                true, // emit_end: this is a standalone execution
1674            )
1675            .await;
1676
1677        match &result {
1678            Ok(r) => tracing::info!(
1679                a3s.agent.tool_calls_count = r.tool_calls_count,
1680                a3s.llm.total_tokens = r.usage.total_tokens,
1681                "a3s.agent.execute_from_messages completed"
1682            ),
1683            Err(e) => tracing::warn!(
1684                error = %e,
1685                "a3s.agent.execute_from_messages failed"
1686            ),
1687        }
1688
1689        result
1690    }
1691
1692    /// Execute the agent loop for a prompt with session context
1693    ///
1694    /// Takes the conversation history, user prompt, and optional session ID.
1695    /// When session_id is provided, context providers can use it for session-specific context.
1696    pub async fn execute_with_session(
1697        &self,
1698        history: &[Message],
1699        prompt: &str,
1700        session_id: Option<&str>,
1701        event_tx: Option<mpsc::Sender<AgentEvent>>,
1702        cancel_token: Option<&tokio_util::sync::CancellationToken>,
1703    ) -> Result<AgentResult> {
1704        let default_token = tokio_util::sync::CancellationToken::new();
1705        let token = cancel_token.unwrap_or(&default_token);
1706        tracing::info!(
1707            a3s.session.id = session_id.unwrap_or("none"),
1708            a3s.agent.max_turns = self.config.max_tool_rounds,
1709            "a3s.agent.execute started"
1710        );
1711
1712        let effective_style = self.resolve_effective_style(prompt).await;
1713
1714        // Determine whether to use planning mode
1715        let use_planning = if self.config.planning_mode == PlanningMode::Auto {
1716            effective_style.requires_planning()
1717        } else {
1718            // Explicit mode: Enabled or Disabled
1719            self.config.planning_mode.should_plan(prompt)
1720        };
1721
1722        // Create agent task if task_manager is available
1723        let task_id = if let Some(ref tm) = self.task_manager {
1724            let workspace = self.tool_context.workspace.display().to_string();
1725            let task = crate::task::Task::agent("agent", &workspace, prompt);
1726            let id = task.id;
1727            tm.spawn(task);
1728            let _ = tm.start(id);
1729            Some(id)
1730        } else {
1731            None
1732        };
1733
1734        let result = if use_planning {
1735            self.execute_with_planning(history, prompt, event_tx).await
1736        } else {
1737            self.execute_loop(history, prompt, session_id, event_tx, token, true)
1738                .await
1739        };
1740
1741        // Complete or fail agent task based on result
1742        if let Some(ref tm) = self.task_manager {
1743            if let Some(tid) = task_id {
1744                match &result {
1745                    Ok(r) => {
1746                        let output = serde_json::json!({
1747                            "text": r.text,
1748                            "tool_calls_count": r.tool_calls_count,
1749                            "usage": r.usage,
1750                        });
1751                        let _ = tm.complete(tid, Some(output));
1752                    }
1753                    Err(e) => {
1754                        let _ = tm.fail(tid, e.to_string());
1755                    }
1756                }
1757            }
1758        }
1759
1760        match &result {
1761            Ok(r) => {
1762                tracing::info!(
1763                    a3s.agent.tool_calls_count = r.tool_calls_count,
1764                    a3s.llm.total_tokens = r.usage.total_tokens,
1765                    "a3s.agent.execute completed"
1766                );
1767                // Fire PostResponse hook
1768                self.fire_post_response(
1769                    session_id.unwrap_or(""),
1770                    &r.text,
1771                    r.tool_calls_count,
1772                    &r.usage,
1773                    0, // duration tracked externally
1774                )
1775                .await;
1776            }
1777            Err(e) => {
1778                tracing::warn!(
1779                    error = %e,
1780                    "a3s.agent.execute failed"
1781                );
1782                // Fire OnError hook
1783                self.fire_on_error(
1784                    session_id.unwrap_or(""),
1785                    ErrorType::Other,
1786                    &e.to_string(),
1787                    serde_json::json!({"phase": "execute"}),
1788                )
1789                .await;
1790            }
1791        }
1792
1793        result
1794    }
1795
1796    /// Core execution loop (without planning routing).
1797    ///
1798    /// This is the inner loop that runs LLM calls and tool executions.
1799    /// Called directly by `execute_with_session` (after planning check)
1800    /// and by `execute_plan` (for individual steps, bypassing planning).
1801    async fn execute_loop(
1802        &self,
1803        history: &[Message],
1804        prompt: &str,
1805        session_id: Option<&str>,
1806        event_tx: Option<mpsc::Sender<AgentEvent>>,
1807        cancel_token: &tokio_util::sync::CancellationToken,
1808        emit_end: bool,
1809    ) -> Result<AgentResult> {
1810        // When called via execute_loop, the prompt is used for both
1811        // message-adding and hook/memory/event purposes.
1812        self.execute_loop_inner(
1813            history,
1814            prompt,
1815            prompt,
1816            session_id,
1817            event_tx,
1818            cancel_token,
1819            emit_end,
1820        )
1821        .await
1822    }
1823
1824    /// Inner execution loop.
1825    ///
1826    /// `msg_prompt` controls whether a user message is appended (empty = skip).
1827    /// `effective_prompt` is used for hooks, memory recall, taint tracking, and events.
1828    /// `emit_end` controls whether to send `AgentEvent::End` when the loop completes
1829    /// (should be false when called from `execute_plan` to avoid duplicate End events).
1830    #[allow(clippy::too_many_arguments)]
1831    async fn execute_loop_inner(
1832        &self,
1833        history: &[Message],
1834        msg_prompt: &str,
1835        effective_prompt: &str,
1836        session_id: Option<&str>,
1837        event_tx: Option<mpsc::Sender<AgentEvent>>,
1838        cancel_token: &tokio_util::sync::CancellationToken,
1839        emit_end: bool,
1840    ) -> Result<AgentResult> {
1841        let mut messages = history.to_vec();
1842        let mut total_usage = TokenUsage::default();
1843        let mut tool_calls_count = 0;
1844        let mut turn = 0;
1845        // Consecutive malformed-tool-args errors (4.1 parse error recovery)
1846        let mut parse_error_count: u32 = 0;
1847        // Continuation injection counter
1848        let mut continuation_count: u32 = 0;
1849        let mut recent_tool_signatures: Vec<String> = Vec::new();
1850        let style_prompt = if effective_prompt.is_empty() {
1851            msg_prompt
1852        } else {
1853            effective_prompt
1854        };
1855        let effective_style = self.resolve_effective_style(style_prompt).await;
1856        let effective_system_prompt = self.system_prompt_for_style(effective_style);
1857        if let Some(tx) = &event_tx {
1858            tx.send(AgentEvent::AgentModeChanged {
1859                mode: effective_style.runtime_mode().to_string(),
1860                agent: effective_style.builtin_agent_name().to_string(),
1861                description: effective_style.description().to_string(),
1862            })
1863            .await
1864            .ok();
1865        }
1866
1867        // Send start event
1868        if let Some(tx) = &event_tx {
1869            tx.send(AgentEvent::Start {
1870                prompt: effective_prompt.to_string(),
1871            })
1872            .await
1873            .ok();
1874        }
1875
1876        // Forward queue events (CommandDeadLettered, CommandRetry, QueueAlert) to event stream
1877        let _queue_forward_handle =
1878            if let (Some(ref queue), Some(ref tx)) = (&self.command_queue, &event_tx) {
1879                let mut rx = queue.subscribe();
1880                let tx = tx.clone();
1881                Some(tokio::spawn(async move {
1882                    while let Ok(event) = rx.recv().await {
1883                        if tx.send(event).await.is_err() {
1884                            break;
1885                        }
1886                    }
1887                }))
1888            } else {
1889                None
1890            };
1891
1892        // Fire PrePrompt hook (may modify the prompt)
1893        let built_system_prompt = Some(effective_system_prompt.clone());
1894        let hooked_prompt = if let Some(modified) = self
1895            .fire_pre_prompt(
1896                session_id.unwrap_or(""),
1897                effective_prompt,
1898                &built_system_prompt,
1899                messages.len(),
1900            )
1901            .await
1902        {
1903            modified
1904        } else {
1905            effective_prompt.to_string()
1906        };
1907        let effective_prompt = hooked_prompt.as_str();
1908
1909        // Taint-track the incoming prompt for sensitive data detection
1910        if let Some(ref sp) = self.config.security_provider {
1911            sp.taint_input(effective_prompt);
1912        }
1913
1914        // Recall relevant memories and inject into system prompt
1915        let system_with_memory = if let Some(ref memory) = self.config.memory {
1916            match memory.recall_similar(effective_prompt, 5).await {
1917                Ok(items) if !items.is_empty() => {
1918                    if let Some(tx) = &event_tx {
1919                        for item in &items {
1920                            tx.send(AgentEvent::MemoryRecalled {
1921                                memory_id: item.id.clone(),
1922                                content: item.content.clone(),
1923                                relevance: item.relevance_score(),
1924                            })
1925                            .await
1926                            .ok();
1927                        }
1928                        tx.send(AgentEvent::MemoriesSearched {
1929                            query: Some(effective_prompt.to_string()),
1930                            tags: Vec::new(),
1931                            result_count: items.len(),
1932                        })
1933                        .await
1934                        .ok();
1935                    }
1936                    let memory_context = items
1937                        .iter()
1938                        .map(|i| format!("- {}", i.content))
1939                        .collect::<Vec<_>>()
1940                        .join(
1941                            "
1942",
1943                        );
1944                    let base = effective_system_prompt.clone();
1945                    Some(format!(
1946                        "{}
1947
1948## Relevant past experience
1949{}",
1950                        base, memory_context
1951                    ))
1952                }
1953                _ => Some(effective_system_prompt.clone()),
1954            }
1955        } else {
1956            Some(effective_system_prompt.clone())
1957        };
1958
1959        // Resolve context from providers on first turn (before adding user message)
1960        let augmented_system = if !self.config.context_providers.is_empty() {
1961            // Send context resolving event
1962            if let Some(tx) = &event_tx {
1963                let provider_names: Vec<String> = self
1964                    .config
1965                    .context_providers
1966                    .iter()
1967                    .map(|p| p.name().to_string())
1968                    .collect();
1969                tx.send(AgentEvent::ContextResolving {
1970                    providers: provider_names,
1971                })
1972                .await
1973                .ok();
1974            }
1975
1976            tracing::info!(
1977                a3s.context.providers = self.config.context_providers.len() as i64,
1978                "Context resolution started"
1979            );
1980            let context_results = self.resolve_context(effective_prompt, session_id).await;
1981
1982            // Send context resolved event
1983            if let Some(tx) = &event_tx {
1984                let total_items: usize = context_results.iter().map(|r| r.items.len()).sum();
1985                let total_tokens: usize = context_results.iter().map(|r| r.total_tokens).sum();
1986
1987                tracing::info!(
1988                    context_items = total_items,
1989                    context_tokens = total_tokens,
1990                    "Context resolution completed"
1991                );
1992
1993                tx.send(AgentEvent::ContextResolved {
1994                    total_items,
1995                    total_tokens,
1996                })
1997                .await
1998                .ok();
1999            }
2000
2001            self.build_augmented_system_prompt_with_base(&effective_system_prompt, &context_results)
2002        } else {
2003            Some(effective_system_prompt.clone())
2004        };
2005
2006        // Merge memory context into system prompt
2007        let base_prompt = effective_system_prompt.clone();
2008        let augmented_system = match (augmented_system, system_with_memory) {
2009            (Some(ctx), Some(mem)) if ctx != mem => Some(ctx.replacen(&base_prompt, &mem, 1)),
2010            (Some(ctx), _) => Some(ctx),
2011            (None, mem) => mem,
2012        };
2013
2014        // Add user message
2015        if !msg_prompt.is_empty() {
2016            messages.push(Message::user(msg_prompt));
2017        }
2018
2019        loop {
2020            turn += 1;
2021
2022            if turn > self.config.max_tool_rounds {
2023                let error = format!("Max tool rounds ({}) exceeded", self.config.max_tool_rounds);
2024                if let Some(tx) = &event_tx {
2025                    tx.send(AgentEvent::Error {
2026                        message: error.clone(),
2027                    })
2028                    .await
2029                    .ok();
2030                }
2031                anyhow::bail!(error);
2032            }
2033
2034            // Send turn start event
2035            if let Some(tx) = &event_tx {
2036                tx.send(AgentEvent::TurnStart { turn }).await.ok();
2037            }
2038
2039            tracing::info!(
2040                turn = turn,
2041                max_turns = self.config.max_tool_rounds,
2042                "Agent turn started"
2043            );
2044
2045            // Call LLM - use streaming if we have an event channel
2046            tracing::info!(
2047                a3s.llm.streaming = event_tx.is_some(),
2048                "LLM completion started"
2049            );
2050
2051            // Fire GenerateStart hook
2052            self.fire_generate_start(
2053                session_id.unwrap_or(""),
2054                effective_prompt,
2055                &augmented_system,
2056            )
2057            .await;
2058
2059            let llm_start = std::time::Instant::now();
2060            // Circuit breaker (4.3): retry non-streaming LLM calls on transient failures.
2061            // Each failure increments `consecutive_llm_errors`; on success it resets to 0.
2062            // Streaming mode bails immediately on failure (events can't be replayed).
2063            let response = {
2064                let threshold = self.config.circuit_breaker_threshold.max(1);
2065                let mut attempt = 0u32;
2066                loop {
2067                    attempt += 1;
2068                    let result = self
2069                        .call_llm(
2070                            &messages,
2071                            augmented_system.as_deref(),
2072                            &event_tx,
2073                            cancel_token,
2074                        )
2075                        .await;
2076                    match result {
2077                        Ok(r) => {
2078                            break r;
2079                        }
2080                        // Never retry if cancelled
2081                        Err(e) if cancel_token.is_cancelled() => {
2082                            anyhow::bail!(e);
2083                        }
2084                        // Retry when: non-streaming under threshold, OR first streaming attempt
2085                        Err(e) if attempt < threshold && (event_tx.is_none() || attempt == 1) => {
2086                            tracing::warn!(
2087                                turn = turn,
2088                                attempt = attempt,
2089                                threshold = threshold,
2090                                error = %e,
2091                                "LLM call failed, will retry"
2092                            );
2093                            tokio::time::sleep(Duration::from_millis(100 * attempt as u64)).await;
2094                        }
2095                        // Threshold exceeded or streaming mid-stream: bail
2096                        Err(e) => {
2097                            let msg = if attempt > 1 {
2098                                format!(
2099                                    "LLM circuit breaker triggered: failed after {} attempt(s): {}",
2100                                    attempt, e
2101                                )
2102                            } else {
2103                                format!("LLM call failed: {}", e)
2104                            };
2105                            tracing::error!(turn = turn, attempt = attempt, "{}", msg);
2106                            // Fire OnError hook for LLM failure
2107                            self.fire_on_error(
2108                                session_id.unwrap_or(""),
2109                                ErrorType::LlmFailure,
2110                                &msg,
2111                                serde_json::json!({"turn": turn, "attempt": attempt}),
2112                            )
2113                            .await;
2114                            if let Some(tx) = &event_tx {
2115                                tx.send(AgentEvent::Error {
2116                                    message: msg.clone(),
2117                                })
2118                                .await
2119                                .ok();
2120                            }
2121                            anyhow::bail!(msg);
2122                        }
2123                    }
2124                }
2125            };
2126
2127            // Update usage
2128            total_usage.prompt_tokens += response.usage.prompt_tokens;
2129            total_usage.completion_tokens += response.usage.completion_tokens;
2130            total_usage.total_tokens += response.usage.total_tokens;
2131
2132            // Track token usage in progress tracker
2133            if let Some(ref tracker) = self.progress_tracker {
2134                let token_usage = crate::task::TaskTokenUsage {
2135                    input_tokens: response.usage.prompt_tokens as u64,
2136                    output_tokens: response.usage.completion_tokens as u64,
2137                    cache_read_tokens: response.usage.cache_read_tokens.unwrap_or(0) as u64,
2138                    cache_write_tokens: response.usage.cache_write_tokens.unwrap_or(0) as u64,
2139                };
2140                if let Ok(mut guard) = tracker.try_write() {
2141                    guard.track_tokens(token_usage);
2142                }
2143            }
2144
2145            // Record LLM completion telemetry
2146            let llm_duration = llm_start.elapsed();
2147            tracing::info!(
2148                turn = turn,
2149                streaming = event_tx.is_some(),
2150                prompt_tokens = response.usage.prompt_tokens,
2151                completion_tokens = response.usage.completion_tokens,
2152                total_tokens = response.usage.total_tokens,
2153                stop_reason = response.stop_reason.as_deref().unwrap_or("unknown"),
2154                duration_ms = llm_duration.as_millis() as u64,
2155                "LLM completion finished"
2156            );
2157
2158            // Fire GenerateEnd hook
2159            self.fire_generate_end(
2160                session_id.unwrap_or(""),
2161                effective_prompt,
2162                &response,
2163                llm_duration.as_millis() as u64,
2164            )
2165            .await;
2166
2167            // Record LLM usage on the llm span
2168            crate::telemetry::record_llm_usage(
2169                response.usage.prompt_tokens,
2170                response.usage.completion_tokens,
2171                response.usage.total_tokens,
2172                response.stop_reason.as_deref(),
2173            );
2174            // Log turn token usage
2175            tracing::info!(
2176                turn = turn,
2177                a3s.llm.total_tokens = response.usage.total_tokens,
2178                "Turn token usage"
2179            );
2180
2181            // Add assistant message to history
2182            messages.push(response.message.clone());
2183
2184            // Check for tool calls
2185            let tool_calls = response.tool_calls();
2186
2187            // Send turn end event
2188            if let Some(tx) = &event_tx {
2189                tx.send(AgentEvent::TurnEnd {
2190                    turn,
2191                    usage: response.usage.clone(),
2192                })
2193                .await
2194                .ok();
2195            }
2196
2197            // Auto-compact: check if context usage exceeds threshold
2198            if self.config.auto_compact {
2199                let used = response.usage.prompt_tokens;
2200                let max = self.config.max_context_tokens;
2201                let threshold = self.config.auto_compact_threshold;
2202
2203                if crate::session::compaction::should_auto_compact(used, max, threshold) {
2204                    let before_len = messages.len();
2205                    let percent_before = used as f32 / max as f32;
2206
2207                    tracing::info!(
2208                        used_tokens = used,
2209                        max_tokens = max,
2210                        percent = percent_before,
2211                        threshold = threshold,
2212                        "Auto-compact triggered"
2213                    );
2214
2215                    // Step 1: Prune large tool outputs first (cheap, no LLM call)
2216                    if let Some(pruned) = crate::session::compaction::prune_tool_outputs(&messages)
2217                    {
2218                        messages = pruned;
2219                        tracing::info!("Tool output pruning applied");
2220                    }
2221
2222                    // Step 2: Full summarization using the agent's LLM client
2223                    if let Ok(Some(compacted)) = crate::session::compaction::compact_messages(
2224                        session_id.unwrap_or(""),
2225                        &messages,
2226                        &self.llm_client,
2227                    )
2228                    .await
2229                    {
2230                        messages = compacted;
2231                    }
2232
2233                    // Emit compaction event
2234                    if let Some(tx) = &event_tx {
2235                        tx.send(AgentEvent::ContextCompacted {
2236                            session_id: session_id.unwrap_or("").to_string(),
2237                            before_messages: before_len,
2238                            after_messages: messages.len(),
2239                            percent_before,
2240                        })
2241                        .await
2242                        .ok();
2243                    }
2244                }
2245            }
2246
2247            if tool_calls.is_empty() {
2248                // No tool calls — check if we should inject a continuation message
2249                // before treating this as a final answer.
2250                let final_text = response.text();
2251
2252                if self.config.continuation_enabled
2253                    && continuation_count < self.config.max_continuation_turns
2254                    && turn < self.config.max_tool_rounds  // never inject past the turn limit
2255                    && Self::looks_incomplete(&final_text)
2256                {
2257                    continuation_count += 1;
2258                    tracing::info!(
2259                        turn = turn,
2260                        continuation = continuation_count,
2261                        max_continuation = self.config.max_continuation_turns,
2262                        "Injecting continuation message — response looks incomplete"
2263                    );
2264                    // Inject continuation as a user message and keep looping
2265                    messages.push(Message::user(crate::prompts::CONTINUATION));
2266                    continue;
2267                }
2268
2269                // Sanitize output to redact any sensitive data before returning
2270                let final_text = if let Some(ref sp) = self.config.security_provider {
2271                    sp.sanitize_output(&final_text)
2272                } else {
2273                    final_text
2274                };
2275
2276                // Record final totals
2277                tracing::info!(
2278                    tool_calls_count = tool_calls_count,
2279                    total_prompt_tokens = total_usage.prompt_tokens,
2280                    total_completion_tokens = total_usage.completion_tokens,
2281                    total_tokens = total_usage.total_tokens,
2282                    turns = turn,
2283                    "Agent execution completed"
2284                );
2285
2286                if emit_end {
2287                    if let Some(tx) = &event_tx {
2288                        tx.send(AgentEvent::End {
2289                            text: final_text.clone(),
2290                            usage: total_usage.clone(),
2291                            meta: response.meta.clone(),
2292                        })
2293                        .await
2294                        .ok();
2295                    }
2296                }
2297
2298                // Notify context providers of turn completion for memory extraction
2299                if let Some(sid) = session_id {
2300                    self.notify_turn_complete(sid, effective_prompt, &final_text)
2301                        .await;
2302                }
2303
2304                return Ok(AgentResult {
2305                    text: final_text,
2306                    messages,
2307                    usage: total_usage,
2308                    tool_calls_count,
2309                });
2310            }
2311
2312            // Execute tools sequentially
2313            // Fast path: when all tool calls are independent file writes and no hooks/HITL
2314            // are configured, execute them concurrently to avoid serial I/O bottleneck.
2315            let tool_calls = if self.config.hook_engine.is_none()
2316                && self.config.confirmation_manager.is_none()
2317                && tool_calls.len() > 1
2318                && tool_calls
2319                    .iter()
2320                    .all(|tc| Self::is_parallel_safe_write(&tc.name, &tc.args))
2321                && {
2322                    // All target paths must be distinct (no write-write conflicts)
2323                    let paths: Vec<_> = tool_calls
2324                        .iter()
2325                        .filter_map(|tc| Self::extract_write_path(&tc.args))
2326                        .collect();
2327                    paths.len() == tool_calls.len()
2328                        && paths.iter().collect::<std::collections::HashSet<_>>().len()
2329                            == paths.len()
2330                } {
2331                tracing::info!(
2332                    count = tool_calls.len(),
2333                    "Parallel write batch: executing {} independent file writes concurrently",
2334                    tool_calls.len()
2335                );
2336
2337                let futures: Vec<_> = tool_calls
2338                    .iter()
2339                    .map(|tc| {
2340                        let ctx = self.tool_context.clone();
2341                        let executor = Arc::clone(&self.tool_executor);
2342                        let name = tc.name.clone();
2343                        let args = tc.args.clone();
2344                        async move { executor.execute_with_context(&name, &args, &ctx).await }
2345                    })
2346                    .collect();
2347
2348                let results = join_all(futures).await;
2349
2350                // Post-process results in original order (sequential, preserves message ordering)
2351                for (tc, result) in tool_calls.iter().zip(results) {
2352                    tool_calls_count += 1;
2353                    let (output, exit_code, is_error, metadata, images) =
2354                        Self::tool_result_to_tuple(result);
2355
2356                    // Track tool call in progress tracker
2357                    self.track_tool_result(&tc.name, &tc.args, exit_code);
2358
2359                    let output = if let Some(ref sp) = self.config.security_provider {
2360                        sp.sanitize_output(&output)
2361                    } else {
2362                        output
2363                    };
2364
2365                    if let Some(tx) = &event_tx {
2366                        tx.send(AgentEvent::ToolEnd {
2367                            id: tc.id.clone(),
2368                            name: tc.name.clone(),
2369                            output: output.clone(),
2370                            exit_code,
2371                            metadata,
2372                        })
2373                        .await
2374                        .ok();
2375                    }
2376
2377                    if images.is_empty() {
2378                        messages.push(Message::tool_result(&tc.id, &output, is_error));
2379                    } else {
2380                        messages.push(Message::tool_result_with_images(
2381                            &tc.id, &output, &images, is_error,
2382                        ));
2383                    }
2384                }
2385
2386                // Skip the sequential loop below
2387                continue;
2388            } else {
2389                tool_calls
2390            };
2391
2392            for tool_call in tool_calls {
2393                tool_calls_count += 1;
2394
2395                let tool_start = std::time::Instant::now();
2396
2397                tracing::info!(
2398                    tool_name = tool_call.name.as_str(),
2399                    tool_id = tool_call.id.as_str(),
2400                    "Tool execution started"
2401                );
2402
2403                // Send tool start event (only if not already sent during streaming)
2404                // In streaming mode, ToolStart is sent when we receive ToolUseStart from LLM
2405                // But we still need to send ToolEnd after execution
2406
2407                // Check for malformed tool arguments from LLM (4.1 parse error recovery)
2408                if let Some(parse_error) =
2409                    tool_call.args.get("__parse_error").and_then(|v| v.as_str())
2410                {
2411                    parse_error_count += 1;
2412                    let error_msg = format!("Error: {}", parse_error);
2413                    tracing::warn!(
2414                        tool = tool_call.name.as_str(),
2415                        parse_error_count = parse_error_count,
2416                        max_parse_retries = self.config.max_parse_retries,
2417                        "Malformed tool arguments from LLM"
2418                    );
2419
2420                    if let Some(tx) = &event_tx {
2421                        tx.send(AgentEvent::ToolEnd {
2422                            id: tool_call.id.clone(),
2423                            name: tool_call.name.clone(),
2424                            output: error_msg.clone(),
2425                            exit_code: 1,
2426                            metadata: None,
2427                        })
2428                        .await
2429                        .ok();
2430                    }
2431
2432                    messages.push(Message::tool_result(&tool_call.id, &error_msg, true));
2433
2434                    if parse_error_count > self.config.max_parse_retries {
2435                        let msg = format!(
2436                            "LLM produced malformed tool arguments {} time(s) in a row \
2437                             (max_parse_retries={}); giving up",
2438                            parse_error_count, self.config.max_parse_retries
2439                        );
2440                        tracing::error!("{}", msg);
2441                        if let Some(tx) = &event_tx {
2442                            tx.send(AgentEvent::Error {
2443                                message: msg.clone(),
2444                            })
2445                            .await
2446                            .ok();
2447                        }
2448                        anyhow::bail!(msg);
2449                    }
2450                    continue;
2451                }
2452
2453                // Tool args are valid — reset parse error counter
2454                parse_error_count = 0;
2455
2456                // Check skill-based tool permissions
2457                if let Some(ref registry) = self.config.skill_registry {
2458                    let instruction_skills =
2459                        registry.by_kind(crate::skills::SkillKind::Instruction);
2460                    let has_restrictions =
2461                        instruction_skills.iter().any(|s| s.allowed_tools.is_some());
2462                    if has_restrictions {
2463                        let allowed = instruction_skills
2464                            .iter()
2465                            .any(|s| s.is_tool_allowed(&tool_call.name));
2466                        if !allowed {
2467                            let msg = format!(
2468                                "Tool '{}' is not allowed by any active skill.",
2469                                tool_call.name
2470                            );
2471                            tracing::info!(
2472                                tool_name = tool_call.name.as_str(),
2473                                "Tool blocked by skill registry"
2474                            );
2475                            if let Some(tx) = &event_tx {
2476                                tx.send(AgentEvent::PermissionDenied {
2477                                    tool_id: tool_call.id.clone(),
2478                                    tool_name: tool_call.name.clone(),
2479                                    args: tool_call.args.clone(),
2480                                    reason: msg.clone(),
2481                                })
2482                                .await
2483                                .ok();
2484                            }
2485                            messages.push(Message::tool_result(&tool_call.id, &msg, true));
2486                            continue;
2487                        }
2488                    }
2489                }
2490
2491                // Fire PreToolUse hook (may block the tool call)
2492                if let Some(HookResult::Block(reason)) = self
2493                    .fire_pre_tool_use(
2494                        session_id.unwrap_or(""),
2495                        &tool_call.name,
2496                        &tool_call.args,
2497                        recent_tool_signatures.clone(),
2498                    )
2499                    .await
2500                {
2501                    let msg = format!("Tool '{}' blocked by hook: {}", tool_call.name, reason);
2502                    tracing::info!(
2503                        tool_name = tool_call.name.as_str(),
2504                        "Tool blocked by PreToolUse hook"
2505                    );
2506
2507                    if let Some(tx) = &event_tx {
2508                        tx.send(AgentEvent::PermissionDenied {
2509                            tool_id: tool_call.id.clone(),
2510                            tool_name: tool_call.name.clone(),
2511                            args: tool_call.args.clone(),
2512                            reason: reason.clone(),
2513                        })
2514                        .await
2515                        .ok();
2516                    }
2517
2518                    messages.push(Message::tool_result(&tool_call.id, &msg, true));
2519                    continue;
2520                }
2521
2522                // Check permission before executing tool
2523                let permission_decision = if let Some(checker) = &self.config.permission_checker {
2524                    checker.check(&tool_call.name, &tool_call.args)
2525                } else {
2526                    // No policy configured — default to Ask so HITL can still intervene
2527                    PermissionDecision::Ask
2528                };
2529
2530                let (output, exit_code, is_error, metadata, images) = match permission_decision {
2531                    PermissionDecision::Deny => {
2532                        tracing::info!(
2533                            tool_name = tool_call.name.as_str(),
2534                            permission = "deny",
2535                            "Tool permission denied"
2536                        );
2537                        // Tool execution denied by permission policy
2538                        let denial_msg = format!(
2539                            "Permission denied: Tool '{}' is blocked by permission policy.",
2540                            tool_call.name
2541                        );
2542
2543                        // Send permission denied event
2544                        if let Some(tx) = &event_tx {
2545                            tx.send(AgentEvent::PermissionDenied {
2546                                tool_id: tool_call.id.clone(),
2547                                tool_name: tool_call.name.clone(),
2548                                args: tool_call.args.clone(),
2549                                reason: "Blocked by deny rule in permission policy".to_string(),
2550                            })
2551                            .await
2552                            .ok();
2553                        }
2554
2555                        (denial_msg, 1, true, None, Vec::new())
2556                    }
2557                    PermissionDecision::Allow => {
2558                        tracing::info!(
2559                            tool_name = tool_call.name.as_str(),
2560                            permission = "allow",
2561                            "Tool permission: allow"
2562                        );
2563                        // Permission explicitly allows — execute directly, no HITL
2564                        let stream_ctx =
2565                            self.streaming_tool_context(&event_tx, &tool_call.id, &tool_call.name);
2566                        let result = self
2567                            .execute_tool_queued_or_direct(
2568                                &tool_call.name,
2569                                &tool_call.args,
2570                                &stream_ctx,
2571                            )
2572                            .await;
2573
2574                        let tuple = Self::tool_result_to_tuple(result);
2575                        // Track tool call in progress tracker
2576                        let (_, exit_code, _, _, _) = tuple;
2577                        self.track_tool_result(&tool_call.name, &tool_call.args, exit_code);
2578                        tuple
2579                    }
2580                    PermissionDecision::Ask => {
2581                        tracing::info!(
2582                            tool_name = tool_call.name.as_str(),
2583                            permission = "ask",
2584                            "Tool permission: ask"
2585                        );
2586                        // Permission says Ask — delegate to HITL confirmation manager
2587                        if let Some(cm) = &self.config.confirmation_manager {
2588                            // Check YOLO lanes: if the tool's lane is in YOLO mode, skip confirmation
2589                            if !cm.requires_confirmation(&tool_call.name).await {
2590                                let stream_ctx = self.streaming_tool_context(
2591                                    &event_tx,
2592                                    &tool_call.id,
2593                                    &tool_call.name,
2594                                );
2595                                let result = self
2596                                    .execute_tool_queued_or_direct(
2597                                        &tool_call.name,
2598                                        &tool_call.args,
2599                                        &stream_ctx,
2600                                    )
2601                                    .await;
2602
2603                                let (output, exit_code, is_error, metadata, images) =
2604                                    Self::tool_result_to_tuple(result);
2605
2606                                // Track tool call in progress tracker
2607                                self.track_tool_result(&tool_call.name, &tool_call.args, exit_code);
2608
2609                                // Add tool result to messages
2610                                if images.is_empty() {
2611                                    messages.push(Message::tool_result(
2612                                        &tool_call.id,
2613                                        &output,
2614                                        is_error,
2615                                    ));
2616                                } else {
2617                                    messages.push(Message::tool_result_with_images(
2618                                        &tool_call.id,
2619                                        &output,
2620                                        &images,
2621                                        is_error,
2622                                    ));
2623                                }
2624
2625                                // Record tool result on the tool span for early exit
2626                                let tool_duration = tool_start.elapsed();
2627                                crate::telemetry::record_tool_result(exit_code, tool_duration);
2628
2629                                // Send ToolEnd event
2630                                if let Some(tx) = &event_tx {
2631                                    tx.send(AgentEvent::ToolEnd {
2632                                        id: tool_call.id.clone(),
2633                                        name: tool_call.name.clone(),
2634                                        output: output.clone(),
2635                                        exit_code,
2636                                        metadata,
2637                                    })
2638                                    .await
2639                                    .ok();
2640                                }
2641
2642                                // Fire PostToolUse hook (fire-and-forget)
2643                                self.fire_post_tool_use(
2644                                    session_id.unwrap_or(""),
2645                                    &tool_call.name,
2646                                    &tool_call.args,
2647                                    &output,
2648                                    exit_code == 0,
2649                                    tool_duration.as_millis() as u64,
2650                                )
2651                                .await;
2652
2653                                continue; // Skip the rest, move to next tool call
2654                            }
2655
2656                            // Get timeout from policy
2657                            let policy = cm.policy().await;
2658                            let timeout_ms = policy.default_timeout_ms;
2659                            let timeout_action = policy.timeout_action;
2660
2661                            // Request confirmation (this emits ConfirmationRequired event)
2662                            let rx = cm
2663                                .request_confirmation(
2664                                    &tool_call.id,
2665                                    &tool_call.name,
2666                                    &tool_call.args,
2667                                )
2668                                .await;
2669
2670                            // Forward ConfirmationRequired to the streaming event channel
2671                            // so external consumers (e.g. SafeClaw engine) can relay it
2672                            // to the browser UI.
2673                            if let Some(tx) = &event_tx {
2674                                tx.send(AgentEvent::ConfirmationRequired {
2675                                    tool_id: tool_call.id.clone(),
2676                                    tool_name: tool_call.name.clone(),
2677                                    args: tool_call.args.clone(),
2678                                    timeout_ms,
2679                                })
2680                                .await
2681                                .ok();
2682                            }
2683
2684                            // Wait for confirmation with timeout
2685                            let confirmation_result =
2686                                tokio::time::timeout(Duration::from_millis(timeout_ms), rx).await;
2687
2688                            match confirmation_result {
2689                                Ok(Ok(response)) => {
2690                                    // Forward ConfirmationReceived
2691                                    if let Some(tx) = &event_tx {
2692                                        tx.send(AgentEvent::ConfirmationReceived {
2693                                            tool_id: tool_call.id.clone(),
2694                                            approved: response.approved,
2695                                            reason: response.reason.clone(),
2696                                        })
2697                                        .await
2698                                        .ok();
2699                                    }
2700                                    if response.approved {
2701                                        let stream_ctx = self.streaming_tool_context(
2702                                            &event_tx,
2703                                            &tool_call.id,
2704                                            &tool_call.name,
2705                                        );
2706                                        let result = self
2707                                            .execute_tool_queued_or_direct(
2708                                                &tool_call.name,
2709                                                &tool_call.args,
2710                                                &stream_ctx,
2711                                            )
2712                                            .await;
2713
2714                                        let tuple = Self::tool_result_to_tuple(result);
2715                                        // Track tool call in progress tracker
2716                                        let (_, exit_code, _, _, _) = tuple;
2717                                        self.track_tool_result(
2718                                            &tool_call.name,
2719                                            &tool_call.args,
2720                                            exit_code,
2721                                        );
2722                                        tuple
2723                                    } else {
2724                                        let rejection_msg = format!(
2725                                            "Tool '{}' execution was REJECTED by the user. Reason: {}. \
2726                                             DO NOT retry this tool call unless the user explicitly asks you to.",
2727                                            tool_call.name,
2728                                            response.reason.unwrap_or_else(|| "No reason provided".to_string())
2729                                        );
2730                                        (rejection_msg, 1, true, None, Vec::new())
2731                                    }
2732                                }
2733                                Ok(Err(_)) => {
2734                                    // Forward ConfirmationTimeout (channel closed = effectively timed out)
2735                                    if let Some(tx) = &event_tx {
2736                                        tx.send(AgentEvent::ConfirmationTimeout {
2737                                            tool_id: tool_call.id.clone(),
2738                                            action_taken: "rejected".to_string(),
2739                                        })
2740                                        .await
2741                                        .ok();
2742                                    }
2743                                    let msg = format!(
2744                                        "Tool '{}' confirmation failed: confirmation channel closed",
2745                                        tool_call.name
2746                                    );
2747                                    (msg, 1, true, None, Vec::new())
2748                                }
2749                                Err(_) => {
2750                                    cm.check_timeouts().await;
2751
2752                                    // Forward ConfirmationTimeout
2753                                    if let Some(tx) = &event_tx {
2754                                        tx.send(AgentEvent::ConfirmationTimeout {
2755                                            tool_id: tool_call.id.clone(),
2756                                            action_taken: match timeout_action {
2757                                                crate::hitl::TimeoutAction::Reject => {
2758                                                    "rejected".to_string()
2759                                                }
2760                                                crate::hitl::TimeoutAction::AutoApprove => {
2761                                                    "auto_approved".to_string()
2762                                                }
2763                                            },
2764                                        })
2765                                        .await
2766                                        .ok();
2767                                    }
2768
2769                                    match timeout_action {
2770                                        crate::hitl::TimeoutAction::Reject => {
2771                                            let msg = format!(
2772                                                "Tool '{}' execution was REJECTED: user confirmation timed out after {}ms. \
2773                                                 DO NOT retry this tool call — the user did not approve it. \
2774                                                 Inform the user that the operation requires their approval and ask them to try again.",
2775                                                tool_call.name, timeout_ms
2776                                            );
2777                                            (msg, 1, true, None, Vec::new())
2778                                        }
2779                                        crate::hitl::TimeoutAction::AutoApprove => {
2780                                            let stream_ctx = self.streaming_tool_context(
2781                                                &event_tx,
2782                                                &tool_call.id,
2783                                                &tool_call.name,
2784                                            );
2785                                            let result = self
2786                                                .execute_tool_queued_or_direct(
2787                                                    &tool_call.name,
2788                                                    &tool_call.args,
2789                                                    &stream_ctx,
2790                                                )
2791                                                .await;
2792
2793                                            let tuple = Self::tool_result_to_tuple(result);
2794                                            // Track tool call in progress tracker
2795                                            let (_, exit_code, _, _, _) = tuple;
2796                                            self.track_tool_result(
2797                                                &tool_call.name,
2798                                                &tool_call.args,
2799                                                exit_code,
2800                                            );
2801                                            tuple
2802                                        }
2803                                    }
2804                                }
2805                            }
2806                        } else {
2807                            // Ask without confirmation manager — safe deny
2808                            let msg = format!(
2809                                "Tool '{}' requires confirmation but no HITL confirmation manager is configured. \
2810                                 Configure a confirmation policy to enable tool execution.",
2811                                tool_call.name
2812                            );
2813                            tracing::warn!(
2814                                tool_name = tool_call.name.as_str(),
2815                                "Tool requires confirmation but no HITL manager configured"
2816                            );
2817                            (msg, 1, true, None, Vec::new())
2818                        }
2819                    }
2820                };
2821
2822                let tool_duration = tool_start.elapsed();
2823                crate::telemetry::record_tool_result(exit_code, tool_duration);
2824
2825                // Sanitize tool output for sensitive data before it enters the message history
2826                let output = if let Some(ref sp) = self.config.security_provider {
2827                    sp.sanitize_output(&output)
2828                } else {
2829                    output
2830                };
2831
2832                recent_tool_signatures.push(format!(
2833                    "{}:{} => {}",
2834                    tool_call.name,
2835                    serde_json::to_string(&tool_call.args).unwrap_or_default(),
2836                    if is_error { "error" } else { "ok" }
2837                ));
2838                if recent_tool_signatures.len() > 8 {
2839                    let overflow = recent_tool_signatures.len() - 8;
2840                    recent_tool_signatures.drain(0..overflow);
2841                }
2842
2843                // Fire PostToolUse hook (fire-and-forget)
2844                self.fire_post_tool_use(
2845                    session_id.unwrap_or(""),
2846                    &tool_call.name,
2847                    &tool_call.args,
2848                    &output,
2849                    exit_code == 0,
2850                    tool_duration.as_millis() as u64,
2851                )
2852                .await;
2853
2854                // Auto-remember tool result in long-term memory
2855                if let Some(ref memory) = self.config.memory {
2856                    let tools_used = [tool_call.name.clone()];
2857                    let remember_result = if exit_code == 0 {
2858                        memory
2859                            .remember_success(effective_prompt, &tools_used, &output)
2860                            .await
2861                    } else {
2862                        memory
2863                            .remember_failure(effective_prompt, &output, &tools_used)
2864                            .await
2865                    };
2866                    match remember_result {
2867                        Ok(()) => {
2868                            if let Some(tx) = &event_tx {
2869                                let item_type = if exit_code == 0 { "success" } else { "failure" };
2870                                tx.send(AgentEvent::MemoryStored {
2871                                    memory_id: uuid::Uuid::new_v4().to_string(),
2872                                    memory_type: item_type.to_string(),
2873                                    importance: if exit_code == 0 { 0.8 } else { 0.9 },
2874                                    tags: vec![item_type.to_string(), tool_call.name.clone()],
2875                                })
2876                                .await
2877                                .ok();
2878                            }
2879                        }
2880                        Err(e) => {
2881                            tracing::warn!("Failed to store memory after tool execution: {}", e);
2882                        }
2883                    }
2884                }
2885
2886                // Send tool end event
2887                if let Some(tx) = &event_tx {
2888                    tx.send(AgentEvent::ToolEnd {
2889                        id: tool_call.id.clone(),
2890                        name: tool_call.name.clone(),
2891                        output: output.clone(),
2892                        exit_code,
2893                        metadata,
2894                    })
2895                    .await
2896                    .ok();
2897                }
2898
2899                // Add tool result to messages
2900                if images.is_empty() {
2901                    messages.push(Message::tool_result(&tool_call.id, &output, is_error));
2902                } else {
2903                    messages.push(Message::tool_result_with_images(
2904                        &tool_call.id,
2905                        &output,
2906                        &images,
2907                        is_error,
2908                    ));
2909                }
2910            }
2911        }
2912    }
2913
2914    /// Execute with streaming events
2915    pub async fn execute_streaming(
2916        &self,
2917        history: &[Message],
2918        prompt: &str,
2919    ) -> Result<(
2920        mpsc::Receiver<AgentEvent>,
2921        tokio::task::JoinHandle<Result<AgentResult>>,
2922        tokio_util::sync::CancellationToken,
2923    )> {
2924        let (tx, rx) = mpsc::channel(100);
2925        let cancel_token = tokio_util::sync::CancellationToken::new();
2926
2927        let llm_client = self.llm_client.clone();
2928        let tool_executor = self.tool_executor.clone();
2929        let tool_context = self.tool_context.clone();
2930        let config = self.config.clone();
2931        let tool_metrics = self.tool_metrics.clone();
2932        let command_queue = self.command_queue.clone();
2933        let history = history.to_vec();
2934        let prompt = prompt.to_string();
2935        let token_clone = cancel_token.clone();
2936
2937        let handle = tokio::spawn(async move {
2938            let mut agent = AgentLoop::new(llm_client, tool_executor, tool_context, config);
2939            if let Some(metrics) = tool_metrics {
2940                agent = agent.with_tool_metrics(metrics);
2941            }
2942            if let Some(queue) = command_queue {
2943                agent = agent.with_queue(queue);
2944            }
2945            agent
2946                .execute_with_session(&history, &prompt, None, Some(tx), Some(&token_clone))
2947                .await
2948        });
2949
2950        Ok((rx, handle, cancel_token))
2951    }
2952
2953    /// Create an execution plan for a prompt
2954    ///
2955    /// Delegates to [`LlmPlanner`] for structured JSON plan generation,
2956    /// falling back to heuristic planning if the LLM call fails.
2957    pub async fn plan(&self, prompt: &str, _context: Option<&str>) -> Result<ExecutionPlan> {
2958        use crate::planning::LlmPlanner;
2959
2960        match LlmPlanner::create_plan(&self.llm_client, prompt).await {
2961            Ok(plan) => Ok(plan),
2962            Err(e) => {
2963                tracing::warn!("LLM plan creation failed, using fallback: {}", e);
2964                Ok(LlmPlanner::fallback_plan(prompt))
2965            }
2966        }
2967    }
2968
2969    /// Execute with planning phase
2970    pub async fn execute_with_planning(
2971        &self,
2972        history: &[Message],
2973        prompt: &str,
2974        event_tx: Option<mpsc::Sender<AgentEvent>>,
2975    ) -> Result<AgentResult> {
2976        // Send planning start event
2977        if let Some(tx) = &event_tx {
2978            tx.send(AgentEvent::PlanningStart {
2979                prompt: prompt.to_string(),
2980            })
2981            .await
2982            .ok();
2983        }
2984
2985        // Extract goal when goal_tracking is enabled
2986        let goal = if self.config.goal_tracking {
2987            let g = self.extract_goal(prompt).await?;
2988            if let Some(tx) = &event_tx {
2989                tx.send(AgentEvent::GoalExtracted { goal: g.clone() })
2990                    .await
2991                    .ok();
2992            }
2993            Some(g)
2994        } else {
2995            None
2996        };
2997
2998        // Create execution plan
2999        let plan = self.plan(prompt, None).await?;
3000
3001        // Send planning end event
3002        if let Some(tx) = &event_tx {
3003            tx.send(AgentEvent::PlanningEnd {
3004                estimated_steps: plan.steps.len(),
3005                plan: plan.clone(),
3006            })
3007            .await
3008            .ok();
3009        }
3010
3011        let plan_start = std::time::Instant::now();
3012
3013        // Execute the plan step by step
3014        let result = self.execute_plan(history, &plan, event_tx.clone()).await?;
3015
3016        // Emit the final End event (execute_loop_inner does not emit End in planning mode)
3017        if let Some(tx) = &event_tx {
3018            tx.send(AgentEvent::End {
3019                text: result.text.clone(),
3020                usage: result.usage.clone(),
3021                meta: None,
3022            })
3023            .await
3024            .ok();
3025        }
3026
3027        // Check goal achievement when goal_tracking is enabled
3028        if self.config.goal_tracking {
3029            if let Some(ref g) = goal {
3030                let achieved = self.check_goal_achievement(g, &result.text).await?;
3031                if achieved {
3032                    if let Some(tx) = &event_tx {
3033                        tx.send(AgentEvent::GoalAchieved {
3034                            goal: g.description.clone(),
3035                            total_steps: result.messages.len(),
3036                            duration_ms: plan_start.elapsed().as_millis() as i64,
3037                        })
3038                        .await
3039                        .ok();
3040                    }
3041                }
3042            }
3043        }
3044
3045        Ok(result)
3046    }
3047
3048    /// Execute an execution plan using wave-based dependency-aware scheduling.
3049    ///
3050    /// Steps with no unmet dependencies are grouped into "waves". A wave with
3051    /// a single step executes sequentially (preserving the history chain). A
3052    /// wave with multiple independent steps executes them in parallel via
3053    /// `JoinSet`, then merges their results back into the shared history.
3054    async fn execute_plan(
3055        &self,
3056        history: &[Message],
3057        plan: &ExecutionPlan,
3058        event_tx: Option<mpsc::Sender<AgentEvent>>,
3059    ) -> Result<AgentResult> {
3060        let mut plan = plan.clone();
3061        let mut current_history = history.to_vec();
3062        let mut total_usage = TokenUsage::default();
3063        let mut tool_calls_count = 0;
3064        let total_steps = plan.steps.len();
3065
3066        // Add initial user message with the goal
3067        let steps_text = plan
3068            .steps
3069            .iter()
3070            .enumerate()
3071            .map(|(i, step)| format!("{}. {}", i + 1, step.content))
3072            .collect::<Vec<_>>()
3073            .join("\n");
3074        current_history.push(Message::user(&crate::prompts::render(
3075            crate::prompts::PLAN_EXECUTE_GOAL,
3076            &[("goal", &plan.goal), ("steps", &steps_text)],
3077        )));
3078
3079        loop {
3080            let ready: Vec<String> = plan
3081                .get_ready_steps()
3082                .iter()
3083                .map(|s| s.id.clone())
3084                .collect();
3085
3086            if ready.is_empty() {
3087                // All done or deadlock
3088                if plan.has_deadlock() {
3089                    tracing::warn!(
3090                        "Plan deadlock detected: {} pending steps with unresolvable dependencies",
3091                        plan.pending_count()
3092                    );
3093                }
3094                break;
3095            }
3096
3097            if ready.len() == 1 {
3098                // === Single step: sequential execution (preserves history chain) ===
3099                let step_id = &ready[0];
3100                let step = plan
3101                    .steps
3102                    .iter()
3103                    .find(|s| s.id == *step_id)
3104                    .ok_or_else(|| anyhow::anyhow!("step '{}' not found in plan", step_id))?
3105                    .clone();
3106                let step_number = plan
3107                    .steps
3108                    .iter()
3109                    .position(|s| s.id == *step_id)
3110                    .unwrap_or(0)
3111                    + 1;
3112
3113                // Send step start event
3114                if let Some(tx) = &event_tx {
3115                    tx.send(AgentEvent::StepStart {
3116                        step_id: step.id.clone(),
3117                        description: step.content.clone(),
3118                        step_number,
3119                        total_steps,
3120                    })
3121                    .await
3122                    .ok();
3123                }
3124
3125                plan.mark_status(&step.id, TaskStatus::InProgress);
3126
3127                let step_prompt = crate::prompts::render(
3128                    crate::prompts::PLAN_EXECUTE_STEP,
3129                    &[
3130                        ("step_num", &step_number.to_string()),
3131                        ("description", &step.content),
3132                    ],
3133                );
3134
3135                match self
3136                    .execute_loop(
3137                        &current_history,
3138                        &step_prompt,
3139                        None,
3140                        event_tx.clone(),
3141                        &tokio_util::sync::CancellationToken::new(),
3142                        false, // emit_end: false — End is emitted by execute_with_planning after execute_plan
3143                    )
3144                    .await
3145                {
3146                    Ok(result) => {
3147                        current_history = result.messages.clone();
3148                        total_usage.prompt_tokens += result.usage.prompt_tokens;
3149                        total_usage.completion_tokens += result.usage.completion_tokens;
3150                        total_usage.total_tokens += result.usage.total_tokens;
3151                        tool_calls_count += result.tool_calls_count;
3152                        plan.mark_status(&step.id, TaskStatus::Completed);
3153
3154                        if let Some(tx) = &event_tx {
3155                            tx.send(AgentEvent::StepEnd {
3156                                step_id: step.id.clone(),
3157                                status: TaskStatus::Completed,
3158                                step_number,
3159                                total_steps,
3160                            })
3161                            .await
3162                            .ok();
3163                        }
3164                    }
3165                    Err(e) => {
3166                        tracing::error!("Plan step '{}' failed: {}", step.id, e);
3167                        plan.mark_status(&step.id, TaskStatus::Failed);
3168
3169                        if let Some(tx) = &event_tx {
3170                            tx.send(AgentEvent::StepEnd {
3171                                step_id: step.id.clone(),
3172                                status: TaskStatus::Failed,
3173                                step_number,
3174                                total_steps,
3175                            })
3176                            .await
3177                            .ok();
3178                        }
3179                    }
3180                }
3181            } else {
3182                // === Multiple steps: parallel execution via JoinSet ===
3183                // NOTE: Each parallel branch gets a clone of the base history.
3184                // Individual branch histories (tool calls, LLM turns) are NOT merged
3185                // back — only a summary message is appended. This is a deliberate
3186                // trade-off: merging divergent histories in a deterministic order is
3187                // complex and the summary approach keeps the context window manageable.
3188                let ready_steps: Vec<_> = ready
3189                    .iter()
3190                    .filter_map(|id| {
3191                        let step = plan.steps.iter().find(|s| s.id == *id)?.clone();
3192                        let step_number =
3193                            plan.steps.iter().position(|s| s.id == *id).unwrap_or(0) + 1;
3194                        Some((step, step_number))
3195                    })
3196                    .collect();
3197
3198                // Mark all as InProgress and emit StepStart events
3199                for (step, step_number) in &ready_steps {
3200                    plan.mark_status(&step.id, TaskStatus::InProgress);
3201                    if let Some(tx) = &event_tx {
3202                        tx.send(AgentEvent::StepStart {
3203                            step_id: step.id.clone(),
3204                            description: step.content.clone(),
3205                            step_number: *step_number,
3206                            total_steps,
3207                        })
3208                        .await
3209                        .ok();
3210                    }
3211                }
3212
3213                // Spawn all into JoinSet, each with a clone of the base history
3214                let mut join_set = tokio::task::JoinSet::new();
3215                for (step, step_number) in &ready_steps {
3216                    let base_history = current_history.clone();
3217                    let agent_clone = self.clone();
3218                    let tx = event_tx.clone();
3219                    let step_clone = step.clone();
3220                    let sn = *step_number;
3221
3222                    join_set.spawn(async move {
3223                        let prompt = crate::prompts::render(
3224                            crate::prompts::PLAN_EXECUTE_STEP,
3225                            &[
3226                                ("step_num", &sn.to_string()),
3227                                ("description", &step_clone.content),
3228                            ],
3229                        );
3230                        let result = agent_clone
3231                            .execute_loop(
3232                                &base_history,
3233                                &prompt,
3234                                None,
3235                                tx,
3236                                &tokio_util::sync::CancellationToken::new(),
3237                                false, // emit_end: false — End is emitted by execute_with_planning after execute_plan
3238                            )
3239                            .await;
3240                        (step_clone.id, sn, result)
3241                    });
3242                }
3243
3244                // Collect results
3245                let mut parallel_summaries = Vec::new();
3246                while let Some(join_result) = join_set.join_next().await {
3247                    match join_result {
3248                        Ok((step_id, step_number, step_result)) => match step_result {
3249                            Ok(result) => {
3250                                total_usage.prompt_tokens += result.usage.prompt_tokens;
3251                                total_usage.completion_tokens += result.usage.completion_tokens;
3252                                total_usage.total_tokens += result.usage.total_tokens;
3253                                tool_calls_count += result.tool_calls_count;
3254                                plan.mark_status(&step_id, TaskStatus::Completed);
3255
3256                                // Collect the final assistant text for context merging
3257                                parallel_summaries.push(format!(
3258                                    "- Step {} ({}): {}",
3259                                    step_number, step_id, result.text
3260                                ));
3261
3262                                if let Some(tx) = &event_tx {
3263                                    tx.send(AgentEvent::StepEnd {
3264                                        step_id,
3265                                        status: TaskStatus::Completed,
3266                                        step_number,
3267                                        total_steps,
3268                                    })
3269                                    .await
3270                                    .ok();
3271                                }
3272                            }
3273                            Err(e) => {
3274                                tracing::error!("Plan step '{}' failed: {}", step_id, e);
3275                                plan.mark_status(&step_id, TaskStatus::Failed);
3276
3277                                if let Some(tx) = &event_tx {
3278                                    tx.send(AgentEvent::StepEnd {
3279                                        step_id,
3280                                        status: TaskStatus::Failed,
3281                                        step_number,
3282                                        total_steps,
3283                                    })
3284                                    .await
3285                                    .ok();
3286                                }
3287                            }
3288                        },
3289                        Err(e) => {
3290                            tracing::error!("JoinSet task panicked: {}", e);
3291                        }
3292                    }
3293                }
3294
3295                // Merge parallel results into history for subsequent steps
3296                if !parallel_summaries.is_empty() {
3297                    parallel_summaries.sort(); // Deterministic ordering
3298                    let results_text = parallel_summaries.join("\n");
3299                    current_history.push(Message::user(&crate::prompts::render(
3300                        crate::prompts::PLAN_PARALLEL_RESULTS,
3301                        &[("results", &results_text)],
3302                    )));
3303                }
3304            }
3305
3306            // Emit GoalProgress after each wave
3307            if self.config.goal_tracking {
3308                let completed = plan
3309                    .steps
3310                    .iter()
3311                    .filter(|s| s.status == TaskStatus::Completed)
3312                    .count();
3313                if let Some(tx) = &event_tx {
3314                    tx.send(AgentEvent::GoalProgress {
3315                        goal: plan.goal.clone(),
3316                        progress: plan.progress(),
3317                        completed_steps: completed,
3318                        total_steps,
3319                    })
3320                    .await
3321                    .ok();
3322                }
3323            }
3324        }
3325
3326        // Get final response
3327        let final_text = current_history
3328            .last()
3329            .map(|m| {
3330                m.content
3331                    .iter()
3332                    .filter_map(|block| {
3333                        if let crate::llm::ContentBlock::Text { text } = block {
3334                            Some(text.as_str())
3335                        } else {
3336                            None
3337                        }
3338                    })
3339                    .collect::<Vec<_>>()
3340                    .join("\n")
3341            })
3342            .unwrap_or_default();
3343
3344        Ok(AgentResult {
3345            text: final_text,
3346            messages: current_history,
3347            usage: total_usage,
3348            tool_calls_count,
3349        })
3350    }
3351
3352    /// Extract goal from prompt
3353    ///
3354    /// Delegates to [`LlmPlanner`] for structured JSON goal extraction,
3355    /// falling back to heuristic logic if the LLM call fails.
3356    pub async fn extract_goal(&self, prompt: &str) -> Result<AgentGoal> {
3357        use crate::planning::LlmPlanner;
3358
3359        match LlmPlanner::extract_goal(&self.llm_client, prompt).await {
3360            Ok(goal) => Ok(goal),
3361            Err(e) => {
3362                tracing::warn!("LLM goal extraction failed, using fallback: {}", e);
3363                Ok(LlmPlanner::fallback_goal(prompt))
3364            }
3365        }
3366    }
3367
3368    /// Check if goal is achieved
3369    ///
3370    /// Delegates to [`LlmPlanner`] for structured JSON achievement check,
3371    /// falling back to heuristic logic if the LLM call fails.
3372    pub async fn check_goal_achievement(
3373        &self,
3374        goal: &AgentGoal,
3375        current_state: &str,
3376    ) -> Result<bool> {
3377        use crate::planning::LlmPlanner;
3378
3379        match LlmPlanner::check_achievement(&self.llm_client, goal, current_state).await {
3380            Ok(result) => Ok(result.achieved),
3381            Err(e) => {
3382                tracing::warn!("LLM achievement check failed, using fallback: {}", e);
3383                let result = LlmPlanner::fallback_check_achievement(goal, current_state);
3384                Ok(result.achieved)
3385            }
3386        }
3387    }
3388}
3389
3390#[cfg(test)]
3391mod tests {
3392    use super::*;
3393    use crate::llm::{ContentBlock, StreamEvent};
3394    use crate::permissions::PermissionPolicy;
3395    use crate::tools::ToolExecutor;
3396    use std::path::PathBuf;
3397    use std::sync::atomic::{AtomicUsize, Ordering};
3398
3399    /// Create a default ToolContext for tests
3400    fn test_tool_context() -> ToolContext {
3401        ToolContext::new(PathBuf::from("/tmp"))
3402    }
3403
3404    #[test]
3405    fn test_agent_config_default() {
3406        let config = AgentConfig::default();
3407        assert!(config.prompt_slots.is_empty());
3408        assert!(config.tools.is_empty()); // Tools are provided externally
3409        assert_eq!(config.max_tool_rounds, MAX_TOOL_ROUNDS);
3410        assert!(config.permission_checker.is_none());
3411        assert!(config.context_providers.is_empty());
3412        // Built-in skills are always present by default
3413        let registry = config
3414            .skill_registry
3415            .expect("skill_registry must be Some by default");
3416        assert!(registry.len() >= 7, "expected at least 7 built-in skills");
3417        assert!(registry.get("code-search").is_some());
3418        assert!(registry.get("find-bugs").is_some());
3419    }
3420
3421    // ========================================================================
3422    // Mock LLM Client for Testing
3423    // ========================================================================
3424
3425    /// Mock LLM client that returns predefined responses
3426    pub(crate) struct MockLlmClient {
3427        /// Responses to return (consumed in order)
3428        responses: std::sync::Mutex<Vec<LlmResponse>>,
3429        /// Number of calls made
3430        pub(crate) call_count: AtomicUsize,
3431    }
3432
3433    impl MockLlmClient {
3434        pub(crate) fn new(responses: Vec<LlmResponse>) -> Self {
3435            Self {
3436                responses: std::sync::Mutex::new(responses),
3437                call_count: AtomicUsize::new(0),
3438            }
3439        }
3440
3441        /// Create a response with text only (no tool calls)
3442        pub(crate) fn text_response(text: &str) -> LlmResponse {
3443            LlmResponse {
3444                message: Message {
3445                    role: "assistant".to_string(),
3446                    content: vec![ContentBlock::Text {
3447                        text: text.to_string(),
3448                    }],
3449                    reasoning_content: None,
3450                },
3451                usage: TokenUsage {
3452                    prompt_tokens: 10,
3453                    completion_tokens: 5,
3454                    total_tokens: 15,
3455                    cache_read_tokens: None,
3456                    cache_write_tokens: None,
3457                },
3458                stop_reason: Some("end_turn".to_string()),
3459                meta: None,
3460            }
3461        }
3462
3463        /// Create a response with a tool call
3464        pub(crate) fn tool_call_response(
3465            tool_id: &str,
3466            tool_name: &str,
3467            args: serde_json::Value,
3468        ) -> LlmResponse {
3469            LlmResponse {
3470                message: Message {
3471                    role: "assistant".to_string(),
3472                    content: vec![ContentBlock::ToolUse {
3473                        id: tool_id.to_string(),
3474                        name: tool_name.to_string(),
3475                        input: args,
3476                    }],
3477                    reasoning_content: None,
3478                },
3479                usage: TokenUsage {
3480                    prompt_tokens: 10,
3481                    completion_tokens: 5,
3482                    total_tokens: 15,
3483                    cache_read_tokens: None,
3484                    cache_write_tokens: None,
3485                },
3486                stop_reason: Some("tool_use".to_string()),
3487                meta: None,
3488            }
3489        }
3490    }
3491
3492    #[async_trait::async_trait]
3493    impl LlmClient for MockLlmClient {
3494        async fn complete(
3495            &self,
3496            _messages: &[Message],
3497            _system: Option<&str>,
3498            _tools: &[ToolDefinition],
3499        ) -> Result<LlmResponse> {
3500            self.call_count.fetch_add(1, Ordering::SeqCst);
3501            let mut responses = self.responses.lock().unwrap();
3502            if responses.is_empty() {
3503                anyhow::bail!("No more mock responses available");
3504            }
3505            Ok(responses.remove(0))
3506        }
3507
3508        async fn complete_streaming(
3509            &self,
3510            _messages: &[Message],
3511            _system: Option<&str>,
3512            _tools: &[ToolDefinition],
3513        ) -> Result<mpsc::Receiver<StreamEvent>> {
3514            self.call_count.fetch_add(1, Ordering::SeqCst);
3515            let mut responses = self.responses.lock().unwrap();
3516            if responses.is_empty() {
3517                anyhow::bail!("No more mock responses available");
3518            }
3519            let response = responses.remove(0);
3520
3521            let (tx, rx) = mpsc::channel(10);
3522            tokio::spawn(async move {
3523                // Send text deltas if any
3524                for block in &response.message.content {
3525                    if let ContentBlock::Text { text } = block {
3526                        tx.send(StreamEvent::TextDelta(text.clone())).await.ok();
3527                    }
3528                }
3529                tx.send(StreamEvent::Done(response)).await.ok();
3530            });
3531
3532            Ok(rx)
3533        }
3534    }
3535
3536    // ========================================================================
3537    // Agent Loop Tests
3538    // ========================================================================
3539
3540    #[tokio::test]
3541    async fn test_agent_simple_response() {
3542        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
3543            "Hello, I'm an AI assistant.",
3544        )]));
3545
3546        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
3547        let config = AgentConfig::default();
3548
3549        let agent = AgentLoop::new(
3550            mock_client.clone(),
3551            tool_executor,
3552            test_tool_context(),
3553            config,
3554        );
3555        let result = agent.execute(&[], "Hello", None).await.unwrap();
3556
3557        assert_eq!(result.text, "Hello, I'm an AI assistant.");
3558        assert_eq!(result.tool_calls_count, 0);
3559        assert_eq!(mock_client.call_count.load(Ordering::SeqCst), 1);
3560    }
3561
3562    #[tokio::test]
3563    async fn test_agent_with_tool_call() {
3564        let mock_client = Arc::new(MockLlmClient::new(vec![
3565            // First response: tool call
3566            MockLlmClient::tool_call_response(
3567                "tool-1",
3568                "bash",
3569                serde_json::json!({"command": "echo hello"}),
3570            ),
3571            // Second response: final text
3572            MockLlmClient::text_response("The command output was: hello"),
3573        ]));
3574
3575        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
3576        let config = AgentConfig::default();
3577
3578        let agent = AgentLoop::new(
3579            mock_client.clone(),
3580            tool_executor,
3581            test_tool_context(),
3582            config,
3583        );
3584        let result = agent.execute(&[], "Run echo hello", None).await.unwrap();
3585
3586        assert_eq!(result.text, "The command output was: hello");
3587        assert_eq!(result.tool_calls_count, 1);
3588        assert_eq!(mock_client.call_count.load(Ordering::SeqCst), 2);
3589    }
3590
3591    #[tokio::test]
3592    async fn test_agent_permission_deny() {
3593        let mock_client = Arc::new(MockLlmClient::new(vec![
3594            // First response: tool call that will be denied
3595            MockLlmClient::tool_call_response(
3596                "tool-1",
3597                "bash",
3598                serde_json::json!({"command": "rm -rf /tmp/test"}),
3599            ),
3600            // Second response: LLM responds to the denial
3601            MockLlmClient::text_response(
3602                "I cannot execute that command due to permission restrictions.",
3603            ),
3604        ]));
3605
3606        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
3607
3608        // Create permission policy that denies rm commands
3609        let permission_policy = PermissionPolicy::new().deny("bash(rm:*)");
3610
3611        let config = AgentConfig {
3612            permission_checker: Some(Arc::new(permission_policy)),
3613            ..Default::default()
3614        };
3615
3616        let (tx, mut rx) = mpsc::channel(100);
3617        let agent = AgentLoop::new(
3618            mock_client.clone(),
3619            tool_executor,
3620            test_tool_context(),
3621            config,
3622        );
3623        let result = agent.execute(&[], "Delete files", Some(tx)).await.unwrap();
3624
3625        // Check that we received a PermissionDenied event
3626        let mut found_permission_denied = false;
3627        while let Ok(event) = rx.try_recv() {
3628            if let AgentEvent::PermissionDenied { tool_name, .. } = event {
3629                assert_eq!(tool_name, "bash");
3630                found_permission_denied = true;
3631            }
3632        }
3633        assert!(
3634            found_permission_denied,
3635            "Should have received PermissionDenied event"
3636        );
3637
3638        assert_eq!(result.tool_calls_count, 1);
3639    }
3640
3641    #[tokio::test]
3642    async fn test_agent_permission_allow() {
3643        let mock_client = Arc::new(MockLlmClient::new(vec![
3644            // First response: tool call that will be allowed
3645            MockLlmClient::tool_call_response(
3646                "tool-1",
3647                "bash",
3648                serde_json::json!({"command": "echo hello"}),
3649            ),
3650            // Second response: final text
3651            MockLlmClient::text_response("Done!"),
3652        ]));
3653
3654        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
3655
3656        // Create permission policy that allows echo commands
3657        let permission_policy = PermissionPolicy::new()
3658            .allow("bash(echo:*)")
3659            .deny("bash(rm:*)");
3660
3661        let config = AgentConfig {
3662            permission_checker: Some(Arc::new(permission_policy)),
3663            ..Default::default()
3664        };
3665
3666        let agent = AgentLoop::new(
3667            mock_client.clone(),
3668            tool_executor,
3669            test_tool_context(),
3670            config,
3671        );
3672        let result = agent.execute(&[], "Echo hello", None).await.unwrap();
3673
3674        assert_eq!(result.text, "Done!");
3675        assert_eq!(result.tool_calls_count, 1);
3676    }
3677
3678    #[tokio::test]
3679    async fn test_agent_streaming_events() {
3680        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
3681            "Hello!",
3682        )]));
3683
3684        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
3685        let config = AgentConfig::default();
3686
3687        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
3688        let (mut rx, handle, _cancel_token) = agent.execute_streaming(&[], "Hi").await.unwrap();
3689
3690        // Collect events
3691        let mut events = Vec::new();
3692        while let Some(event) = rx.recv().await {
3693            events.push(event);
3694        }
3695
3696        let result = handle.await.unwrap().unwrap();
3697        assert_eq!(result.text, "Hello!");
3698
3699        // Check we received Start and End events
3700        assert!(events.iter().any(|e| matches!(e, AgentEvent::Start { .. })));
3701        assert!(events.iter().any(|e| matches!(e, AgentEvent::End { .. })));
3702    }
3703
3704    #[tokio::test]
3705    async fn test_agent_max_tool_rounds() {
3706        // Create a mock that always returns tool calls (infinite loop)
3707        let responses: Vec<LlmResponse> = (0..100)
3708            .map(|i| {
3709                MockLlmClient::tool_call_response(
3710                    &format!("tool-{}", i),
3711                    "bash",
3712                    serde_json::json!({"command": "echo loop"}),
3713                )
3714            })
3715            .collect();
3716
3717        let mock_client = Arc::new(MockLlmClient::new(responses));
3718        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
3719
3720        let config = AgentConfig {
3721            max_tool_rounds: 3,
3722            ..Default::default()
3723        };
3724
3725        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
3726        let result = agent.execute(&[], "Loop forever", None).await;
3727
3728        // Should fail due to max tool rounds exceeded
3729        assert!(result.is_err());
3730        assert!(result.unwrap_err().to_string().contains("Max tool rounds"));
3731    }
3732
3733    #[tokio::test]
3734    async fn test_agent_no_permission_policy_defaults_to_ask() {
3735        // When no permission policy is set, tools default to Ask.
3736        // Without a confirmation manager, Ask = safe deny.
3737        let mock_client = Arc::new(MockLlmClient::new(vec![
3738            MockLlmClient::tool_call_response(
3739                "tool-1",
3740                "bash",
3741                serde_json::json!({"command": "rm -rf /tmp/test"}),
3742            ),
3743            MockLlmClient::text_response("Denied!"),
3744        ]));
3745
3746        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
3747        let config = AgentConfig {
3748            permission_checker: None, // No policy → defaults to Ask
3749            // No confirmation_manager → safe deny
3750            ..Default::default()
3751        };
3752
3753        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
3754        let result = agent.execute(&[], "Delete", None).await.unwrap();
3755
3756        // Should be denied (no policy + no CM = safe deny)
3757        assert_eq!(result.text, "Denied!");
3758        assert_eq!(result.tool_calls_count, 1);
3759    }
3760
3761    #[tokio::test]
3762    async fn test_agent_permission_ask_without_cm_denies() {
3763        // When permission is Ask and no confirmation manager configured,
3764        // tool execution should be denied (safe default).
3765        let mock_client = Arc::new(MockLlmClient::new(vec![
3766            MockLlmClient::tool_call_response(
3767                "tool-1",
3768                "bash",
3769                serde_json::json!({"command": "echo test"}),
3770            ),
3771            MockLlmClient::text_response("Denied!"),
3772        ]));
3773
3774        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
3775
3776        // Create policy where bash falls through to Ask (default)
3777        let permission_policy = PermissionPolicy::new(); // Default decision is Ask
3778
3779        let config = AgentConfig {
3780            permission_checker: Some(Arc::new(permission_policy)),
3781            // No confirmation_manager — safe deny
3782            ..Default::default()
3783        };
3784
3785        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
3786        let result = agent.execute(&[], "Echo", None).await.unwrap();
3787
3788        // Should deny (Ask without CM = safe deny)
3789        assert_eq!(result.text, "Denied!");
3790        // The tool result should contain the denial message
3791        assert!(result.tool_calls_count >= 1);
3792    }
3793
3794    // ========================================================================
3795    // HITL (Human-in-the-Loop) Tests
3796    // ========================================================================
3797
3798    #[tokio::test]
3799    async fn test_agent_hitl_approved() {
3800        use crate::hitl::{ConfirmationManager, ConfirmationPolicy};
3801        use tokio::sync::broadcast;
3802
3803        let mock_client = Arc::new(MockLlmClient::new(vec![
3804            MockLlmClient::tool_call_response(
3805                "tool-1",
3806                "bash",
3807                serde_json::json!({"command": "echo hello"}),
3808            ),
3809            MockLlmClient::text_response("Command executed!"),
3810        ]));
3811
3812        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
3813
3814        // Create HITL confirmation manager with policy enabled
3815        let (event_tx, _event_rx) = broadcast::channel(100);
3816        let hitl_policy = ConfirmationPolicy {
3817            enabled: true,
3818            ..Default::default()
3819        };
3820        let confirmation_manager = Arc::new(ConfirmationManager::new(hitl_policy, event_tx));
3821
3822        // Create permission policy that returns Ask for bash
3823        let permission_policy = PermissionPolicy::new(); // Default is Ask
3824
3825        let config = AgentConfig {
3826            permission_checker: Some(Arc::new(permission_policy)),
3827            confirmation_manager: Some(confirmation_manager.clone()),
3828            ..Default::default()
3829        };
3830
3831        // Spawn a task to approve the confirmation
3832        let cm_clone = confirmation_manager.clone();
3833        tokio::spawn(async move {
3834            // Wait a bit for the confirmation request to be created
3835            tokio::time::sleep(std::time::Duration::from_millis(50)).await;
3836            // Approve it
3837            cm_clone.confirm("tool-1", true, None).await.ok();
3838        });
3839
3840        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
3841        let result = agent.execute(&[], "Run echo", None).await.unwrap();
3842
3843        assert_eq!(result.text, "Command executed!");
3844        assert_eq!(result.tool_calls_count, 1);
3845    }
3846
3847    #[tokio::test]
3848    async fn test_agent_hitl_rejected() {
3849        use crate::hitl::{ConfirmationManager, ConfirmationPolicy};
3850        use tokio::sync::broadcast;
3851
3852        let mock_client = Arc::new(MockLlmClient::new(vec![
3853            MockLlmClient::tool_call_response(
3854                "tool-1",
3855                "bash",
3856                serde_json::json!({"command": "rm -rf /"}),
3857            ),
3858            MockLlmClient::text_response("Understood, I won't do that."),
3859        ]));
3860
3861        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
3862
3863        // Create HITL confirmation manager
3864        let (event_tx, _event_rx) = broadcast::channel(100);
3865        let hitl_policy = ConfirmationPolicy {
3866            enabled: true,
3867            ..Default::default()
3868        };
3869        let confirmation_manager = Arc::new(ConfirmationManager::new(hitl_policy, event_tx));
3870
3871        // Permission policy returns Ask
3872        let permission_policy = PermissionPolicy::new();
3873
3874        let config = AgentConfig {
3875            permission_checker: Some(Arc::new(permission_policy)),
3876            confirmation_manager: Some(confirmation_manager.clone()),
3877            ..Default::default()
3878        };
3879
3880        // Spawn a task to reject the confirmation
3881        let cm_clone = confirmation_manager.clone();
3882        tokio::spawn(async move {
3883            tokio::time::sleep(std::time::Duration::from_millis(50)).await;
3884            cm_clone
3885                .confirm("tool-1", false, Some("Too dangerous".to_string()))
3886                .await
3887                .ok();
3888        });
3889
3890        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
3891        let result = agent.execute(&[], "Delete everything", None).await.unwrap();
3892
3893        // LLM should respond to the rejection
3894        assert_eq!(result.text, "Understood, I won't do that.");
3895    }
3896
3897    #[tokio::test]
3898    async fn test_agent_hitl_timeout_reject() {
3899        use crate::hitl::{ConfirmationManager, ConfirmationPolicy, TimeoutAction};
3900        use tokio::sync::broadcast;
3901
3902        let mock_client = Arc::new(MockLlmClient::new(vec![
3903            MockLlmClient::tool_call_response(
3904                "tool-1",
3905                "bash",
3906                serde_json::json!({"command": "echo test"}),
3907            ),
3908            MockLlmClient::text_response("Timed out, I understand."),
3909        ]));
3910
3911        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
3912
3913        // Create HITL with very short timeout and Reject action
3914        let (event_tx, _event_rx) = broadcast::channel(100);
3915        let hitl_policy = ConfirmationPolicy {
3916            enabled: true,
3917            default_timeout_ms: 50, // Very short timeout
3918            timeout_action: TimeoutAction::Reject,
3919            ..Default::default()
3920        };
3921        let confirmation_manager = Arc::new(ConfirmationManager::new(hitl_policy, event_tx));
3922
3923        let permission_policy = PermissionPolicy::new();
3924
3925        let config = AgentConfig {
3926            permission_checker: Some(Arc::new(permission_policy)),
3927            confirmation_manager: Some(confirmation_manager),
3928            ..Default::default()
3929        };
3930
3931        // Don't approve - let it timeout
3932        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
3933        let result = agent.execute(&[], "Echo", None).await.unwrap();
3934
3935        // Should get timeout rejection response from LLM
3936        assert_eq!(result.text, "Timed out, I understand.");
3937    }
3938
3939    #[tokio::test]
3940    async fn test_agent_hitl_timeout_auto_approve() {
3941        use crate::hitl::{ConfirmationManager, ConfirmationPolicy, TimeoutAction};
3942        use tokio::sync::broadcast;
3943
3944        let mock_client = Arc::new(MockLlmClient::new(vec![
3945            MockLlmClient::tool_call_response(
3946                "tool-1",
3947                "bash",
3948                serde_json::json!({"command": "echo hello"}),
3949            ),
3950            MockLlmClient::text_response("Auto-approved and executed!"),
3951        ]));
3952
3953        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
3954
3955        // Create HITL with very short timeout and AutoApprove action
3956        let (event_tx, _event_rx) = broadcast::channel(100);
3957        let hitl_policy = ConfirmationPolicy {
3958            enabled: true,
3959            default_timeout_ms: 50, // Very short timeout
3960            timeout_action: TimeoutAction::AutoApprove,
3961            ..Default::default()
3962        };
3963        let confirmation_manager = Arc::new(ConfirmationManager::new(hitl_policy, event_tx));
3964
3965        let permission_policy = PermissionPolicy::new();
3966
3967        let config = AgentConfig {
3968            permission_checker: Some(Arc::new(permission_policy)),
3969            confirmation_manager: Some(confirmation_manager),
3970            ..Default::default()
3971        };
3972
3973        // Don't approve - let it timeout and auto-approve
3974        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
3975        let result = agent.execute(&[], "Echo", None).await.unwrap();
3976
3977        // Should auto-approve on timeout and execute
3978        assert_eq!(result.text, "Auto-approved and executed!");
3979        assert_eq!(result.tool_calls_count, 1);
3980    }
3981
3982    #[tokio::test]
3983    async fn test_agent_hitl_confirmation_events() {
3984        use crate::hitl::{ConfirmationManager, ConfirmationPolicy};
3985        use tokio::sync::broadcast;
3986
3987        let mock_client = Arc::new(MockLlmClient::new(vec![
3988            MockLlmClient::tool_call_response(
3989                "tool-1",
3990                "bash",
3991                serde_json::json!({"command": "echo test"}),
3992            ),
3993            MockLlmClient::text_response("Done!"),
3994        ]));
3995
3996        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
3997
3998        // Create HITL confirmation manager
3999        let (event_tx, mut event_rx) = broadcast::channel(100);
4000        let hitl_policy = ConfirmationPolicy {
4001            enabled: true,
4002            default_timeout_ms: 5000, // Long enough timeout
4003            ..Default::default()
4004        };
4005        let confirmation_manager = Arc::new(ConfirmationManager::new(hitl_policy, event_tx));
4006
4007        let permission_policy = PermissionPolicy::new();
4008
4009        let config = AgentConfig {
4010            permission_checker: Some(Arc::new(permission_policy)),
4011            confirmation_manager: Some(confirmation_manager.clone()),
4012            ..Default::default()
4013        };
4014
4015        // Spawn task to approve and collect events
4016        let cm_clone = confirmation_manager.clone();
4017        let event_handle = tokio::spawn(async move {
4018            let mut events = Vec::new();
4019            // Wait for ConfirmationRequired event
4020            while let Ok(event) = event_rx.recv().await {
4021                events.push(event.clone());
4022                if let AgentEvent::ConfirmationRequired { tool_id, .. } = event {
4023                    // Approve it
4024                    cm_clone.confirm(&tool_id, true, None).await.ok();
4025                    // Wait for ConfirmationReceived
4026                    if let Ok(recv_event) = event_rx.recv().await {
4027                        events.push(recv_event);
4028                    }
4029                    break;
4030                }
4031            }
4032            events
4033        });
4034
4035        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
4036        let _result = agent.execute(&[], "Echo", None).await.unwrap();
4037
4038        // Check events
4039        let events = event_handle.await.unwrap();
4040        assert!(
4041            events
4042                .iter()
4043                .any(|e| matches!(e, AgentEvent::ConfirmationRequired { .. })),
4044            "Should have ConfirmationRequired event"
4045        );
4046        assert!(
4047            events
4048                .iter()
4049                .any(|e| matches!(e, AgentEvent::ConfirmationReceived { approved: true, .. })),
4050            "Should have ConfirmationReceived event with approved=true"
4051        );
4052    }
4053
4054    #[tokio::test]
4055    async fn test_agent_hitl_disabled_auto_executes() {
4056        // When HITL is disabled, tools should execute automatically even with Ask permission
4057        use crate::hitl::{ConfirmationManager, ConfirmationPolicy};
4058        use tokio::sync::broadcast;
4059
4060        let mock_client = Arc::new(MockLlmClient::new(vec![
4061            MockLlmClient::tool_call_response(
4062                "tool-1",
4063                "bash",
4064                serde_json::json!({"command": "echo auto"}),
4065            ),
4066            MockLlmClient::text_response("Auto executed!"),
4067        ]));
4068
4069        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4070
4071        // Create HITL with enabled=false
4072        let (event_tx, _event_rx) = broadcast::channel(100);
4073        let hitl_policy = ConfirmationPolicy {
4074            enabled: false, // HITL disabled
4075            ..Default::default()
4076        };
4077        let confirmation_manager = Arc::new(ConfirmationManager::new(hitl_policy, event_tx));
4078
4079        let permission_policy = PermissionPolicy::new(); // Default is Ask
4080
4081        let config = AgentConfig {
4082            permission_checker: Some(Arc::new(permission_policy)),
4083            confirmation_manager: Some(confirmation_manager),
4084            ..Default::default()
4085        };
4086
4087        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
4088        let result = agent.execute(&[], "Echo", None).await.unwrap();
4089
4090        // Should execute without waiting for confirmation
4091        assert_eq!(result.text, "Auto executed!");
4092        assert_eq!(result.tool_calls_count, 1);
4093    }
4094
4095    #[tokio::test]
4096    async fn test_agent_hitl_with_permission_deny_skips_hitl() {
4097        // When permission is Deny, HITL should not be triggered
4098        use crate::hitl::{ConfirmationManager, ConfirmationPolicy};
4099        use tokio::sync::broadcast;
4100
4101        let mock_client = Arc::new(MockLlmClient::new(vec![
4102            MockLlmClient::tool_call_response(
4103                "tool-1",
4104                "bash",
4105                serde_json::json!({"command": "rm -rf /"}),
4106            ),
4107            MockLlmClient::text_response("Blocked by permission."),
4108        ]));
4109
4110        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4111
4112        // Create HITL enabled
4113        let (event_tx, mut event_rx) = broadcast::channel(100);
4114        let hitl_policy = ConfirmationPolicy {
4115            enabled: true,
4116            ..Default::default()
4117        };
4118        let confirmation_manager = Arc::new(ConfirmationManager::new(hitl_policy, event_tx));
4119
4120        // Permission policy denies rm commands
4121        let permission_policy = PermissionPolicy::new().deny("bash(rm:*)");
4122
4123        let config = AgentConfig {
4124            permission_checker: Some(Arc::new(permission_policy)),
4125            confirmation_manager: Some(confirmation_manager),
4126            ..Default::default()
4127        };
4128
4129        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
4130        let result = agent.execute(&[], "Delete", None).await.unwrap();
4131
4132        // Should be denied without HITL
4133        assert_eq!(result.text, "Blocked by permission.");
4134
4135        // Should NOT have any ConfirmationRequired events
4136        let mut found_confirmation = false;
4137        while let Ok(event) = event_rx.try_recv() {
4138            if matches!(event, AgentEvent::ConfirmationRequired { .. }) {
4139                found_confirmation = true;
4140            }
4141        }
4142        assert!(
4143            !found_confirmation,
4144            "HITL should not be triggered when permission is Deny"
4145        );
4146    }
4147
4148    #[tokio::test]
4149    async fn test_agent_hitl_with_permission_allow_skips_hitl() {
4150        // When permission is Allow, HITL confirmation is skipped entirely.
4151        // PermissionPolicy is the declarative rule engine; Allow = execute directly.
4152        use crate::hitl::{ConfirmationManager, ConfirmationPolicy};
4153        use tokio::sync::broadcast;
4154
4155        let mock_client = Arc::new(MockLlmClient::new(vec![
4156            MockLlmClient::tool_call_response(
4157                "tool-1",
4158                "bash",
4159                serde_json::json!({"command": "echo hello"}),
4160            ),
4161            MockLlmClient::text_response("Allowed!"),
4162        ]));
4163
4164        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4165
4166        // Create HITL enabled
4167        let (event_tx, mut event_rx) = broadcast::channel(100);
4168        let hitl_policy = ConfirmationPolicy {
4169            enabled: true,
4170            ..Default::default()
4171        };
4172        let confirmation_manager = Arc::new(ConfirmationManager::new(hitl_policy, event_tx));
4173
4174        // Permission policy allows echo commands
4175        let permission_policy = PermissionPolicy::new().allow("bash(echo:*)");
4176
4177        let config = AgentConfig {
4178            permission_checker: Some(Arc::new(permission_policy)),
4179            confirmation_manager: Some(confirmation_manager.clone()),
4180            ..Default::default()
4181        };
4182
4183        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
4184        let result = agent.execute(&[], "Echo", None).await.unwrap();
4185
4186        // Should execute directly without HITL (permission Allow skips confirmation)
4187        assert_eq!(result.text, "Allowed!");
4188
4189        // Should NOT have ConfirmationRequired event (Allow bypasses HITL)
4190        let mut found_confirmation = false;
4191        while let Ok(event) = event_rx.try_recv() {
4192            if matches!(event, AgentEvent::ConfirmationRequired { .. }) {
4193                found_confirmation = true;
4194            }
4195        }
4196        assert!(
4197            !found_confirmation,
4198            "Permission Allow should skip HITL confirmation"
4199        );
4200    }
4201
4202    #[tokio::test]
4203    async fn test_agent_hitl_multiple_tool_calls() {
4204        // Test multiple tool calls in sequence with HITL
4205        use crate::hitl::{ConfirmationManager, ConfirmationPolicy};
4206        use tokio::sync::broadcast;
4207
4208        let mock_client = Arc::new(MockLlmClient::new(vec![
4209            // First response: two tool calls
4210            LlmResponse {
4211                message: Message {
4212                    role: "assistant".to_string(),
4213                    content: vec![
4214                        ContentBlock::ToolUse {
4215                            id: "tool-1".to_string(),
4216                            name: "bash".to_string(),
4217                            input: serde_json::json!({"command": "echo first"}),
4218                        },
4219                        ContentBlock::ToolUse {
4220                            id: "tool-2".to_string(),
4221                            name: "bash".to_string(),
4222                            input: serde_json::json!({"command": "echo second"}),
4223                        },
4224                    ],
4225                    reasoning_content: None,
4226                },
4227                usage: TokenUsage {
4228                    prompt_tokens: 10,
4229                    completion_tokens: 5,
4230                    total_tokens: 15,
4231                    cache_read_tokens: None,
4232                    cache_write_tokens: None,
4233                },
4234                stop_reason: Some("tool_use".to_string()),
4235                meta: None,
4236            },
4237            MockLlmClient::text_response("Both executed!"),
4238        ]));
4239
4240        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4241
4242        // Create HITL
4243        let (event_tx, _event_rx) = broadcast::channel(100);
4244        let hitl_policy = ConfirmationPolicy {
4245            enabled: true,
4246            default_timeout_ms: 5000,
4247            ..Default::default()
4248        };
4249        let confirmation_manager = Arc::new(ConfirmationManager::new(hitl_policy, event_tx));
4250
4251        let permission_policy = PermissionPolicy::new(); // Default Ask
4252
4253        let config = AgentConfig {
4254            permission_checker: Some(Arc::new(permission_policy)),
4255            confirmation_manager: Some(confirmation_manager.clone()),
4256            ..Default::default()
4257        };
4258
4259        // Spawn task to approve both tools
4260        let cm_clone = confirmation_manager.clone();
4261        tokio::spawn(async move {
4262            tokio::time::sleep(std::time::Duration::from_millis(30)).await;
4263            cm_clone.confirm("tool-1", true, None).await.ok();
4264            tokio::time::sleep(std::time::Duration::from_millis(30)).await;
4265            cm_clone.confirm("tool-2", true, None).await.ok();
4266        });
4267
4268        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
4269        let result = agent.execute(&[], "Run both", None).await.unwrap();
4270
4271        assert_eq!(result.text, "Both executed!");
4272        assert_eq!(result.tool_calls_count, 2);
4273    }
4274
4275    #[tokio::test]
4276    async fn test_agent_hitl_partial_approval() {
4277        // Test: first tool approved, second rejected
4278        use crate::hitl::{ConfirmationManager, ConfirmationPolicy};
4279        use tokio::sync::broadcast;
4280
4281        let mock_client = Arc::new(MockLlmClient::new(vec![
4282            // First response: two tool calls
4283            LlmResponse {
4284                message: Message {
4285                    role: "assistant".to_string(),
4286                    content: vec![
4287                        ContentBlock::ToolUse {
4288                            id: "tool-1".to_string(),
4289                            name: "bash".to_string(),
4290                            input: serde_json::json!({"command": "echo safe"}),
4291                        },
4292                        ContentBlock::ToolUse {
4293                            id: "tool-2".to_string(),
4294                            name: "bash".to_string(),
4295                            input: serde_json::json!({"command": "rm -rf /"}),
4296                        },
4297                    ],
4298                    reasoning_content: None,
4299                },
4300                usage: TokenUsage {
4301                    prompt_tokens: 10,
4302                    completion_tokens: 5,
4303                    total_tokens: 15,
4304                    cache_read_tokens: None,
4305                    cache_write_tokens: None,
4306                },
4307                stop_reason: Some("tool_use".to_string()),
4308                meta: None,
4309            },
4310            MockLlmClient::text_response("First worked, second rejected."),
4311        ]));
4312
4313        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4314
4315        let (event_tx, _event_rx) = broadcast::channel(100);
4316        let hitl_policy = ConfirmationPolicy {
4317            enabled: true,
4318            default_timeout_ms: 5000,
4319            ..Default::default()
4320        };
4321        let confirmation_manager = Arc::new(ConfirmationManager::new(hitl_policy, event_tx));
4322
4323        let permission_policy = PermissionPolicy::new();
4324
4325        let config = AgentConfig {
4326            permission_checker: Some(Arc::new(permission_policy)),
4327            confirmation_manager: Some(confirmation_manager.clone()),
4328            ..Default::default()
4329        };
4330
4331        // Approve first, reject second
4332        let cm_clone = confirmation_manager.clone();
4333        tokio::spawn(async move {
4334            tokio::time::sleep(std::time::Duration::from_millis(30)).await;
4335            cm_clone.confirm("tool-1", true, None).await.ok();
4336            tokio::time::sleep(std::time::Duration::from_millis(30)).await;
4337            cm_clone
4338                .confirm("tool-2", false, Some("Dangerous".to_string()))
4339                .await
4340                .ok();
4341        });
4342
4343        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
4344        let result = agent.execute(&[], "Run both", None).await.unwrap();
4345
4346        assert_eq!(result.text, "First worked, second rejected.");
4347        assert_eq!(result.tool_calls_count, 2);
4348    }
4349
4350    #[tokio::test]
4351    async fn test_agent_hitl_yolo_mode_auto_approves() {
4352        // YOLO mode: specific lanes auto-approve without confirmation
4353        use crate::hitl::{ConfirmationManager, ConfirmationPolicy, SessionLane};
4354        use tokio::sync::broadcast;
4355
4356        let mock_client = Arc::new(MockLlmClient::new(vec![
4357            MockLlmClient::tool_call_response(
4358                "tool-1",
4359                "read", // Query lane tool
4360                serde_json::json!({"path": "/tmp/test.txt"}),
4361            ),
4362            MockLlmClient::text_response("File read!"),
4363        ]));
4364
4365        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4366
4367        // YOLO mode for Query lane (read, glob, ls, grep)
4368        let (event_tx, mut event_rx) = broadcast::channel(100);
4369        let mut yolo_lanes = std::collections::HashSet::new();
4370        yolo_lanes.insert(SessionLane::Query);
4371        let hitl_policy = ConfirmationPolicy {
4372            enabled: true,
4373            yolo_lanes, // Auto-approve query operations
4374            ..Default::default()
4375        };
4376        let confirmation_manager = Arc::new(ConfirmationManager::new(hitl_policy, event_tx));
4377
4378        let permission_policy = PermissionPolicy::new();
4379
4380        let config = AgentConfig {
4381            permission_checker: Some(Arc::new(permission_policy)),
4382            confirmation_manager: Some(confirmation_manager),
4383            ..Default::default()
4384        };
4385
4386        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
4387        let result = agent.execute(&[], "Read file", None).await.unwrap();
4388
4389        // Should auto-execute without confirmation (YOLO mode)
4390        assert_eq!(result.text, "File read!");
4391
4392        // Should NOT have ConfirmationRequired for yolo lane
4393        let mut found_confirmation = false;
4394        while let Ok(event) = event_rx.try_recv() {
4395            if matches!(event, AgentEvent::ConfirmationRequired { .. }) {
4396                found_confirmation = true;
4397            }
4398        }
4399        assert!(
4400            !found_confirmation,
4401            "YOLO mode should not trigger confirmation"
4402        );
4403    }
4404
4405    #[tokio::test]
4406    async fn test_agent_config_with_all_options() {
4407        use crate::hitl::{ConfirmationManager, ConfirmationPolicy};
4408        use tokio::sync::broadcast;
4409
4410        let (event_tx, _) = broadcast::channel(100);
4411        let hitl_policy = ConfirmationPolicy::default();
4412        let confirmation_manager = Arc::new(ConfirmationManager::new(hitl_policy, event_tx));
4413
4414        let permission_policy = PermissionPolicy::new().allow("bash(*)");
4415
4416        let config = AgentConfig {
4417            prompt_slots: SystemPromptSlots {
4418                extra: Some("Test system prompt".to_string()),
4419                ..Default::default()
4420            },
4421            tools: vec![],
4422            max_tool_rounds: 10,
4423            permission_checker: Some(Arc::new(permission_policy)),
4424            confirmation_manager: Some(confirmation_manager),
4425            context_providers: vec![],
4426            planning_mode: PlanningMode::default(),
4427            goal_tracking: false,
4428            hook_engine: None,
4429            skill_registry: None,
4430            ..AgentConfig::default()
4431        };
4432
4433        assert!(config.prompt_slots.build().contains("Test system prompt"));
4434        assert_eq!(config.max_tool_rounds, 10);
4435        assert!(config.permission_checker.is_some());
4436        assert!(config.confirmation_manager.is_some());
4437        assert!(config.context_providers.is_empty());
4438
4439        // Test Debug trait
4440        let debug_str = format!("{:?}", config);
4441        assert!(debug_str.contains("AgentConfig"));
4442        assert!(debug_str.contains("permission_checker: true"));
4443        assert!(debug_str.contains("confirmation_manager: true"));
4444        assert!(debug_str.contains("context_providers: 0"));
4445    }
4446
4447    // ========================================================================
4448    // Context Provider Tests
4449    // ========================================================================
4450
4451    use crate::context::{ContextItem, ContextType};
4452
4453    /// Mock context provider for testing
4454    struct MockContextProvider {
4455        name: String,
4456        items: Vec<ContextItem>,
4457        on_turn_calls: std::sync::Arc<tokio::sync::RwLock<Vec<(String, String, String)>>>,
4458    }
4459
4460    impl MockContextProvider {
4461        fn new(name: &str) -> Self {
4462            Self {
4463                name: name.to_string(),
4464                items: Vec::new(),
4465                on_turn_calls: std::sync::Arc::new(tokio::sync::RwLock::new(Vec::new())),
4466            }
4467        }
4468
4469        fn with_items(mut self, items: Vec<ContextItem>) -> Self {
4470            self.items = items;
4471            self
4472        }
4473    }
4474
4475    #[async_trait::async_trait]
4476    impl ContextProvider for MockContextProvider {
4477        fn name(&self) -> &str {
4478            &self.name
4479        }
4480
4481        async fn query(&self, _query: &ContextQuery) -> anyhow::Result<ContextResult> {
4482            let mut result = ContextResult::new(&self.name);
4483            for item in &self.items {
4484                result.add_item(item.clone());
4485            }
4486            Ok(result)
4487        }
4488
4489        async fn on_turn_complete(
4490            &self,
4491            session_id: &str,
4492            prompt: &str,
4493            response: &str,
4494        ) -> anyhow::Result<()> {
4495            let mut calls = self.on_turn_calls.write().await;
4496            calls.push((
4497                session_id.to_string(),
4498                prompt.to_string(),
4499                response.to_string(),
4500            ));
4501            Ok(())
4502        }
4503    }
4504
4505    #[tokio::test]
4506    async fn test_agent_with_context_provider() {
4507        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
4508            "Response using context",
4509        )]));
4510
4511        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4512
4513        let provider =
4514            MockContextProvider::new("test-provider").with_items(vec![ContextItem::new(
4515                "ctx-1",
4516                ContextType::Resource,
4517                "Relevant context here",
4518            )
4519            .with_source("test://docs/example")]);
4520
4521        let config = AgentConfig {
4522            prompt_slots: SystemPromptSlots {
4523                extra: Some("You are helpful.".to_string()),
4524                ..Default::default()
4525            },
4526            context_providers: vec![Arc::new(provider)],
4527            ..Default::default()
4528        };
4529
4530        let agent = AgentLoop::new(
4531            mock_client.clone(),
4532            tool_executor,
4533            test_tool_context(),
4534            config,
4535        );
4536        let result = agent.execute(&[], "What is X?", None).await.unwrap();
4537
4538        assert_eq!(result.text, "Response using context");
4539        assert_eq!(mock_client.call_count.load(Ordering::SeqCst), 1);
4540    }
4541
4542    #[tokio::test]
4543    async fn test_agent_context_provider_events() {
4544        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
4545            "Answer",
4546        )]));
4547
4548        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4549
4550        let provider =
4551            MockContextProvider::new("event-provider").with_items(vec![ContextItem::new(
4552                "item-1",
4553                ContextType::Memory,
4554                "Memory content",
4555            )
4556            .with_token_count(50)]);
4557
4558        let config = AgentConfig {
4559            context_providers: vec![Arc::new(provider)],
4560            ..Default::default()
4561        };
4562
4563        let (tx, mut rx) = mpsc::channel(100);
4564        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
4565        let _result = agent.execute(&[], "Test prompt", Some(tx)).await.unwrap();
4566
4567        // Collect events
4568        let mut events = Vec::new();
4569        while let Ok(event) = rx.try_recv() {
4570            events.push(event);
4571        }
4572
4573        // Should have ContextResolving and ContextResolved events
4574        assert!(
4575            events
4576                .iter()
4577                .any(|e| matches!(e, AgentEvent::ContextResolving { .. })),
4578            "Should have ContextResolving event"
4579        );
4580        assert!(
4581            events
4582                .iter()
4583                .any(|e| matches!(e, AgentEvent::ContextResolved { .. })),
4584            "Should have ContextResolved event"
4585        );
4586
4587        // Check context resolved values
4588        for event in &events {
4589            if let AgentEvent::ContextResolved {
4590                total_items,
4591                total_tokens,
4592            } = event
4593            {
4594                assert_eq!(*total_items, 1);
4595                assert_eq!(*total_tokens, 50);
4596            }
4597        }
4598    }
4599
4600    #[tokio::test]
4601    async fn test_agent_multiple_context_providers() {
4602        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
4603            "Combined response",
4604        )]));
4605
4606        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4607
4608        let provider1 = MockContextProvider::new("provider-1").with_items(vec![ContextItem::new(
4609            "p1-1",
4610            ContextType::Resource,
4611            "Resource from P1",
4612        )
4613        .with_token_count(100)]);
4614
4615        let provider2 = MockContextProvider::new("provider-2").with_items(vec![
4616            ContextItem::new("p2-1", ContextType::Memory, "Memory from P2").with_token_count(50),
4617            ContextItem::new("p2-2", ContextType::Skill, "Skill from P2").with_token_count(75),
4618        ]);
4619
4620        let config = AgentConfig {
4621            prompt_slots: SystemPromptSlots {
4622                extra: Some("Base system prompt.".to_string()),
4623                ..Default::default()
4624            },
4625            context_providers: vec![Arc::new(provider1), Arc::new(provider2)],
4626            ..Default::default()
4627        };
4628
4629        let (tx, mut rx) = mpsc::channel(100);
4630        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
4631        let result = agent.execute(&[], "Query", Some(tx)).await.unwrap();
4632
4633        assert_eq!(result.text, "Combined response");
4634
4635        // Check context resolved event has combined totals
4636        while let Ok(event) = rx.try_recv() {
4637            if let AgentEvent::ContextResolved {
4638                total_items,
4639                total_tokens,
4640            } = event
4641            {
4642                assert_eq!(total_items, 3); // 1 + 2
4643                assert_eq!(total_tokens, 225); // 100 + 50 + 75
4644            }
4645        }
4646    }
4647
4648    #[tokio::test]
4649    async fn test_agent_no_context_providers() {
4650        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
4651            "No context",
4652        )]));
4653
4654        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4655
4656        // No context providers
4657        let config = AgentConfig::default();
4658
4659        let (tx, mut rx) = mpsc::channel(100);
4660        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
4661        let result = agent.execute(&[], "Simple prompt", Some(tx)).await.unwrap();
4662
4663        assert_eq!(result.text, "No context");
4664
4665        // Should NOT have context events when no providers
4666        let mut events = Vec::new();
4667        while let Ok(event) = rx.try_recv() {
4668            events.push(event);
4669        }
4670
4671        assert!(
4672            !events
4673                .iter()
4674                .any(|e| matches!(e, AgentEvent::ContextResolving { .. })),
4675            "Should NOT have ContextResolving event"
4676        );
4677    }
4678
4679    #[tokio::test]
4680    async fn test_agent_context_on_turn_complete() {
4681        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
4682            "Final response",
4683        )]));
4684
4685        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4686
4687        let provider = Arc::new(MockContextProvider::new("memory-provider"));
4688        let on_turn_calls = provider.on_turn_calls.clone();
4689
4690        let config = AgentConfig {
4691            context_providers: vec![provider],
4692            ..Default::default()
4693        };
4694
4695        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
4696
4697        // Execute with session ID
4698        let result = agent
4699            .execute_with_session(&[], "User prompt", Some("sess-123"), None, None)
4700            .await
4701            .unwrap();
4702
4703        assert_eq!(result.text, "Final response");
4704
4705        // Check on_turn_complete was called
4706        let calls = on_turn_calls.read().await;
4707        assert_eq!(calls.len(), 1);
4708        assert_eq!(calls[0].0, "sess-123");
4709        assert_eq!(calls[0].1, "User prompt");
4710        assert_eq!(calls[0].2, "Final response");
4711    }
4712
4713    #[tokio::test]
4714    async fn test_agent_context_on_turn_complete_no_session() {
4715        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
4716            "Response",
4717        )]));
4718
4719        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4720
4721        let provider = Arc::new(MockContextProvider::new("memory-provider"));
4722        let on_turn_calls = provider.on_turn_calls.clone();
4723
4724        let config = AgentConfig {
4725            context_providers: vec![provider],
4726            ..Default::default()
4727        };
4728
4729        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
4730
4731        // Execute without session ID (uses execute() which passes None)
4732        let _result = agent.execute(&[], "Prompt", None).await.unwrap();
4733
4734        // on_turn_complete should NOT be called when session_id is None
4735        let calls = on_turn_calls.read().await;
4736        assert!(calls.is_empty());
4737    }
4738
4739    #[tokio::test]
4740    async fn test_agent_build_augmented_system_prompt() {
4741        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response("OK")]));
4742
4743        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4744
4745        let provider = MockContextProvider::new("test").with_items(vec![ContextItem::new(
4746            "doc-1",
4747            ContextType::Resource,
4748            "Auth uses JWT tokens.",
4749        )
4750        .with_source("viking://docs/auth")]);
4751
4752        let config = AgentConfig {
4753            prompt_slots: SystemPromptSlots {
4754                extra: Some("You are helpful.".to_string()),
4755                ..Default::default()
4756            },
4757            context_providers: vec![Arc::new(provider)],
4758            ..Default::default()
4759        };
4760
4761        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
4762
4763        // Test building augmented prompt
4764        let context_results = agent.resolve_context("test", None).await;
4765        let augmented = agent.build_augmented_system_prompt(&context_results);
4766
4767        let augmented_str = augmented.unwrap();
4768        assert!(augmented_str.contains("You are helpful."));
4769        assert!(augmented_str.contains("<context source=\"viking://docs/auth\" type=\"Resource\">"));
4770        assert!(augmented_str.contains("Auth uses JWT tokens."));
4771    }
4772
4773    // ========================================================================
4774    // Agentic Loop Integration Tests
4775    // ========================================================================
4776
4777    /// Helper: collect all events from a channel
4778    async fn collect_events(mut rx: mpsc::Receiver<AgentEvent>) -> Vec<AgentEvent> {
4779        let mut events = Vec::new();
4780        while let Ok(event) = rx.try_recv() {
4781            events.push(event);
4782        }
4783        // Drain remaining
4784        while let Some(event) = rx.recv().await {
4785            events.push(event);
4786        }
4787        events
4788    }
4789
4790    #[tokio::test]
4791    async fn test_agent_multi_turn_tool_chain() {
4792        // LLM calls tool A → sees result → calls tool B → sees result → final answer
4793        let mock_client = Arc::new(MockLlmClient::new(vec![
4794            // Turn 1: call ls
4795            MockLlmClient::tool_call_response(
4796                "t1",
4797                "bash",
4798                serde_json::json!({"command": "echo step1"}),
4799            ),
4800            // Turn 2: call another tool based on first result
4801            MockLlmClient::tool_call_response(
4802                "t2",
4803                "bash",
4804                serde_json::json!({"command": "echo step2"}),
4805            ),
4806            // Turn 3: final answer
4807            MockLlmClient::text_response("Completed both steps: step1 then step2"),
4808        ]));
4809
4810        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4811        let config = AgentConfig::default();
4812
4813        let agent = AgentLoop::new(
4814            mock_client.clone(),
4815            tool_executor,
4816            test_tool_context(),
4817            config,
4818        );
4819        let result = agent.execute(&[], "Run two steps", None).await.unwrap();
4820
4821        assert_eq!(result.text, "Completed both steps: step1 then step2");
4822        assert_eq!(result.tool_calls_count, 2);
4823        assert_eq!(mock_client.call_count.load(Ordering::SeqCst), 3);
4824
4825        // Verify message history: user → assistant(tool_use) → user(tool_result) → assistant(tool_use) → user(tool_result) → assistant(text)
4826        assert_eq!(result.messages[0].role, "user");
4827        assert_eq!(result.messages[1].role, "assistant"); // tool call 1
4828        assert_eq!(result.messages[2].role, "user"); // tool result 1 (Anthropic convention)
4829        assert_eq!(result.messages[3].role, "assistant"); // tool call 2
4830        assert_eq!(result.messages[4].role, "user"); // tool result 2
4831        assert_eq!(result.messages[5].role, "assistant"); // final text
4832        assert_eq!(result.messages.len(), 6);
4833    }
4834
4835    #[tokio::test]
4836    async fn test_agent_conversation_history_preserved() {
4837        // Pass existing history, verify it's preserved in output
4838        let existing_history = vec![
4839            Message::user("What is Rust?"),
4840            Message {
4841                role: "assistant".to_string(),
4842                content: vec![ContentBlock::Text {
4843                    text: "Rust is a systems programming language.".to_string(),
4844                }],
4845                reasoning_content: None,
4846            },
4847        ];
4848
4849        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
4850            "Rust was created by Graydon Hoare at Mozilla.",
4851        )]));
4852
4853        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4854        let agent = AgentLoop::new(
4855            mock_client.clone(),
4856            tool_executor,
4857            test_tool_context(),
4858            AgentConfig::default(),
4859        );
4860
4861        let result = agent
4862            .execute(&existing_history, "Who created it?", None)
4863            .await
4864            .unwrap();
4865
4866        // History should contain: old user + old assistant + new user + new assistant
4867        assert_eq!(result.messages.len(), 4);
4868        assert_eq!(result.messages[0].text(), "What is Rust?");
4869        assert_eq!(
4870            result.messages[1].text(),
4871            "Rust is a systems programming language."
4872        );
4873        assert_eq!(result.messages[2].text(), "Who created it?");
4874        assert_eq!(
4875            result.messages[3].text(),
4876            "Rust was created by Graydon Hoare at Mozilla."
4877        );
4878    }
4879
4880    #[tokio::test]
4881    async fn test_agent_event_stream_completeness() {
4882        // Verify full event sequence for a single tool call loop
4883        let mock_client = Arc::new(MockLlmClient::new(vec![
4884            MockLlmClient::tool_call_response(
4885                "t1",
4886                "bash",
4887                serde_json::json!({"command": "echo hi"}),
4888            ),
4889            MockLlmClient::text_response("Done"),
4890        ]));
4891
4892        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4893        let agent = AgentLoop::new(
4894            mock_client,
4895            tool_executor,
4896            test_tool_context(),
4897            AgentConfig::default(),
4898        );
4899
4900        let (tx, rx) = mpsc::channel(100);
4901        let result = agent.execute(&[], "Say hi", Some(tx)).await.unwrap();
4902        assert_eq!(result.text, "Done");
4903
4904        let events = collect_events(rx).await;
4905
4906        // Verify event sequence
4907        let event_types: Vec<&str> = events
4908            .iter()
4909            .map(|e| match e {
4910                AgentEvent::Start { .. } => "Start",
4911                AgentEvent::TurnStart { .. } => "TurnStart",
4912                AgentEvent::TurnEnd { .. } => "TurnEnd",
4913                AgentEvent::ToolEnd { .. } => "ToolEnd",
4914                AgentEvent::End { .. } => "End",
4915                _ => "Other",
4916            })
4917            .collect();
4918
4919        // Must start with Start, end with End
4920        assert_eq!(event_types.first(), Some(&"Start"));
4921        assert_eq!(event_types.last(), Some(&"End"));
4922
4923        // Must have 2 TurnStarts (tool call turn + final answer turn)
4924        let turn_starts = event_types.iter().filter(|&&t| t == "TurnStart").count();
4925        assert_eq!(turn_starts, 2);
4926
4927        // Must have 1 ToolEnd
4928        let tool_ends = event_types.iter().filter(|&&t| t == "ToolEnd").count();
4929        assert_eq!(tool_ends, 1);
4930    }
4931
4932    #[tokio::test]
4933    async fn test_agent_multiple_tools_single_turn() {
4934        // LLM returns 2 tool calls in one response
4935        let mock_client = Arc::new(MockLlmClient::new(vec![
4936            LlmResponse {
4937                message: Message {
4938                    role: "assistant".to_string(),
4939                    content: vec![
4940                        ContentBlock::ToolUse {
4941                            id: "t1".to_string(),
4942                            name: "bash".to_string(),
4943                            input: serde_json::json!({"command": "echo first"}),
4944                        },
4945                        ContentBlock::ToolUse {
4946                            id: "t2".to_string(),
4947                            name: "bash".to_string(),
4948                            input: serde_json::json!({"command": "echo second"}),
4949                        },
4950                    ],
4951                    reasoning_content: None,
4952                },
4953                usage: TokenUsage {
4954                    prompt_tokens: 10,
4955                    completion_tokens: 5,
4956                    total_tokens: 15,
4957                    cache_read_tokens: None,
4958                    cache_write_tokens: None,
4959                },
4960                stop_reason: Some("tool_use".to_string()),
4961                meta: None,
4962            },
4963            MockLlmClient::text_response("Both commands ran"),
4964        ]));
4965
4966        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4967        let agent = AgentLoop::new(
4968            mock_client.clone(),
4969            tool_executor,
4970            test_tool_context(),
4971            AgentConfig::default(),
4972        );
4973
4974        let result = agent.execute(&[], "Run both", None).await.unwrap();
4975
4976        assert_eq!(result.text, "Both commands ran");
4977        assert_eq!(result.tool_calls_count, 2);
4978        assert_eq!(mock_client.call_count.load(Ordering::SeqCst), 2); // Only 2 LLM calls
4979
4980        // Messages: user → assistant(2 tools) → user(tool_result) → user(tool_result) → assistant(text)
4981        assert_eq!(result.messages[0].role, "user");
4982        assert_eq!(result.messages[1].role, "assistant");
4983        assert_eq!(result.messages[2].role, "user"); // tool result 1
4984        assert_eq!(result.messages[3].role, "user"); // tool result 2
4985        assert_eq!(result.messages[4].role, "assistant");
4986    }
4987
4988    #[tokio::test]
4989    async fn test_agent_token_usage_accumulation() {
4990        // Verify usage sums across multiple turns
4991        let mock_client = Arc::new(MockLlmClient::new(vec![
4992            MockLlmClient::tool_call_response(
4993                "t1",
4994                "bash",
4995                serde_json::json!({"command": "echo x"}),
4996            ),
4997            MockLlmClient::text_response("Done"),
4998        ]));
4999
5000        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5001        let agent = AgentLoop::new(
5002            mock_client,
5003            tool_executor,
5004            test_tool_context(),
5005            AgentConfig::default(),
5006        );
5007
5008        let result = agent.execute(&[], "test", None).await.unwrap();
5009
5010        // Each mock response has prompt=10, completion=5, total=15
5011        // 2 LLM calls → 20 prompt, 10 completion, 30 total
5012        assert_eq!(result.usage.prompt_tokens, 20);
5013        assert_eq!(result.usage.completion_tokens, 10);
5014        assert_eq!(result.usage.total_tokens, 30);
5015    }
5016
5017    #[tokio::test]
5018    async fn test_agent_system_prompt_passed() {
5019        // Verify system prompt is used (MockLlmClient captures calls)
5020        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
5021            "I am a coding assistant.",
5022        )]));
5023
5024        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5025        let config = AgentConfig {
5026            prompt_slots: SystemPromptSlots {
5027                extra: Some("You are a coding assistant.".to_string()),
5028                ..Default::default()
5029            },
5030            ..Default::default()
5031        };
5032
5033        let agent = AgentLoop::new(
5034            mock_client.clone(),
5035            tool_executor,
5036            test_tool_context(),
5037            config,
5038        );
5039        let result = agent.execute(&[], "What are you?", None).await.unwrap();
5040
5041        assert_eq!(result.text, "I am a coding assistant.");
5042        assert_eq!(mock_client.call_count.load(Ordering::SeqCst), 1);
5043    }
5044
5045    #[tokio::test]
5046    async fn test_agent_max_rounds_with_persistent_tool_calls() {
5047        // LLM keeps calling tools forever — should hit max_tool_rounds
5048        let mut responses = Vec::new();
5049        for i in 0..15 {
5050            responses.push(MockLlmClient::tool_call_response(
5051                &format!("t{}", i),
5052                "bash",
5053                serde_json::json!({"command": format!("echo round{}", i)}),
5054            ));
5055        }
5056
5057        let mock_client = Arc::new(MockLlmClient::new(responses));
5058        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5059        let config = AgentConfig {
5060            max_tool_rounds: 5,
5061            ..Default::default()
5062        };
5063
5064        let agent = AgentLoop::new(
5065            mock_client.clone(),
5066            tool_executor,
5067            test_tool_context(),
5068            config,
5069        );
5070        let result = agent.execute(&[], "Loop forever", None).await;
5071
5072        assert!(result.is_err());
5073        let err = result.unwrap_err().to_string();
5074        assert!(err.contains("Max tool rounds (5) exceeded"));
5075    }
5076
5077    #[tokio::test]
5078    async fn test_agent_end_event_contains_final_text() {
5079        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
5080            "Final answer here",
5081        )]));
5082
5083        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5084        let agent = AgentLoop::new(
5085            mock_client,
5086            tool_executor,
5087            test_tool_context(),
5088            AgentConfig::default(),
5089        );
5090
5091        let (tx, rx) = mpsc::channel(100);
5092        agent.execute(&[], "test", Some(tx)).await.unwrap();
5093
5094        let events = collect_events(rx).await;
5095        let end_event = events.iter().find(|e| matches!(e, AgentEvent::End { .. }));
5096        assert!(end_event.is_some());
5097
5098        if let AgentEvent::End { text, usage, .. } = end_event.unwrap() {
5099            assert_eq!(text, "Final answer here");
5100            assert_eq!(usage.total_tokens, 15);
5101        }
5102    }
5103}
5104
5105#[cfg(test)]
5106mod extra_agent_tests {
5107    use super::*;
5108    use crate::agent::tests::MockLlmClient;
5109    use crate::queue::SessionQueueConfig;
5110    use crate::tools::ToolExecutor;
5111    use std::path::PathBuf;
5112    use std::sync::atomic::{AtomicUsize, Ordering};
5113
5114    fn test_tool_context() -> ToolContext {
5115        ToolContext::new(PathBuf::from("/tmp"))
5116    }
5117
5118    // ========================================================================
5119    // AgentConfig
5120    // ========================================================================
5121
5122    #[test]
5123    fn test_agent_config_debug() {
5124        let config = AgentConfig {
5125            prompt_slots: SystemPromptSlots {
5126                extra: Some("You are helpful".to_string()),
5127                ..Default::default()
5128            },
5129            tools: vec![],
5130            max_tool_rounds: 10,
5131            permission_checker: None,
5132            confirmation_manager: None,
5133            context_providers: vec![],
5134            planning_mode: PlanningMode::Enabled,
5135            goal_tracking: false,
5136            hook_engine: None,
5137            skill_registry: None,
5138            ..AgentConfig::default()
5139        };
5140        let debug = format!("{:?}", config);
5141        assert!(debug.contains("AgentConfig"));
5142        assert!(debug.contains("planning_mode"));
5143    }
5144
5145    #[test]
5146    fn test_agent_config_default_values() {
5147        let config = AgentConfig::default();
5148        assert_eq!(config.max_tool_rounds, MAX_TOOL_ROUNDS);
5149        assert_eq!(config.planning_mode, PlanningMode::Auto);
5150        assert!(!config.goal_tracking);
5151        assert!(config.context_providers.is_empty());
5152    }
5153
5154    // ========================================================================
5155    // AgentEvent serialization
5156    // ========================================================================
5157
5158    #[test]
5159    fn test_agent_event_serialize_start() {
5160        let event = AgentEvent::Start {
5161            prompt: "Hello".to_string(),
5162        };
5163        let json = serde_json::to_string(&event).unwrap();
5164        assert!(json.contains("agent_start"));
5165        assert!(json.contains("Hello"));
5166    }
5167
5168    #[test]
5169    fn test_agent_event_serialize_text_delta() {
5170        let event = AgentEvent::TextDelta {
5171            text: "chunk".to_string(),
5172        };
5173        let json = serde_json::to_string(&event).unwrap();
5174        assert!(json.contains("text_delta"));
5175    }
5176
5177    #[test]
5178    fn test_agent_event_serialize_tool_start() {
5179        let event = AgentEvent::ToolStart {
5180            id: "t1".to_string(),
5181            name: "bash".to_string(),
5182        };
5183        let json = serde_json::to_string(&event).unwrap();
5184        assert!(json.contains("tool_start"));
5185        assert!(json.contains("bash"));
5186    }
5187
5188    #[test]
5189    fn test_agent_event_serialize_tool_end() {
5190        let event = AgentEvent::ToolEnd {
5191            id: "t1".to_string(),
5192            name: "bash".to_string(),
5193            output: "hello".to_string(),
5194            exit_code: 0,
5195            metadata: None,
5196        };
5197        let json = serde_json::to_string(&event).unwrap();
5198        assert!(json.contains("tool_end"));
5199    }
5200
5201    #[test]
5202    fn test_agent_event_tool_end_has_metadata_field() {
5203        let event = AgentEvent::ToolEnd {
5204            id: "t1".to_string(),
5205            name: "write".to_string(),
5206            output: "Wrote 5 bytes".to_string(),
5207            exit_code: 0,
5208            metadata: Some(
5209                serde_json::json!({ "before": "old", "after": "new", "file_path": "f.txt" }),
5210            ),
5211        };
5212        let json = serde_json::to_string(&event).unwrap();
5213        assert!(json.contains("\"before\""));
5214    }
5215
5216    #[test]
5217    fn test_agent_event_serialize_error() {
5218        let event = AgentEvent::Error {
5219            message: "oops".to_string(),
5220        };
5221        let json = serde_json::to_string(&event).unwrap();
5222        assert!(json.contains("error"));
5223        assert!(json.contains("oops"));
5224    }
5225
5226    #[test]
5227    fn test_agent_event_serialize_confirmation_required() {
5228        let event = AgentEvent::ConfirmationRequired {
5229            tool_id: "t1".to_string(),
5230            tool_name: "bash".to_string(),
5231            args: serde_json::json!({"cmd": "rm"}),
5232            timeout_ms: 30000,
5233        };
5234        let json = serde_json::to_string(&event).unwrap();
5235        assert!(json.contains("confirmation_required"));
5236    }
5237
5238    #[test]
5239    fn test_agent_event_serialize_confirmation_received() {
5240        let event = AgentEvent::ConfirmationReceived {
5241            tool_id: "t1".to_string(),
5242            approved: true,
5243            reason: Some("safe".to_string()),
5244        };
5245        let json = serde_json::to_string(&event).unwrap();
5246        assert!(json.contains("confirmation_received"));
5247    }
5248
5249    #[test]
5250    fn test_agent_event_serialize_confirmation_timeout() {
5251        let event = AgentEvent::ConfirmationTimeout {
5252            tool_id: "t1".to_string(),
5253            action_taken: "rejected".to_string(),
5254        };
5255        let json = serde_json::to_string(&event).unwrap();
5256        assert!(json.contains("confirmation_timeout"));
5257    }
5258
5259    #[test]
5260    fn test_agent_event_serialize_external_task_pending() {
5261        let event = AgentEvent::ExternalTaskPending {
5262            task_id: "task-1".to_string(),
5263            session_id: "sess-1".to_string(),
5264            lane: crate::hitl::SessionLane::Execute,
5265            command_type: "bash".to_string(),
5266            payload: serde_json::json!({}),
5267            timeout_ms: 60000,
5268        };
5269        let json = serde_json::to_string(&event).unwrap();
5270        assert!(json.contains("external_task_pending"));
5271    }
5272
5273    #[test]
5274    fn test_agent_event_serialize_external_task_completed() {
5275        let event = AgentEvent::ExternalTaskCompleted {
5276            task_id: "task-1".to_string(),
5277            session_id: "sess-1".to_string(),
5278            success: false,
5279        };
5280        let json = serde_json::to_string(&event).unwrap();
5281        assert!(json.contains("external_task_completed"));
5282    }
5283
5284    #[test]
5285    fn test_agent_event_serialize_permission_denied() {
5286        let event = AgentEvent::PermissionDenied {
5287            tool_id: "t1".to_string(),
5288            tool_name: "bash".to_string(),
5289            args: serde_json::json!({}),
5290            reason: "denied".to_string(),
5291        };
5292        let json = serde_json::to_string(&event).unwrap();
5293        assert!(json.contains("permission_denied"));
5294    }
5295
5296    #[test]
5297    fn test_agent_event_serialize_context_compacted() {
5298        let event = AgentEvent::ContextCompacted {
5299            session_id: "sess-1".to_string(),
5300            before_messages: 100,
5301            after_messages: 20,
5302            percent_before: 0.85,
5303        };
5304        let json = serde_json::to_string(&event).unwrap();
5305        assert!(json.contains("context_compacted"));
5306    }
5307
5308    #[test]
5309    fn test_agent_event_serialize_turn_start() {
5310        let event = AgentEvent::TurnStart { turn: 3 };
5311        let json = serde_json::to_string(&event).unwrap();
5312        assert!(json.contains("turn_start"));
5313    }
5314
5315    #[test]
5316    fn test_agent_event_serialize_turn_end() {
5317        let event = AgentEvent::TurnEnd {
5318            turn: 3,
5319            usage: TokenUsage::default(),
5320        };
5321        let json = serde_json::to_string(&event).unwrap();
5322        assert!(json.contains("turn_end"));
5323    }
5324
5325    #[test]
5326    fn test_agent_event_serialize_end() {
5327        let event = AgentEvent::End {
5328            text: "Done".to_string(),
5329            usage: TokenUsage {
5330                prompt_tokens: 100,
5331                completion_tokens: 50,
5332                total_tokens: 150,
5333                cache_read_tokens: None,
5334                cache_write_tokens: None,
5335            },
5336            meta: None,
5337        };
5338        let json = serde_json::to_string(&event).unwrap();
5339        assert!(json.contains("agent_end"));
5340    }
5341
5342    // ========================================================================
5343    // AgentResult
5344    // ========================================================================
5345
5346    #[test]
5347    fn test_agent_result_fields() {
5348        let result = AgentResult {
5349            text: "output".to_string(),
5350            messages: vec![Message::user("hello")],
5351            usage: TokenUsage::default(),
5352            tool_calls_count: 3,
5353        };
5354        assert_eq!(result.text, "output");
5355        assert_eq!(result.messages.len(), 1);
5356        assert_eq!(result.tool_calls_count, 3);
5357    }
5358
5359    // ========================================================================
5360    // Missing AgentEvent serialization tests
5361    // ========================================================================
5362
5363    #[test]
5364    fn test_agent_event_serialize_context_resolving() {
5365        let event = AgentEvent::ContextResolving {
5366            providers: vec!["provider1".to_string(), "provider2".to_string()],
5367        };
5368        let json = serde_json::to_string(&event).unwrap();
5369        assert!(json.contains("context_resolving"));
5370        assert!(json.contains("provider1"));
5371    }
5372
5373    #[test]
5374    fn test_agent_event_serialize_context_resolved() {
5375        let event = AgentEvent::ContextResolved {
5376            total_items: 5,
5377            total_tokens: 1000,
5378        };
5379        let json = serde_json::to_string(&event).unwrap();
5380        assert!(json.contains("context_resolved"));
5381        assert!(json.contains("1000"));
5382    }
5383
5384    #[test]
5385    fn test_agent_event_serialize_command_dead_lettered() {
5386        let event = AgentEvent::CommandDeadLettered {
5387            command_id: "cmd-1".to_string(),
5388            command_type: "bash".to_string(),
5389            lane: "execute".to_string(),
5390            error: "timeout".to_string(),
5391            attempts: 3,
5392        };
5393        let json = serde_json::to_string(&event).unwrap();
5394        assert!(json.contains("command_dead_lettered"));
5395        assert!(json.contains("cmd-1"));
5396    }
5397
5398    #[test]
5399    fn test_agent_event_serialize_command_retry() {
5400        let event = AgentEvent::CommandRetry {
5401            command_id: "cmd-2".to_string(),
5402            command_type: "read".to_string(),
5403            lane: "query".to_string(),
5404            attempt: 2,
5405            delay_ms: 1000,
5406        };
5407        let json = serde_json::to_string(&event).unwrap();
5408        assert!(json.contains("command_retry"));
5409        assert!(json.contains("cmd-2"));
5410    }
5411
5412    #[test]
5413    fn test_agent_event_serialize_queue_alert() {
5414        let event = AgentEvent::QueueAlert {
5415            level: "warning".to_string(),
5416            alert_type: "depth".to_string(),
5417            message: "Queue depth exceeded".to_string(),
5418        };
5419        let json = serde_json::to_string(&event).unwrap();
5420        assert!(json.contains("queue_alert"));
5421        assert!(json.contains("warning"));
5422    }
5423
5424    #[test]
5425    fn test_agent_event_serialize_task_updated() {
5426        let event = AgentEvent::TaskUpdated {
5427            session_id: "sess-1".to_string(),
5428            tasks: vec![],
5429        };
5430        let json = serde_json::to_string(&event).unwrap();
5431        assert!(json.contains("task_updated"));
5432        assert!(json.contains("sess-1"));
5433    }
5434
5435    #[test]
5436    fn test_agent_event_serialize_memory_stored() {
5437        let event = AgentEvent::MemoryStored {
5438            memory_id: "mem-1".to_string(),
5439            memory_type: "conversation".to_string(),
5440            importance: 0.8,
5441            tags: vec!["important".to_string()],
5442        };
5443        let json = serde_json::to_string(&event).unwrap();
5444        assert!(json.contains("memory_stored"));
5445        assert!(json.contains("mem-1"));
5446    }
5447
5448    #[test]
5449    fn test_agent_event_serialize_memory_recalled() {
5450        let event = AgentEvent::MemoryRecalled {
5451            memory_id: "mem-2".to_string(),
5452            content: "Previous conversation".to_string(),
5453            relevance: 0.9,
5454        };
5455        let json = serde_json::to_string(&event).unwrap();
5456        assert!(json.contains("memory_recalled"));
5457        assert!(json.contains("mem-2"));
5458    }
5459
5460    #[test]
5461    fn test_agent_event_serialize_memories_searched() {
5462        let event = AgentEvent::MemoriesSearched {
5463            query: Some("search term".to_string()),
5464            tags: vec!["tag1".to_string()],
5465            result_count: 5,
5466        };
5467        let json = serde_json::to_string(&event).unwrap();
5468        assert!(json.contains("memories_searched"));
5469        assert!(json.contains("search term"));
5470    }
5471
5472    #[test]
5473    fn test_agent_event_serialize_memory_cleared() {
5474        let event = AgentEvent::MemoryCleared {
5475            tier: "short_term".to_string(),
5476            count: 10,
5477        };
5478        let json = serde_json::to_string(&event).unwrap();
5479        assert!(json.contains("memory_cleared"));
5480        assert!(json.contains("short_term"));
5481    }
5482
5483    #[test]
5484    fn test_agent_event_serialize_subagent_start() {
5485        let event = AgentEvent::SubagentStart {
5486            task_id: "task-1".to_string(),
5487            session_id: "child-sess".to_string(),
5488            parent_session_id: "parent-sess".to_string(),
5489            agent: "explore".to_string(),
5490            description: "Explore codebase".to_string(),
5491        };
5492        let json = serde_json::to_string(&event).unwrap();
5493        assert!(json.contains("subagent_start"));
5494        assert!(json.contains("explore"));
5495    }
5496
5497    #[test]
5498    fn test_agent_event_serialize_subagent_progress() {
5499        let event = AgentEvent::SubagentProgress {
5500            task_id: "task-1".to_string(),
5501            session_id: "child-sess".to_string(),
5502            status: "processing".to_string(),
5503            metadata: serde_json::json!({"progress": 50}),
5504        };
5505        let json = serde_json::to_string(&event).unwrap();
5506        assert!(json.contains("subagent_progress"));
5507        assert!(json.contains("processing"));
5508    }
5509
5510    #[test]
5511    fn test_agent_event_serialize_subagent_end() {
5512        let event = AgentEvent::SubagentEnd {
5513            task_id: "task-1".to_string(),
5514            session_id: "child-sess".to_string(),
5515            agent: "explore".to_string(),
5516            output: "Found 10 files".to_string(),
5517            success: true,
5518        };
5519        let json = serde_json::to_string(&event).unwrap();
5520        assert!(json.contains("subagent_end"));
5521        assert!(json.contains("Found 10 files"));
5522    }
5523
5524    #[test]
5525    fn test_agent_event_serialize_planning_start() {
5526        let event = AgentEvent::PlanningStart {
5527            prompt: "Build a web app".to_string(),
5528        };
5529        let json = serde_json::to_string(&event).unwrap();
5530        assert!(json.contains("planning_start"));
5531        assert!(json.contains("Build a web app"));
5532    }
5533
5534    #[test]
5535    fn test_agent_event_serialize_planning_end() {
5536        use crate::planning::{Complexity, ExecutionPlan};
5537        let plan = ExecutionPlan::new("Test goal".to_string(), Complexity::Simple);
5538        let event = AgentEvent::PlanningEnd {
5539            plan,
5540            estimated_steps: 3,
5541        };
5542        let json = serde_json::to_string(&event).unwrap();
5543        assert!(json.contains("planning_end"));
5544        assert!(json.contains("estimated_steps"));
5545    }
5546
5547    #[test]
5548    fn test_agent_event_serialize_step_start() {
5549        let event = AgentEvent::StepStart {
5550            step_id: "step-1".to_string(),
5551            description: "Initialize project".to_string(),
5552            step_number: 1,
5553            total_steps: 5,
5554        };
5555        let json = serde_json::to_string(&event).unwrap();
5556        assert!(json.contains("step_start"));
5557        assert!(json.contains("Initialize project"));
5558    }
5559
5560    #[test]
5561    fn test_agent_event_serialize_step_end() {
5562        let event = AgentEvent::StepEnd {
5563            step_id: "step-1".to_string(),
5564            status: TaskStatus::Completed,
5565            step_number: 1,
5566            total_steps: 5,
5567        };
5568        let json = serde_json::to_string(&event).unwrap();
5569        assert!(json.contains("step_end"));
5570        assert!(json.contains("step-1"));
5571    }
5572
5573    #[test]
5574    fn test_agent_event_serialize_goal_extracted() {
5575        use crate::planning::AgentGoal;
5576        let goal = AgentGoal::new("Complete the task".to_string());
5577        let event = AgentEvent::GoalExtracted { goal };
5578        let json = serde_json::to_string(&event).unwrap();
5579        assert!(json.contains("goal_extracted"));
5580    }
5581
5582    #[test]
5583    fn test_agent_event_serialize_goal_progress() {
5584        let event = AgentEvent::GoalProgress {
5585            goal: "Build app".to_string(),
5586            progress: 0.5,
5587            completed_steps: 2,
5588            total_steps: 4,
5589        };
5590        let json = serde_json::to_string(&event).unwrap();
5591        assert!(json.contains("goal_progress"));
5592        assert!(json.contains("0.5"));
5593    }
5594
5595    #[test]
5596    fn test_agent_event_serialize_goal_achieved() {
5597        let event = AgentEvent::GoalAchieved {
5598            goal: "Build app".to_string(),
5599            total_steps: 4,
5600            duration_ms: 5000,
5601        };
5602        let json = serde_json::to_string(&event).unwrap();
5603        assert!(json.contains("goal_achieved"));
5604        assert!(json.contains("5000"));
5605    }
5606
5607    #[tokio::test]
5608    async fn test_extract_goal_with_json_response() {
5609        // LlmPlanner expects JSON with "description" and "success_criteria" fields
5610        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
5611            r#"{"description": "Build web app", "success_criteria": ["App runs on port 3000", "Has login page"]}"#,
5612        )]));
5613        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5614        let agent = AgentLoop::new(
5615            mock_client,
5616            tool_executor,
5617            test_tool_context(),
5618            AgentConfig::default(),
5619        );
5620
5621        let goal = agent.extract_goal("Build a web app").await.unwrap();
5622        assert_eq!(goal.description, "Build web app");
5623        assert_eq!(goal.success_criteria.len(), 2);
5624        assert_eq!(goal.success_criteria[0], "App runs on port 3000");
5625    }
5626
5627    #[tokio::test]
5628    async fn test_extract_goal_fallback_on_non_json() {
5629        // Non-JSON response triggers fallback: returns the original prompt as goal
5630        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
5631            "Some non-JSON response",
5632        )]));
5633        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5634        let agent = AgentLoop::new(
5635            mock_client,
5636            tool_executor,
5637            test_tool_context(),
5638            AgentConfig::default(),
5639        );
5640
5641        let goal = agent.extract_goal("Do something").await.unwrap();
5642        // Fallback uses the original prompt as description
5643        assert_eq!(goal.description, "Do something");
5644        // Fallback adds 2 generic criteria
5645        assert_eq!(goal.success_criteria.len(), 2);
5646    }
5647
5648    #[tokio::test]
5649    async fn test_check_goal_achievement_json_yes() {
5650        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
5651            r#"{"achieved": true, "progress": 1.0, "remaining_criteria": []}"#,
5652        )]));
5653        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5654        let agent = AgentLoop::new(
5655            mock_client,
5656            tool_executor,
5657            test_tool_context(),
5658            AgentConfig::default(),
5659        );
5660
5661        let goal = crate::planning::AgentGoal::new("Test goal".to_string());
5662        let achieved = agent
5663            .check_goal_achievement(&goal, "All done")
5664            .await
5665            .unwrap();
5666        assert!(achieved);
5667    }
5668
5669    #[tokio::test]
5670    async fn test_check_goal_achievement_fallback_not_done() {
5671        // Non-JSON response triggers heuristic fallback
5672        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
5673            "invalid json",
5674        )]));
5675        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5676        let agent = AgentLoop::new(
5677            mock_client,
5678            tool_executor,
5679            test_tool_context(),
5680            AgentConfig::default(),
5681        );
5682
5683        let goal = crate::planning::AgentGoal::new("Test goal".to_string());
5684        // "still working" doesn't contain "complete"/"done"/"finished"
5685        let achieved = agent
5686            .check_goal_achievement(&goal, "still working")
5687            .await
5688            .unwrap();
5689        assert!(!achieved);
5690    }
5691
5692    // ========================================================================
5693    // build_augmented_system_prompt Tests
5694    // ========================================================================
5695
5696    #[test]
5697    fn test_build_augmented_system_prompt_empty_context() {
5698        let mock_client = Arc::new(MockLlmClient::new(vec![]));
5699        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5700        let config = AgentConfig {
5701            prompt_slots: SystemPromptSlots {
5702                extra: Some("Base prompt".to_string()),
5703                ..Default::default()
5704            },
5705            ..Default::default()
5706        };
5707        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
5708
5709        let result = agent.build_augmented_system_prompt(&[]);
5710        assert!(result.unwrap().contains("Base prompt"));
5711    }
5712
5713    #[test]
5714    fn test_build_augmented_system_prompt_no_custom_slots() {
5715        let mock_client = Arc::new(MockLlmClient::new(vec![]));
5716        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5717        let agent = AgentLoop::new(
5718            mock_client,
5719            tool_executor,
5720            test_tool_context(),
5721            AgentConfig::default(),
5722        );
5723
5724        let result = agent.build_augmented_system_prompt(&[]);
5725        // Default slots still produce the default agentic prompt
5726        assert!(result.is_some());
5727        assert!(result.unwrap().contains("Core Behaviour"));
5728    }
5729
5730    #[test]
5731    fn test_build_augmented_system_prompt_with_context_no_base() {
5732        use crate::context::{ContextItem, ContextResult, ContextType};
5733
5734        let mock_client = Arc::new(MockLlmClient::new(vec![]));
5735        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5736        let agent = AgentLoop::new(
5737            mock_client,
5738            tool_executor,
5739            test_tool_context(),
5740            AgentConfig::default(),
5741        );
5742
5743        let context = vec![ContextResult {
5744            provider: "test".to_string(),
5745            items: vec![ContextItem::new("id1", ContextType::Resource, "Content")],
5746            total_tokens: 10,
5747            truncated: false,
5748        }];
5749
5750        let result = agent.build_augmented_system_prompt(&context);
5751        assert!(result.is_some());
5752        let text = result.unwrap();
5753        assert!(text.contains("<context"));
5754        assert!(text.contains("Content"));
5755    }
5756
5757    // ========================================================================
5758    // AgentResult Clone and Debug
5759    // ========================================================================
5760
5761    #[test]
5762    fn test_agent_result_clone() {
5763        let result = AgentResult {
5764            text: "output".to_string(),
5765            messages: vec![Message::user("hello")],
5766            usage: TokenUsage::default(),
5767            tool_calls_count: 3,
5768        };
5769        let cloned = result.clone();
5770        assert_eq!(cloned.text, result.text);
5771        assert_eq!(cloned.tool_calls_count, result.tool_calls_count);
5772    }
5773
5774    #[test]
5775    fn test_agent_result_debug() {
5776        let result = AgentResult {
5777            text: "output".to_string(),
5778            messages: vec![Message::user("hello")],
5779            usage: TokenUsage::default(),
5780            tool_calls_count: 3,
5781        };
5782        let debug = format!("{:?}", result);
5783        assert!(debug.contains("AgentResult"));
5784        assert!(debug.contains("output"));
5785    }
5786
5787    // ========================================================================
5788    // handle_post_execution_metadata Tests
5789    // ========================================================================
5790
5791    // ========================================================================
5792    // ToolCommand adapter tests
5793    // ========================================================================
5794
5795    #[tokio::test]
5796    async fn test_tool_command_command_type() {
5797        let executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5798        let cmd = ToolCommand {
5799            tool_executor: executor,
5800            tool_name: "read".to_string(),
5801            tool_args: serde_json::json!({"file": "test.rs"}),
5802            skill_registry: None,
5803            tool_context: test_tool_context(),
5804        };
5805        assert_eq!(cmd.command_type(), "read");
5806    }
5807
5808    #[tokio::test]
5809    async fn test_tool_command_payload() {
5810        let executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5811        let args = serde_json::json!({"file": "test.rs", "offset": 10});
5812        let cmd = ToolCommand {
5813            tool_executor: executor,
5814            tool_name: "read".to_string(),
5815            tool_args: args.clone(),
5816            skill_registry: None,
5817            tool_context: test_tool_context(),
5818        };
5819        assert_eq!(cmd.payload(), args);
5820    }
5821
5822    // ========================================================================
5823    // AgentLoop with queue builder tests
5824    // ========================================================================
5825
5826    #[tokio::test(flavor = "multi_thread")]
5827    async fn test_agent_loop_with_queue() {
5828        use tokio::sync::broadcast;
5829
5830        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
5831            "Hello",
5832        )]));
5833        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5834        let config = AgentConfig::default();
5835
5836        let (event_tx, _) = broadcast::channel(100);
5837        let queue = SessionLaneQueue::new("test-session", SessionQueueConfig::default(), event_tx)
5838            .await
5839            .unwrap();
5840
5841        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config)
5842            .with_queue(Arc::new(queue));
5843
5844        assert!(agent.command_queue.is_some());
5845    }
5846
5847    #[tokio::test]
5848    async fn test_agent_loop_without_queue() {
5849        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
5850            "Hello",
5851        )]));
5852        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5853        let config = AgentConfig::default();
5854
5855        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
5856
5857        assert!(agent.command_queue.is_none());
5858    }
5859
5860    // ========================================================================
5861    // Parallel Plan Execution Tests
5862    // ========================================================================
5863
5864    #[tokio::test]
5865    async fn test_execute_plan_parallel_independent() {
5866        use crate::planning::{Complexity, ExecutionPlan, Task};
5867
5868        // 3 independent steps (no dependencies) — should all execute.
5869        // MockLlmClient needs one response per execute_loop call per step.
5870        let mock_client = Arc::new(MockLlmClient::new(vec![
5871            MockLlmClient::text_response("Step 1 done"),
5872            MockLlmClient::text_response("Step 2 done"),
5873            MockLlmClient::text_response("Step 3 done"),
5874        ]));
5875
5876        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5877        let config = AgentConfig::default();
5878        let agent = AgentLoop::new(
5879            mock_client.clone(),
5880            tool_executor,
5881            test_tool_context(),
5882            config,
5883        );
5884
5885        let mut plan = ExecutionPlan::new("Test parallel", Complexity::Simple);
5886        plan.add_step(Task::new("s1", "First step"));
5887        plan.add_step(Task::new("s2", "Second step"));
5888        plan.add_step(Task::new("s3", "Third step"));
5889
5890        let (tx, mut rx) = mpsc::channel(100);
5891        let result = agent.execute_plan(&[], &plan, Some(tx)).await.unwrap();
5892
5893        // All 3 steps should have been executed (3 * 15 = 45 total tokens)
5894        assert_eq!(result.usage.total_tokens, 45);
5895
5896        // Verify we received StepStart and StepEnd events for all 3 steps
5897        let mut step_starts = Vec::new();
5898        let mut step_ends = Vec::new();
5899        rx.close();
5900        while let Some(event) = rx.recv().await {
5901            match event {
5902                AgentEvent::StepStart { step_id, .. } => step_starts.push(step_id),
5903                AgentEvent::StepEnd {
5904                    step_id, status, ..
5905                } => {
5906                    assert_eq!(status, TaskStatus::Completed);
5907                    step_ends.push(step_id);
5908                }
5909                _ => {}
5910            }
5911        }
5912        assert_eq!(step_starts.len(), 3);
5913        assert_eq!(step_ends.len(), 3);
5914    }
5915
5916    #[tokio::test]
5917    async fn test_execute_plan_respects_dependencies() {
5918        use crate::planning::{Complexity, ExecutionPlan, Task};
5919
5920        // s1 and s2 are independent (wave 1), s3 depends on both (wave 2).
5921        // This requires 3 responses total.
5922        let mock_client = Arc::new(MockLlmClient::new(vec![
5923            MockLlmClient::text_response("Step 1 done"),
5924            MockLlmClient::text_response("Step 2 done"),
5925            MockLlmClient::text_response("Step 3 done"),
5926        ]));
5927
5928        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5929        let config = AgentConfig::default();
5930        let agent = AgentLoop::new(
5931            mock_client.clone(),
5932            tool_executor,
5933            test_tool_context(),
5934            config,
5935        );
5936
5937        let mut plan = ExecutionPlan::new("Test deps", Complexity::Medium);
5938        plan.add_step(Task::new("s1", "Independent A"));
5939        plan.add_step(Task::new("s2", "Independent B"));
5940        plan.add_step(
5941            Task::new("s3", "Depends on A+B")
5942                .with_dependencies(vec!["s1".to_string(), "s2".to_string()]),
5943        );
5944
5945        let (tx, mut rx) = mpsc::channel(100);
5946        let result = agent.execute_plan(&[], &plan, Some(tx)).await.unwrap();
5947
5948        // All 3 steps should have been executed (3 * 15 = 45 total tokens)
5949        assert_eq!(result.usage.total_tokens, 45);
5950
5951        // Verify ordering: s3's StepStart must come after s1 and s2's StepEnd
5952        let mut events = Vec::new();
5953        rx.close();
5954        while let Some(event) = rx.recv().await {
5955            match &event {
5956                AgentEvent::StepStart { step_id, .. } => {
5957                    events.push(format!("start:{}", step_id));
5958                }
5959                AgentEvent::StepEnd { step_id, .. } => {
5960                    events.push(format!("end:{}", step_id));
5961                }
5962                _ => {}
5963            }
5964        }
5965
5966        // s3 start must occur after both s1 end and s2 end
5967        let s1_end = events.iter().position(|e| e == "end:s1").unwrap();
5968        let s2_end = events.iter().position(|e| e == "end:s2").unwrap();
5969        let s3_start = events.iter().position(|e| e == "start:s3").unwrap();
5970        assert!(
5971            s3_start > s1_end,
5972            "s3 started before s1 ended: {:?}",
5973            events
5974        );
5975        assert!(
5976            s3_start > s2_end,
5977            "s3 started before s2 ended: {:?}",
5978            events
5979        );
5980
5981        // Final result should reflect step 3 (last sequential step)
5982        assert!(result.text.contains("Step 3 done") || !result.text.is_empty());
5983    }
5984
5985    #[tokio::test]
5986    async fn test_execute_plan_handles_step_failure() {
5987        use crate::planning::{Complexity, ExecutionPlan, Task};
5988
5989        // s1 succeeds, s2 depends on s1 (succeeds), s3 depends on nothing (succeeds),
5990        // s4 depends on a step that will fail (s_fail).
5991        // We simulate failure by providing no responses for s_fail's execute_loop.
5992        //
5993        // Simpler approach: s1 succeeds, s2 depends on s1 (will fail because no
5994        // mock response left), s3 is independent.
5995        // Layout: s1 (independent), s3 (independent) → wave 1 parallel
5996        //         s2 depends on s1 → wave 2
5997        //         s4 depends on s2 → wave 3 (should deadlock since s2 fails)
5998        let mock_client = Arc::new(MockLlmClient::new(vec![
5999            // Wave 1: s1 and s3 execute in parallel
6000            MockLlmClient::text_response("s1 done"),
6001            MockLlmClient::text_response("s3 done"),
6002            // Wave 2: s2 executes — but we give it no response, causing failure
6003            // Actually the MockLlmClient will fail with "No more mock responses"
6004        ]));
6005
6006        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
6007        let config = AgentConfig::default();
6008        let agent = AgentLoop::new(
6009            mock_client.clone(),
6010            tool_executor,
6011            test_tool_context(),
6012            config,
6013        );
6014
6015        let mut plan = ExecutionPlan::new("Test failure", Complexity::Medium);
6016        plan.add_step(Task::new("s1", "Independent step"));
6017        plan.add_step(Task::new("s2", "Depends on s1").with_dependencies(vec!["s1".to_string()]));
6018        plan.add_step(Task::new("s3", "Another independent"));
6019        plan.add_step(Task::new("s4", "Depends on s2").with_dependencies(vec!["s2".to_string()]));
6020
6021        let (tx, mut rx) = mpsc::channel(100);
6022        let _result = agent.execute_plan(&[], &plan, Some(tx)).await.unwrap();
6023
6024        // s1 and s3 should succeed (wave 1), s2 should fail (wave 2),
6025        // s4 should never execute (deadlock — dep s2 failed, not completed)
6026        let mut completed_steps = Vec::new();
6027        let mut failed_steps = Vec::new();
6028        rx.close();
6029        while let Some(event) = rx.recv().await {
6030            if let AgentEvent::StepEnd {
6031                step_id, status, ..
6032            } = event
6033            {
6034                match status {
6035                    TaskStatus::Completed => completed_steps.push(step_id),
6036                    TaskStatus::Failed => failed_steps.push(step_id),
6037                    _ => {}
6038                }
6039            }
6040        }
6041
6042        assert!(
6043            completed_steps.contains(&"s1".to_string()),
6044            "s1 should complete"
6045        );
6046        assert!(
6047            completed_steps.contains(&"s3".to_string()),
6048            "s3 should complete"
6049        );
6050        assert!(failed_steps.contains(&"s2".to_string()), "s2 should fail");
6051        // s4 should NOT appear in either list — it was never started
6052        assert!(
6053            !completed_steps.contains(&"s4".to_string()),
6054            "s4 should not complete"
6055        );
6056        assert!(
6057            !failed_steps.contains(&"s4".to_string()),
6058            "s4 should not fail (never started)"
6059        );
6060    }
6061
6062    // ========================================================================
6063    // Phase 4: Error Recovery & Resilience Tests
6064    // ========================================================================
6065
6066    #[test]
6067    fn test_agent_config_resilience_defaults() {
6068        let config = AgentConfig::default();
6069        assert_eq!(config.max_parse_retries, 2);
6070        assert_eq!(config.tool_timeout_ms, None);
6071        assert_eq!(config.circuit_breaker_threshold, 3);
6072    }
6073
6074    /// 4.1 — Parse error recovery: bails after max_parse_retries exceeded
6075    #[tokio::test]
6076    async fn test_parse_error_recovery_bails_after_threshold() {
6077        // 3 parse errors with max_parse_retries=2: count reaches 3 > 2 → bail
6078        let mock_client = Arc::new(MockLlmClient::new(vec![
6079            MockLlmClient::tool_call_response(
6080                "c1",
6081                "bash",
6082                serde_json::json!({"__parse_error": "unexpected token at position 5"}),
6083            ),
6084            MockLlmClient::tool_call_response(
6085                "c2",
6086                "bash",
6087                serde_json::json!({"__parse_error": "missing closing brace"}),
6088            ),
6089            MockLlmClient::tool_call_response(
6090                "c3",
6091                "bash",
6092                serde_json::json!({"__parse_error": "still broken"}),
6093            ),
6094            MockLlmClient::text_response("Done"), // never reached
6095        ]));
6096
6097        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
6098        let config = AgentConfig {
6099            max_parse_retries: 2,
6100            ..AgentConfig::default()
6101        };
6102        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
6103        let result = agent.execute(&[], "Do something", None).await;
6104        assert!(result.is_err(), "should bail after parse error threshold");
6105        let err = result.unwrap_err().to_string();
6106        assert!(
6107            err.contains("malformed tool arguments"),
6108            "error should mention malformed tool arguments, got: {}",
6109            err
6110        );
6111    }
6112
6113    /// 4.1 — Parse error recovery: counter resets after a valid tool execution
6114    #[tokio::test]
6115    async fn test_parse_error_counter_resets_on_success() {
6116        // 2 parse errors (= max_parse_retries, not yet exceeded)
6117        // Then a valid tool call (resets counter)
6118        // Then final text — should NOT bail
6119        let mock_client = Arc::new(MockLlmClient::new(vec![
6120            MockLlmClient::tool_call_response(
6121                "c1",
6122                "bash",
6123                serde_json::json!({"__parse_error": "bad args"}),
6124            ),
6125            MockLlmClient::tool_call_response(
6126                "c2",
6127                "bash",
6128                serde_json::json!({"__parse_error": "bad args again"}),
6129            ),
6130            // Valid call — resets parse_error_count to 0
6131            MockLlmClient::tool_call_response(
6132                "c3",
6133                "bash",
6134                serde_json::json!({"command": "echo ok"}),
6135            ),
6136            MockLlmClient::text_response("All done"),
6137        ]));
6138
6139        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
6140        let config = AgentConfig {
6141            max_parse_retries: 2,
6142            ..AgentConfig::default()
6143        };
6144        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
6145        let result = agent.execute(&[], "Do something", None).await;
6146        assert!(
6147            result.is_ok(),
6148            "should not bail — counter reset after successful tool, got: {:?}",
6149            result.err()
6150        );
6151        assert_eq!(result.unwrap().text, "All done");
6152    }
6153
6154    /// 4.2 — Tool timeout: slow tool produces a timeout error result; session continues
6155    #[tokio::test]
6156    async fn test_tool_timeout_produces_error_result() {
6157        let mock_client = Arc::new(MockLlmClient::new(vec![
6158            MockLlmClient::tool_call_response(
6159                "t1",
6160                "bash",
6161                serde_json::json!({"command": "sleep 10"}),
6162            ),
6163            MockLlmClient::text_response("The command timed out."),
6164        ]));
6165
6166        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
6167        let config = AgentConfig {
6168            // 50ms — sleep 10 will never finish
6169            tool_timeout_ms: Some(50),
6170            ..AgentConfig::default()
6171        };
6172        let agent = AgentLoop::new(
6173            mock_client.clone(),
6174            tool_executor,
6175            test_tool_context(),
6176            config,
6177        );
6178        let result = agent.execute(&[], "Run sleep", None).await;
6179        assert!(
6180            result.is_ok(),
6181            "session should continue after tool timeout: {:?}",
6182            result.err()
6183        );
6184        assert_eq!(result.unwrap().text, "The command timed out.");
6185        // LLM called twice: initial request + response after timeout error
6186        assert_eq!(mock_client.call_count.load(Ordering::SeqCst), 2);
6187    }
6188
6189    /// 4.2 — Tool timeout: tool that finishes before the deadline succeeds normally
6190    #[tokio::test]
6191    async fn test_tool_within_timeout_succeeds() {
6192        let mock_client = Arc::new(MockLlmClient::new(vec![
6193            MockLlmClient::tool_call_response(
6194                "t1",
6195                "bash",
6196                serde_json::json!({"command": "echo fast"}),
6197            ),
6198            MockLlmClient::text_response("Command succeeded."),
6199        ]));
6200
6201        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
6202        let config = AgentConfig {
6203            tool_timeout_ms: Some(5_000), // 5 s — echo completes in <100ms
6204            ..AgentConfig::default()
6205        };
6206        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
6207        let result = agent.execute(&[], "Run something fast", None).await;
6208        assert!(
6209            result.is_ok(),
6210            "fast tool should succeed: {:?}",
6211            result.err()
6212        );
6213        assert_eq!(result.unwrap().text, "Command succeeded.");
6214    }
6215
6216    /// 4.3 — Circuit breaker: retries non-streaming LLM failures up to threshold
6217    #[tokio::test]
6218    async fn test_circuit_breaker_retries_non_streaming() {
6219        // Empty response list → every call bails with "No more mock responses"
6220        // threshold=2 → tries twice, then bails with circuit-breaker message
6221        let mock_client = Arc::new(MockLlmClient::new(vec![]));
6222
6223        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
6224        let config = AgentConfig {
6225            circuit_breaker_threshold: 2,
6226            ..AgentConfig::default()
6227        };
6228        let agent = AgentLoop::new(
6229            mock_client.clone(),
6230            tool_executor,
6231            test_tool_context(),
6232            config,
6233        );
6234        let result = agent.execute(&[], "Hello", None).await;
6235        assert!(result.is_err(), "should fail when LLM always errors");
6236        let err = result.unwrap_err().to_string();
6237        assert!(
6238            err.contains("circuit breaker"),
6239            "error should mention circuit breaker, got: {}",
6240            err
6241        );
6242        assert_eq!(
6243            mock_client.call_count.load(Ordering::SeqCst),
6244            2,
6245            "should make exactly threshold=2 LLM calls"
6246        );
6247    }
6248
6249    /// 4.3 — Circuit breaker: threshold=1 bails on the very first failure
6250    #[tokio::test]
6251    async fn test_circuit_breaker_threshold_one_no_retry() {
6252        let mock_client = Arc::new(MockLlmClient::new(vec![]));
6253
6254        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
6255        let config = AgentConfig {
6256            circuit_breaker_threshold: 1,
6257            ..AgentConfig::default()
6258        };
6259        let agent = AgentLoop::new(
6260            mock_client.clone(),
6261            tool_executor,
6262            test_tool_context(),
6263            config,
6264        );
6265        let result = agent.execute(&[], "Hello", None).await;
6266        assert!(result.is_err());
6267        assert_eq!(
6268            mock_client.call_count.load(Ordering::SeqCst),
6269            1,
6270            "with threshold=1 exactly one attempt should be made"
6271        );
6272    }
6273
6274    /// 4.3 — Circuit breaker: succeeds when LLM recovers before hitting threshold
6275    #[tokio::test]
6276    async fn test_circuit_breaker_succeeds_if_llm_recovers() {
6277        // First call fails, second call succeeds; threshold=3 — recovery within threshold
6278        struct FailOnceThenSucceed {
6279            inner: MockLlmClient,
6280            failed_once: std::sync::atomic::AtomicBool,
6281            call_count: AtomicUsize,
6282        }
6283
6284        #[async_trait::async_trait]
6285        impl LlmClient for FailOnceThenSucceed {
6286            async fn complete(
6287                &self,
6288                messages: &[Message],
6289                system: Option<&str>,
6290                tools: &[ToolDefinition],
6291            ) -> Result<LlmResponse> {
6292                self.call_count.fetch_add(1, Ordering::SeqCst);
6293                let already_failed = self
6294                    .failed_once
6295                    .swap(true, std::sync::atomic::Ordering::SeqCst);
6296                if !already_failed {
6297                    anyhow::bail!("transient network error");
6298                }
6299                self.inner.complete(messages, system, tools).await
6300            }
6301
6302            async fn complete_streaming(
6303                &self,
6304                messages: &[Message],
6305                system: Option<&str>,
6306                tools: &[ToolDefinition],
6307            ) -> Result<tokio::sync::mpsc::Receiver<crate::llm::StreamEvent>> {
6308                self.inner.complete_streaming(messages, system, tools).await
6309            }
6310        }
6311
6312        let mock = Arc::new(FailOnceThenSucceed {
6313            inner: MockLlmClient::new(vec![MockLlmClient::text_response("Recovered!")]),
6314            failed_once: std::sync::atomic::AtomicBool::new(false),
6315            call_count: AtomicUsize::new(0),
6316        });
6317
6318        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
6319        let config = AgentConfig {
6320            circuit_breaker_threshold: 3,
6321            ..AgentConfig::default()
6322        };
6323        let agent = AgentLoop::new(mock.clone(), tool_executor, test_tool_context(), config);
6324        let result = agent.execute(&[], "Hello", None).await;
6325        assert!(
6326            result.is_ok(),
6327            "should succeed when LLM recovers within threshold: {:?}",
6328            result.err()
6329        );
6330        assert_eq!(result.unwrap().text, "Recovered!");
6331        assert_eq!(
6332            mock.call_count.load(Ordering::SeqCst),
6333            2,
6334            "should have made exactly 2 calls (1 fail + 1 success)"
6335        );
6336    }
6337
6338    // ── Continuation detection tests ─────────────────────────────────────────
6339
6340    #[test]
6341    fn test_looks_incomplete_empty() {
6342        assert!(AgentLoop::looks_incomplete(""));
6343        assert!(AgentLoop::looks_incomplete("   "));
6344    }
6345
6346    #[test]
6347    fn test_looks_incomplete_trailing_colon() {
6348        assert!(AgentLoop::looks_incomplete("Let me check the file:"));
6349        assert!(AgentLoop::looks_incomplete("Next steps:"));
6350    }
6351
6352    #[test]
6353    fn test_looks_incomplete_ellipsis() {
6354        assert!(AgentLoop::looks_incomplete("Working on it..."));
6355        assert!(AgentLoop::looks_incomplete("Processing…"));
6356    }
6357
6358    #[test]
6359    fn test_looks_incomplete_intent_phrases() {
6360        assert!(AgentLoop::looks_incomplete(
6361            "I'll start by reading the file."
6362        ));
6363        assert!(AgentLoop::looks_incomplete(
6364            "Let me check the configuration."
6365        ));
6366        assert!(AgentLoop::looks_incomplete("I will now run the tests."));
6367        assert!(AgentLoop::looks_incomplete(
6368            "I need to update the Cargo.toml."
6369        ));
6370    }
6371
6372    #[test]
6373    fn test_looks_complete_final_answer() {
6374        // Clear final answers should NOT trigger continuation
6375        assert!(!AgentLoop::looks_incomplete(
6376            "The tests pass. All changes have been applied successfully."
6377        ));
6378        assert!(!AgentLoop::looks_incomplete(
6379            "Done. I've updated the three files and verified the build succeeds."
6380        ));
6381        assert!(!AgentLoop::looks_incomplete("42"));
6382        assert!(!AgentLoop::looks_incomplete("Yes."));
6383    }
6384
6385    #[test]
6386    fn test_looks_incomplete_multiline_complete() {
6387        let text = "Here is the summary:\n\n- Fixed the bug in agent.rs\n- All tests pass\n- Build succeeds";
6388        assert!(!AgentLoop::looks_incomplete(text));
6389    }
6390}