Skip to main content

a3s_code_core/
agent.rs

1//! Agent Loop Implementation
2//!
3//! The agent loop handles the core conversation cycle:
4//! 1. User sends a prompt
5//! 2. LLM generates a response (possibly with tool calls)
6//! 3. If tool calls present, execute them and send results back
7//! 4. Repeat until LLM returns without tool calls
8//!
9//! This implements agentic behavior where the LLM can use tools
10//! to accomplish tasks agentically.
11
12use crate::context::{ContextProvider, ContextQuery, ContextResult};
13use crate::hitl::ConfirmationProvider;
14use crate::hooks::{
15    ErrorType, GenerateEndEvent, GenerateStartEvent, HookEvent, HookExecutor, HookResult,
16    OnErrorEvent, PostResponseEvent, PostToolUseEvent, PrePromptEvent, PreToolUseEvent,
17    TokenUsageInfo, ToolCallInfo, ToolResultData,
18};
19use crate::llm::{LlmClient, LlmResponse, Message, TokenUsage, ToolDefinition};
20use crate::permissions::{PermissionChecker, PermissionDecision};
21use crate::planning::{AgentGoal, ExecutionPlan, TaskStatus};
22use crate::prompts::{AgentStyle, DetectionConfidence, PlanningMode, SystemPromptSlots};
23use crate::queue::SessionCommand;
24use crate::session_lane_queue::SessionLaneQueue;
25use crate::text::truncate_utf8;
26use crate::tool_search::ToolIndex;
27use crate::tools::{ToolContext, ToolExecutor, ToolStreamEvent};
28use anyhow::{Context, Result};
29use async_trait::async_trait;
30use futures::future::join_all;
31use serde::{Deserialize, Serialize};
32use serde_json::Value;
33use std::sync::Arc;
34use std::time::Duration;
35use tokio::sync::{mpsc, RwLock};
36
37/// Maximum number of tool execution rounds before stopping
38const MAX_TOOL_ROUNDS: usize = 50;
39
40/// Agent configuration
41#[derive(Clone)]
42pub struct AgentConfig {
43    /// Slot-based system prompt customization.
44    ///
45    /// Users can customize specific parts (role, guidelines, response style, extra)
46    /// without overriding the core agentic capabilities. The default agentic core
47    /// (tool usage, autonomous behavior, completion criteria) is always preserved.
48    pub prompt_slots: SystemPromptSlots,
49    pub tools: Vec<ToolDefinition>,
50    pub max_tool_rounds: usize,
51    /// Optional security provider for input taint tracking and output sanitization
52    pub security_provider: Option<Arc<dyn crate::security::SecurityProvider>>,
53    /// Optional permission checker for tool execution control
54    pub permission_checker: Option<Arc<dyn PermissionChecker>>,
55    /// Optional confirmation manager for HITL (Human-in-the-Loop)
56    pub confirmation_manager: Option<Arc<dyn ConfirmationProvider>>,
57    /// Context providers for augmenting prompts with external context
58    pub context_providers: Vec<Arc<dyn ContextProvider>>,
59    /// Planning mode — Auto (detect from message), Enabled, or Disabled.
60    pub planning_mode: PlanningMode,
61    /// Enable goal tracking
62    pub goal_tracking: bool,
63    /// Optional hook engine for firing lifecycle events (PreToolUse, PostToolUse, etc.)
64    pub hook_engine: Option<Arc<dyn HookExecutor>>,
65    /// Optional skill registry for tool permission enforcement
66    pub skill_registry: Option<Arc<crate::skills::SkillRegistry>>,
67    /// Max consecutive malformed-tool-args errors before aborting (default: 2).
68    ///
69    /// When the LLM returns tool arguments with `__parse_error`, the error is
70    /// fed back as a tool result. After this many consecutive parse errors the
71    /// loop bails instead of retrying indefinitely.
72    pub max_parse_retries: u32,
73    /// Per-tool execution timeout in milliseconds (`None` = no timeout).
74    ///
75    /// When set, each tool execution is wrapped in `tokio::time::timeout`.
76    /// A timeout produces an error result sent back to the LLM rather than
77    /// crashing the session.
78    pub tool_timeout_ms: Option<u64>,
79    /// Circuit-breaker threshold: max consecutive LLM API failures before
80    /// aborting (default: 3).
81    ///
82    /// In non-streaming mode, transient LLM failures are retried up to this
83    /// many times (with short exponential backoff) before the loop bails.
84    /// In streaming mode, any failure is fatal (events cannot be replayed).
85    pub circuit_breaker_threshold: u32,
86    /// Max consecutive identical tool signatures before aborting (default: 3).
87    ///
88    /// A tool signature is the exact combination of tool name + compact JSON
89    /// arguments. This prevents the agent from getting stuck repeating the same
90    /// tool call in a loop, for example repeatedly fetching the same URL.
91    pub duplicate_tool_call_threshold: u32,
92    /// Enable auto-compaction when context usage exceeds threshold.
93    pub auto_compact: bool,
94    /// Context usage percentage threshold to trigger auto-compaction (0.0 - 1.0).
95    /// Default: 0.80 (80%).
96    pub auto_compact_threshold: f32,
97    /// Maximum context window size in tokens (used for auto-compact calculation).
98    /// Default: 200_000.
99    pub max_context_tokens: usize,
100    /// LLM client reference for auto-compaction (needs to call LLM for summarization).
101    pub llm_client: Option<Arc<dyn LlmClient>>,
102    /// Optional agent memory for auto-remember after tool execution and recall before prompts.
103    pub memory: Option<Arc<crate::memory::AgentMemory>>,
104    /// Inject a continuation message when the LLM stops calling tools before the
105    /// task is complete. Enabled by default. Set to `false` to disable.
106    ///
107    /// When enabled, if the LLM produces a response with no tool calls but the
108    /// response text looks like an intermediate step (not a final answer), the
109    /// loop injects [`crate::prompts::CONTINUATION`] as a user message and
110    /// continues for up to `max_continuation_turns` additional turns.
111    pub continuation_enabled: bool,
112    /// Maximum number of continuation injections per execution (default: 3).
113    ///
114    /// Prevents infinite loops when the LLM repeatedly stops without completing.
115    pub max_continuation_turns: u32,
116    /// Optional tool search index for filtering tools per-turn.
117    ///
118    /// When set, only tools matching the user prompt are sent to the LLM,
119    /// reducing context usage when many MCP tools are registered.
120    pub tool_index: Option<ToolIndex>,
121}
122
123impl std::fmt::Debug for AgentConfig {
124    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
125        f.debug_struct("AgentConfig")
126            .field("prompt_slots", &self.prompt_slots)
127            .field("tools", &self.tools)
128            .field("max_tool_rounds", &self.max_tool_rounds)
129            .field("security_provider", &self.security_provider.is_some())
130            .field("permission_checker", &self.permission_checker.is_some())
131            .field("confirmation_manager", &self.confirmation_manager.is_some())
132            .field("context_providers", &self.context_providers.len())
133            .field("planning_mode", &self.planning_mode)
134            .field("goal_tracking", &self.goal_tracking)
135            .field("hook_engine", &self.hook_engine.is_some())
136            .field(
137                "skill_registry",
138                &self.skill_registry.as_ref().map(|r| r.len()),
139            )
140            .field("max_parse_retries", &self.max_parse_retries)
141            .field("tool_timeout_ms", &self.tool_timeout_ms)
142            .field("circuit_breaker_threshold", &self.circuit_breaker_threshold)
143            .field(
144                "duplicate_tool_call_threshold",
145                &self.duplicate_tool_call_threshold,
146            )
147            .field("auto_compact", &self.auto_compact)
148            .field("auto_compact_threshold", &self.auto_compact_threshold)
149            .field("max_context_tokens", &self.max_context_tokens)
150            .field("continuation_enabled", &self.continuation_enabled)
151            .field("max_continuation_turns", &self.max_continuation_turns)
152            .field("memory", &self.memory.is_some())
153            .field("tool_index", &self.tool_index.as_ref().map(|i| i.len()))
154            .finish()
155    }
156}
157
158impl Default for AgentConfig {
159    fn default() -> Self {
160        Self {
161            prompt_slots: SystemPromptSlots::default(),
162            tools: Vec::new(), // Tools are provided by ToolExecutor
163            max_tool_rounds: MAX_TOOL_ROUNDS,
164            security_provider: None,
165            permission_checker: None,
166            confirmation_manager: None,
167            context_providers: Vec::new(),
168            planning_mode: PlanningMode::default(),
169            goal_tracking: false,
170            hook_engine: None,
171            skill_registry: Some(Arc::new(crate::skills::SkillRegistry::with_builtins())),
172            max_parse_retries: 2,
173            tool_timeout_ms: None,
174            circuit_breaker_threshold: 3,
175            duplicate_tool_call_threshold: 3,
176            auto_compact: false,
177            auto_compact_threshold: 0.80,
178            max_context_tokens: 200_000,
179            llm_client: None,
180            memory: None,
181            continuation_enabled: true,
182            max_continuation_turns: 3,
183            tool_index: None,
184        }
185    }
186}
187
188/// Events emitted during agent execution
189///
190/// Subscribe via [`Session::subscribe_events()`](crate::session::Session::subscribe_events).
191/// New variants may be added in minor releases — always include a wildcard arm
192/// (`_ => {}`) when matching.
193#[derive(Debug, Clone, Serialize, Deserialize)]
194#[serde(tag = "type")]
195#[non_exhaustive]
196pub enum AgentEvent {
197    /// Agent started processing
198    #[serde(rename = "agent_start")]
199    Start { prompt: String },
200
201    /// Runtime agent style/mode selected for the current execution.
202    #[serde(rename = "agent_mode_changed")]
203    AgentModeChanged {
204        /// Stable UI/runtime mode label, e.g. "general", "planning", "explore".
205        mode: String,
206        /// Canonical built-in agent name associated with this mode.
207        agent: String,
208        /// Human-readable explanation of the selected style.
209        description: String,
210    },
211
212    /// LLM turn started
213    #[serde(rename = "turn_start")]
214    TurnStart { turn: usize },
215
216    /// Text delta from streaming
217    #[serde(rename = "text_delta")]
218    TextDelta { text: String },
219
220    /// Tool execution started
221    #[serde(rename = "tool_start")]
222    ToolStart { id: String, name: String },
223
224    /// Tool input delta from streaming (partial JSON arguments)
225    #[serde(rename = "tool_input_delta")]
226    ToolInputDelta { delta: String },
227
228    /// Tool execution completed
229    #[serde(rename = "tool_end")]
230    ToolEnd {
231        id: String,
232        name: String,
233        output: String,
234        exit_code: i32,
235        #[serde(skip_serializing_if = "Option::is_none")]
236        metadata: Option<serde_json::Value>,
237    },
238
239    /// Intermediate tool output (streaming delta)
240    #[serde(rename = "tool_output_delta")]
241    ToolOutputDelta {
242        id: String,
243        name: String,
244        delta: String,
245    },
246
247    /// LLM turn completed
248    #[serde(rename = "turn_end")]
249    TurnEnd { turn: usize, usage: TokenUsage },
250
251    /// Agent completed
252    #[serde(rename = "agent_end")]
253    End {
254        text: String,
255        usage: TokenUsage,
256        #[serde(skip_serializing_if = "Option::is_none")]
257        meta: Option<crate::llm::LlmResponseMeta>,
258    },
259
260    /// Error occurred
261    #[serde(rename = "error")]
262    Error { message: String },
263
264    /// Tool execution requires confirmation (HITL)
265    #[serde(rename = "confirmation_required")]
266    ConfirmationRequired {
267        tool_id: String,
268        tool_name: String,
269        args: serde_json::Value,
270        timeout_ms: u64,
271    },
272
273    /// Confirmation received from user (HITL)
274    #[serde(rename = "confirmation_received")]
275    ConfirmationReceived {
276        tool_id: String,
277        approved: bool,
278        reason: Option<String>,
279    },
280
281    /// Confirmation timed out (HITL)
282    #[serde(rename = "confirmation_timeout")]
283    ConfirmationTimeout {
284        tool_id: String,
285        action_taken: String, // "rejected" or "auto_approved"
286    },
287
288    /// External task pending (needs SDK processing)
289    #[serde(rename = "external_task_pending")]
290    ExternalTaskPending {
291        task_id: String,
292        session_id: String,
293        lane: crate::hitl::SessionLane,
294        command_type: String,
295        payload: serde_json::Value,
296        timeout_ms: u64,
297    },
298
299    /// External task completed
300    #[serde(rename = "external_task_completed")]
301    ExternalTaskCompleted {
302        task_id: String,
303        session_id: String,
304        success: bool,
305    },
306
307    /// Tool execution denied by permission policy
308    #[serde(rename = "permission_denied")]
309    PermissionDenied {
310        tool_id: String,
311        tool_name: String,
312        args: serde_json::Value,
313        reason: String,
314    },
315
316    /// Context resolution started
317    #[serde(rename = "context_resolving")]
318    ContextResolving { providers: Vec<String> },
319
320    /// Context resolution completed
321    #[serde(rename = "context_resolved")]
322    ContextResolved {
323        total_items: usize,
324        total_tokens: usize,
325    },
326
327    // ========================================================================
328    // a3s-lane integration events
329    // ========================================================================
330    /// Command moved to dead letter queue after exhausting retries
331    #[serde(rename = "command_dead_lettered")]
332    CommandDeadLettered {
333        command_id: String,
334        command_type: String,
335        lane: String,
336        error: String,
337        attempts: u32,
338    },
339
340    /// Command retry attempt
341    #[serde(rename = "command_retry")]
342    CommandRetry {
343        command_id: String,
344        command_type: String,
345        lane: String,
346        attempt: u32,
347        delay_ms: u64,
348    },
349
350    /// Queue alert (depth warning, latency alert, etc.)
351    #[serde(rename = "queue_alert")]
352    QueueAlert {
353        level: String,
354        alert_type: String,
355        message: String,
356    },
357
358    // ========================================================================
359    // Task tracking events
360    // ========================================================================
361    /// Task list updated
362    #[serde(rename = "task_updated")]
363    TaskUpdated {
364        session_id: String,
365        tasks: Vec<crate::planning::Task>,
366    },
367
368    // ========================================================================
369    // Memory System events (Phase 3)
370    // ========================================================================
371    /// Memory stored
372    #[serde(rename = "memory_stored")]
373    MemoryStored {
374        memory_id: String,
375        memory_type: String,
376        importance: f32,
377        tags: Vec<String>,
378    },
379
380    /// Memory recalled
381    #[serde(rename = "memory_recalled")]
382    MemoryRecalled {
383        memory_id: String,
384        content: String,
385        relevance: f32,
386    },
387
388    /// Memories searched
389    #[serde(rename = "memories_searched")]
390    MemoriesSearched {
391        query: Option<String>,
392        tags: Vec<String>,
393        result_count: usize,
394    },
395
396    /// Memory cleared
397    #[serde(rename = "memory_cleared")]
398    MemoryCleared {
399        tier: String, // "long_term", "short_term", "working"
400        count: u64,
401    },
402
403    // ========================================================================
404    // Subagent events
405    // ========================================================================
406    /// Subagent task started
407    #[serde(rename = "subagent_start")]
408    SubagentStart {
409        /// Unique task identifier
410        task_id: String,
411        /// Child session ID
412        session_id: String,
413        /// Parent session ID
414        parent_session_id: String,
415        /// Agent type (e.g., "explore", "general")
416        agent: String,
417        /// Short description of the task
418        description: String,
419    },
420
421    /// Subagent task progress update
422    #[serde(rename = "subagent_progress")]
423    SubagentProgress {
424        /// Task identifier
425        task_id: String,
426        /// Child session ID
427        session_id: String,
428        /// Progress status message
429        status: String,
430        /// Additional metadata
431        metadata: serde_json::Value,
432    },
433
434    /// Subagent task completed
435    #[serde(rename = "subagent_end")]
436    SubagentEnd {
437        /// Task identifier
438        task_id: String,
439        /// Child session ID
440        session_id: String,
441        /// Agent type
442        agent: String,
443        /// Task output/result
444        output: String,
445        /// Whether the task succeeded
446        success: bool,
447    },
448
449    // ========================================================================
450    // Planning and Goal Tracking Events (Phase 1)
451    // ========================================================================
452    /// Planning phase started
453    #[serde(rename = "planning_start")]
454    PlanningStart { prompt: String },
455
456    /// Planning phase completed
457    #[serde(rename = "planning_end")]
458    PlanningEnd {
459        plan: ExecutionPlan,
460        estimated_steps: usize,
461    },
462
463    /// Step execution started
464    #[serde(rename = "step_start")]
465    StepStart {
466        step_id: String,
467        description: String,
468        step_number: usize,
469        total_steps: usize,
470    },
471
472    /// Step execution completed
473    #[serde(rename = "step_end")]
474    StepEnd {
475        step_id: String,
476        status: TaskStatus,
477        step_number: usize,
478        total_steps: usize,
479    },
480
481    /// Goal extracted from prompt
482    #[serde(rename = "goal_extracted")]
483    GoalExtracted { goal: AgentGoal },
484
485    /// Goal progress update
486    #[serde(rename = "goal_progress")]
487    GoalProgress {
488        goal: String,
489        progress: f32,
490        completed_steps: usize,
491        total_steps: usize,
492    },
493
494    /// Goal achieved
495    #[serde(rename = "goal_achieved")]
496    GoalAchieved {
497        goal: String,
498        total_steps: usize,
499        duration_ms: i64,
500    },
501
502    // ========================================================================
503    // Context Compaction events
504    // ========================================================================
505    /// Context automatically compacted due to high usage
506    #[serde(rename = "context_compacted")]
507    ContextCompacted {
508        session_id: String,
509        before_messages: usize,
510        after_messages: usize,
511        percent_before: f32,
512    },
513
514    // ========================================================================
515    // Persistence events
516    // ========================================================================
517    /// Session persistence failed — SDK clients should handle this
518    #[serde(rename = "persistence_failed")]
519    PersistenceFailed {
520        session_id: String,
521        operation: String,
522        error: String,
523    },
524
525    // ========================================================================
526    // Side question (btw)
527    // ========================================================================
528    /// Ephemeral side question answered.
529    ///
530    /// Emitted by [`crate::agent_api::AgentSession::btw()`] in streaming mode.
531    /// The answer is never added to conversation history.
532    #[serde(rename = "btw_answer")]
533    BtwAnswer {
534        question: String,
535        answer: String,
536        usage: TokenUsage,
537    },
538}
539
540/// Result of agent execution
541#[derive(Debug, Clone)]
542pub struct AgentResult {
543    pub text: String,
544    pub messages: Vec<Message>,
545    pub usage: TokenUsage,
546    pub tool_calls_count: usize,
547}
548
549// ============================================================================
550// ToolCommand — bridges ToolExecutor to SessionCommand for queue submission
551// ============================================================================
552
553/// Adapter that implements `SessionCommand` for tool execution via the queue.
554///
555/// Wraps a `ToolExecutor` call so it can be submitted to `SessionLaneQueue`.
556pub struct ToolCommand {
557    tool_executor: Arc<ToolExecutor>,
558    tool_name: String,
559    tool_args: Value,
560    tool_context: ToolContext,
561    skill_registry: Option<Arc<crate::skills::SkillRegistry>>,
562}
563
564impl ToolCommand {
565    /// Create a new ToolCommand
566    pub fn new(
567        tool_executor: Arc<ToolExecutor>,
568        tool_name: String,
569        tool_args: Value,
570        tool_context: ToolContext,
571        skill_registry: Option<Arc<crate::skills::SkillRegistry>>,
572    ) -> Self {
573        Self {
574            tool_executor,
575            tool_name,
576            tool_args,
577            tool_context,
578            skill_registry,
579        }
580    }
581}
582
583#[async_trait]
584impl SessionCommand for ToolCommand {
585    async fn execute(&self) -> Result<Value> {
586        // Check skill-based tool permissions
587        if let Some(registry) = &self.skill_registry {
588            let instruction_skills = registry.by_kind(crate::skills::SkillKind::Instruction);
589
590            // If there are instruction skills with tool restrictions, check permissions
591            let has_restrictions = instruction_skills.iter().any(|s| s.allowed_tools.is_some());
592
593            if has_restrictions {
594                let mut allowed = false;
595
596                for skill in &instruction_skills {
597                    if skill.is_tool_allowed(&self.tool_name) {
598                        allowed = true;
599                        break;
600                    }
601                }
602
603                if !allowed {
604                    return Err(anyhow::anyhow!(
605                        "Tool '{}' is not allowed by any active skill. Active skills restrict tools to their allowed-tools lists.",
606                        self.tool_name
607                    ));
608                }
609            }
610        }
611
612        // Execute the tool
613        let result = self
614            .tool_executor
615            .execute_with_context(&self.tool_name, &self.tool_args, &self.tool_context)
616            .await?;
617        Ok(serde_json::json!({
618            "output": result.output,
619            "exit_code": result.exit_code,
620            "metadata": result.metadata,
621        }))
622    }
623
624    fn command_type(&self) -> &str {
625        &self.tool_name
626    }
627
628    fn payload(&self) -> Value {
629        self.tool_args.clone()
630    }
631}
632
633// ============================================================================
634// AgentLoop
635// ============================================================================
636
637/// Agent loop executor
638#[derive(Clone)]
639pub struct AgentLoop {
640    llm_client: Arc<dyn LlmClient>,
641    tool_executor: Arc<ToolExecutor>,
642    tool_context: ToolContext,
643    config: AgentConfig,
644    /// Optional per-session tool metrics collector
645    tool_metrics: Option<Arc<RwLock<crate::telemetry::ToolMetrics>>>,
646    /// Optional lane queue for priority-based tool execution
647    command_queue: Option<Arc<SessionLaneQueue>>,
648    /// Optional progress tracker for real-time tool/token usage tracking
649    progress_tracker: Option<Arc<tokio::sync::RwLock<crate::task::ProgressTracker>>>,
650    /// Optional task manager for centralized task lifecycle tracking
651    task_manager: Option<Arc<crate::task::TaskManager>>,
652}
653
654impl AgentLoop {
655    pub fn new(
656        llm_client: Arc<dyn LlmClient>,
657        tool_executor: Arc<ToolExecutor>,
658        tool_context: ToolContext,
659        config: AgentConfig,
660    ) -> Self {
661        Self {
662            llm_client,
663            tool_executor,
664            tool_context,
665            config,
666            tool_metrics: None,
667            command_queue: None,
668            progress_tracker: None,
669            task_manager: None,
670        }
671    }
672
673    /// Set the progress tracker for real-time tool/token usage tracking.
674    pub fn with_progress_tracker(
675        mut self,
676        tracker: Arc<tokio::sync::RwLock<crate::task::ProgressTracker>>,
677    ) -> Self {
678        self.progress_tracker = Some(tracker);
679        self
680    }
681
682    /// Set the task manager for centralized task lifecycle tracking.
683    pub fn with_task_manager(mut self, manager: Arc<crate::task::TaskManager>) -> Self {
684        self.task_manager = Some(manager);
685        self
686    }
687
688    /// Set the tool metrics collector for this agent loop
689    pub fn with_tool_metrics(
690        mut self,
691        metrics: Arc<RwLock<crate::telemetry::ToolMetrics>>,
692    ) -> Self {
693        self.tool_metrics = Some(metrics);
694        self
695    }
696
697    /// Set the lane queue for priority-based tool execution.
698    ///
699    /// When set, tools are routed through the lane queue which supports
700    /// External task handling for multi-machine parallel processing.
701    pub fn with_queue(mut self, queue: Arc<SessionLaneQueue>) -> Self {
702        self.command_queue = Some(queue);
703        self
704    }
705
706    /// Track a tool call result in the progress tracker.
707    fn track_tool_result(&self, tool_name: &str, args: &serde_json::Value, exit_code: i32) {
708        if let Some(ref tracker) = self.progress_tracker {
709            let args_summary = Self::compact_json_args(args);
710            let success = exit_code == 0;
711            if let Ok(mut guard) = tracker.try_write() {
712                guard.track_tool_call(tool_name, args_summary, success);
713            }
714        }
715    }
716
717    /// Compact JSON arguments to a short summary string for tracking.
718    fn compact_json_args(args: &serde_json::Value) -> String {
719        let raw = match args {
720            serde_json::Value::Null => String::new(),
721            serde_json::Value::String(s) => s.clone(),
722            _ => serde_json::to_string(args).unwrap_or_default(),
723        };
724        let compact = raw.split_whitespace().collect::<Vec<_>>().join(" ");
725        if compact.len() > 180 {
726            format!("{}...", truncate_utf8(&compact, 180))
727        } else {
728            compact
729        }
730    }
731
732    /// Execute a tool, applying the configured timeout if set.
733    ///
734    /// On timeout, returns an error describing which tool timed out and after
735    /// how many milliseconds. The caller converts this to a tool-result error
736    /// message that is fed back to the LLM.
737    async fn execute_tool_timed(
738        &self,
739        name: &str,
740        args: &serde_json::Value,
741        ctx: &ToolContext,
742    ) -> anyhow::Result<crate::tools::ToolResult> {
743        let fut = self.tool_executor.execute_with_context(name, args, ctx);
744        if let Some(timeout_ms) = self.config.tool_timeout_ms {
745            match tokio::time::timeout(Duration::from_millis(timeout_ms), fut).await {
746                Ok(result) => result,
747                Err(_) => Err(anyhow::anyhow!(
748                    "Tool '{}' timed out after {}ms",
749                    name,
750                    timeout_ms
751                )),
752            }
753        } else {
754            fut.await
755        }
756    }
757
758    /// Convert a tool execution result into the (output, exit_code, is_error, metadata, images) tuple.
759    fn tool_result_to_tuple(
760        result: anyhow::Result<crate::tools::ToolResult>,
761    ) -> (
762        String,
763        i32,
764        bool,
765        Option<serde_json::Value>,
766        Vec<crate::llm::Attachment>,
767    ) {
768        match result {
769            Ok(r) => (
770                r.output,
771                r.exit_code,
772                r.exit_code != 0,
773                r.metadata,
774                r.images,
775            ),
776            Err(e) => {
777                let msg = e.to_string();
778                // Classify the error so the LLM knows whether retrying makes sense.
779                let hint = if Self::is_transient_error(&msg) {
780                    " [transient — you may retry this tool call]"
781                } else {
782                    " [permanent — do not retry without changing the arguments]"
783                };
784                (
785                    format!("Tool execution error: {}{}", msg, hint),
786                    1,
787                    true,
788                    None,
789                    Vec::new(),
790                )
791            }
792        }
793    }
794
795    /// Inspect the workspace for well-known project marker files and return a short
796    /// `## Project Context` section that the agent can use without any manual configuration.
797    /// Returns an empty string when the workspace type cannot be determined.
798    fn detect_project_hint(workspace: &std::path::Path) -> String {
799        struct Marker {
800            file: &'static str,
801            lang: &'static str,
802            tip: &'static str,
803        }
804
805        let markers = [
806            Marker {
807                file: "Cargo.toml",
808                lang: "Rust",
809                tip: "Use `cargo build`, `cargo test`, `cargo clippy`, and `cargo fmt`. \
810                  Prefer `anyhow` / `thiserror` for error handling. \
811                  Follow the Microsoft Rust Guidelines (no panics in library code, \
812                  async-first with Tokio).",
813            },
814            Marker {
815                file: "package.json",
816                lang: "Node.js / TypeScript",
817                tip: "Check `package.json` for the package manager (npm/yarn/pnpm/bun) \
818                  and available scripts. Prefer TypeScript with strict mode. \
819                  Use ESM imports unless the project is CommonJS.",
820            },
821            Marker {
822                file: "pyproject.toml",
823                lang: "Python",
824                tip: "Use the package manager declared in `pyproject.toml` \
825                  (uv, poetry, hatch, etc.). Prefer type hints and async/await for I/O.",
826            },
827            Marker {
828                file: "setup.py",
829                lang: "Python",
830                tip: "Legacy Python project. Prefer type hints and async/await for I/O.",
831            },
832            Marker {
833                file: "requirements.txt",
834                lang: "Python",
835                tip: "Python project with pip-style dependencies. \
836                  Prefer type hints and async/await for I/O.",
837            },
838            Marker {
839                file: "go.mod",
840                lang: "Go",
841                tip: "Use `go build ./...` and `go test ./...`. \
842                  Follow standard Go project layout. Use `gofmt` for formatting.",
843            },
844            Marker {
845                file: "pom.xml",
846                lang: "Java / Maven",
847                tip: "Use `mvn compile`, `mvn test`, `mvn package`. \
848                  Follow standard Maven project structure.",
849            },
850            Marker {
851                file: "build.gradle",
852                lang: "Java / Gradle",
853                tip: "Use `./gradlew build` and `./gradlew test`. \
854                  Follow standard Gradle project structure.",
855            },
856            Marker {
857                file: "build.gradle.kts",
858                lang: "Kotlin / Gradle",
859                tip: "Use `./gradlew build` and `./gradlew test`. \
860                  Prefer Kotlin coroutines for async work.",
861            },
862            Marker {
863                file: "CMakeLists.txt",
864                lang: "C / C++",
865                tip: "Use `cmake -B build && cmake --build build`. \
866                  Check for `compile_commands.json` for IDE tooling.",
867            },
868            Marker {
869                file: "Makefile",
870                lang: "C / C++ (or generic)",
871                tip: "Use `make` or `make <target>`. \
872                  Check available targets with `make help` or by reading the Makefile.",
873            },
874        ];
875
876        // Check for C# / .NET — no single fixed filename, so glob for *.csproj / *.sln
877        let is_dotnet = workspace.join("*.csproj").exists() || {
878            // Fast check: look for any .csproj or .sln in the workspace root
879            std::fs::read_dir(workspace)
880                .map(|entries| {
881                    entries.flatten().any(|e| {
882                        let name = e.file_name();
883                        let s = name.to_string_lossy();
884                        s.ends_with(".csproj") || s.ends_with(".sln")
885                    })
886                })
887                .unwrap_or(false)
888        };
889
890        if is_dotnet {
891            return "## Project Context\n\nThis is a **C# / .NET** project. \
892             Use `dotnet build`, `dotnet test`, and `dotnet run`. \
893             Follow C# coding conventions and async/await patterns."
894                .to_string();
895        }
896
897        for marker in &markers {
898            if workspace.join(marker.file).exists() {
899                return format!(
900                    "## Project Context\n\nThis is a **{}** project. {}",
901                    marker.lang, marker.tip
902                );
903            }
904        }
905
906        String::new()
907    }
908
909    /// Returns `true` for errors that are likely transient (network, timeout, I/O contention).
910    /// Used to annotate tool error messages so the LLM knows whether retrying is safe.
911    fn is_transient_error(msg: &str) -> bool {
912        let lower = msg.to_lowercase();
913        lower.contains("timeout")
914        || lower.contains("timed out")
915        || lower.contains("connection refused")
916        || lower.contains("connection reset")
917        || lower.contains("broken pipe")
918        || lower.contains("temporarily unavailable")
919        || lower.contains("resource temporarily unavailable")
920        || lower.contains("os error 11")  // EAGAIN
921        || lower.contains("os error 35")  // EAGAIN on macOS
922        || lower.contains("rate limit")
923        || lower.contains("too many requests")
924        || lower.contains("service unavailable")
925        || lower.contains("network unreachable")
926    }
927
928    /// Returns `true` when a tool writes a file and is safe to run concurrently with other
929    /// independent writes (no ordering dependencies, no side-channel output).
930    fn is_parallel_safe_write(name: &str, _args: &serde_json::Value) -> bool {
931        matches!(
932            name,
933            "write_file" | "edit_file" | "create_file" | "append_to_file" | "replace_in_file"
934        )
935    }
936
937    /// Extract the target file path from write-tool arguments so we can check for conflicts.
938    fn extract_write_path(args: &serde_json::Value) -> Option<String> {
939        // write_file / create_file / append_to_file / replace_in_file use "path"
940        // edit_file uses "path" as well
941        args.get("path")
942            .and_then(|v| v.as_str())
943            .map(|s| s.to_string())
944    }
945
946    /// Execute a tool through the lane queue (if configured) or directly.
947    /// Wraps execution in task lifecycle if task_manager is configured.
948    async fn execute_tool_queued_or_direct(
949        &self,
950        name: &str,
951        args: &serde_json::Value,
952        ctx: &ToolContext,
953    ) -> anyhow::Result<crate::tools::ToolResult> {
954        // Create task for this tool execution if task_manager is available
955        let task_id = if let Some(ref tm) = self.task_manager {
956            let task = crate::task::Task::tool(name, args.clone());
957            let id = task.id;
958            tm.spawn(task);
959            // Start the task immediately
960            let _ = tm.start(id);
961            Some(id)
962        } else {
963            None
964        };
965
966        let result = self
967            .execute_tool_queued_or_direct_inner(name, args, ctx)
968            .await;
969
970        // Complete or fail the task based on result
971        if let Some(ref tm) = self.task_manager {
972            if let Some(tid) = task_id {
973                match &result {
974                    Ok(r) => {
975                        let output = serde_json::json!({
976                            "output": r.output.clone(),
977                            "exit_code": r.exit_code,
978                        });
979                        let _ = tm.complete(tid, Some(output));
980                    }
981                    Err(e) => {
982                        let _ = tm.fail(tid, e.to_string());
983                    }
984                }
985            }
986        }
987
988        result
989    }
990
991    /// Inner execution without task lifecycle wrapping.
992    async fn execute_tool_queued_or_direct_inner(
993        &self,
994        name: &str,
995        args: &serde_json::Value,
996        ctx: &ToolContext,
997    ) -> anyhow::Result<crate::tools::ToolResult> {
998        if let Some(ref queue) = self.command_queue {
999            let command = ToolCommand::new(
1000                Arc::clone(&self.tool_executor),
1001                name.to_string(),
1002                args.clone(),
1003                ctx.clone(),
1004                self.config.skill_registry.clone(),
1005            );
1006            let rx = queue.submit_by_tool(name, Box::new(command)).await;
1007            match rx.await {
1008                Ok(Ok(value)) => {
1009                    let output = value["output"]
1010                        .as_str()
1011                        .ok_or_else(|| {
1012                            anyhow::anyhow!(
1013                                "Queue result missing 'output' field for tool '{}'",
1014                                name
1015                            )
1016                        })?
1017                        .to_string();
1018                    let exit_code = value["exit_code"].as_i64().unwrap_or(0) as i32;
1019                    return Ok(crate::tools::ToolResult {
1020                        name: name.to_string(),
1021                        output,
1022                        exit_code,
1023                        metadata: None,
1024                        images: Vec::new(),
1025                    });
1026                }
1027                Ok(Err(e)) => {
1028                    tracing::warn!(
1029                        "Queue execution failed for tool '{}', falling back to direct: {}",
1030                        name,
1031                        e
1032                    );
1033                }
1034                Err(_) => {
1035                    tracing::warn!(
1036                        "Queue channel closed for tool '{}', falling back to direct",
1037                        name
1038                    );
1039                }
1040            }
1041        }
1042        self.execute_tool_timed(name, args, ctx).await
1043    }
1044
1045    /// Call the LLM, handling streaming vs non-streaming internally.
1046    ///
1047    /// Streaming events (`TextDelta`, `ToolStart`) are forwarded to `event_tx`
1048    /// as they arrive. Non-streaming mode simply awaits the complete response.
1049    ///
1050    /// When a `ToolIndex` is configured, tools are filtered per-turn based on
1051    /// the last user message, reducing context usage with large tool sets.
1052    ///
1053    /// Returns `Err` on any LLM API failure. The circuit breaker in
1054    /// `execute_loop` wraps this call with retry logic for non-streaming mode.
1055    async fn call_llm(
1056        &self,
1057        messages: &[Message],
1058        system: Option<&str>,
1059        event_tx: &Option<mpsc::Sender<AgentEvent>>,
1060        cancel_token: &tokio_util::sync::CancellationToken,
1061    ) -> anyhow::Result<LlmResponse> {
1062        // Filter tools through ToolIndex if configured
1063        let tools = if let Some(ref index) = self.config.tool_index {
1064            let query = messages
1065                .iter()
1066                .rev()
1067                .find(|m| m.role == "user")
1068                .and_then(|m| {
1069                    m.content.iter().find_map(|b| match b {
1070                        crate::llm::ContentBlock::Text { text } => Some(text.as_str()),
1071                        _ => None,
1072                    })
1073                })
1074                .unwrap_or("");
1075            let matches = index.search(query, index.len());
1076            let matched_names: std::collections::HashSet<&str> =
1077                matches.iter().map(|m| m.name.as_str()).collect();
1078            self.config
1079                .tools
1080                .iter()
1081                .filter(|t| matched_names.contains(t.name.as_str()))
1082                .cloned()
1083                .collect::<Vec<_>>()
1084        } else {
1085            self.config.tools.clone()
1086        };
1087
1088        if event_tx.is_some() {
1089            let mut stream_rx = match self
1090                .llm_client
1091                .complete_streaming(messages, system, &tools)
1092                .await
1093            {
1094                Ok(rx) => rx,
1095                Err(stream_error) => {
1096                    tracing::warn!(
1097                        error = %stream_error,
1098                        "LLM streaming setup failed; falling back to non-streaming completion"
1099                    );
1100                    return self
1101                        .llm_client
1102                        .complete(messages, system, &tools)
1103                        .await
1104                        .with_context(|| {
1105                            format!(
1106                                "LLM streaming call failed ({stream_error}); non-streaming fallback also failed"
1107                            )
1108                        });
1109                }
1110            };
1111
1112            let mut final_response: Option<LlmResponse> = None;
1113            loop {
1114                tokio::select! {
1115                    _ = cancel_token.cancelled() => {
1116                        tracing::info!("🛑 LLM streaming cancelled by CancellationToken");
1117                        anyhow::bail!("Operation cancelled by user");
1118                    }
1119                    event = stream_rx.recv() => {
1120                        match event {
1121                            Some(crate::llm::StreamEvent::TextDelta(text)) => {
1122                                if let Some(tx) = event_tx {
1123                                    tx.send(AgentEvent::TextDelta { text }).await.ok();
1124                                }
1125                            }
1126                            Some(crate::llm::StreamEvent::ToolUseStart { id, name }) => {
1127                                if let Some(tx) = event_tx {
1128                                    tx.send(AgentEvent::ToolStart { id, name }).await.ok();
1129                                }
1130                            }
1131                            Some(crate::llm::StreamEvent::ToolUseInputDelta(delta)) => {
1132                                if let Some(tx) = event_tx {
1133                                    tx.send(AgentEvent::ToolInputDelta { delta }).await.ok();
1134                                }
1135                            }
1136                            Some(crate::llm::StreamEvent::Done(resp)) => {
1137                                final_response = Some(resp);
1138                                break;
1139                            }
1140                            None => break,
1141                        }
1142                    }
1143                }
1144            }
1145            final_response.context("Stream ended without final response")
1146        } else {
1147            self.llm_client
1148                .complete(messages, system, &tools)
1149                .await
1150                .context("LLM call failed")
1151        }
1152    }
1153
1154    /// Create a tool context with streaming support.
1155    ///
1156    /// When `event_tx` is Some, spawns a forwarder task that converts
1157    /// `ToolStreamEvent::OutputDelta` into `AgentEvent::ToolOutputDelta`
1158    /// and sends them to the agent event channel.
1159    ///
1160    /// Returns the augmented `ToolContext`. The forwarder task runs until
1161    /// the tool-side sender is dropped (i.e., tool execution finishes).
1162    fn streaming_tool_context(
1163        &self,
1164        event_tx: &Option<mpsc::Sender<AgentEvent>>,
1165        tool_id: &str,
1166        tool_name: &str,
1167    ) -> ToolContext {
1168        let mut ctx = self.tool_context.clone();
1169        if let Some(agent_tx) = event_tx {
1170            let (tool_tx, mut tool_rx) = mpsc::channel::<ToolStreamEvent>(64);
1171            ctx.event_tx = Some(tool_tx);
1172
1173            let agent_tx = agent_tx.clone();
1174            let tool_id = tool_id.to_string();
1175            let tool_name = tool_name.to_string();
1176            tokio::spawn(async move {
1177                while let Some(event) = tool_rx.recv().await {
1178                    match event {
1179                        ToolStreamEvent::OutputDelta(delta) => {
1180                            agent_tx
1181                                .send(AgentEvent::ToolOutputDelta {
1182                                    id: tool_id.clone(),
1183                                    name: tool_name.clone(),
1184                                    delta,
1185                                })
1186                                .await
1187                                .ok();
1188                        }
1189                    }
1190                }
1191            });
1192        }
1193        ctx
1194    }
1195
1196    /// Resolve context from all providers for a given prompt
1197    ///
1198    /// Returns aggregated context results from all configured providers.
1199    async fn resolve_context(&self, prompt: &str, session_id: Option<&str>) -> Vec<ContextResult> {
1200        if self.config.context_providers.is_empty() {
1201            return Vec::new();
1202        }
1203
1204        let query = ContextQuery::new(prompt).with_session_id(session_id.unwrap_or(""));
1205
1206        let futures = self
1207            .config
1208            .context_providers
1209            .iter()
1210            .map(|p| p.query(&query));
1211        let outcomes = join_all(futures).await;
1212
1213        outcomes
1214            .into_iter()
1215            .enumerate()
1216            .filter_map(|(i, r)| match r {
1217                Ok(result) if !result.is_empty() => Some(result),
1218                Ok(_) => None,
1219                Err(e) => {
1220                    tracing::warn!(
1221                        "Context provider '{}' failed: {}",
1222                        self.config.context_providers[i].name(),
1223                        e
1224                    );
1225                    None
1226                }
1227            })
1228            .collect()
1229    }
1230
1231    /// Detect whether the LLM's no-tool-call response looks like an intermediate
1232    /// step rather than a genuine final answer.
1233    ///
1234    /// Returns `true` when continuation should be injected. Heuristics:
1235    /// - Response ends with a colon or ellipsis (mid-thought)
1236    /// - Response contains phrases that signal incomplete work
1237    /// - Response is very short (< 80 chars) and doesn't look like a summary
1238    fn looks_incomplete(text: &str) -> bool {
1239        let t = text.trim();
1240        if t.is_empty() {
1241            return true;
1242        }
1243        // Very short responses that aren't clearly a final answer
1244        if t.len() < 80 && !t.contains('\n') {
1245            // Short single-line — could be a genuine one-liner answer, keep going
1246            // only if it ends with punctuation that signals continuation
1247            let ends_continuation =
1248                t.ends_with(':') || t.ends_with("...") || t.ends_with('…') || t.ends_with(',');
1249            if ends_continuation {
1250                return true;
1251            }
1252        }
1253        // Phrases that signal the LLM is describing what it will do rather than doing it
1254        let incomplete_phrases = [
1255            "i'll ",
1256            "i will ",
1257            "let me ",
1258            "i need to ",
1259            "i should ",
1260            "next, i",
1261            "first, i",
1262            "now i",
1263            "i'll start",
1264            "i'll begin",
1265            "i'll now",
1266            "let's start",
1267            "let's begin",
1268            "to do this",
1269            "i'm going to",
1270        ];
1271        let lower = t.to_lowercase();
1272        for phrase in &incomplete_phrases {
1273            if lower.contains(phrase) {
1274                return true;
1275            }
1276        }
1277        false
1278    }
1279
1280    /// Get the assembled system prompt from slots.
1281    #[allow(dead_code)]
1282    fn system_prompt(&self) -> String {
1283        self.config.prompt_slots.build()
1284    }
1285
1286    /// Get the assembled system prompt from slots with an explicit style.
1287    fn system_prompt_for_style(&self, style: AgentStyle) -> String {
1288        let mut slots = self.config.prompt_slots.clone();
1289        slots.style = Some(style);
1290        slots.build()
1291    }
1292
1293    async fn resolve_effective_style(&self, prompt: &str) -> AgentStyle {
1294        if let Some(style) = self.config.prompt_slots.style {
1295            return style;
1296        }
1297
1298        let (style, confidence) = AgentStyle::detect_with_confidence(prompt);
1299        if confidence != DetectionConfidence::Low {
1300            return style;
1301        }
1302
1303        match AgentStyle::detect_with_llm(self.llm_client.as_ref(), prompt).await {
1304            Ok(classified_style) => {
1305                tracing::debug!(
1306                    intent.classification = ?classified_style,
1307                    intent.source = "llm",
1308                    "Intent classified via LLM"
1309                );
1310                classified_style
1311            }
1312            Err(e) => {
1313                tracing::warn!(error = %e, "LLM intent classification failed, using keyword detection");
1314                style
1315            }
1316        }
1317    }
1318
1319    /// Build augmented system prompt with context
1320    #[allow(dead_code)]
1321    fn build_augmented_system_prompt(&self, context_results: &[ContextResult]) -> Option<String> {
1322        let base = self.system_prompt();
1323        self.build_augmented_system_prompt_with_base(&base, context_results)
1324    }
1325
1326    fn build_augmented_system_prompt_with_base(
1327        &self,
1328        base: &str,
1329        context_results: &[ContextResult],
1330    ) -> Option<String> {
1331        let base = base.to_string();
1332
1333        // Use live tool executor definitions so tools added via add_mcp_server() are included
1334        let live_tools = self.tool_executor.definitions();
1335        let mcp_tools: Vec<&ToolDefinition> = live_tools
1336            .iter()
1337            .filter(|t| t.name.starts_with("mcp__"))
1338            .collect();
1339
1340        let mcp_section = if mcp_tools.is_empty() {
1341            String::new()
1342        } else {
1343            let mut lines = vec![
1344                "## MCP Tools".to_string(),
1345                String::new(),
1346                "The following MCP (Model Context Protocol) tools are available. Use them when the task requires external capabilities beyond the built-in tools:".to_string(),
1347                String::new(),
1348            ];
1349            for tool in &mcp_tools {
1350                let display = format!("- `{}` — {}", tool.name, tool.description);
1351                lines.push(display);
1352            }
1353            lines.join("\n")
1354        };
1355
1356        let parts: Vec<&str> = [base.as_str(), mcp_section.as_str()]
1357            .iter()
1358            .filter(|s| !s.is_empty())
1359            .copied()
1360            .collect();
1361
1362        // Auto-detect project type from workspace and inject language-specific guidelines,
1363        // but only when the user hasn't already set a custom `guidelines` slot.
1364        let project_hint = if self.config.prompt_slots.guidelines.is_none() {
1365            Self::detect_project_hint(&self.tool_context.workspace)
1366        } else {
1367            String::new()
1368        };
1369
1370        if context_results.is_empty() {
1371            if project_hint.is_empty() {
1372                return Some(parts.join("\n\n"));
1373            }
1374            return Some(format!("{}\n\n{}", parts.join("\n\n"), project_hint));
1375        }
1376
1377        // Build context XML block
1378        let context_xml: String = context_results
1379            .iter()
1380            .map(|r| r.to_xml())
1381            .collect::<Vec<_>>()
1382            .join("\n\n");
1383
1384        if project_hint.is_empty() {
1385            Some(format!("{}\n\n{}", parts.join("\n\n"), context_xml))
1386        } else {
1387            Some(format!(
1388                "{}\n\n{}\n\n{}",
1389                parts.join("\n\n"),
1390                project_hint,
1391                context_xml
1392            ))
1393        }
1394    }
1395
1396    /// Notify providers of turn completion for memory extraction
1397    async fn notify_turn_complete(&self, session_id: &str, prompt: &str, response: &str) {
1398        let futures = self
1399            .config
1400            .context_providers
1401            .iter()
1402            .map(|p| p.on_turn_complete(session_id, prompt, response));
1403        let outcomes = join_all(futures).await;
1404
1405        for (i, result) in outcomes.into_iter().enumerate() {
1406            if let Err(e) = result {
1407                tracing::warn!(
1408                    "Context provider '{}' on_turn_complete failed: {}",
1409                    self.config.context_providers[i].name(),
1410                    e
1411                );
1412            }
1413        }
1414    }
1415
1416    /// Fire PreToolUse hook event before tool execution.
1417    /// Returns the HookResult which may block the tool call.
1418    async fn fire_pre_tool_use(
1419        &self,
1420        session_id: &str,
1421        tool_name: &str,
1422        args: &serde_json::Value,
1423        recent_tools: Vec<String>,
1424    ) -> Option<HookResult> {
1425        if let Some(he) = &self.config.hook_engine {
1426            let event = HookEvent::PreToolUse(PreToolUseEvent {
1427                session_id: session_id.to_string(),
1428                tool: tool_name.to_string(),
1429                args: args.clone(),
1430                working_directory: self.tool_context.workspace.to_string_lossy().to_string(),
1431                recent_tools,
1432            });
1433            let result = he.fire(&event).await;
1434            if result.is_block() {
1435                return Some(result);
1436            }
1437        }
1438        None
1439    }
1440
1441    /// Fire PostToolUse hook event after tool execution (fire-and-forget).
1442    async fn fire_post_tool_use(
1443        &self,
1444        session_id: &str,
1445        tool_name: &str,
1446        args: &serde_json::Value,
1447        output: &str,
1448        success: bool,
1449        duration_ms: u64,
1450    ) {
1451        if let Some(he) = &self.config.hook_engine {
1452            let event = HookEvent::PostToolUse(PostToolUseEvent {
1453                session_id: session_id.to_string(),
1454                tool: tool_name.to_string(),
1455                args: args.clone(),
1456                result: ToolResultData {
1457                    success,
1458                    output: output.to_string(),
1459                    exit_code: if success { Some(0) } else { Some(1) },
1460                    duration_ms,
1461                },
1462            });
1463            let he = Arc::clone(he);
1464            tokio::spawn(async move {
1465                let _ = he.fire(&event).await;
1466            });
1467        }
1468    }
1469
1470    /// Fire GenerateStart hook event before an LLM call.
1471    async fn fire_generate_start(
1472        &self,
1473        session_id: &str,
1474        prompt: &str,
1475        system_prompt: &Option<String>,
1476    ) {
1477        if let Some(he) = &self.config.hook_engine {
1478            let event = HookEvent::GenerateStart(GenerateStartEvent {
1479                session_id: session_id.to_string(),
1480                prompt: prompt.to_string(),
1481                system_prompt: system_prompt.clone(),
1482                model_provider: String::new(),
1483                model_name: String::new(),
1484                available_tools: self.config.tools.iter().map(|t| t.name.clone()).collect(),
1485            });
1486            let _ = he.fire(&event).await;
1487        }
1488    }
1489
1490    /// Fire GenerateEnd hook event after an LLM call.
1491    async fn fire_generate_end(
1492        &self,
1493        session_id: &str,
1494        prompt: &str,
1495        response: &LlmResponse,
1496        duration_ms: u64,
1497    ) {
1498        if let Some(he) = &self.config.hook_engine {
1499            let tool_calls: Vec<ToolCallInfo> = response
1500                .tool_calls()
1501                .iter()
1502                .map(|tc| ToolCallInfo {
1503                    name: tc.name.clone(),
1504                    args: tc.args.clone(),
1505                })
1506                .collect();
1507
1508            let event = HookEvent::GenerateEnd(GenerateEndEvent {
1509                session_id: session_id.to_string(),
1510                prompt: prompt.to_string(),
1511                response_text: response.text().to_string(),
1512                tool_calls,
1513                usage: TokenUsageInfo {
1514                    prompt_tokens: response.usage.prompt_tokens as i32,
1515                    completion_tokens: response.usage.completion_tokens as i32,
1516                    total_tokens: response.usage.total_tokens as i32,
1517                },
1518                duration_ms,
1519            });
1520            let _ = he.fire(&event).await;
1521        }
1522    }
1523
1524    /// Fire PrePrompt hook event before prompt augmentation.
1525    /// Returns optional modified prompt text from the hook.
1526    async fn fire_pre_prompt(
1527        &self,
1528        session_id: &str,
1529        prompt: &str,
1530        system_prompt: &Option<String>,
1531        message_count: usize,
1532    ) -> Option<String> {
1533        if let Some(he) = &self.config.hook_engine {
1534            let event = HookEvent::PrePrompt(PrePromptEvent {
1535                session_id: session_id.to_string(),
1536                prompt: prompt.to_string(),
1537                system_prompt: system_prompt.clone(),
1538                message_count,
1539            });
1540            let result = he.fire(&event).await;
1541            if let HookResult::Continue(Some(modified)) = result {
1542                // Extract modified prompt from hook response
1543                if let Some(new_prompt) = modified.get("prompt").and_then(|v| v.as_str()) {
1544                    return Some(new_prompt.to_string());
1545                }
1546            }
1547        }
1548        None
1549    }
1550
1551    /// Fire PostResponse hook event after the agent loop completes.
1552    async fn fire_post_response(
1553        &self,
1554        session_id: &str,
1555        response_text: &str,
1556        tool_calls_count: usize,
1557        usage: &TokenUsage,
1558        duration_ms: u64,
1559    ) {
1560        if let Some(he) = &self.config.hook_engine {
1561            let event = HookEvent::PostResponse(PostResponseEvent {
1562                session_id: session_id.to_string(),
1563                response_text: response_text.to_string(),
1564                tool_calls_count,
1565                usage: TokenUsageInfo {
1566                    prompt_tokens: usage.prompt_tokens as i32,
1567                    completion_tokens: usage.completion_tokens as i32,
1568                    total_tokens: usage.total_tokens as i32,
1569                },
1570                duration_ms,
1571            });
1572            let he = Arc::clone(he);
1573            tokio::spawn(async move {
1574                let _ = he.fire(&event).await;
1575            });
1576        }
1577    }
1578
1579    /// Fire OnError hook event when an error occurs.
1580    async fn fire_on_error(
1581        &self,
1582        session_id: &str,
1583        error_type: ErrorType,
1584        error_message: &str,
1585        context: serde_json::Value,
1586    ) {
1587        if let Some(he) = &self.config.hook_engine {
1588            let event = HookEvent::OnError(OnErrorEvent {
1589                session_id: session_id.to_string(),
1590                error_type,
1591                error_message: error_message.to_string(),
1592                context,
1593            });
1594            let he = Arc::clone(he);
1595            tokio::spawn(async move {
1596                let _ = he.fire(&event).await;
1597            });
1598        }
1599    }
1600
1601    /// Execute the agent loop for a prompt
1602    ///
1603    /// Takes the conversation history and a new user prompt.
1604    /// Returns the agent result and updated message history.
1605    /// When event_tx is provided, uses streaming LLM API for real-time text output.
1606    pub async fn execute(
1607        &self,
1608        history: &[Message],
1609        prompt: &str,
1610        event_tx: Option<mpsc::Sender<AgentEvent>>,
1611    ) -> Result<AgentResult> {
1612        self.execute_with_session(history, prompt, None, event_tx, None)
1613            .await
1614    }
1615
1616    /// Execute the agent loop with pre-built messages (user message already included).
1617    ///
1618    /// Used by `send_with_attachments` / `stream_with_attachments` where the
1619    /// user message contains multi-modal content and is already appended to
1620    /// the messages vec.
1621    pub async fn execute_from_messages(
1622        &self,
1623        messages: Vec<Message>,
1624        session_id: Option<&str>,
1625        event_tx: Option<mpsc::Sender<AgentEvent>>,
1626        cancel_token: Option<&tokio_util::sync::CancellationToken>,
1627    ) -> Result<AgentResult> {
1628        let default_token = tokio_util::sync::CancellationToken::new();
1629        let token = cancel_token.unwrap_or(&default_token);
1630        tracing::info!(
1631            a3s.session.id = session_id.unwrap_or("none"),
1632            a3s.agent.max_turns = self.config.max_tool_rounds,
1633            "a3s.agent.execute_from_messages started"
1634        );
1635
1636        // Extract the last user message text for hooks, memory recall, and events.
1637        // Pass empty prompt so execute_loop skips adding a duplicate user message,
1638        // but provide effective_prompt for hook/memory/event purposes.
1639        let effective_prompt = messages
1640            .iter()
1641            .rev()
1642            .find(|m| m.role == "user")
1643            .map(|m| m.text())
1644            .unwrap_or_default();
1645
1646        let result = self
1647            .execute_loop_inner(
1648                &messages,
1649                "",
1650                &effective_prompt,
1651                session_id,
1652                event_tx,
1653                token,
1654                true, // emit_end: this is a standalone execution
1655            )
1656            .await;
1657
1658        match &result {
1659            Ok(r) => tracing::info!(
1660                a3s.agent.tool_calls_count = r.tool_calls_count,
1661                a3s.llm.total_tokens = r.usage.total_tokens,
1662                "a3s.agent.execute_from_messages completed"
1663            ),
1664            Err(e) => tracing::warn!(
1665                error = %e,
1666                "a3s.agent.execute_from_messages failed"
1667            ),
1668        }
1669
1670        result
1671    }
1672
1673    /// Execute the agent loop for a prompt with session context
1674    ///
1675    /// Takes the conversation history, user prompt, and optional session ID.
1676    /// When session_id is provided, context providers can use it for session-specific context.
1677    pub async fn execute_with_session(
1678        &self,
1679        history: &[Message],
1680        prompt: &str,
1681        session_id: Option<&str>,
1682        event_tx: Option<mpsc::Sender<AgentEvent>>,
1683        cancel_token: Option<&tokio_util::sync::CancellationToken>,
1684    ) -> Result<AgentResult> {
1685        let default_token = tokio_util::sync::CancellationToken::new();
1686        let token = cancel_token.unwrap_or(&default_token);
1687        tracing::info!(
1688            a3s.session.id = session_id.unwrap_or("none"),
1689            a3s.agent.max_turns = self.config.max_tool_rounds,
1690            "a3s.agent.execute started"
1691        );
1692
1693        let effective_style = self.resolve_effective_style(prompt).await;
1694
1695        // Determine whether to use planning mode
1696        let use_planning = if self.config.planning_mode == PlanningMode::Auto {
1697            effective_style.requires_planning()
1698        } else {
1699            // Explicit mode: Enabled or Disabled
1700            self.config.planning_mode.should_plan(prompt)
1701        };
1702
1703        // Create agent task if task_manager is available
1704        let task_id = if let Some(ref tm) = self.task_manager {
1705            let workspace = self.tool_context.workspace.display().to_string();
1706            let task = crate::task::Task::agent("agent", &workspace, prompt);
1707            let id = task.id;
1708            tm.spawn(task);
1709            let _ = tm.start(id);
1710            Some(id)
1711        } else {
1712            None
1713        };
1714
1715        let result = if use_planning {
1716            self.execute_with_planning(history, prompt, event_tx).await
1717        } else {
1718            self.execute_loop(history, prompt, session_id, event_tx, token, true)
1719                .await
1720        };
1721
1722        // Complete or fail agent task based on result
1723        if let Some(ref tm) = self.task_manager {
1724            if let Some(tid) = task_id {
1725                match &result {
1726                    Ok(r) => {
1727                        let output = serde_json::json!({
1728                            "text": r.text,
1729                            "tool_calls_count": r.tool_calls_count,
1730                            "usage": r.usage,
1731                        });
1732                        let _ = tm.complete(tid, Some(output));
1733                    }
1734                    Err(e) => {
1735                        let _ = tm.fail(tid, e.to_string());
1736                    }
1737                }
1738            }
1739        }
1740
1741        match &result {
1742            Ok(r) => {
1743                tracing::info!(
1744                    a3s.agent.tool_calls_count = r.tool_calls_count,
1745                    a3s.llm.total_tokens = r.usage.total_tokens,
1746                    "a3s.agent.execute completed"
1747                );
1748                // Fire PostResponse hook
1749                self.fire_post_response(
1750                    session_id.unwrap_or(""),
1751                    &r.text,
1752                    r.tool_calls_count,
1753                    &r.usage,
1754                    0, // duration tracked externally
1755                )
1756                .await;
1757            }
1758            Err(e) => {
1759                tracing::warn!(
1760                    error = %e,
1761                    "a3s.agent.execute failed"
1762                );
1763                // Fire OnError hook
1764                self.fire_on_error(
1765                    session_id.unwrap_or(""),
1766                    ErrorType::Other,
1767                    &e.to_string(),
1768                    serde_json::json!({"phase": "execute"}),
1769                )
1770                .await;
1771            }
1772        }
1773
1774        result
1775    }
1776
1777    /// Core execution loop (without planning routing).
1778    ///
1779    /// This is the inner loop that runs LLM calls and tool executions.
1780    /// Called directly by `execute_with_session` (after planning check)
1781    /// and by `execute_plan` (for individual steps, bypassing planning).
1782    async fn execute_loop(
1783        &self,
1784        history: &[Message],
1785        prompt: &str,
1786        session_id: Option<&str>,
1787        event_tx: Option<mpsc::Sender<AgentEvent>>,
1788        cancel_token: &tokio_util::sync::CancellationToken,
1789        emit_end: bool,
1790    ) -> Result<AgentResult> {
1791        // When called via execute_loop, the prompt is used for both
1792        // message-adding and hook/memory/event purposes.
1793        self.execute_loop_inner(
1794            history,
1795            prompt,
1796            prompt,
1797            session_id,
1798            event_tx,
1799            cancel_token,
1800            emit_end,
1801        )
1802        .await
1803    }
1804
1805    /// Inner execution loop.
1806    ///
1807    /// `msg_prompt` controls whether a user message is appended (empty = skip).
1808    /// `effective_prompt` is used for hooks, memory recall, taint tracking, and events.
1809    /// `emit_end` controls whether to send `AgentEvent::End` when the loop completes
1810    /// (should be false when called from `execute_plan` to avoid duplicate End events).
1811    #[allow(clippy::too_many_arguments)]
1812    async fn execute_loop_inner(
1813        &self,
1814        history: &[Message],
1815        msg_prompt: &str,
1816        effective_prompt: &str,
1817        session_id: Option<&str>,
1818        event_tx: Option<mpsc::Sender<AgentEvent>>,
1819        cancel_token: &tokio_util::sync::CancellationToken,
1820        emit_end: bool,
1821    ) -> Result<AgentResult> {
1822        let mut messages = history.to_vec();
1823        let mut total_usage = TokenUsage::default();
1824        let mut tool_calls_count = 0;
1825        let mut turn = 0;
1826        // Consecutive malformed-tool-args errors (4.1 parse error recovery)
1827        let mut parse_error_count: u32 = 0;
1828        // Continuation injection counter
1829        let mut continuation_count: u32 = 0;
1830        let mut recent_tool_signatures: Vec<String> = Vec::new();
1831        let style_prompt = if effective_prompt.is_empty() {
1832            msg_prompt
1833        } else {
1834            effective_prompt
1835        };
1836        let effective_style = self.resolve_effective_style(style_prompt).await;
1837        let effective_system_prompt = self.system_prompt_for_style(effective_style);
1838        if let Some(tx) = &event_tx {
1839            tx.send(AgentEvent::AgentModeChanged {
1840                mode: effective_style.runtime_mode().to_string(),
1841                agent: effective_style.builtin_agent_name().to_string(),
1842                description: effective_style.description().to_string(),
1843            })
1844            .await
1845            .ok();
1846        }
1847
1848        // Send start event
1849        if let Some(tx) = &event_tx {
1850            tx.send(AgentEvent::Start {
1851                prompt: effective_prompt.to_string(),
1852            })
1853            .await
1854            .ok();
1855        }
1856
1857        // Forward queue events (CommandDeadLettered, CommandRetry, QueueAlert) to event stream
1858        let _queue_forward_handle =
1859            if let (Some(ref queue), Some(ref tx)) = (&self.command_queue, &event_tx) {
1860                let mut rx = queue.subscribe();
1861                let tx = tx.clone();
1862                Some(tokio::spawn(async move {
1863                    while let Ok(event) = rx.recv().await {
1864                        if tx.send(event).await.is_err() {
1865                            break;
1866                        }
1867                    }
1868                }))
1869            } else {
1870                None
1871            };
1872
1873        // Fire PrePrompt hook (may modify the prompt)
1874        let built_system_prompt = Some(effective_system_prompt.clone());
1875        let hooked_prompt = if let Some(modified) = self
1876            .fire_pre_prompt(
1877                session_id.unwrap_or(""),
1878                effective_prompt,
1879                &built_system_prompt,
1880                messages.len(),
1881            )
1882            .await
1883        {
1884            modified
1885        } else {
1886            effective_prompt.to_string()
1887        };
1888        let effective_prompt = hooked_prompt.as_str();
1889
1890        // Taint-track the incoming prompt for sensitive data detection
1891        if let Some(ref sp) = self.config.security_provider {
1892            sp.taint_input(effective_prompt);
1893        }
1894
1895        // Recall relevant memories and inject into system prompt
1896        let system_with_memory = if let Some(ref memory) = self.config.memory {
1897            match memory.recall_similar(effective_prompt, 5).await {
1898                Ok(items) if !items.is_empty() => {
1899                    if let Some(tx) = &event_tx {
1900                        for item in &items {
1901                            tx.send(AgentEvent::MemoryRecalled {
1902                                memory_id: item.id.clone(),
1903                                content: item.content.clone(),
1904                                relevance: item.relevance_score(),
1905                            })
1906                            .await
1907                            .ok();
1908                        }
1909                        tx.send(AgentEvent::MemoriesSearched {
1910                            query: Some(effective_prompt.to_string()),
1911                            tags: Vec::new(),
1912                            result_count: items.len(),
1913                        })
1914                        .await
1915                        .ok();
1916                    }
1917                    let memory_context = items
1918                        .iter()
1919                        .map(|i| format!("- {}", i.content))
1920                        .collect::<Vec<_>>()
1921                        .join(
1922                            "
1923",
1924                        );
1925                    let base = effective_system_prompt.clone();
1926                    Some(format!(
1927                        "{}
1928
1929## Relevant past experience
1930{}",
1931                        base, memory_context
1932                    ))
1933                }
1934                _ => Some(effective_system_prompt.clone()),
1935            }
1936        } else {
1937            Some(effective_system_prompt.clone())
1938        };
1939
1940        // Resolve context from providers on first turn (before adding user message)
1941        let augmented_system = if !self.config.context_providers.is_empty() {
1942            // Send context resolving event
1943            if let Some(tx) = &event_tx {
1944                let provider_names: Vec<String> = self
1945                    .config
1946                    .context_providers
1947                    .iter()
1948                    .map(|p| p.name().to_string())
1949                    .collect();
1950                tx.send(AgentEvent::ContextResolving {
1951                    providers: provider_names,
1952                })
1953                .await
1954                .ok();
1955            }
1956
1957            tracing::info!(
1958                a3s.context.providers = self.config.context_providers.len() as i64,
1959                "Context resolution started"
1960            );
1961            let context_results = self.resolve_context(effective_prompt, session_id).await;
1962
1963            // Send context resolved event
1964            if let Some(tx) = &event_tx {
1965                let total_items: usize = context_results.iter().map(|r| r.items.len()).sum();
1966                let total_tokens: usize = context_results.iter().map(|r| r.total_tokens).sum();
1967
1968                tracing::info!(
1969                    context_items = total_items,
1970                    context_tokens = total_tokens,
1971                    "Context resolution completed"
1972                );
1973
1974                tx.send(AgentEvent::ContextResolved {
1975                    total_items,
1976                    total_tokens,
1977                })
1978                .await
1979                .ok();
1980            }
1981
1982            self.build_augmented_system_prompt_with_base(&effective_system_prompt, &context_results)
1983        } else {
1984            Some(effective_system_prompt.clone())
1985        };
1986
1987        // Merge memory context into system prompt
1988        let base_prompt = effective_system_prompt.clone();
1989        let augmented_system = match (augmented_system, system_with_memory) {
1990            (Some(ctx), Some(mem)) if ctx != mem => Some(ctx.replacen(&base_prompt, &mem, 1)),
1991            (Some(ctx), _) => Some(ctx),
1992            (None, mem) => mem,
1993        };
1994
1995        // Add user message
1996        if !msg_prompt.is_empty() {
1997            messages.push(Message::user(msg_prompt));
1998        }
1999
2000        loop {
2001            turn += 1;
2002
2003            if turn > self.config.max_tool_rounds {
2004                let error = format!("Max tool rounds ({}) exceeded", self.config.max_tool_rounds);
2005                if let Some(tx) = &event_tx {
2006                    tx.send(AgentEvent::Error {
2007                        message: error.clone(),
2008                    })
2009                    .await
2010                    .ok();
2011                }
2012                anyhow::bail!(error);
2013            }
2014
2015            // Send turn start event
2016            if let Some(tx) = &event_tx {
2017                tx.send(AgentEvent::TurnStart { turn }).await.ok();
2018            }
2019
2020            tracing::info!(
2021                turn = turn,
2022                max_turns = self.config.max_tool_rounds,
2023                "Agent turn started"
2024            );
2025
2026            // Call LLM - use streaming if we have an event channel
2027            tracing::info!(
2028                a3s.llm.streaming = event_tx.is_some(),
2029                "LLM completion started"
2030            );
2031
2032            // Fire GenerateStart hook
2033            self.fire_generate_start(
2034                session_id.unwrap_or(""),
2035                effective_prompt,
2036                &augmented_system,
2037            )
2038            .await;
2039
2040            let llm_start = std::time::Instant::now();
2041            // Circuit breaker (4.3): retry non-streaming LLM calls on transient failures.
2042            // Each failure increments `consecutive_llm_errors`; on success it resets to 0.
2043            // Streaming mode bails immediately on failure (events can't be replayed).
2044            let response = {
2045                let threshold = self.config.circuit_breaker_threshold.max(1);
2046                let mut attempt = 0u32;
2047                loop {
2048                    attempt += 1;
2049                    let result = self
2050                        .call_llm(
2051                            &messages,
2052                            augmented_system.as_deref(),
2053                            &event_tx,
2054                            cancel_token,
2055                        )
2056                        .await;
2057                    match result {
2058                        Ok(r) => {
2059                            break r;
2060                        }
2061                        // Never retry if cancelled
2062                        Err(e) if cancel_token.is_cancelled() => {
2063                            anyhow::bail!(e);
2064                        }
2065                        // Retry when: non-streaming under threshold, OR first streaming attempt
2066                        Err(e) if attempt < threshold && (event_tx.is_none() || attempt == 1) => {
2067                            tracing::warn!(
2068                                turn = turn,
2069                                attempt = attempt,
2070                                threshold = threshold,
2071                                error = %e,
2072                                "LLM call failed, will retry"
2073                            );
2074                            tokio::time::sleep(Duration::from_millis(100 * attempt as u64)).await;
2075                        }
2076                        // Threshold exceeded or streaming mid-stream: bail
2077                        Err(e) => {
2078                            let msg = if attempt > 1 {
2079                                format!(
2080                                    "LLM circuit breaker triggered: failed after {} attempt(s): {}",
2081                                    attempt, e
2082                                )
2083                            } else {
2084                                format!("LLM call failed: {}", e)
2085                            };
2086                            tracing::error!(turn = turn, attempt = attempt, "{}", msg);
2087                            // Fire OnError hook for LLM failure
2088                            self.fire_on_error(
2089                                session_id.unwrap_or(""),
2090                                ErrorType::LlmFailure,
2091                                &msg,
2092                                serde_json::json!({"turn": turn, "attempt": attempt}),
2093                            )
2094                            .await;
2095                            if let Some(tx) = &event_tx {
2096                                tx.send(AgentEvent::Error {
2097                                    message: msg.clone(),
2098                                })
2099                                .await
2100                                .ok();
2101                            }
2102                            anyhow::bail!(msg);
2103                        }
2104                    }
2105                }
2106            };
2107
2108            // Update usage
2109            total_usage.prompt_tokens += response.usage.prompt_tokens;
2110            total_usage.completion_tokens += response.usage.completion_tokens;
2111            total_usage.total_tokens += response.usage.total_tokens;
2112
2113            // Track token usage in progress tracker
2114            if let Some(ref tracker) = self.progress_tracker {
2115                let token_usage = crate::task::TaskTokenUsage {
2116                    input_tokens: response.usage.prompt_tokens as u64,
2117                    output_tokens: response.usage.completion_tokens as u64,
2118                    cache_read_tokens: response.usage.cache_read_tokens.unwrap_or(0) as u64,
2119                    cache_write_tokens: response.usage.cache_write_tokens.unwrap_or(0) as u64,
2120                };
2121                if let Ok(mut guard) = tracker.try_write() {
2122                    guard.track_tokens(token_usage);
2123                }
2124            }
2125
2126            // Record LLM completion telemetry
2127            let llm_duration = llm_start.elapsed();
2128            tracing::info!(
2129                turn = turn,
2130                streaming = event_tx.is_some(),
2131                prompt_tokens = response.usage.prompt_tokens,
2132                completion_tokens = response.usage.completion_tokens,
2133                total_tokens = response.usage.total_tokens,
2134                stop_reason = response.stop_reason.as_deref().unwrap_or("unknown"),
2135                duration_ms = llm_duration.as_millis() as u64,
2136                "LLM completion finished"
2137            );
2138
2139            // Fire GenerateEnd hook
2140            self.fire_generate_end(
2141                session_id.unwrap_or(""),
2142                effective_prompt,
2143                &response,
2144                llm_duration.as_millis() as u64,
2145            )
2146            .await;
2147
2148            // Record LLM usage on the llm span
2149            crate::telemetry::record_llm_usage(
2150                response.usage.prompt_tokens,
2151                response.usage.completion_tokens,
2152                response.usage.total_tokens,
2153                response.stop_reason.as_deref(),
2154            );
2155            // Log turn token usage
2156            tracing::info!(
2157                turn = turn,
2158                a3s.llm.total_tokens = response.usage.total_tokens,
2159                "Turn token usage"
2160            );
2161
2162            // Add assistant message to history
2163            messages.push(response.message.clone());
2164
2165            // Check for tool calls
2166            let tool_calls = response.tool_calls();
2167
2168            // Send turn end event
2169            if let Some(tx) = &event_tx {
2170                tx.send(AgentEvent::TurnEnd {
2171                    turn,
2172                    usage: response.usage.clone(),
2173                })
2174                .await
2175                .ok();
2176            }
2177
2178            // Auto-compact: check if context usage exceeds threshold
2179            if self.config.auto_compact {
2180                let used = response.usage.prompt_tokens;
2181                let max = self.config.max_context_tokens;
2182                let threshold = self.config.auto_compact_threshold;
2183
2184                if crate::session::compaction::should_auto_compact(used, max, threshold) {
2185                    let before_len = messages.len();
2186                    let percent_before = used as f32 / max as f32;
2187
2188                    tracing::info!(
2189                        used_tokens = used,
2190                        max_tokens = max,
2191                        percent = percent_before,
2192                        threshold = threshold,
2193                        "Auto-compact triggered"
2194                    );
2195
2196                    // Step 1: Prune large tool outputs first (cheap, no LLM call)
2197                    if let Some(pruned) = crate::session::compaction::prune_tool_outputs(&messages)
2198                    {
2199                        messages = pruned;
2200                        tracing::info!("Tool output pruning applied");
2201                    }
2202
2203                    // Step 2: Full summarization using the agent's LLM client
2204                    if let Ok(Some(compacted)) = crate::session::compaction::compact_messages(
2205                        session_id.unwrap_or(""),
2206                        &messages,
2207                        &self.llm_client,
2208                    )
2209                    .await
2210                    {
2211                        messages = compacted;
2212                    }
2213
2214                    // Emit compaction event
2215                    if let Some(tx) = &event_tx {
2216                        tx.send(AgentEvent::ContextCompacted {
2217                            session_id: session_id.unwrap_or("").to_string(),
2218                            before_messages: before_len,
2219                            after_messages: messages.len(),
2220                            percent_before,
2221                        })
2222                        .await
2223                        .ok();
2224                    }
2225                }
2226            }
2227
2228            if tool_calls.is_empty() {
2229                // No tool calls — check if we should inject a continuation message
2230                // before treating this as a final answer.
2231                let final_text = response.text();
2232
2233                if self.config.continuation_enabled
2234                    && continuation_count < self.config.max_continuation_turns
2235                    && turn < self.config.max_tool_rounds  // never inject past the turn limit
2236                    && Self::looks_incomplete(&final_text)
2237                {
2238                    continuation_count += 1;
2239                    tracing::info!(
2240                        turn = turn,
2241                        continuation = continuation_count,
2242                        max_continuation = self.config.max_continuation_turns,
2243                        "Injecting continuation message — response looks incomplete"
2244                    );
2245                    // Inject continuation as a user message and keep looping
2246                    messages.push(Message::user(crate::prompts::CONTINUATION));
2247                    continue;
2248                }
2249
2250                // Sanitize output to redact any sensitive data before returning
2251                let final_text = if let Some(ref sp) = self.config.security_provider {
2252                    sp.sanitize_output(&final_text)
2253                } else {
2254                    final_text
2255                };
2256
2257                // Record final totals
2258                tracing::info!(
2259                    tool_calls_count = tool_calls_count,
2260                    total_prompt_tokens = total_usage.prompt_tokens,
2261                    total_completion_tokens = total_usage.completion_tokens,
2262                    total_tokens = total_usage.total_tokens,
2263                    turns = turn,
2264                    "Agent execution completed"
2265                );
2266
2267                if emit_end {
2268                    if let Some(tx) = &event_tx {
2269                        tx.send(AgentEvent::End {
2270                            text: final_text.clone(),
2271                            usage: total_usage.clone(),
2272                            meta: response.meta.clone(),
2273                        })
2274                        .await
2275                        .ok();
2276                    }
2277                }
2278
2279                // Notify context providers of turn completion for memory extraction
2280                if let Some(sid) = session_id {
2281                    self.notify_turn_complete(sid, effective_prompt, &final_text)
2282                        .await;
2283                }
2284
2285                return Ok(AgentResult {
2286                    text: final_text,
2287                    messages,
2288                    usage: total_usage,
2289                    tool_calls_count,
2290                });
2291            }
2292
2293            // Execute tools sequentially
2294            // Fast path: when all tool calls are independent file writes and no hooks/HITL
2295            // are configured, execute them concurrently to avoid serial I/O bottleneck.
2296            let tool_calls = if self.config.hook_engine.is_none()
2297                && self.config.confirmation_manager.is_none()
2298                && tool_calls.len() > 1
2299                && tool_calls
2300                    .iter()
2301                    .all(|tc| Self::is_parallel_safe_write(&tc.name, &tc.args))
2302                && {
2303                    // All target paths must be distinct (no write-write conflicts)
2304                    let paths: Vec<_> = tool_calls
2305                        .iter()
2306                        .filter_map(|tc| Self::extract_write_path(&tc.args))
2307                        .collect();
2308                    paths.len() == tool_calls.len()
2309                        && paths.iter().collect::<std::collections::HashSet<_>>().len()
2310                            == paths.len()
2311                } {
2312                tracing::info!(
2313                    count = tool_calls.len(),
2314                    "Parallel write batch: executing {} independent file writes concurrently",
2315                    tool_calls.len()
2316                );
2317
2318                let futures: Vec<_> = tool_calls
2319                    .iter()
2320                    .map(|tc| {
2321                        let ctx = self.tool_context.clone();
2322                        let executor = Arc::clone(&self.tool_executor);
2323                        let name = tc.name.clone();
2324                        let args = tc.args.clone();
2325                        async move { executor.execute_with_context(&name, &args, &ctx).await }
2326                    })
2327                    .collect();
2328
2329                let results = join_all(futures).await;
2330
2331                // Post-process results in original order (sequential, preserves message ordering)
2332                for (tc, result) in tool_calls.iter().zip(results) {
2333                    tool_calls_count += 1;
2334                    let (output, exit_code, is_error, metadata, images) =
2335                        Self::tool_result_to_tuple(result);
2336
2337                    // Track tool call in progress tracker
2338                    self.track_tool_result(&tc.name, &tc.args, exit_code);
2339
2340                    let output = if let Some(ref sp) = self.config.security_provider {
2341                        sp.sanitize_output(&output)
2342                    } else {
2343                        output
2344                    };
2345
2346                    if let Some(tx) = &event_tx {
2347                        tx.send(AgentEvent::ToolEnd {
2348                            id: tc.id.clone(),
2349                            name: tc.name.clone(),
2350                            output: output.clone(),
2351                            exit_code,
2352                            metadata,
2353                        })
2354                        .await
2355                        .ok();
2356                    }
2357
2358                    if images.is_empty() {
2359                        messages.push(Message::tool_result(&tc.id, &output, is_error));
2360                    } else {
2361                        messages.push(Message::tool_result_with_images(
2362                            &tc.id, &output, &images, is_error,
2363                        ));
2364                    }
2365                }
2366
2367                // Skip the sequential loop below
2368                continue;
2369            } else {
2370                tool_calls
2371            };
2372
2373            for tool_call in tool_calls {
2374                tool_calls_count += 1;
2375
2376                let tool_start = std::time::Instant::now();
2377
2378                tracing::info!(
2379                    tool_name = tool_call.name.as_str(),
2380                    tool_id = tool_call.id.as_str(),
2381                    "Tool execution started"
2382                );
2383
2384                // Send tool start event (only if not already sent during streaming)
2385                // In streaming mode, ToolStart is sent when we receive ToolUseStart from LLM
2386                // But we still need to send ToolEnd after execution
2387
2388                // Check for malformed tool arguments from LLM (4.1 parse error recovery)
2389                if let Some(parse_error) =
2390                    tool_call.args.get("__parse_error").and_then(|v| v.as_str())
2391                {
2392                    parse_error_count += 1;
2393                    let error_msg = format!("Error: {}", parse_error);
2394                    tracing::warn!(
2395                        tool = tool_call.name.as_str(),
2396                        parse_error_count = parse_error_count,
2397                        max_parse_retries = self.config.max_parse_retries,
2398                        "Malformed tool arguments from LLM"
2399                    );
2400
2401                    if let Some(tx) = &event_tx {
2402                        tx.send(AgentEvent::ToolEnd {
2403                            id: tool_call.id.clone(),
2404                            name: tool_call.name.clone(),
2405                            output: error_msg.clone(),
2406                            exit_code: 1,
2407                            metadata: None,
2408                        })
2409                        .await
2410                        .ok();
2411                    }
2412
2413                    messages.push(Message::tool_result(&tool_call.id, &error_msg, true));
2414
2415                    if parse_error_count > self.config.max_parse_retries {
2416                        let msg = format!(
2417                            "LLM produced malformed tool arguments {} time(s) in a row \
2418                             (max_parse_retries={}); giving up",
2419                            parse_error_count, self.config.max_parse_retries
2420                        );
2421                        tracing::error!("{}", msg);
2422                        if let Some(tx) = &event_tx {
2423                            tx.send(AgentEvent::Error {
2424                                message: msg.clone(),
2425                            })
2426                            .await
2427                            .ok();
2428                        }
2429                        anyhow::bail!(msg);
2430                    }
2431                    continue;
2432                }
2433
2434                // Tool args are valid — reset parse error counter
2435                parse_error_count = 0;
2436
2437                // Check skill-based tool permissions
2438                if let Some(ref registry) = self.config.skill_registry {
2439                    let instruction_skills =
2440                        registry.by_kind(crate::skills::SkillKind::Instruction);
2441                    let has_restrictions =
2442                        instruction_skills.iter().any(|s| s.allowed_tools.is_some());
2443                    if has_restrictions {
2444                        let allowed = instruction_skills
2445                            .iter()
2446                            .any(|s| s.is_tool_allowed(&tool_call.name));
2447                        if !allowed {
2448                            let msg = format!(
2449                                "Tool '{}' is not allowed by any active skill.",
2450                                tool_call.name
2451                            );
2452                            tracing::info!(
2453                                tool_name = tool_call.name.as_str(),
2454                                "Tool blocked by skill registry"
2455                            );
2456                            if let Some(tx) = &event_tx {
2457                                tx.send(AgentEvent::PermissionDenied {
2458                                    tool_id: tool_call.id.clone(),
2459                                    tool_name: tool_call.name.clone(),
2460                                    args: tool_call.args.clone(),
2461                                    reason: msg.clone(),
2462                                })
2463                                .await
2464                                .ok();
2465                            }
2466                            messages.push(Message::tool_result(&tool_call.id, &msg, true));
2467                            continue;
2468                        }
2469                    }
2470                }
2471
2472                // Fire PreToolUse hook (may block the tool call)
2473                if let Some(HookResult::Block(reason)) = self
2474                    .fire_pre_tool_use(
2475                        session_id.unwrap_or(""),
2476                        &tool_call.name,
2477                        &tool_call.args,
2478                        recent_tool_signatures.clone(),
2479                    )
2480                    .await
2481                {
2482                    let msg = format!("Tool '{}' blocked by hook: {}", tool_call.name, reason);
2483                    tracing::info!(
2484                        tool_name = tool_call.name.as_str(),
2485                        "Tool blocked by PreToolUse hook"
2486                    );
2487
2488                    if let Some(tx) = &event_tx {
2489                        tx.send(AgentEvent::PermissionDenied {
2490                            tool_id: tool_call.id.clone(),
2491                            tool_name: tool_call.name.clone(),
2492                            args: tool_call.args.clone(),
2493                            reason: reason.clone(),
2494                        })
2495                        .await
2496                        .ok();
2497                    }
2498
2499                    messages.push(Message::tool_result(&tool_call.id, &msg, true));
2500                    continue;
2501                }
2502
2503                // Check permission before executing tool
2504                let permission_decision = if let Some(checker) = &self.config.permission_checker {
2505                    checker.check(&tool_call.name, &tool_call.args)
2506                } else {
2507                    // No policy configured — default to Ask so HITL can still intervene
2508                    PermissionDecision::Ask
2509                };
2510
2511                let (output, exit_code, is_error, metadata, images) = match permission_decision {
2512                    PermissionDecision::Deny => {
2513                        tracing::info!(
2514                            tool_name = tool_call.name.as_str(),
2515                            permission = "deny",
2516                            "Tool permission denied"
2517                        );
2518                        // Tool execution denied by permission policy
2519                        let denial_msg = format!(
2520                            "Permission denied: Tool '{}' is blocked by permission policy.",
2521                            tool_call.name
2522                        );
2523
2524                        // Send permission denied event
2525                        if let Some(tx) = &event_tx {
2526                            tx.send(AgentEvent::PermissionDenied {
2527                                tool_id: tool_call.id.clone(),
2528                                tool_name: tool_call.name.clone(),
2529                                args: tool_call.args.clone(),
2530                                reason: "Blocked by deny rule in permission policy".to_string(),
2531                            })
2532                            .await
2533                            .ok();
2534                        }
2535
2536                        (denial_msg, 1, true, None, Vec::new())
2537                    }
2538                    PermissionDecision::Allow => {
2539                        tracing::info!(
2540                            tool_name = tool_call.name.as_str(),
2541                            permission = "allow",
2542                            "Tool permission: allow"
2543                        );
2544                        // Permission explicitly allows — execute directly, no HITL
2545                        let stream_ctx =
2546                            self.streaming_tool_context(&event_tx, &tool_call.id, &tool_call.name);
2547                        let result = self
2548                            .execute_tool_queued_or_direct(
2549                                &tool_call.name,
2550                                &tool_call.args,
2551                                &stream_ctx,
2552                            )
2553                            .await;
2554
2555                        let tuple = Self::tool_result_to_tuple(result);
2556                        // Track tool call in progress tracker
2557                        let (_, exit_code, _, _, _) = tuple;
2558                        self.track_tool_result(&tool_call.name, &tool_call.args, exit_code);
2559                        tuple
2560                    }
2561                    PermissionDecision::Ask => {
2562                        tracing::info!(
2563                            tool_name = tool_call.name.as_str(),
2564                            permission = "ask",
2565                            "Tool permission: ask"
2566                        );
2567                        // Permission says Ask — delegate to HITL confirmation manager
2568                        if let Some(cm) = &self.config.confirmation_manager {
2569                            // Check YOLO lanes: if the tool's lane is in YOLO mode, skip confirmation
2570                            if !cm.requires_confirmation(&tool_call.name).await {
2571                                let stream_ctx = self.streaming_tool_context(
2572                                    &event_tx,
2573                                    &tool_call.id,
2574                                    &tool_call.name,
2575                                );
2576                                let result = self
2577                                    .execute_tool_queued_or_direct(
2578                                        &tool_call.name,
2579                                        &tool_call.args,
2580                                        &stream_ctx,
2581                                    )
2582                                    .await;
2583
2584                                let (output, exit_code, is_error, metadata, images) =
2585                                    Self::tool_result_to_tuple(result);
2586
2587                                // Track tool call in progress tracker
2588                                self.track_tool_result(&tool_call.name, &tool_call.args, exit_code);
2589
2590                                // Add tool result to messages
2591                                if images.is_empty() {
2592                                    messages.push(Message::tool_result(
2593                                        &tool_call.id,
2594                                        &output,
2595                                        is_error,
2596                                    ));
2597                                } else {
2598                                    messages.push(Message::tool_result_with_images(
2599                                        &tool_call.id,
2600                                        &output,
2601                                        &images,
2602                                        is_error,
2603                                    ));
2604                                }
2605
2606                                // Record tool result on the tool span for early exit
2607                                let tool_duration = tool_start.elapsed();
2608                                crate::telemetry::record_tool_result(exit_code, tool_duration);
2609
2610                                // Send ToolEnd event
2611                                if let Some(tx) = &event_tx {
2612                                    tx.send(AgentEvent::ToolEnd {
2613                                        id: tool_call.id.clone(),
2614                                        name: tool_call.name.clone(),
2615                                        output: output.clone(),
2616                                        exit_code,
2617                                        metadata,
2618                                    })
2619                                    .await
2620                                    .ok();
2621                                }
2622
2623                                // Fire PostToolUse hook (fire-and-forget)
2624                                self.fire_post_tool_use(
2625                                    session_id.unwrap_or(""),
2626                                    &tool_call.name,
2627                                    &tool_call.args,
2628                                    &output,
2629                                    exit_code == 0,
2630                                    tool_duration.as_millis() as u64,
2631                                )
2632                                .await;
2633
2634                                continue; // Skip the rest, move to next tool call
2635                            }
2636
2637                            // Get timeout from policy
2638                            let policy = cm.policy().await;
2639                            let timeout_ms = policy.default_timeout_ms;
2640                            let timeout_action = policy.timeout_action;
2641
2642                            // Request confirmation (this emits ConfirmationRequired event)
2643                            let rx = cm
2644                                .request_confirmation(
2645                                    &tool_call.id,
2646                                    &tool_call.name,
2647                                    &tool_call.args,
2648                                )
2649                                .await;
2650
2651                            // Forward ConfirmationRequired to the streaming event channel
2652                            // so external consumers (e.g. SafeClaw engine) can relay it
2653                            // to the browser UI.
2654                            if let Some(tx) = &event_tx {
2655                                tx.send(AgentEvent::ConfirmationRequired {
2656                                    tool_id: tool_call.id.clone(),
2657                                    tool_name: tool_call.name.clone(),
2658                                    args: tool_call.args.clone(),
2659                                    timeout_ms,
2660                                })
2661                                .await
2662                                .ok();
2663                            }
2664
2665                            // Wait for confirmation with timeout
2666                            let confirmation_result =
2667                                tokio::time::timeout(Duration::from_millis(timeout_ms), rx).await;
2668
2669                            match confirmation_result {
2670                                Ok(Ok(response)) => {
2671                                    // Forward ConfirmationReceived
2672                                    if let Some(tx) = &event_tx {
2673                                        tx.send(AgentEvent::ConfirmationReceived {
2674                                            tool_id: tool_call.id.clone(),
2675                                            approved: response.approved,
2676                                            reason: response.reason.clone(),
2677                                        })
2678                                        .await
2679                                        .ok();
2680                                    }
2681                                    if response.approved {
2682                                        let stream_ctx = self.streaming_tool_context(
2683                                            &event_tx,
2684                                            &tool_call.id,
2685                                            &tool_call.name,
2686                                        );
2687                                        let result = self
2688                                            .execute_tool_queued_or_direct(
2689                                                &tool_call.name,
2690                                                &tool_call.args,
2691                                                &stream_ctx,
2692                                            )
2693                                            .await;
2694
2695                                        let tuple = Self::tool_result_to_tuple(result);
2696                                        // Track tool call in progress tracker
2697                                        let (_, exit_code, _, _, _) = tuple;
2698                                        self.track_tool_result(
2699                                            &tool_call.name,
2700                                            &tool_call.args,
2701                                            exit_code,
2702                                        );
2703                                        tuple
2704                                    } else {
2705                                        let rejection_msg = format!(
2706                                            "Tool '{}' execution was REJECTED by the user. Reason: {}. \
2707                                             DO NOT retry this tool call unless the user explicitly asks you to.",
2708                                            tool_call.name,
2709                                            response.reason.unwrap_or_else(|| "No reason provided".to_string())
2710                                        );
2711                                        (rejection_msg, 1, true, None, Vec::new())
2712                                    }
2713                                }
2714                                Ok(Err(_)) => {
2715                                    // Forward ConfirmationTimeout (channel closed = effectively timed out)
2716                                    if let Some(tx) = &event_tx {
2717                                        tx.send(AgentEvent::ConfirmationTimeout {
2718                                            tool_id: tool_call.id.clone(),
2719                                            action_taken: "rejected".to_string(),
2720                                        })
2721                                        .await
2722                                        .ok();
2723                                    }
2724                                    let msg = format!(
2725                                        "Tool '{}' confirmation failed: confirmation channel closed",
2726                                        tool_call.name
2727                                    );
2728                                    (msg, 1, true, None, Vec::new())
2729                                }
2730                                Err(_) => {
2731                                    cm.check_timeouts().await;
2732
2733                                    // Forward ConfirmationTimeout
2734                                    if let Some(tx) = &event_tx {
2735                                        tx.send(AgentEvent::ConfirmationTimeout {
2736                                            tool_id: tool_call.id.clone(),
2737                                            action_taken: match timeout_action {
2738                                                crate::hitl::TimeoutAction::Reject => {
2739                                                    "rejected".to_string()
2740                                                }
2741                                                crate::hitl::TimeoutAction::AutoApprove => {
2742                                                    "auto_approved".to_string()
2743                                                }
2744                                            },
2745                                        })
2746                                        .await
2747                                        .ok();
2748                                    }
2749
2750                                    match timeout_action {
2751                                        crate::hitl::TimeoutAction::Reject => {
2752                                            let msg = format!(
2753                                                "Tool '{}' execution was REJECTED: user confirmation timed out after {}ms. \
2754                                                 DO NOT retry this tool call — the user did not approve it. \
2755                                                 Inform the user that the operation requires their approval and ask them to try again.",
2756                                                tool_call.name, timeout_ms
2757                                            );
2758                                            (msg, 1, true, None, Vec::new())
2759                                        }
2760                                        crate::hitl::TimeoutAction::AutoApprove => {
2761                                            let stream_ctx = self.streaming_tool_context(
2762                                                &event_tx,
2763                                                &tool_call.id,
2764                                                &tool_call.name,
2765                                            );
2766                                            let result = self
2767                                                .execute_tool_queued_or_direct(
2768                                                    &tool_call.name,
2769                                                    &tool_call.args,
2770                                                    &stream_ctx,
2771                                                )
2772                                                .await;
2773
2774                                            let tuple = Self::tool_result_to_tuple(result);
2775                                            // Track tool call in progress tracker
2776                                            let (_, exit_code, _, _, _) = tuple;
2777                                            self.track_tool_result(
2778                                                &tool_call.name,
2779                                                &tool_call.args,
2780                                                exit_code,
2781                                            );
2782                                            tuple
2783                                        }
2784                                    }
2785                                }
2786                            }
2787                        } else {
2788                            // Ask without confirmation manager — safe deny
2789                            let msg = format!(
2790                                "Tool '{}' requires confirmation but no HITL confirmation manager is configured. \
2791                                 Configure a confirmation policy to enable tool execution.",
2792                                tool_call.name
2793                            );
2794                            tracing::warn!(
2795                                tool_name = tool_call.name.as_str(),
2796                                "Tool requires confirmation but no HITL manager configured"
2797                            );
2798                            (msg, 1, true, None, Vec::new())
2799                        }
2800                    }
2801                };
2802
2803                let tool_duration = tool_start.elapsed();
2804                crate::telemetry::record_tool_result(exit_code, tool_duration);
2805
2806                // Sanitize tool output for sensitive data before it enters the message history
2807                let output = if let Some(ref sp) = self.config.security_provider {
2808                    sp.sanitize_output(&output)
2809                } else {
2810                    output
2811                };
2812
2813                recent_tool_signatures.push(format!(
2814                    "{}:{} => {}",
2815                    tool_call.name,
2816                    serde_json::to_string(&tool_call.args).unwrap_or_default(),
2817                    if is_error { "error" } else { "ok" }
2818                ));
2819                if recent_tool_signatures.len() > 8 {
2820                    let overflow = recent_tool_signatures.len() - 8;
2821                    recent_tool_signatures.drain(0..overflow);
2822                }
2823
2824                // Fire PostToolUse hook (fire-and-forget)
2825                self.fire_post_tool_use(
2826                    session_id.unwrap_or(""),
2827                    &tool_call.name,
2828                    &tool_call.args,
2829                    &output,
2830                    exit_code == 0,
2831                    tool_duration.as_millis() as u64,
2832                )
2833                .await;
2834
2835                // Auto-remember tool result in long-term memory
2836                if let Some(ref memory) = self.config.memory {
2837                    let tools_used = [tool_call.name.clone()];
2838                    let remember_result = if exit_code == 0 {
2839                        memory
2840                            .remember_success(effective_prompt, &tools_used, &output)
2841                            .await
2842                    } else {
2843                        memory
2844                            .remember_failure(effective_prompt, &output, &tools_used)
2845                            .await
2846                    };
2847                    match remember_result {
2848                        Ok(()) => {
2849                            if let Some(tx) = &event_tx {
2850                                let item_type = if exit_code == 0 { "success" } else { "failure" };
2851                                tx.send(AgentEvent::MemoryStored {
2852                                    memory_id: uuid::Uuid::new_v4().to_string(),
2853                                    memory_type: item_type.to_string(),
2854                                    importance: if exit_code == 0 { 0.8 } else { 0.9 },
2855                                    tags: vec![item_type.to_string(), tool_call.name.clone()],
2856                                })
2857                                .await
2858                                .ok();
2859                            }
2860                        }
2861                        Err(e) => {
2862                            tracing::warn!("Failed to store memory after tool execution: {}", e);
2863                        }
2864                    }
2865                }
2866
2867                // Send tool end event
2868                if let Some(tx) = &event_tx {
2869                    tx.send(AgentEvent::ToolEnd {
2870                        id: tool_call.id.clone(),
2871                        name: tool_call.name.clone(),
2872                        output: output.clone(),
2873                        exit_code,
2874                        metadata,
2875                    })
2876                    .await
2877                    .ok();
2878                }
2879
2880                // Add tool result to messages
2881                if images.is_empty() {
2882                    messages.push(Message::tool_result(&tool_call.id, &output, is_error));
2883                } else {
2884                    messages.push(Message::tool_result_with_images(
2885                        &tool_call.id,
2886                        &output,
2887                        &images,
2888                        is_error,
2889                    ));
2890                }
2891            }
2892        }
2893    }
2894
2895    /// Execute with streaming events
2896    pub async fn execute_streaming(
2897        &self,
2898        history: &[Message],
2899        prompt: &str,
2900    ) -> Result<(
2901        mpsc::Receiver<AgentEvent>,
2902        tokio::task::JoinHandle<Result<AgentResult>>,
2903        tokio_util::sync::CancellationToken,
2904    )> {
2905        let (tx, rx) = mpsc::channel(100);
2906        let cancel_token = tokio_util::sync::CancellationToken::new();
2907
2908        let llm_client = self.llm_client.clone();
2909        let tool_executor = self.tool_executor.clone();
2910        let tool_context = self.tool_context.clone();
2911        let config = self.config.clone();
2912        let tool_metrics = self.tool_metrics.clone();
2913        let command_queue = self.command_queue.clone();
2914        let history = history.to_vec();
2915        let prompt = prompt.to_string();
2916        let token_clone = cancel_token.clone();
2917
2918        let handle = tokio::spawn(async move {
2919            let mut agent = AgentLoop::new(llm_client, tool_executor, tool_context, config);
2920            if let Some(metrics) = tool_metrics {
2921                agent = agent.with_tool_metrics(metrics);
2922            }
2923            if let Some(queue) = command_queue {
2924                agent = agent.with_queue(queue);
2925            }
2926            agent
2927                .execute_with_session(&history, &prompt, None, Some(tx), Some(&token_clone))
2928                .await
2929        });
2930
2931        Ok((rx, handle, cancel_token))
2932    }
2933
2934    /// Create an execution plan for a prompt
2935    ///
2936    /// Delegates to [`LlmPlanner`] for structured JSON plan generation,
2937    /// falling back to heuristic planning if the LLM call fails.
2938    pub async fn plan(&self, prompt: &str, _context: Option<&str>) -> Result<ExecutionPlan> {
2939        use crate::planning::LlmPlanner;
2940
2941        match LlmPlanner::create_plan(&self.llm_client, prompt).await {
2942            Ok(plan) => Ok(plan),
2943            Err(e) => {
2944                tracing::warn!("LLM plan creation failed, using fallback: {}", e);
2945                Ok(LlmPlanner::fallback_plan(prompt))
2946            }
2947        }
2948    }
2949
2950    /// Execute with planning phase
2951    pub async fn execute_with_planning(
2952        &self,
2953        history: &[Message],
2954        prompt: &str,
2955        event_tx: Option<mpsc::Sender<AgentEvent>>,
2956    ) -> Result<AgentResult> {
2957        // Send planning start event
2958        if let Some(tx) = &event_tx {
2959            tx.send(AgentEvent::PlanningStart {
2960                prompt: prompt.to_string(),
2961            })
2962            .await
2963            .ok();
2964        }
2965
2966        // Extract goal when goal_tracking is enabled
2967        let goal = if self.config.goal_tracking {
2968            let g = self.extract_goal(prompt).await?;
2969            if let Some(tx) = &event_tx {
2970                tx.send(AgentEvent::GoalExtracted { goal: g.clone() })
2971                    .await
2972                    .ok();
2973            }
2974            Some(g)
2975        } else {
2976            None
2977        };
2978
2979        // Create execution plan
2980        let plan = self.plan(prompt, None).await?;
2981
2982        // Send planning end event
2983        if let Some(tx) = &event_tx {
2984            tx.send(AgentEvent::PlanningEnd {
2985                estimated_steps: plan.steps.len(),
2986                plan: plan.clone(),
2987            })
2988            .await
2989            .ok();
2990        }
2991
2992        let plan_start = std::time::Instant::now();
2993
2994        // Execute the plan step by step
2995        let result = self.execute_plan(history, &plan, event_tx.clone()).await?;
2996
2997        // Emit the final End event (execute_loop_inner does not emit End in planning mode)
2998        if let Some(tx) = &event_tx {
2999            tx.send(AgentEvent::End {
3000                text: result.text.clone(),
3001                usage: result.usage.clone(),
3002                meta: None,
3003            })
3004            .await
3005            .ok();
3006        }
3007
3008        // Check goal achievement when goal_tracking is enabled
3009        if self.config.goal_tracking {
3010            if let Some(ref g) = goal {
3011                let achieved = self.check_goal_achievement(g, &result.text).await?;
3012                if achieved {
3013                    if let Some(tx) = &event_tx {
3014                        tx.send(AgentEvent::GoalAchieved {
3015                            goal: g.description.clone(),
3016                            total_steps: result.messages.len(),
3017                            duration_ms: plan_start.elapsed().as_millis() as i64,
3018                        })
3019                        .await
3020                        .ok();
3021                    }
3022                }
3023            }
3024        }
3025
3026        Ok(result)
3027    }
3028
3029    /// Execute an execution plan using wave-based dependency-aware scheduling.
3030    ///
3031    /// Steps with no unmet dependencies are grouped into "waves". A wave with
3032    /// a single step executes sequentially (preserving the history chain). A
3033    /// wave with multiple independent steps executes them in parallel via
3034    /// `JoinSet`, then merges their results back into the shared history.
3035    async fn execute_plan(
3036        &self,
3037        history: &[Message],
3038        plan: &ExecutionPlan,
3039        event_tx: Option<mpsc::Sender<AgentEvent>>,
3040    ) -> Result<AgentResult> {
3041        let mut plan = plan.clone();
3042        let mut current_history = history.to_vec();
3043        let mut total_usage = TokenUsage::default();
3044        let mut tool_calls_count = 0;
3045        let total_steps = plan.steps.len();
3046
3047        // Add initial user message with the goal
3048        let steps_text = plan
3049            .steps
3050            .iter()
3051            .enumerate()
3052            .map(|(i, step)| format!("{}. {}", i + 1, step.content))
3053            .collect::<Vec<_>>()
3054            .join("\n");
3055        current_history.push(Message::user(&crate::prompts::render(
3056            crate::prompts::PLAN_EXECUTE_GOAL,
3057            &[("goal", &plan.goal), ("steps", &steps_text)],
3058        )));
3059
3060        loop {
3061            let ready: Vec<String> = plan
3062                .get_ready_steps()
3063                .iter()
3064                .map(|s| s.id.clone())
3065                .collect();
3066
3067            if ready.is_empty() {
3068                // All done or deadlock
3069                if plan.has_deadlock() {
3070                    tracing::warn!(
3071                        "Plan deadlock detected: {} pending steps with unresolvable dependencies",
3072                        plan.pending_count()
3073                    );
3074                }
3075                break;
3076            }
3077
3078            if ready.len() == 1 {
3079                // === Single step: sequential execution (preserves history chain) ===
3080                let step_id = &ready[0];
3081                let step = plan
3082                    .steps
3083                    .iter()
3084                    .find(|s| s.id == *step_id)
3085                    .ok_or_else(|| anyhow::anyhow!("step '{}' not found in plan", step_id))?
3086                    .clone();
3087                let step_number = plan
3088                    .steps
3089                    .iter()
3090                    .position(|s| s.id == *step_id)
3091                    .unwrap_or(0)
3092                    + 1;
3093
3094                // Send step start event
3095                if let Some(tx) = &event_tx {
3096                    tx.send(AgentEvent::StepStart {
3097                        step_id: step.id.clone(),
3098                        description: step.content.clone(),
3099                        step_number,
3100                        total_steps,
3101                    })
3102                    .await
3103                    .ok();
3104                }
3105
3106                plan.mark_status(&step.id, TaskStatus::InProgress);
3107
3108                let step_prompt = crate::prompts::render(
3109                    crate::prompts::PLAN_EXECUTE_STEP,
3110                    &[
3111                        ("step_num", &step_number.to_string()),
3112                        ("description", &step.content),
3113                    ],
3114                );
3115
3116                match self
3117                    .execute_loop(
3118                        &current_history,
3119                        &step_prompt,
3120                        None,
3121                        event_tx.clone(),
3122                        &tokio_util::sync::CancellationToken::new(),
3123                        false, // emit_end: false — End is emitted by execute_with_planning after execute_plan
3124                    )
3125                    .await
3126                {
3127                    Ok(result) => {
3128                        current_history = result.messages.clone();
3129                        total_usage.prompt_tokens += result.usage.prompt_tokens;
3130                        total_usage.completion_tokens += result.usage.completion_tokens;
3131                        total_usage.total_tokens += result.usage.total_tokens;
3132                        tool_calls_count += result.tool_calls_count;
3133                        plan.mark_status(&step.id, TaskStatus::Completed);
3134
3135                        if let Some(tx) = &event_tx {
3136                            tx.send(AgentEvent::StepEnd {
3137                                step_id: step.id.clone(),
3138                                status: TaskStatus::Completed,
3139                                step_number,
3140                                total_steps,
3141                            })
3142                            .await
3143                            .ok();
3144                        }
3145                    }
3146                    Err(e) => {
3147                        tracing::error!("Plan step '{}' failed: {}", step.id, e);
3148                        plan.mark_status(&step.id, TaskStatus::Failed);
3149
3150                        if let Some(tx) = &event_tx {
3151                            tx.send(AgentEvent::StepEnd {
3152                                step_id: step.id.clone(),
3153                                status: TaskStatus::Failed,
3154                                step_number,
3155                                total_steps,
3156                            })
3157                            .await
3158                            .ok();
3159                        }
3160                    }
3161                }
3162            } else {
3163                // === Multiple steps: parallel execution via JoinSet ===
3164                // NOTE: Each parallel branch gets a clone of the base history.
3165                // Individual branch histories (tool calls, LLM turns) are NOT merged
3166                // back — only a summary message is appended. This is a deliberate
3167                // trade-off: merging divergent histories in a deterministic order is
3168                // complex and the summary approach keeps the context window manageable.
3169                let ready_steps: Vec<_> = ready
3170                    .iter()
3171                    .filter_map(|id| {
3172                        let step = plan.steps.iter().find(|s| s.id == *id)?.clone();
3173                        let step_number =
3174                            plan.steps.iter().position(|s| s.id == *id).unwrap_or(0) + 1;
3175                        Some((step, step_number))
3176                    })
3177                    .collect();
3178
3179                // Mark all as InProgress and emit StepStart events
3180                for (step, step_number) in &ready_steps {
3181                    plan.mark_status(&step.id, TaskStatus::InProgress);
3182                    if let Some(tx) = &event_tx {
3183                        tx.send(AgentEvent::StepStart {
3184                            step_id: step.id.clone(),
3185                            description: step.content.clone(),
3186                            step_number: *step_number,
3187                            total_steps,
3188                        })
3189                        .await
3190                        .ok();
3191                    }
3192                }
3193
3194                // Spawn all into JoinSet, each with a clone of the base history
3195                let mut join_set = tokio::task::JoinSet::new();
3196                for (step, step_number) in &ready_steps {
3197                    let base_history = current_history.clone();
3198                    let agent_clone = self.clone();
3199                    let tx = event_tx.clone();
3200                    let step_clone = step.clone();
3201                    let sn = *step_number;
3202
3203                    join_set.spawn(async move {
3204                        let prompt = crate::prompts::render(
3205                            crate::prompts::PLAN_EXECUTE_STEP,
3206                            &[
3207                                ("step_num", &sn.to_string()),
3208                                ("description", &step_clone.content),
3209                            ],
3210                        );
3211                        let result = agent_clone
3212                            .execute_loop(
3213                                &base_history,
3214                                &prompt,
3215                                None,
3216                                tx,
3217                                &tokio_util::sync::CancellationToken::new(),
3218                                false, // emit_end: false — End is emitted by execute_with_planning after execute_plan
3219                            )
3220                            .await;
3221                        (step_clone.id, sn, result)
3222                    });
3223                }
3224
3225                // Collect results
3226                let mut parallel_summaries = Vec::new();
3227                while let Some(join_result) = join_set.join_next().await {
3228                    match join_result {
3229                        Ok((step_id, step_number, step_result)) => match step_result {
3230                            Ok(result) => {
3231                                total_usage.prompt_tokens += result.usage.prompt_tokens;
3232                                total_usage.completion_tokens += result.usage.completion_tokens;
3233                                total_usage.total_tokens += result.usage.total_tokens;
3234                                tool_calls_count += result.tool_calls_count;
3235                                plan.mark_status(&step_id, TaskStatus::Completed);
3236
3237                                // Collect the final assistant text for context merging
3238                                parallel_summaries.push(format!(
3239                                    "- Step {} ({}): {}",
3240                                    step_number, step_id, result.text
3241                                ));
3242
3243                                if let Some(tx) = &event_tx {
3244                                    tx.send(AgentEvent::StepEnd {
3245                                        step_id,
3246                                        status: TaskStatus::Completed,
3247                                        step_number,
3248                                        total_steps,
3249                                    })
3250                                    .await
3251                                    .ok();
3252                                }
3253                            }
3254                            Err(e) => {
3255                                tracing::error!("Plan step '{}' failed: {}", step_id, e);
3256                                plan.mark_status(&step_id, TaskStatus::Failed);
3257
3258                                if let Some(tx) = &event_tx {
3259                                    tx.send(AgentEvent::StepEnd {
3260                                        step_id,
3261                                        status: TaskStatus::Failed,
3262                                        step_number,
3263                                        total_steps,
3264                                    })
3265                                    .await
3266                                    .ok();
3267                                }
3268                            }
3269                        },
3270                        Err(e) => {
3271                            tracing::error!("JoinSet task panicked: {}", e);
3272                        }
3273                    }
3274                }
3275
3276                // Merge parallel results into history for subsequent steps
3277                if !parallel_summaries.is_empty() {
3278                    parallel_summaries.sort(); // Deterministic ordering
3279                    let results_text = parallel_summaries.join("\n");
3280                    current_history.push(Message::user(&crate::prompts::render(
3281                        crate::prompts::PLAN_PARALLEL_RESULTS,
3282                        &[("results", &results_text)],
3283                    )));
3284                }
3285            }
3286
3287            // Emit GoalProgress after each wave
3288            if self.config.goal_tracking {
3289                let completed = plan
3290                    .steps
3291                    .iter()
3292                    .filter(|s| s.status == TaskStatus::Completed)
3293                    .count();
3294                if let Some(tx) = &event_tx {
3295                    tx.send(AgentEvent::GoalProgress {
3296                        goal: plan.goal.clone(),
3297                        progress: plan.progress(),
3298                        completed_steps: completed,
3299                        total_steps,
3300                    })
3301                    .await
3302                    .ok();
3303                }
3304            }
3305        }
3306
3307        // Get final response
3308        let final_text = current_history
3309            .last()
3310            .map(|m| {
3311                m.content
3312                    .iter()
3313                    .filter_map(|block| {
3314                        if let crate::llm::ContentBlock::Text { text } = block {
3315                            Some(text.as_str())
3316                        } else {
3317                            None
3318                        }
3319                    })
3320                    .collect::<Vec<_>>()
3321                    .join("\n")
3322            })
3323            .unwrap_or_default();
3324
3325        Ok(AgentResult {
3326            text: final_text,
3327            messages: current_history,
3328            usage: total_usage,
3329            tool_calls_count,
3330        })
3331    }
3332
3333    /// Extract goal from prompt
3334    ///
3335    /// Delegates to [`LlmPlanner`] for structured JSON goal extraction,
3336    /// falling back to heuristic logic if the LLM call fails.
3337    pub async fn extract_goal(&self, prompt: &str) -> Result<AgentGoal> {
3338        use crate::planning::LlmPlanner;
3339
3340        match LlmPlanner::extract_goal(&self.llm_client, prompt).await {
3341            Ok(goal) => Ok(goal),
3342            Err(e) => {
3343                tracing::warn!("LLM goal extraction failed, using fallback: {}", e);
3344                Ok(LlmPlanner::fallback_goal(prompt))
3345            }
3346        }
3347    }
3348
3349    /// Check if goal is achieved
3350    ///
3351    /// Delegates to [`LlmPlanner`] for structured JSON achievement check,
3352    /// falling back to heuristic logic if the LLM call fails.
3353    pub async fn check_goal_achievement(
3354        &self,
3355        goal: &AgentGoal,
3356        current_state: &str,
3357    ) -> Result<bool> {
3358        use crate::planning::LlmPlanner;
3359
3360        match LlmPlanner::check_achievement(&self.llm_client, goal, current_state).await {
3361            Ok(result) => Ok(result.achieved),
3362            Err(e) => {
3363                tracing::warn!("LLM achievement check failed, using fallback: {}", e);
3364                let result = LlmPlanner::fallback_check_achievement(goal, current_state);
3365                Ok(result.achieved)
3366            }
3367        }
3368    }
3369}
3370
3371#[cfg(test)]
3372mod tests {
3373    use super::*;
3374    use crate::llm::{ContentBlock, StreamEvent};
3375    use crate::permissions::PermissionPolicy;
3376    use crate::tools::ToolExecutor;
3377    use std::path::PathBuf;
3378    use std::sync::atomic::{AtomicUsize, Ordering};
3379
3380    /// Create a default ToolContext for tests
3381    fn test_tool_context() -> ToolContext {
3382        ToolContext::new(PathBuf::from("/tmp"))
3383    }
3384
3385    #[test]
3386    fn test_agent_config_default() {
3387        let config = AgentConfig::default();
3388        assert!(config.prompt_slots.is_empty());
3389        assert!(config.tools.is_empty()); // Tools are provided externally
3390        assert_eq!(config.max_tool_rounds, MAX_TOOL_ROUNDS);
3391        assert!(config.permission_checker.is_none());
3392        assert!(config.context_providers.is_empty());
3393        // Built-in skills are always present by default
3394        let registry = config
3395            .skill_registry
3396            .expect("skill_registry must be Some by default");
3397        assert!(registry.len() >= 7, "expected at least 7 built-in skills");
3398        assert!(registry.get("code-search").is_some());
3399        assert!(registry.get("find-bugs").is_some());
3400    }
3401
3402    // ========================================================================
3403    // Mock LLM Client for Testing
3404    // ========================================================================
3405
3406    /// Mock LLM client that returns predefined responses
3407    pub(crate) struct MockLlmClient {
3408        /// Responses to return (consumed in order)
3409        responses: std::sync::Mutex<Vec<LlmResponse>>,
3410        /// Number of calls made
3411        pub(crate) call_count: AtomicUsize,
3412    }
3413
3414    impl MockLlmClient {
3415        pub(crate) fn new(responses: Vec<LlmResponse>) -> Self {
3416            Self {
3417                responses: std::sync::Mutex::new(responses),
3418                call_count: AtomicUsize::new(0),
3419            }
3420        }
3421
3422        /// Create a response with text only (no tool calls)
3423        pub(crate) fn text_response(text: &str) -> LlmResponse {
3424            LlmResponse {
3425                message: Message {
3426                    role: "assistant".to_string(),
3427                    content: vec![ContentBlock::Text {
3428                        text: text.to_string(),
3429                    }],
3430                    reasoning_content: None,
3431                },
3432                usage: TokenUsage {
3433                    prompt_tokens: 10,
3434                    completion_tokens: 5,
3435                    total_tokens: 15,
3436                    cache_read_tokens: None,
3437                    cache_write_tokens: None,
3438                },
3439                stop_reason: Some("end_turn".to_string()),
3440                meta: None,
3441            }
3442        }
3443
3444        /// Create a response with a tool call
3445        pub(crate) fn tool_call_response(
3446            tool_id: &str,
3447            tool_name: &str,
3448            args: serde_json::Value,
3449        ) -> LlmResponse {
3450            LlmResponse {
3451                message: Message {
3452                    role: "assistant".to_string(),
3453                    content: vec![ContentBlock::ToolUse {
3454                        id: tool_id.to_string(),
3455                        name: tool_name.to_string(),
3456                        input: args,
3457                    }],
3458                    reasoning_content: None,
3459                },
3460                usage: TokenUsage {
3461                    prompt_tokens: 10,
3462                    completion_tokens: 5,
3463                    total_tokens: 15,
3464                    cache_read_tokens: None,
3465                    cache_write_tokens: None,
3466                },
3467                stop_reason: Some("tool_use".to_string()),
3468                meta: None,
3469            }
3470        }
3471    }
3472
3473    #[async_trait::async_trait]
3474    impl LlmClient for MockLlmClient {
3475        async fn complete(
3476            &self,
3477            _messages: &[Message],
3478            _system: Option<&str>,
3479            _tools: &[ToolDefinition],
3480        ) -> Result<LlmResponse> {
3481            self.call_count.fetch_add(1, Ordering::SeqCst);
3482            let mut responses = self.responses.lock().unwrap();
3483            if responses.is_empty() {
3484                anyhow::bail!("No more mock responses available");
3485            }
3486            Ok(responses.remove(0))
3487        }
3488
3489        async fn complete_streaming(
3490            &self,
3491            _messages: &[Message],
3492            _system: Option<&str>,
3493            _tools: &[ToolDefinition],
3494        ) -> Result<mpsc::Receiver<StreamEvent>> {
3495            self.call_count.fetch_add(1, Ordering::SeqCst);
3496            let mut responses = self.responses.lock().unwrap();
3497            if responses.is_empty() {
3498                anyhow::bail!("No more mock responses available");
3499            }
3500            let response = responses.remove(0);
3501
3502            let (tx, rx) = mpsc::channel(10);
3503            tokio::spawn(async move {
3504                // Send text deltas if any
3505                for block in &response.message.content {
3506                    if let ContentBlock::Text { text } = block {
3507                        tx.send(StreamEvent::TextDelta(text.clone())).await.ok();
3508                    }
3509                }
3510                tx.send(StreamEvent::Done(response)).await.ok();
3511            });
3512
3513            Ok(rx)
3514        }
3515    }
3516
3517    // ========================================================================
3518    // Agent Loop Tests
3519    // ========================================================================
3520
3521    #[tokio::test]
3522    async fn test_agent_simple_response() {
3523        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
3524            "Hello, I'm an AI assistant.",
3525        )]));
3526
3527        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
3528        let config = AgentConfig::default();
3529
3530        let agent = AgentLoop::new(
3531            mock_client.clone(),
3532            tool_executor,
3533            test_tool_context(),
3534            config,
3535        );
3536        let result = agent.execute(&[], "Hello", None).await.unwrap();
3537
3538        assert_eq!(result.text, "Hello, I'm an AI assistant.");
3539        assert_eq!(result.tool_calls_count, 0);
3540        assert_eq!(mock_client.call_count.load(Ordering::SeqCst), 1);
3541    }
3542
3543    #[tokio::test]
3544    async fn test_agent_with_tool_call() {
3545        let mock_client = Arc::new(MockLlmClient::new(vec![
3546            // First response: tool call
3547            MockLlmClient::tool_call_response(
3548                "tool-1",
3549                "bash",
3550                serde_json::json!({"command": "echo hello"}),
3551            ),
3552            // Second response: final text
3553            MockLlmClient::text_response("The command output was: hello"),
3554        ]));
3555
3556        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
3557        let config = AgentConfig::default();
3558
3559        let agent = AgentLoop::new(
3560            mock_client.clone(),
3561            tool_executor,
3562            test_tool_context(),
3563            config,
3564        );
3565        let result = agent.execute(&[], "Run echo hello", None).await.unwrap();
3566
3567        assert_eq!(result.text, "The command output was: hello");
3568        assert_eq!(result.tool_calls_count, 1);
3569        assert_eq!(mock_client.call_count.load(Ordering::SeqCst), 2);
3570    }
3571
3572    #[tokio::test]
3573    async fn test_agent_permission_deny() {
3574        let mock_client = Arc::new(MockLlmClient::new(vec![
3575            // First response: tool call that will be denied
3576            MockLlmClient::tool_call_response(
3577                "tool-1",
3578                "bash",
3579                serde_json::json!({"command": "rm -rf /tmp/test"}),
3580            ),
3581            // Second response: LLM responds to the denial
3582            MockLlmClient::text_response(
3583                "I cannot execute that command due to permission restrictions.",
3584            ),
3585        ]));
3586
3587        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
3588
3589        // Create permission policy that denies rm commands
3590        let permission_policy = PermissionPolicy::new().deny("bash(rm:*)");
3591
3592        let config = AgentConfig {
3593            permission_checker: Some(Arc::new(permission_policy)),
3594            ..Default::default()
3595        };
3596
3597        let (tx, mut rx) = mpsc::channel(100);
3598        let agent = AgentLoop::new(
3599            mock_client.clone(),
3600            tool_executor,
3601            test_tool_context(),
3602            config,
3603        );
3604        let result = agent.execute(&[], "Delete files", Some(tx)).await.unwrap();
3605
3606        // Check that we received a PermissionDenied event
3607        let mut found_permission_denied = false;
3608        while let Ok(event) = rx.try_recv() {
3609            if let AgentEvent::PermissionDenied { tool_name, .. } = event {
3610                assert_eq!(tool_name, "bash");
3611                found_permission_denied = true;
3612            }
3613        }
3614        assert!(
3615            found_permission_denied,
3616            "Should have received PermissionDenied event"
3617        );
3618
3619        assert_eq!(result.tool_calls_count, 1);
3620    }
3621
3622    #[tokio::test]
3623    async fn test_agent_permission_allow() {
3624        let mock_client = Arc::new(MockLlmClient::new(vec![
3625            // First response: tool call that will be allowed
3626            MockLlmClient::tool_call_response(
3627                "tool-1",
3628                "bash",
3629                serde_json::json!({"command": "echo hello"}),
3630            ),
3631            // Second response: final text
3632            MockLlmClient::text_response("Done!"),
3633        ]));
3634
3635        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
3636
3637        // Create permission policy that allows echo commands
3638        let permission_policy = PermissionPolicy::new()
3639            .allow("bash(echo:*)")
3640            .deny("bash(rm:*)");
3641
3642        let config = AgentConfig {
3643            permission_checker: Some(Arc::new(permission_policy)),
3644            ..Default::default()
3645        };
3646
3647        let agent = AgentLoop::new(
3648            mock_client.clone(),
3649            tool_executor,
3650            test_tool_context(),
3651            config,
3652        );
3653        let result = agent.execute(&[], "Echo hello", None).await.unwrap();
3654
3655        assert_eq!(result.text, "Done!");
3656        assert_eq!(result.tool_calls_count, 1);
3657    }
3658
3659    #[tokio::test]
3660    async fn test_agent_streaming_events() {
3661        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
3662            "Hello!",
3663        )]));
3664
3665        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
3666        let config = AgentConfig::default();
3667
3668        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
3669        let (mut rx, handle, _cancel_token) = agent.execute_streaming(&[], "Hi").await.unwrap();
3670
3671        // Collect events
3672        let mut events = Vec::new();
3673        while let Some(event) = rx.recv().await {
3674            events.push(event);
3675        }
3676
3677        let result = handle.await.unwrap().unwrap();
3678        assert_eq!(result.text, "Hello!");
3679
3680        // Check we received Start and End events
3681        assert!(events.iter().any(|e| matches!(e, AgentEvent::Start { .. })));
3682        assert!(events.iter().any(|e| matches!(e, AgentEvent::End { .. })));
3683    }
3684
3685    #[tokio::test]
3686    async fn test_agent_max_tool_rounds() {
3687        // Create a mock that always returns tool calls (infinite loop)
3688        let responses: Vec<LlmResponse> = (0..100)
3689            .map(|i| {
3690                MockLlmClient::tool_call_response(
3691                    &format!("tool-{}", i),
3692                    "bash",
3693                    serde_json::json!({"command": "echo loop"}),
3694                )
3695            })
3696            .collect();
3697
3698        let mock_client = Arc::new(MockLlmClient::new(responses));
3699        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
3700
3701        let config = AgentConfig {
3702            max_tool_rounds: 3,
3703            ..Default::default()
3704        };
3705
3706        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
3707        let result = agent.execute(&[], "Loop forever", None).await;
3708
3709        // Should fail due to max tool rounds exceeded
3710        assert!(result.is_err());
3711        assert!(result.unwrap_err().to_string().contains("Max tool rounds"));
3712    }
3713
3714    #[tokio::test]
3715    async fn test_agent_no_permission_policy_defaults_to_ask() {
3716        // When no permission policy is set, tools default to Ask.
3717        // Without a confirmation manager, Ask = safe deny.
3718        let mock_client = Arc::new(MockLlmClient::new(vec![
3719            MockLlmClient::tool_call_response(
3720                "tool-1",
3721                "bash",
3722                serde_json::json!({"command": "rm -rf /tmp/test"}),
3723            ),
3724            MockLlmClient::text_response("Denied!"),
3725        ]));
3726
3727        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
3728        let config = AgentConfig {
3729            permission_checker: None, // No policy → defaults to Ask
3730            // No confirmation_manager → safe deny
3731            ..Default::default()
3732        };
3733
3734        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
3735        let result = agent.execute(&[], "Delete", None).await.unwrap();
3736
3737        // Should be denied (no policy + no CM = safe deny)
3738        assert_eq!(result.text, "Denied!");
3739        assert_eq!(result.tool_calls_count, 1);
3740    }
3741
3742    #[tokio::test]
3743    async fn test_agent_permission_ask_without_cm_denies() {
3744        // When permission is Ask and no confirmation manager configured,
3745        // tool execution should be denied (safe default).
3746        let mock_client = Arc::new(MockLlmClient::new(vec![
3747            MockLlmClient::tool_call_response(
3748                "tool-1",
3749                "bash",
3750                serde_json::json!({"command": "echo test"}),
3751            ),
3752            MockLlmClient::text_response("Denied!"),
3753        ]));
3754
3755        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
3756
3757        // Create policy where bash falls through to Ask (default)
3758        let permission_policy = PermissionPolicy::new(); // Default decision is Ask
3759
3760        let config = AgentConfig {
3761            permission_checker: Some(Arc::new(permission_policy)),
3762            // No confirmation_manager — safe deny
3763            ..Default::default()
3764        };
3765
3766        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
3767        let result = agent.execute(&[], "Echo", None).await.unwrap();
3768
3769        // Should deny (Ask without CM = safe deny)
3770        assert_eq!(result.text, "Denied!");
3771        // The tool result should contain the denial message
3772        assert!(result.tool_calls_count >= 1);
3773    }
3774
3775    // ========================================================================
3776    // HITL (Human-in-the-Loop) Tests
3777    // ========================================================================
3778
3779    #[tokio::test]
3780    async fn test_agent_hitl_approved() {
3781        use crate::hitl::{ConfirmationManager, ConfirmationPolicy};
3782        use tokio::sync::broadcast;
3783
3784        let mock_client = Arc::new(MockLlmClient::new(vec![
3785            MockLlmClient::tool_call_response(
3786                "tool-1",
3787                "bash",
3788                serde_json::json!({"command": "echo hello"}),
3789            ),
3790            MockLlmClient::text_response("Command executed!"),
3791        ]));
3792
3793        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
3794
3795        // Create HITL confirmation manager with policy enabled
3796        let (event_tx, _event_rx) = broadcast::channel(100);
3797        let hitl_policy = ConfirmationPolicy {
3798            enabled: true,
3799            ..Default::default()
3800        };
3801        let confirmation_manager = Arc::new(ConfirmationManager::new(hitl_policy, event_tx));
3802
3803        // Create permission policy that returns Ask for bash
3804        let permission_policy = PermissionPolicy::new(); // Default is Ask
3805
3806        let config = AgentConfig {
3807            permission_checker: Some(Arc::new(permission_policy)),
3808            confirmation_manager: Some(confirmation_manager.clone()),
3809            ..Default::default()
3810        };
3811
3812        // Spawn a task to approve the confirmation
3813        let cm_clone = confirmation_manager.clone();
3814        tokio::spawn(async move {
3815            // Wait a bit for the confirmation request to be created
3816            tokio::time::sleep(std::time::Duration::from_millis(50)).await;
3817            // Approve it
3818            cm_clone.confirm("tool-1", true, None).await.ok();
3819        });
3820
3821        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
3822        let result = agent.execute(&[], "Run echo", None).await.unwrap();
3823
3824        assert_eq!(result.text, "Command executed!");
3825        assert_eq!(result.tool_calls_count, 1);
3826    }
3827
3828    #[tokio::test]
3829    async fn test_agent_hitl_rejected() {
3830        use crate::hitl::{ConfirmationManager, ConfirmationPolicy};
3831        use tokio::sync::broadcast;
3832
3833        let mock_client = Arc::new(MockLlmClient::new(vec![
3834            MockLlmClient::tool_call_response(
3835                "tool-1",
3836                "bash",
3837                serde_json::json!({"command": "rm -rf /"}),
3838            ),
3839            MockLlmClient::text_response("Understood, I won't do that."),
3840        ]));
3841
3842        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
3843
3844        // Create HITL confirmation manager
3845        let (event_tx, _event_rx) = broadcast::channel(100);
3846        let hitl_policy = ConfirmationPolicy {
3847            enabled: true,
3848            ..Default::default()
3849        };
3850        let confirmation_manager = Arc::new(ConfirmationManager::new(hitl_policy, event_tx));
3851
3852        // Permission policy returns Ask
3853        let permission_policy = PermissionPolicy::new();
3854
3855        let config = AgentConfig {
3856            permission_checker: Some(Arc::new(permission_policy)),
3857            confirmation_manager: Some(confirmation_manager.clone()),
3858            ..Default::default()
3859        };
3860
3861        // Spawn a task to reject the confirmation
3862        let cm_clone = confirmation_manager.clone();
3863        tokio::spawn(async move {
3864            tokio::time::sleep(std::time::Duration::from_millis(50)).await;
3865            cm_clone
3866                .confirm("tool-1", false, Some("Too dangerous".to_string()))
3867                .await
3868                .ok();
3869        });
3870
3871        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
3872        let result = agent.execute(&[], "Delete everything", None).await.unwrap();
3873
3874        // LLM should respond to the rejection
3875        assert_eq!(result.text, "Understood, I won't do that.");
3876    }
3877
3878    #[tokio::test]
3879    async fn test_agent_hitl_timeout_reject() {
3880        use crate::hitl::{ConfirmationManager, ConfirmationPolicy, TimeoutAction};
3881        use tokio::sync::broadcast;
3882
3883        let mock_client = Arc::new(MockLlmClient::new(vec![
3884            MockLlmClient::tool_call_response(
3885                "tool-1",
3886                "bash",
3887                serde_json::json!({"command": "echo test"}),
3888            ),
3889            MockLlmClient::text_response("Timed out, I understand."),
3890        ]));
3891
3892        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
3893
3894        // Create HITL with very short timeout and Reject action
3895        let (event_tx, _event_rx) = broadcast::channel(100);
3896        let hitl_policy = ConfirmationPolicy {
3897            enabled: true,
3898            default_timeout_ms: 50, // Very short timeout
3899            timeout_action: TimeoutAction::Reject,
3900            ..Default::default()
3901        };
3902        let confirmation_manager = Arc::new(ConfirmationManager::new(hitl_policy, event_tx));
3903
3904        let permission_policy = PermissionPolicy::new();
3905
3906        let config = AgentConfig {
3907            permission_checker: Some(Arc::new(permission_policy)),
3908            confirmation_manager: Some(confirmation_manager),
3909            ..Default::default()
3910        };
3911
3912        // Don't approve - let it timeout
3913        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
3914        let result = agent.execute(&[], "Echo", None).await.unwrap();
3915
3916        // Should get timeout rejection response from LLM
3917        assert_eq!(result.text, "Timed out, I understand.");
3918    }
3919
3920    #[tokio::test]
3921    async fn test_agent_hitl_timeout_auto_approve() {
3922        use crate::hitl::{ConfirmationManager, ConfirmationPolicy, TimeoutAction};
3923        use tokio::sync::broadcast;
3924
3925        let mock_client = Arc::new(MockLlmClient::new(vec![
3926            MockLlmClient::tool_call_response(
3927                "tool-1",
3928                "bash",
3929                serde_json::json!({"command": "echo hello"}),
3930            ),
3931            MockLlmClient::text_response("Auto-approved and executed!"),
3932        ]));
3933
3934        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
3935
3936        // Create HITL with very short timeout and AutoApprove action
3937        let (event_tx, _event_rx) = broadcast::channel(100);
3938        let hitl_policy = ConfirmationPolicy {
3939            enabled: true,
3940            default_timeout_ms: 50, // Very short timeout
3941            timeout_action: TimeoutAction::AutoApprove,
3942            ..Default::default()
3943        };
3944        let confirmation_manager = Arc::new(ConfirmationManager::new(hitl_policy, event_tx));
3945
3946        let permission_policy = PermissionPolicy::new();
3947
3948        let config = AgentConfig {
3949            permission_checker: Some(Arc::new(permission_policy)),
3950            confirmation_manager: Some(confirmation_manager),
3951            ..Default::default()
3952        };
3953
3954        // Don't approve - let it timeout and auto-approve
3955        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
3956        let result = agent.execute(&[], "Echo", None).await.unwrap();
3957
3958        // Should auto-approve on timeout and execute
3959        assert_eq!(result.text, "Auto-approved and executed!");
3960        assert_eq!(result.tool_calls_count, 1);
3961    }
3962
3963    #[tokio::test]
3964    async fn test_agent_hitl_confirmation_events() {
3965        use crate::hitl::{ConfirmationManager, ConfirmationPolicy};
3966        use tokio::sync::broadcast;
3967
3968        let mock_client = Arc::new(MockLlmClient::new(vec![
3969            MockLlmClient::tool_call_response(
3970                "tool-1",
3971                "bash",
3972                serde_json::json!({"command": "echo test"}),
3973            ),
3974            MockLlmClient::text_response("Done!"),
3975        ]));
3976
3977        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
3978
3979        // Create HITL confirmation manager
3980        let (event_tx, mut event_rx) = broadcast::channel(100);
3981        let hitl_policy = ConfirmationPolicy {
3982            enabled: true,
3983            default_timeout_ms: 5000, // Long enough timeout
3984            ..Default::default()
3985        };
3986        let confirmation_manager = Arc::new(ConfirmationManager::new(hitl_policy, event_tx));
3987
3988        let permission_policy = PermissionPolicy::new();
3989
3990        let config = AgentConfig {
3991            permission_checker: Some(Arc::new(permission_policy)),
3992            confirmation_manager: Some(confirmation_manager.clone()),
3993            ..Default::default()
3994        };
3995
3996        // Spawn task to approve and collect events
3997        let cm_clone = confirmation_manager.clone();
3998        let event_handle = tokio::spawn(async move {
3999            let mut events = Vec::new();
4000            // Wait for ConfirmationRequired event
4001            while let Ok(event) = event_rx.recv().await {
4002                events.push(event.clone());
4003                if let AgentEvent::ConfirmationRequired { tool_id, .. } = event {
4004                    // Approve it
4005                    cm_clone.confirm(&tool_id, true, None).await.ok();
4006                    // Wait for ConfirmationReceived
4007                    if let Ok(recv_event) = event_rx.recv().await {
4008                        events.push(recv_event);
4009                    }
4010                    break;
4011                }
4012            }
4013            events
4014        });
4015
4016        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
4017        let _result = agent.execute(&[], "Echo", None).await.unwrap();
4018
4019        // Check events
4020        let events = event_handle.await.unwrap();
4021        assert!(
4022            events
4023                .iter()
4024                .any(|e| matches!(e, AgentEvent::ConfirmationRequired { .. })),
4025            "Should have ConfirmationRequired event"
4026        );
4027        assert!(
4028            events
4029                .iter()
4030                .any(|e| matches!(e, AgentEvent::ConfirmationReceived { approved: true, .. })),
4031            "Should have ConfirmationReceived event with approved=true"
4032        );
4033    }
4034
4035    #[tokio::test]
4036    async fn test_agent_hitl_disabled_auto_executes() {
4037        // When HITL is disabled, tools should execute automatically even with Ask permission
4038        use crate::hitl::{ConfirmationManager, ConfirmationPolicy};
4039        use tokio::sync::broadcast;
4040
4041        let mock_client = Arc::new(MockLlmClient::new(vec![
4042            MockLlmClient::tool_call_response(
4043                "tool-1",
4044                "bash",
4045                serde_json::json!({"command": "echo auto"}),
4046            ),
4047            MockLlmClient::text_response("Auto executed!"),
4048        ]));
4049
4050        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4051
4052        // Create HITL with enabled=false
4053        let (event_tx, _event_rx) = broadcast::channel(100);
4054        let hitl_policy = ConfirmationPolicy {
4055            enabled: false, // HITL disabled
4056            ..Default::default()
4057        };
4058        let confirmation_manager = Arc::new(ConfirmationManager::new(hitl_policy, event_tx));
4059
4060        let permission_policy = PermissionPolicy::new(); // Default is Ask
4061
4062        let config = AgentConfig {
4063            permission_checker: Some(Arc::new(permission_policy)),
4064            confirmation_manager: Some(confirmation_manager),
4065            ..Default::default()
4066        };
4067
4068        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
4069        let result = agent.execute(&[], "Echo", None).await.unwrap();
4070
4071        // Should execute without waiting for confirmation
4072        assert_eq!(result.text, "Auto executed!");
4073        assert_eq!(result.tool_calls_count, 1);
4074    }
4075
4076    #[tokio::test]
4077    async fn test_agent_hitl_with_permission_deny_skips_hitl() {
4078        // When permission is Deny, HITL should not be triggered
4079        use crate::hitl::{ConfirmationManager, ConfirmationPolicy};
4080        use tokio::sync::broadcast;
4081
4082        let mock_client = Arc::new(MockLlmClient::new(vec![
4083            MockLlmClient::tool_call_response(
4084                "tool-1",
4085                "bash",
4086                serde_json::json!({"command": "rm -rf /"}),
4087            ),
4088            MockLlmClient::text_response("Blocked by permission."),
4089        ]));
4090
4091        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4092
4093        // Create HITL enabled
4094        let (event_tx, mut event_rx) = broadcast::channel(100);
4095        let hitl_policy = ConfirmationPolicy {
4096            enabled: true,
4097            ..Default::default()
4098        };
4099        let confirmation_manager = Arc::new(ConfirmationManager::new(hitl_policy, event_tx));
4100
4101        // Permission policy denies rm commands
4102        let permission_policy = PermissionPolicy::new().deny("bash(rm:*)");
4103
4104        let config = AgentConfig {
4105            permission_checker: Some(Arc::new(permission_policy)),
4106            confirmation_manager: Some(confirmation_manager),
4107            ..Default::default()
4108        };
4109
4110        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
4111        let result = agent.execute(&[], "Delete", None).await.unwrap();
4112
4113        // Should be denied without HITL
4114        assert_eq!(result.text, "Blocked by permission.");
4115
4116        // Should NOT have any ConfirmationRequired events
4117        let mut found_confirmation = false;
4118        while let Ok(event) = event_rx.try_recv() {
4119            if matches!(event, AgentEvent::ConfirmationRequired { .. }) {
4120                found_confirmation = true;
4121            }
4122        }
4123        assert!(
4124            !found_confirmation,
4125            "HITL should not be triggered when permission is Deny"
4126        );
4127    }
4128
4129    #[tokio::test]
4130    async fn test_agent_hitl_with_permission_allow_skips_hitl() {
4131        // When permission is Allow, HITL confirmation is skipped entirely.
4132        // PermissionPolicy is the declarative rule engine; Allow = execute directly.
4133        use crate::hitl::{ConfirmationManager, ConfirmationPolicy};
4134        use tokio::sync::broadcast;
4135
4136        let mock_client = Arc::new(MockLlmClient::new(vec![
4137            MockLlmClient::tool_call_response(
4138                "tool-1",
4139                "bash",
4140                serde_json::json!({"command": "echo hello"}),
4141            ),
4142            MockLlmClient::text_response("Allowed!"),
4143        ]));
4144
4145        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4146
4147        // Create HITL enabled
4148        let (event_tx, mut event_rx) = broadcast::channel(100);
4149        let hitl_policy = ConfirmationPolicy {
4150            enabled: true,
4151            ..Default::default()
4152        };
4153        let confirmation_manager = Arc::new(ConfirmationManager::new(hitl_policy, event_tx));
4154
4155        // Permission policy allows echo commands
4156        let permission_policy = PermissionPolicy::new().allow("bash(echo:*)");
4157
4158        let config = AgentConfig {
4159            permission_checker: Some(Arc::new(permission_policy)),
4160            confirmation_manager: Some(confirmation_manager.clone()),
4161            ..Default::default()
4162        };
4163
4164        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
4165        let result = agent.execute(&[], "Echo", None).await.unwrap();
4166
4167        // Should execute directly without HITL (permission Allow skips confirmation)
4168        assert_eq!(result.text, "Allowed!");
4169
4170        // Should NOT have ConfirmationRequired event (Allow bypasses HITL)
4171        let mut found_confirmation = false;
4172        while let Ok(event) = event_rx.try_recv() {
4173            if matches!(event, AgentEvent::ConfirmationRequired { .. }) {
4174                found_confirmation = true;
4175            }
4176        }
4177        assert!(
4178            !found_confirmation,
4179            "Permission Allow should skip HITL confirmation"
4180        );
4181    }
4182
4183    #[tokio::test]
4184    async fn test_agent_hitl_multiple_tool_calls() {
4185        // Test multiple tool calls in sequence with HITL
4186        use crate::hitl::{ConfirmationManager, ConfirmationPolicy};
4187        use tokio::sync::broadcast;
4188
4189        let mock_client = Arc::new(MockLlmClient::new(vec![
4190            // First response: two tool calls
4191            LlmResponse {
4192                message: Message {
4193                    role: "assistant".to_string(),
4194                    content: vec![
4195                        ContentBlock::ToolUse {
4196                            id: "tool-1".to_string(),
4197                            name: "bash".to_string(),
4198                            input: serde_json::json!({"command": "echo first"}),
4199                        },
4200                        ContentBlock::ToolUse {
4201                            id: "tool-2".to_string(),
4202                            name: "bash".to_string(),
4203                            input: serde_json::json!({"command": "echo second"}),
4204                        },
4205                    ],
4206                    reasoning_content: None,
4207                },
4208                usage: TokenUsage {
4209                    prompt_tokens: 10,
4210                    completion_tokens: 5,
4211                    total_tokens: 15,
4212                    cache_read_tokens: None,
4213                    cache_write_tokens: None,
4214                },
4215                stop_reason: Some("tool_use".to_string()),
4216                meta: None,
4217            },
4218            MockLlmClient::text_response("Both executed!"),
4219        ]));
4220
4221        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4222
4223        // Create HITL
4224        let (event_tx, _event_rx) = broadcast::channel(100);
4225        let hitl_policy = ConfirmationPolicy {
4226            enabled: true,
4227            default_timeout_ms: 5000,
4228            ..Default::default()
4229        };
4230        let confirmation_manager = Arc::new(ConfirmationManager::new(hitl_policy, event_tx));
4231
4232        let permission_policy = PermissionPolicy::new(); // Default Ask
4233
4234        let config = AgentConfig {
4235            permission_checker: Some(Arc::new(permission_policy)),
4236            confirmation_manager: Some(confirmation_manager.clone()),
4237            ..Default::default()
4238        };
4239
4240        // Spawn task to approve both tools
4241        let cm_clone = confirmation_manager.clone();
4242        tokio::spawn(async move {
4243            tokio::time::sleep(std::time::Duration::from_millis(30)).await;
4244            cm_clone.confirm("tool-1", true, None).await.ok();
4245            tokio::time::sleep(std::time::Duration::from_millis(30)).await;
4246            cm_clone.confirm("tool-2", true, None).await.ok();
4247        });
4248
4249        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
4250        let result = agent.execute(&[], "Run both", None).await.unwrap();
4251
4252        assert_eq!(result.text, "Both executed!");
4253        assert_eq!(result.tool_calls_count, 2);
4254    }
4255
4256    #[tokio::test]
4257    async fn test_agent_hitl_partial_approval() {
4258        // Test: first tool approved, second rejected
4259        use crate::hitl::{ConfirmationManager, ConfirmationPolicy};
4260        use tokio::sync::broadcast;
4261
4262        let mock_client = Arc::new(MockLlmClient::new(vec![
4263            // First response: two tool calls
4264            LlmResponse {
4265                message: Message {
4266                    role: "assistant".to_string(),
4267                    content: vec![
4268                        ContentBlock::ToolUse {
4269                            id: "tool-1".to_string(),
4270                            name: "bash".to_string(),
4271                            input: serde_json::json!({"command": "echo safe"}),
4272                        },
4273                        ContentBlock::ToolUse {
4274                            id: "tool-2".to_string(),
4275                            name: "bash".to_string(),
4276                            input: serde_json::json!({"command": "rm -rf /"}),
4277                        },
4278                    ],
4279                    reasoning_content: None,
4280                },
4281                usage: TokenUsage {
4282                    prompt_tokens: 10,
4283                    completion_tokens: 5,
4284                    total_tokens: 15,
4285                    cache_read_tokens: None,
4286                    cache_write_tokens: None,
4287                },
4288                stop_reason: Some("tool_use".to_string()),
4289                meta: None,
4290            },
4291            MockLlmClient::text_response("First worked, second rejected."),
4292        ]));
4293
4294        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4295
4296        let (event_tx, _event_rx) = broadcast::channel(100);
4297        let hitl_policy = ConfirmationPolicy {
4298            enabled: true,
4299            default_timeout_ms: 5000,
4300            ..Default::default()
4301        };
4302        let confirmation_manager = Arc::new(ConfirmationManager::new(hitl_policy, event_tx));
4303
4304        let permission_policy = PermissionPolicy::new();
4305
4306        let config = AgentConfig {
4307            permission_checker: Some(Arc::new(permission_policy)),
4308            confirmation_manager: Some(confirmation_manager.clone()),
4309            ..Default::default()
4310        };
4311
4312        // Approve first, reject second
4313        let cm_clone = confirmation_manager.clone();
4314        tokio::spawn(async move {
4315            tokio::time::sleep(std::time::Duration::from_millis(30)).await;
4316            cm_clone.confirm("tool-1", true, None).await.ok();
4317            tokio::time::sleep(std::time::Duration::from_millis(30)).await;
4318            cm_clone
4319                .confirm("tool-2", false, Some("Dangerous".to_string()))
4320                .await
4321                .ok();
4322        });
4323
4324        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
4325        let result = agent.execute(&[], "Run both", None).await.unwrap();
4326
4327        assert_eq!(result.text, "First worked, second rejected.");
4328        assert_eq!(result.tool_calls_count, 2);
4329    }
4330
4331    #[tokio::test]
4332    async fn test_agent_hitl_yolo_mode_auto_approves() {
4333        // YOLO mode: specific lanes auto-approve without confirmation
4334        use crate::hitl::{ConfirmationManager, ConfirmationPolicy, SessionLane};
4335        use tokio::sync::broadcast;
4336
4337        let mock_client = Arc::new(MockLlmClient::new(vec![
4338            MockLlmClient::tool_call_response(
4339                "tool-1",
4340                "read", // Query lane tool
4341                serde_json::json!({"path": "/tmp/test.txt"}),
4342            ),
4343            MockLlmClient::text_response("File read!"),
4344        ]));
4345
4346        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4347
4348        // YOLO mode for Query lane (read, glob, ls, grep)
4349        let (event_tx, mut event_rx) = broadcast::channel(100);
4350        let mut yolo_lanes = std::collections::HashSet::new();
4351        yolo_lanes.insert(SessionLane::Query);
4352        let hitl_policy = ConfirmationPolicy {
4353            enabled: true,
4354            yolo_lanes, // Auto-approve query operations
4355            ..Default::default()
4356        };
4357        let confirmation_manager = Arc::new(ConfirmationManager::new(hitl_policy, event_tx));
4358
4359        let permission_policy = PermissionPolicy::new();
4360
4361        let config = AgentConfig {
4362            permission_checker: Some(Arc::new(permission_policy)),
4363            confirmation_manager: Some(confirmation_manager),
4364            ..Default::default()
4365        };
4366
4367        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
4368        let result = agent.execute(&[], "Read file", None).await.unwrap();
4369
4370        // Should auto-execute without confirmation (YOLO mode)
4371        assert_eq!(result.text, "File read!");
4372
4373        // Should NOT have ConfirmationRequired for yolo lane
4374        let mut found_confirmation = false;
4375        while let Ok(event) = event_rx.try_recv() {
4376            if matches!(event, AgentEvent::ConfirmationRequired { .. }) {
4377                found_confirmation = true;
4378            }
4379        }
4380        assert!(
4381            !found_confirmation,
4382            "YOLO mode should not trigger confirmation"
4383        );
4384    }
4385
4386    #[tokio::test]
4387    async fn test_agent_config_with_all_options() {
4388        use crate::hitl::{ConfirmationManager, ConfirmationPolicy};
4389        use tokio::sync::broadcast;
4390
4391        let (event_tx, _) = broadcast::channel(100);
4392        let hitl_policy = ConfirmationPolicy::default();
4393        let confirmation_manager = Arc::new(ConfirmationManager::new(hitl_policy, event_tx));
4394
4395        let permission_policy = PermissionPolicy::new().allow("bash(*)");
4396
4397        let config = AgentConfig {
4398            prompt_slots: SystemPromptSlots {
4399                extra: Some("Test system prompt".to_string()),
4400                ..Default::default()
4401            },
4402            tools: vec![],
4403            max_tool_rounds: 10,
4404            permission_checker: Some(Arc::new(permission_policy)),
4405            confirmation_manager: Some(confirmation_manager),
4406            context_providers: vec![],
4407            planning_mode: PlanningMode::default(),
4408            goal_tracking: false,
4409            hook_engine: None,
4410            skill_registry: None,
4411            ..AgentConfig::default()
4412        };
4413
4414        assert!(config.prompt_slots.build().contains("Test system prompt"));
4415        assert_eq!(config.max_tool_rounds, 10);
4416        assert!(config.permission_checker.is_some());
4417        assert!(config.confirmation_manager.is_some());
4418        assert!(config.context_providers.is_empty());
4419
4420        // Test Debug trait
4421        let debug_str = format!("{:?}", config);
4422        assert!(debug_str.contains("AgentConfig"));
4423        assert!(debug_str.contains("permission_checker: true"));
4424        assert!(debug_str.contains("confirmation_manager: true"));
4425        assert!(debug_str.contains("context_providers: 0"));
4426    }
4427
4428    // ========================================================================
4429    // Context Provider Tests
4430    // ========================================================================
4431
4432    use crate::context::{ContextItem, ContextType};
4433
4434    /// Mock context provider for testing
4435    struct MockContextProvider {
4436        name: String,
4437        items: Vec<ContextItem>,
4438        on_turn_calls: std::sync::Arc<tokio::sync::RwLock<Vec<(String, String, String)>>>,
4439    }
4440
4441    impl MockContextProvider {
4442        fn new(name: &str) -> Self {
4443            Self {
4444                name: name.to_string(),
4445                items: Vec::new(),
4446                on_turn_calls: std::sync::Arc::new(tokio::sync::RwLock::new(Vec::new())),
4447            }
4448        }
4449
4450        fn with_items(mut self, items: Vec<ContextItem>) -> Self {
4451            self.items = items;
4452            self
4453        }
4454    }
4455
4456    #[async_trait::async_trait]
4457    impl ContextProvider for MockContextProvider {
4458        fn name(&self) -> &str {
4459            &self.name
4460        }
4461
4462        async fn query(&self, _query: &ContextQuery) -> anyhow::Result<ContextResult> {
4463            let mut result = ContextResult::new(&self.name);
4464            for item in &self.items {
4465                result.add_item(item.clone());
4466            }
4467            Ok(result)
4468        }
4469
4470        async fn on_turn_complete(
4471            &self,
4472            session_id: &str,
4473            prompt: &str,
4474            response: &str,
4475        ) -> anyhow::Result<()> {
4476            let mut calls = self.on_turn_calls.write().await;
4477            calls.push((
4478                session_id.to_string(),
4479                prompt.to_string(),
4480                response.to_string(),
4481            ));
4482            Ok(())
4483        }
4484    }
4485
4486    #[tokio::test]
4487    async fn test_agent_with_context_provider() {
4488        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
4489            "Response using context",
4490        )]));
4491
4492        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4493
4494        let provider =
4495            MockContextProvider::new("test-provider").with_items(vec![ContextItem::new(
4496                "ctx-1",
4497                ContextType::Resource,
4498                "Relevant context here",
4499            )
4500            .with_source("test://docs/example")]);
4501
4502        let config = AgentConfig {
4503            prompt_slots: SystemPromptSlots {
4504                extra: Some("You are helpful.".to_string()),
4505                ..Default::default()
4506            },
4507            context_providers: vec![Arc::new(provider)],
4508            ..Default::default()
4509        };
4510
4511        let agent = AgentLoop::new(
4512            mock_client.clone(),
4513            tool_executor,
4514            test_tool_context(),
4515            config,
4516        );
4517        let result = agent.execute(&[], "What is X?", None).await.unwrap();
4518
4519        assert_eq!(result.text, "Response using context");
4520        assert_eq!(mock_client.call_count.load(Ordering::SeqCst), 1);
4521    }
4522
4523    #[tokio::test]
4524    async fn test_agent_context_provider_events() {
4525        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
4526            "Answer",
4527        )]));
4528
4529        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4530
4531        let provider =
4532            MockContextProvider::new("event-provider").with_items(vec![ContextItem::new(
4533                "item-1",
4534                ContextType::Memory,
4535                "Memory content",
4536            )
4537            .with_token_count(50)]);
4538
4539        let config = AgentConfig {
4540            context_providers: vec![Arc::new(provider)],
4541            ..Default::default()
4542        };
4543
4544        let (tx, mut rx) = mpsc::channel(100);
4545        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
4546        let _result = agent.execute(&[], "Test prompt", Some(tx)).await.unwrap();
4547
4548        // Collect events
4549        let mut events = Vec::new();
4550        while let Ok(event) = rx.try_recv() {
4551            events.push(event);
4552        }
4553
4554        // Should have ContextResolving and ContextResolved events
4555        assert!(
4556            events
4557                .iter()
4558                .any(|e| matches!(e, AgentEvent::ContextResolving { .. })),
4559            "Should have ContextResolving event"
4560        );
4561        assert!(
4562            events
4563                .iter()
4564                .any(|e| matches!(e, AgentEvent::ContextResolved { .. })),
4565            "Should have ContextResolved event"
4566        );
4567
4568        // Check context resolved values
4569        for event in &events {
4570            if let AgentEvent::ContextResolved {
4571                total_items,
4572                total_tokens,
4573            } = event
4574            {
4575                assert_eq!(*total_items, 1);
4576                assert_eq!(*total_tokens, 50);
4577            }
4578        }
4579    }
4580
4581    #[tokio::test]
4582    async fn test_agent_multiple_context_providers() {
4583        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
4584            "Combined response",
4585        )]));
4586
4587        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4588
4589        let provider1 = MockContextProvider::new("provider-1").with_items(vec![ContextItem::new(
4590            "p1-1",
4591            ContextType::Resource,
4592            "Resource from P1",
4593        )
4594        .with_token_count(100)]);
4595
4596        let provider2 = MockContextProvider::new("provider-2").with_items(vec![
4597            ContextItem::new("p2-1", ContextType::Memory, "Memory from P2").with_token_count(50),
4598            ContextItem::new("p2-2", ContextType::Skill, "Skill from P2").with_token_count(75),
4599        ]);
4600
4601        let config = AgentConfig {
4602            prompt_slots: SystemPromptSlots {
4603                extra: Some("Base system prompt.".to_string()),
4604                ..Default::default()
4605            },
4606            context_providers: vec![Arc::new(provider1), Arc::new(provider2)],
4607            ..Default::default()
4608        };
4609
4610        let (tx, mut rx) = mpsc::channel(100);
4611        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
4612        let result = agent.execute(&[], "Query", Some(tx)).await.unwrap();
4613
4614        assert_eq!(result.text, "Combined response");
4615
4616        // Check context resolved event has combined totals
4617        while let Ok(event) = rx.try_recv() {
4618            if let AgentEvent::ContextResolved {
4619                total_items,
4620                total_tokens,
4621            } = event
4622            {
4623                assert_eq!(total_items, 3); // 1 + 2
4624                assert_eq!(total_tokens, 225); // 100 + 50 + 75
4625            }
4626        }
4627    }
4628
4629    #[tokio::test]
4630    async fn test_agent_no_context_providers() {
4631        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
4632            "No context",
4633        )]));
4634
4635        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4636
4637        // No context providers
4638        let config = AgentConfig::default();
4639
4640        let (tx, mut rx) = mpsc::channel(100);
4641        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
4642        let result = agent.execute(&[], "Simple prompt", Some(tx)).await.unwrap();
4643
4644        assert_eq!(result.text, "No context");
4645
4646        // Should NOT have context events when no providers
4647        let mut events = Vec::new();
4648        while let Ok(event) = rx.try_recv() {
4649            events.push(event);
4650        }
4651
4652        assert!(
4653            !events
4654                .iter()
4655                .any(|e| matches!(e, AgentEvent::ContextResolving { .. })),
4656            "Should NOT have ContextResolving event"
4657        );
4658    }
4659
4660    #[tokio::test]
4661    async fn test_agent_context_on_turn_complete() {
4662        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
4663            "Final response",
4664        )]));
4665
4666        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4667
4668        let provider = Arc::new(MockContextProvider::new("memory-provider"));
4669        let on_turn_calls = provider.on_turn_calls.clone();
4670
4671        let config = AgentConfig {
4672            context_providers: vec![provider],
4673            ..Default::default()
4674        };
4675
4676        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
4677
4678        // Execute with session ID
4679        let result = agent
4680            .execute_with_session(&[], "User prompt", Some("sess-123"), None, None)
4681            .await
4682            .unwrap();
4683
4684        assert_eq!(result.text, "Final response");
4685
4686        // Check on_turn_complete was called
4687        let calls = on_turn_calls.read().await;
4688        assert_eq!(calls.len(), 1);
4689        assert_eq!(calls[0].0, "sess-123");
4690        assert_eq!(calls[0].1, "User prompt");
4691        assert_eq!(calls[0].2, "Final response");
4692    }
4693
4694    #[tokio::test]
4695    async fn test_agent_context_on_turn_complete_no_session() {
4696        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
4697            "Response",
4698        )]));
4699
4700        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4701
4702        let provider = Arc::new(MockContextProvider::new("memory-provider"));
4703        let on_turn_calls = provider.on_turn_calls.clone();
4704
4705        let config = AgentConfig {
4706            context_providers: vec![provider],
4707            ..Default::default()
4708        };
4709
4710        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
4711
4712        // Execute without session ID (uses execute() which passes None)
4713        let _result = agent.execute(&[], "Prompt", None).await.unwrap();
4714
4715        // on_turn_complete should NOT be called when session_id is None
4716        let calls = on_turn_calls.read().await;
4717        assert!(calls.is_empty());
4718    }
4719
4720    #[tokio::test]
4721    async fn test_agent_build_augmented_system_prompt() {
4722        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response("OK")]));
4723
4724        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4725
4726        let provider = MockContextProvider::new("test").with_items(vec![ContextItem::new(
4727            "doc-1",
4728            ContextType::Resource,
4729            "Auth uses JWT tokens.",
4730        )
4731        .with_source("viking://docs/auth")]);
4732
4733        let config = AgentConfig {
4734            prompt_slots: SystemPromptSlots {
4735                extra: Some("You are helpful.".to_string()),
4736                ..Default::default()
4737            },
4738            context_providers: vec![Arc::new(provider)],
4739            ..Default::default()
4740        };
4741
4742        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
4743
4744        // Test building augmented prompt
4745        let context_results = agent.resolve_context("test", None).await;
4746        let augmented = agent.build_augmented_system_prompt(&context_results);
4747
4748        let augmented_str = augmented.unwrap();
4749        assert!(augmented_str.contains("You are helpful."));
4750        assert!(augmented_str.contains("<context source=\"viking://docs/auth\" type=\"Resource\">"));
4751        assert!(augmented_str.contains("Auth uses JWT tokens."));
4752    }
4753
4754    // ========================================================================
4755    // Agentic Loop Integration Tests
4756    // ========================================================================
4757
4758    /// Helper: collect all events from a channel
4759    async fn collect_events(mut rx: mpsc::Receiver<AgentEvent>) -> Vec<AgentEvent> {
4760        let mut events = Vec::new();
4761        while let Ok(event) = rx.try_recv() {
4762            events.push(event);
4763        }
4764        // Drain remaining
4765        while let Some(event) = rx.recv().await {
4766            events.push(event);
4767        }
4768        events
4769    }
4770
4771    #[tokio::test]
4772    async fn test_agent_multi_turn_tool_chain() {
4773        // LLM calls tool A → sees result → calls tool B → sees result → final answer
4774        let mock_client = Arc::new(MockLlmClient::new(vec![
4775            // Turn 1: call ls
4776            MockLlmClient::tool_call_response(
4777                "t1",
4778                "bash",
4779                serde_json::json!({"command": "echo step1"}),
4780            ),
4781            // Turn 2: call another tool based on first result
4782            MockLlmClient::tool_call_response(
4783                "t2",
4784                "bash",
4785                serde_json::json!({"command": "echo step2"}),
4786            ),
4787            // Turn 3: final answer
4788            MockLlmClient::text_response("Completed both steps: step1 then step2"),
4789        ]));
4790
4791        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4792        let config = AgentConfig::default();
4793
4794        let agent = AgentLoop::new(
4795            mock_client.clone(),
4796            tool_executor,
4797            test_tool_context(),
4798            config,
4799        );
4800        let result = agent.execute(&[], "Run two steps", None).await.unwrap();
4801
4802        assert_eq!(result.text, "Completed both steps: step1 then step2");
4803        assert_eq!(result.tool_calls_count, 2);
4804        assert_eq!(mock_client.call_count.load(Ordering::SeqCst), 3);
4805
4806        // Verify message history: user → assistant(tool_use) → user(tool_result) → assistant(tool_use) → user(tool_result) → assistant(text)
4807        assert_eq!(result.messages[0].role, "user");
4808        assert_eq!(result.messages[1].role, "assistant"); // tool call 1
4809        assert_eq!(result.messages[2].role, "user"); // tool result 1 (Anthropic convention)
4810        assert_eq!(result.messages[3].role, "assistant"); // tool call 2
4811        assert_eq!(result.messages[4].role, "user"); // tool result 2
4812        assert_eq!(result.messages[5].role, "assistant"); // final text
4813        assert_eq!(result.messages.len(), 6);
4814    }
4815
4816    #[tokio::test]
4817    async fn test_agent_conversation_history_preserved() {
4818        // Pass existing history, verify it's preserved in output
4819        let existing_history = vec![
4820            Message::user("What is Rust?"),
4821            Message {
4822                role: "assistant".to_string(),
4823                content: vec![ContentBlock::Text {
4824                    text: "Rust is a systems programming language.".to_string(),
4825                }],
4826                reasoning_content: None,
4827            },
4828        ];
4829
4830        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
4831            "Rust was created by Graydon Hoare at Mozilla.",
4832        )]));
4833
4834        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4835        let agent = AgentLoop::new(
4836            mock_client.clone(),
4837            tool_executor,
4838            test_tool_context(),
4839            AgentConfig::default(),
4840        );
4841
4842        let result = agent
4843            .execute(&existing_history, "Who created it?", None)
4844            .await
4845            .unwrap();
4846
4847        // History should contain: old user + old assistant + new user + new assistant
4848        assert_eq!(result.messages.len(), 4);
4849        assert_eq!(result.messages[0].text(), "What is Rust?");
4850        assert_eq!(
4851            result.messages[1].text(),
4852            "Rust is a systems programming language."
4853        );
4854        assert_eq!(result.messages[2].text(), "Who created it?");
4855        assert_eq!(
4856            result.messages[3].text(),
4857            "Rust was created by Graydon Hoare at Mozilla."
4858        );
4859    }
4860
4861    #[tokio::test]
4862    async fn test_agent_event_stream_completeness() {
4863        // Verify full event sequence for a single tool call loop
4864        let mock_client = Arc::new(MockLlmClient::new(vec![
4865            MockLlmClient::tool_call_response(
4866                "t1",
4867                "bash",
4868                serde_json::json!({"command": "echo hi"}),
4869            ),
4870            MockLlmClient::text_response("Done"),
4871        ]));
4872
4873        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4874        let agent = AgentLoop::new(
4875            mock_client,
4876            tool_executor,
4877            test_tool_context(),
4878            AgentConfig::default(),
4879        );
4880
4881        let (tx, rx) = mpsc::channel(100);
4882        let result = agent.execute(&[], "Say hi", Some(tx)).await.unwrap();
4883        assert_eq!(result.text, "Done");
4884
4885        let events = collect_events(rx).await;
4886
4887        // Verify event sequence
4888        let event_types: Vec<&str> = events
4889            .iter()
4890            .map(|e| match e {
4891                AgentEvent::Start { .. } => "Start",
4892                AgentEvent::TurnStart { .. } => "TurnStart",
4893                AgentEvent::TurnEnd { .. } => "TurnEnd",
4894                AgentEvent::ToolEnd { .. } => "ToolEnd",
4895                AgentEvent::End { .. } => "End",
4896                _ => "Other",
4897            })
4898            .collect();
4899
4900        // Must start with Start, end with End
4901        assert_eq!(event_types.first(), Some(&"Start"));
4902        assert_eq!(event_types.last(), Some(&"End"));
4903
4904        // Must have 2 TurnStarts (tool call turn + final answer turn)
4905        let turn_starts = event_types.iter().filter(|&&t| t == "TurnStart").count();
4906        assert_eq!(turn_starts, 2);
4907
4908        // Must have 1 ToolEnd
4909        let tool_ends = event_types.iter().filter(|&&t| t == "ToolEnd").count();
4910        assert_eq!(tool_ends, 1);
4911    }
4912
4913    #[tokio::test]
4914    async fn test_agent_multiple_tools_single_turn() {
4915        // LLM returns 2 tool calls in one response
4916        let mock_client = Arc::new(MockLlmClient::new(vec![
4917            LlmResponse {
4918                message: Message {
4919                    role: "assistant".to_string(),
4920                    content: vec![
4921                        ContentBlock::ToolUse {
4922                            id: "t1".to_string(),
4923                            name: "bash".to_string(),
4924                            input: serde_json::json!({"command": "echo first"}),
4925                        },
4926                        ContentBlock::ToolUse {
4927                            id: "t2".to_string(),
4928                            name: "bash".to_string(),
4929                            input: serde_json::json!({"command": "echo second"}),
4930                        },
4931                    ],
4932                    reasoning_content: None,
4933                },
4934                usage: TokenUsage {
4935                    prompt_tokens: 10,
4936                    completion_tokens: 5,
4937                    total_tokens: 15,
4938                    cache_read_tokens: None,
4939                    cache_write_tokens: None,
4940                },
4941                stop_reason: Some("tool_use".to_string()),
4942                meta: None,
4943            },
4944            MockLlmClient::text_response("Both commands ran"),
4945        ]));
4946
4947        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4948        let agent = AgentLoop::new(
4949            mock_client.clone(),
4950            tool_executor,
4951            test_tool_context(),
4952            AgentConfig::default(),
4953        );
4954
4955        let result = agent.execute(&[], "Run both", None).await.unwrap();
4956
4957        assert_eq!(result.text, "Both commands ran");
4958        assert_eq!(result.tool_calls_count, 2);
4959        assert_eq!(mock_client.call_count.load(Ordering::SeqCst), 2); // Only 2 LLM calls
4960
4961        // Messages: user → assistant(2 tools) → user(tool_result) → user(tool_result) → assistant(text)
4962        assert_eq!(result.messages[0].role, "user");
4963        assert_eq!(result.messages[1].role, "assistant");
4964        assert_eq!(result.messages[2].role, "user"); // tool result 1
4965        assert_eq!(result.messages[3].role, "user"); // tool result 2
4966        assert_eq!(result.messages[4].role, "assistant");
4967    }
4968
4969    #[tokio::test]
4970    async fn test_agent_token_usage_accumulation() {
4971        // Verify usage sums across multiple turns
4972        let mock_client = Arc::new(MockLlmClient::new(vec![
4973            MockLlmClient::tool_call_response(
4974                "t1",
4975                "bash",
4976                serde_json::json!({"command": "echo x"}),
4977            ),
4978            MockLlmClient::text_response("Done"),
4979        ]));
4980
4981        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4982        let agent = AgentLoop::new(
4983            mock_client,
4984            tool_executor,
4985            test_tool_context(),
4986            AgentConfig::default(),
4987        );
4988
4989        let result = agent.execute(&[], "test", None).await.unwrap();
4990
4991        // Each mock response has prompt=10, completion=5, total=15
4992        // 2 LLM calls → 20 prompt, 10 completion, 30 total
4993        assert_eq!(result.usage.prompt_tokens, 20);
4994        assert_eq!(result.usage.completion_tokens, 10);
4995        assert_eq!(result.usage.total_tokens, 30);
4996    }
4997
4998    #[tokio::test]
4999    async fn test_agent_system_prompt_passed() {
5000        // Verify system prompt is used (MockLlmClient captures calls)
5001        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
5002            "I am a coding assistant.",
5003        )]));
5004
5005        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5006        let config = AgentConfig {
5007            prompt_slots: SystemPromptSlots {
5008                extra: Some("You are a coding assistant.".to_string()),
5009                ..Default::default()
5010            },
5011            ..Default::default()
5012        };
5013
5014        let agent = AgentLoop::new(
5015            mock_client.clone(),
5016            tool_executor,
5017            test_tool_context(),
5018            config,
5019        );
5020        let result = agent.execute(&[], "What are you?", None).await.unwrap();
5021
5022        assert_eq!(result.text, "I am a coding assistant.");
5023        assert_eq!(mock_client.call_count.load(Ordering::SeqCst), 1);
5024    }
5025
5026    #[tokio::test]
5027    async fn test_agent_max_rounds_with_persistent_tool_calls() {
5028        // LLM keeps calling tools forever — should hit max_tool_rounds
5029        let mut responses = Vec::new();
5030        for i in 0..15 {
5031            responses.push(MockLlmClient::tool_call_response(
5032                &format!("t{}", i),
5033                "bash",
5034                serde_json::json!({"command": format!("echo round{}", i)}),
5035            ));
5036        }
5037
5038        let mock_client = Arc::new(MockLlmClient::new(responses));
5039        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5040        let config = AgentConfig {
5041            max_tool_rounds: 5,
5042            ..Default::default()
5043        };
5044
5045        let agent = AgentLoop::new(
5046            mock_client.clone(),
5047            tool_executor,
5048            test_tool_context(),
5049            config,
5050        );
5051        let result = agent.execute(&[], "Loop forever", None).await;
5052
5053        assert!(result.is_err());
5054        let err = result.unwrap_err().to_string();
5055        assert!(err.contains("Max tool rounds (5) exceeded"));
5056    }
5057
5058    #[tokio::test]
5059    async fn test_agent_end_event_contains_final_text() {
5060        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
5061            "Final answer here",
5062        )]));
5063
5064        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5065        let agent = AgentLoop::new(
5066            mock_client,
5067            tool_executor,
5068            test_tool_context(),
5069            AgentConfig::default(),
5070        );
5071
5072        let (tx, rx) = mpsc::channel(100);
5073        agent.execute(&[], "test", Some(tx)).await.unwrap();
5074
5075        let events = collect_events(rx).await;
5076        let end_event = events.iter().find(|e| matches!(e, AgentEvent::End { .. }));
5077        assert!(end_event.is_some());
5078
5079        if let AgentEvent::End { text, usage, .. } = end_event.unwrap() {
5080            assert_eq!(text, "Final answer here");
5081            assert_eq!(usage.total_tokens, 15);
5082        }
5083    }
5084}
5085
5086#[cfg(test)]
5087mod extra_agent_tests {
5088    use super::*;
5089    use crate::agent::tests::MockLlmClient;
5090    use crate::queue::SessionQueueConfig;
5091    use crate::tools::ToolExecutor;
5092    use std::path::PathBuf;
5093    use std::sync::atomic::{AtomicUsize, Ordering};
5094
5095    fn test_tool_context() -> ToolContext {
5096        ToolContext::new(PathBuf::from("/tmp"))
5097    }
5098
5099    // ========================================================================
5100    // AgentConfig
5101    // ========================================================================
5102
5103    #[test]
5104    fn test_agent_config_debug() {
5105        let config = AgentConfig {
5106            prompt_slots: SystemPromptSlots {
5107                extra: Some("You are helpful".to_string()),
5108                ..Default::default()
5109            },
5110            tools: vec![],
5111            max_tool_rounds: 10,
5112            permission_checker: None,
5113            confirmation_manager: None,
5114            context_providers: vec![],
5115            planning_mode: PlanningMode::Enabled,
5116            goal_tracking: false,
5117            hook_engine: None,
5118            skill_registry: None,
5119            ..AgentConfig::default()
5120        };
5121        let debug = format!("{:?}", config);
5122        assert!(debug.contains("AgentConfig"));
5123        assert!(debug.contains("planning_mode"));
5124    }
5125
5126    #[test]
5127    fn test_agent_config_default_values() {
5128        let config = AgentConfig::default();
5129        assert_eq!(config.max_tool_rounds, MAX_TOOL_ROUNDS);
5130        assert_eq!(config.planning_mode, PlanningMode::Auto);
5131        assert!(!config.goal_tracking);
5132        assert!(config.context_providers.is_empty());
5133    }
5134
5135    // ========================================================================
5136    // AgentEvent serialization
5137    // ========================================================================
5138
5139    #[test]
5140    fn test_agent_event_serialize_start() {
5141        let event = AgentEvent::Start {
5142            prompt: "Hello".to_string(),
5143        };
5144        let json = serde_json::to_string(&event).unwrap();
5145        assert!(json.contains("agent_start"));
5146        assert!(json.contains("Hello"));
5147    }
5148
5149    #[test]
5150    fn test_agent_event_serialize_text_delta() {
5151        let event = AgentEvent::TextDelta {
5152            text: "chunk".to_string(),
5153        };
5154        let json = serde_json::to_string(&event).unwrap();
5155        assert!(json.contains("text_delta"));
5156    }
5157
5158    #[test]
5159    fn test_agent_event_serialize_tool_start() {
5160        let event = AgentEvent::ToolStart {
5161            id: "t1".to_string(),
5162            name: "bash".to_string(),
5163        };
5164        let json = serde_json::to_string(&event).unwrap();
5165        assert!(json.contains("tool_start"));
5166        assert!(json.contains("bash"));
5167    }
5168
5169    #[test]
5170    fn test_agent_event_serialize_tool_end() {
5171        let event = AgentEvent::ToolEnd {
5172            id: "t1".to_string(),
5173            name: "bash".to_string(),
5174            output: "hello".to_string(),
5175            exit_code: 0,
5176            metadata: None,
5177        };
5178        let json = serde_json::to_string(&event).unwrap();
5179        assert!(json.contains("tool_end"));
5180    }
5181
5182    #[test]
5183    fn test_agent_event_tool_end_has_metadata_field() {
5184        let event = AgentEvent::ToolEnd {
5185            id: "t1".to_string(),
5186            name: "write".to_string(),
5187            output: "Wrote 5 bytes".to_string(),
5188            exit_code: 0,
5189            metadata: Some(
5190                serde_json::json!({ "before": "old", "after": "new", "file_path": "f.txt" }),
5191            ),
5192        };
5193        let json = serde_json::to_string(&event).unwrap();
5194        assert!(json.contains("\"before\""));
5195    }
5196
5197    #[test]
5198    fn test_agent_event_serialize_error() {
5199        let event = AgentEvent::Error {
5200            message: "oops".to_string(),
5201        };
5202        let json = serde_json::to_string(&event).unwrap();
5203        assert!(json.contains("error"));
5204        assert!(json.contains("oops"));
5205    }
5206
5207    #[test]
5208    fn test_agent_event_serialize_confirmation_required() {
5209        let event = AgentEvent::ConfirmationRequired {
5210            tool_id: "t1".to_string(),
5211            tool_name: "bash".to_string(),
5212            args: serde_json::json!({"cmd": "rm"}),
5213            timeout_ms: 30000,
5214        };
5215        let json = serde_json::to_string(&event).unwrap();
5216        assert!(json.contains("confirmation_required"));
5217    }
5218
5219    #[test]
5220    fn test_agent_event_serialize_confirmation_received() {
5221        let event = AgentEvent::ConfirmationReceived {
5222            tool_id: "t1".to_string(),
5223            approved: true,
5224            reason: Some("safe".to_string()),
5225        };
5226        let json = serde_json::to_string(&event).unwrap();
5227        assert!(json.contains("confirmation_received"));
5228    }
5229
5230    #[test]
5231    fn test_agent_event_serialize_confirmation_timeout() {
5232        let event = AgentEvent::ConfirmationTimeout {
5233            tool_id: "t1".to_string(),
5234            action_taken: "rejected".to_string(),
5235        };
5236        let json = serde_json::to_string(&event).unwrap();
5237        assert!(json.contains("confirmation_timeout"));
5238    }
5239
5240    #[test]
5241    fn test_agent_event_serialize_external_task_pending() {
5242        let event = AgentEvent::ExternalTaskPending {
5243            task_id: "task-1".to_string(),
5244            session_id: "sess-1".to_string(),
5245            lane: crate::hitl::SessionLane::Execute,
5246            command_type: "bash".to_string(),
5247            payload: serde_json::json!({}),
5248            timeout_ms: 60000,
5249        };
5250        let json = serde_json::to_string(&event).unwrap();
5251        assert!(json.contains("external_task_pending"));
5252    }
5253
5254    #[test]
5255    fn test_agent_event_serialize_external_task_completed() {
5256        let event = AgentEvent::ExternalTaskCompleted {
5257            task_id: "task-1".to_string(),
5258            session_id: "sess-1".to_string(),
5259            success: false,
5260        };
5261        let json = serde_json::to_string(&event).unwrap();
5262        assert!(json.contains("external_task_completed"));
5263    }
5264
5265    #[test]
5266    fn test_agent_event_serialize_permission_denied() {
5267        let event = AgentEvent::PermissionDenied {
5268            tool_id: "t1".to_string(),
5269            tool_name: "bash".to_string(),
5270            args: serde_json::json!({}),
5271            reason: "denied".to_string(),
5272        };
5273        let json = serde_json::to_string(&event).unwrap();
5274        assert!(json.contains("permission_denied"));
5275    }
5276
5277    #[test]
5278    fn test_agent_event_serialize_context_compacted() {
5279        let event = AgentEvent::ContextCompacted {
5280            session_id: "sess-1".to_string(),
5281            before_messages: 100,
5282            after_messages: 20,
5283            percent_before: 0.85,
5284        };
5285        let json = serde_json::to_string(&event).unwrap();
5286        assert!(json.contains("context_compacted"));
5287    }
5288
5289    #[test]
5290    fn test_agent_event_serialize_turn_start() {
5291        let event = AgentEvent::TurnStart { turn: 3 };
5292        let json = serde_json::to_string(&event).unwrap();
5293        assert!(json.contains("turn_start"));
5294    }
5295
5296    #[test]
5297    fn test_agent_event_serialize_turn_end() {
5298        let event = AgentEvent::TurnEnd {
5299            turn: 3,
5300            usage: TokenUsage::default(),
5301        };
5302        let json = serde_json::to_string(&event).unwrap();
5303        assert!(json.contains("turn_end"));
5304    }
5305
5306    #[test]
5307    fn test_agent_event_serialize_end() {
5308        let event = AgentEvent::End {
5309            text: "Done".to_string(),
5310            usage: TokenUsage {
5311                prompt_tokens: 100,
5312                completion_tokens: 50,
5313                total_tokens: 150,
5314                cache_read_tokens: None,
5315                cache_write_tokens: None,
5316            },
5317            meta: None,
5318        };
5319        let json = serde_json::to_string(&event).unwrap();
5320        assert!(json.contains("agent_end"));
5321    }
5322
5323    // ========================================================================
5324    // AgentResult
5325    // ========================================================================
5326
5327    #[test]
5328    fn test_agent_result_fields() {
5329        let result = AgentResult {
5330            text: "output".to_string(),
5331            messages: vec![Message::user("hello")],
5332            usage: TokenUsage::default(),
5333            tool_calls_count: 3,
5334        };
5335        assert_eq!(result.text, "output");
5336        assert_eq!(result.messages.len(), 1);
5337        assert_eq!(result.tool_calls_count, 3);
5338    }
5339
5340    // ========================================================================
5341    // Missing AgentEvent serialization tests
5342    // ========================================================================
5343
5344    #[test]
5345    fn test_agent_event_serialize_context_resolving() {
5346        let event = AgentEvent::ContextResolving {
5347            providers: vec!["provider1".to_string(), "provider2".to_string()],
5348        };
5349        let json = serde_json::to_string(&event).unwrap();
5350        assert!(json.contains("context_resolving"));
5351        assert!(json.contains("provider1"));
5352    }
5353
5354    #[test]
5355    fn test_agent_event_serialize_context_resolved() {
5356        let event = AgentEvent::ContextResolved {
5357            total_items: 5,
5358            total_tokens: 1000,
5359        };
5360        let json = serde_json::to_string(&event).unwrap();
5361        assert!(json.contains("context_resolved"));
5362        assert!(json.contains("1000"));
5363    }
5364
5365    #[test]
5366    fn test_agent_event_serialize_command_dead_lettered() {
5367        let event = AgentEvent::CommandDeadLettered {
5368            command_id: "cmd-1".to_string(),
5369            command_type: "bash".to_string(),
5370            lane: "execute".to_string(),
5371            error: "timeout".to_string(),
5372            attempts: 3,
5373        };
5374        let json = serde_json::to_string(&event).unwrap();
5375        assert!(json.contains("command_dead_lettered"));
5376        assert!(json.contains("cmd-1"));
5377    }
5378
5379    #[test]
5380    fn test_agent_event_serialize_command_retry() {
5381        let event = AgentEvent::CommandRetry {
5382            command_id: "cmd-2".to_string(),
5383            command_type: "read".to_string(),
5384            lane: "query".to_string(),
5385            attempt: 2,
5386            delay_ms: 1000,
5387        };
5388        let json = serde_json::to_string(&event).unwrap();
5389        assert!(json.contains("command_retry"));
5390        assert!(json.contains("cmd-2"));
5391    }
5392
5393    #[test]
5394    fn test_agent_event_serialize_queue_alert() {
5395        let event = AgentEvent::QueueAlert {
5396            level: "warning".to_string(),
5397            alert_type: "depth".to_string(),
5398            message: "Queue depth exceeded".to_string(),
5399        };
5400        let json = serde_json::to_string(&event).unwrap();
5401        assert!(json.contains("queue_alert"));
5402        assert!(json.contains("warning"));
5403    }
5404
5405    #[test]
5406    fn test_agent_event_serialize_task_updated() {
5407        let event = AgentEvent::TaskUpdated {
5408            session_id: "sess-1".to_string(),
5409            tasks: vec![],
5410        };
5411        let json = serde_json::to_string(&event).unwrap();
5412        assert!(json.contains("task_updated"));
5413        assert!(json.contains("sess-1"));
5414    }
5415
5416    #[test]
5417    fn test_agent_event_serialize_memory_stored() {
5418        let event = AgentEvent::MemoryStored {
5419            memory_id: "mem-1".to_string(),
5420            memory_type: "conversation".to_string(),
5421            importance: 0.8,
5422            tags: vec!["important".to_string()],
5423        };
5424        let json = serde_json::to_string(&event).unwrap();
5425        assert!(json.contains("memory_stored"));
5426        assert!(json.contains("mem-1"));
5427    }
5428
5429    #[test]
5430    fn test_agent_event_serialize_memory_recalled() {
5431        let event = AgentEvent::MemoryRecalled {
5432            memory_id: "mem-2".to_string(),
5433            content: "Previous conversation".to_string(),
5434            relevance: 0.9,
5435        };
5436        let json = serde_json::to_string(&event).unwrap();
5437        assert!(json.contains("memory_recalled"));
5438        assert!(json.contains("mem-2"));
5439    }
5440
5441    #[test]
5442    fn test_agent_event_serialize_memories_searched() {
5443        let event = AgentEvent::MemoriesSearched {
5444            query: Some("search term".to_string()),
5445            tags: vec!["tag1".to_string()],
5446            result_count: 5,
5447        };
5448        let json = serde_json::to_string(&event).unwrap();
5449        assert!(json.contains("memories_searched"));
5450        assert!(json.contains("search term"));
5451    }
5452
5453    #[test]
5454    fn test_agent_event_serialize_memory_cleared() {
5455        let event = AgentEvent::MemoryCleared {
5456            tier: "short_term".to_string(),
5457            count: 10,
5458        };
5459        let json = serde_json::to_string(&event).unwrap();
5460        assert!(json.contains("memory_cleared"));
5461        assert!(json.contains("short_term"));
5462    }
5463
5464    #[test]
5465    fn test_agent_event_serialize_subagent_start() {
5466        let event = AgentEvent::SubagentStart {
5467            task_id: "task-1".to_string(),
5468            session_id: "child-sess".to_string(),
5469            parent_session_id: "parent-sess".to_string(),
5470            agent: "explore".to_string(),
5471            description: "Explore codebase".to_string(),
5472        };
5473        let json = serde_json::to_string(&event).unwrap();
5474        assert!(json.contains("subagent_start"));
5475        assert!(json.contains("explore"));
5476    }
5477
5478    #[test]
5479    fn test_agent_event_serialize_subagent_progress() {
5480        let event = AgentEvent::SubagentProgress {
5481            task_id: "task-1".to_string(),
5482            session_id: "child-sess".to_string(),
5483            status: "processing".to_string(),
5484            metadata: serde_json::json!({"progress": 50}),
5485        };
5486        let json = serde_json::to_string(&event).unwrap();
5487        assert!(json.contains("subagent_progress"));
5488        assert!(json.contains("processing"));
5489    }
5490
5491    #[test]
5492    fn test_agent_event_serialize_subagent_end() {
5493        let event = AgentEvent::SubagentEnd {
5494            task_id: "task-1".to_string(),
5495            session_id: "child-sess".to_string(),
5496            agent: "explore".to_string(),
5497            output: "Found 10 files".to_string(),
5498            success: true,
5499        };
5500        let json = serde_json::to_string(&event).unwrap();
5501        assert!(json.contains("subagent_end"));
5502        assert!(json.contains("Found 10 files"));
5503    }
5504
5505    #[test]
5506    fn test_agent_event_serialize_planning_start() {
5507        let event = AgentEvent::PlanningStart {
5508            prompt: "Build a web app".to_string(),
5509        };
5510        let json = serde_json::to_string(&event).unwrap();
5511        assert!(json.contains("planning_start"));
5512        assert!(json.contains("Build a web app"));
5513    }
5514
5515    #[test]
5516    fn test_agent_event_serialize_planning_end() {
5517        use crate::planning::{Complexity, ExecutionPlan};
5518        let plan = ExecutionPlan::new("Test goal".to_string(), Complexity::Simple);
5519        let event = AgentEvent::PlanningEnd {
5520            plan,
5521            estimated_steps: 3,
5522        };
5523        let json = serde_json::to_string(&event).unwrap();
5524        assert!(json.contains("planning_end"));
5525        assert!(json.contains("estimated_steps"));
5526    }
5527
5528    #[test]
5529    fn test_agent_event_serialize_step_start() {
5530        let event = AgentEvent::StepStart {
5531            step_id: "step-1".to_string(),
5532            description: "Initialize project".to_string(),
5533            step_number: 1,
5534            total_steps: 5,
5535        };
5536        let json = serde_json::to_string(&event).unwrap();
5537        assert!(json.contains("step_start"));
5538        assert!(json.contains("Initialize project"));
5539    }
5540
5541    #[test]
5542    fn test_agent_event_serialize_step_end() {
5543        let event = AgentEvent::StepEnd {
5544            step_id: "step-1".to_string(),
5545            status: TaskStatus::Completed,
5546            step_number: 1,
5547            total_steps: 5,
5548        };
5549        let json = serde_json::to_string(&event).unwrap();
5550        assert!(json.contains("step_end"));
5551        assert!(json.contains("step-1"));
5552    }
5553
5554    #[test]
5555    fn test_agent_event_serialize_goal_extracted() {
5556        use crate::planning::AgentGoal;
5557        let goal = AgentGoal::new("Complete the task".to_string());
5558        let event = AgentEvent::GoalExtracted { goal };
5559        let json = serde_json::to_string(&event).unwrap();
5560        assert!(json.contains("goal_extracted"));
5561    }
5562
5563    #[test]
5564    fn test_agent_event_serialize_goal_progress() {
5565        let event = AgentEvent::GoalProgress {
5566            goal: "Build app".to_string(),
5567            progress: 0.5,
5568            completed_steps: 2,
5569            total_steps: 4,
5570        };
5571        let json = serde_json::to_string(&event).unwrap();
5572        assert!(json.contains("goal_progress"));
5573        assert!(json.contains("0.5"));
5574    }
5575
5576    #[test]
5577    fn test_agent_event_serialize_goal_achieved() {
5578        let event = AgentEvent::GoalAchieved {
5579            goal: "Build app".to_string(),
5580            total_steps: 4,
5581            duration_ms: 5000,
5582        };
5583        let json = serde_json::to_string(&event).unwrap();
5584        assert!(json.contains("goal_achieved"));
5585        assert!(json.contains("5000"));
5586    }
5587
5588    #[tokio::test]
5589    async fn test_extract_goal_with_json_response() {
5590        // LlmPlanner expects JSON with "description" and "success_criteria" fields
5591        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
5592            r#"{"description": "Build web app", "success_criteria": ["App runs on port 3000", "Has login page"]}"#,
5593        )]));
5594        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5595        let agent = AgentLoop::new(
5596            mock_client,
5597            tool_executor,
5598            test_tool_context(),
5599            AgentConfig::default(),
5600        );
5601
5602        let goal = agent.extract_goal("Build a web app").await.unwrap();
5603        assert_eq!(goal.description, "Build web app");
5604        assert_eq!(goal.success_criteria.len(), 2);
5605        assert_eq!(goal.success_criteria[0], "App runs on port 3000");
5606    }
5607
5608    #[tokio::test]
5609    async fn test_extract_goal_fallback_on_non_json() {
5610        // Non-JSON response triggers fallback: returns the original prompt as goal
5611        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
5612            "Some non-JSON response",
5613        )]));
5614        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5615        let agent = AgentLoop::new(
5616            mock_client,
5617            tool_executor,
5618            test_tool_context(),
5619            AgentConfig::default(),
5620        );
5621
5622        let goal = agent.extract_goal("Do something").await.unwrap();
5623        // Fallback uses the original prompt as description
5624        assert_eq!(goal.description, "Do something");
5625        // Fallback adds 2 generic criteria
5626        assert_eq!(goal.success_criteria.len(), 2);
5627    }
5628
5629    #[tokio::test]
5630    async fn test_check_goal_achievement_json_yes() {
5631        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
5632            r#"{"achieved": true, "progress": 1.0, "remaining_criteria": []}"#,
5633        )]));
5634        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5635        let agent = AgentLoop::new(
5636            mock_client,
5637            tool_executor,
5638            test_tool_context(),
5639            AgentConfig::default(),
5640        );
5641
5642        let goal = crate::planning::AgentGoal::new("Test goal".to_string());
5643        let achieved = agent
5644            .check_goal_achievement(&goal, "All done")
5645            .await
5646            .unwrap();
5647        assert!(achieved);
5648    }
5649
5650    #[tokio::test]
5651    async fn test_check_goal_achievement_fallback_not_done() {
5652        // Non-JSON response triggers heuristic fallback
5653        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
5654            "invalid json",
5655        )]));
5656        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5657        let agent = AgentLoop::new(
5658            mock_client,
5659            tool_executor,
5660            test_tool_context(),
5661            AgentConfig::default(),
5662        );
5663
5664        let goal = crate::planning::AgentGoal::new("Test goal".to_string());
5665        // "still working" doesn't contain "complete"/"done"/"finished"
5666        let achieved = agent
5667            .check_goal_achievement(&goal, "still working")
5668            .await
5669            .unwrap();
5670        assert!(!achieved);
5671    }
5672
5673    // ========================================================================
5674    // build_augmented_system_prompt Tests
5675    // ========================================================================
5676
5677    #[test]
5678    fn test_build_augmented_system_prompt_empty_context() {
5679        let mock_client = Arc::new(MockLlmClient::new(vec![]));
5680        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5681        let config = AgentConfig {
5682            prompt_slots: SystemPromptSlots {
5683                extra: Some("Base prompt".to_string()),
5684                ..Default::default()
5685            },
5686            ..Default::default()
5687        };
5688        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
5689
5690        let result = agent.build_augmented_system_prompt(&[]);
5691        assert!(result.unwrap().contains("Base prompt"));
5692    }
5693
5694    #[test]
5695    fn test_build_augmented_system_prompt_no_custom_slots() {
5696        let mock_client = Arc::new(MockLlmClient::new(vec![]));
5697        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5698        let agent = AgentLoop::new(
5699            mock_client,
5700            tool_executor,
5701            test_tool_context(),
5702            AgentConfig::default(),
5703        );
5704
5705        let result = agent.build_augmented_system_prompt(&[]);
5706        // Default slots still produce the default agentic prompt
5707        assert!(result.is_some());
5708        assert!(result.unwrap().contains("Core Behaviour"));
5709    }
5710
5711    #[test]
5712    fn test_build_augmented_system_prompt_with_context_no_base() {
5713        use crate::context::{ContextItem, ContextResult, ContextType};
5714
5715        let mock_client = Arc::new(MockLlmClient::new(vec![]));
5716        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5717        let agent = AgentLoop::new(
5718            mock_client,
5719            tool_executor,
5720            test_tool_context(),
5721            AgentConfig::default(),
5722        );
5723
5724        let context = vec![ContextResult {
5725            provider: "test".to_string(),
5726            items: vec![ContextItem::new("id1", ContextType::Resource, "Content")],
5727            total_tokens: 10,
5728            truncated: false,
5729        }];
5730
5731        let result = agent.build_augmented_system_prompt(&context);
5732        assert!(result.is_some());
5733        let text = result.unwrap();
5734        assert!(text.contains("<context"));
5735        assert!(text.contains("Content"));
5736    }
5737
5738    // ========================================================================
5739    // AgentResult Clone and Debug
5740    // ========================================================================
5741
5742    #[test]
5743    fn test_agent_result_clone() {
5744        let result = AgentResult {
5745            text: "output".to_string(),
5746            messages: vec![Message::user("hello")],
5747            usage: TokenUsage::default(),
5748            tool_calls_count: 3,
5749        };
5750        let cloned = result.clone();
5751        assert_eq!(cloned.text, result.text);
5752        assert_eq!(cloned.tool_calls_count, result.tool_calls_count);
5753    }
5754
5755    #[test]
5756    fn test_agent_result_debug() {
5757        let result = AgentResult {
5758            text: "output".to_string(),
5759            messages: vec![Message::user("hello")],
5760            usage: TokenUsage::default(),
5761            tool_calls_count: 3,
5762        };
5763        let debug = format!("{:?}", result);
5764        assert!(debug.contains("AgentResult"));
5765        assert!(debug.contains("output"));
5766    }
5767
5768    // ========================================================================
5769    // handle_post_execution_metadata Tests
5770    // ========================================================================
5771
5772    // ========================================================================
5773    // ToolCommand adapter tests
5774    // ========================================================================
5775
5776    #[tokio::test]
5777    async fn test_tool_command_command_type() {
5778        let executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5779        let cmd = ToolCommand {
5780            tool_executor: executor,
5781            tool_name: "read".to_string(),
5782            tool_args: serde_json::json!({"file": "test.rs"}),
5783            skill_registry: None,
5784            tool_context: test_tool_context(),
5785        };
5786        assert_eq!(cmd.command_type(), "read");
5787    }
5788
5789    #[tokio::test]
5790    async fn test_tool_command_payload() {
5791        let executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5792        let args = serde_json::json!({"file": "test.rs", "offset": 10});
5793        let cmd = ToolCommand {
5794            tool_executor: executor,
5795            tool_name: "read".to_string(),
5796            tool_args: args.clone(),
5797            skill_registry: None,
5798            tool_context: test_tool_context(),
5799        };
5800        assert_eq!(cmd.payload(), args);
5801    }
5802
5803    // ========================================================================
5804    // AgentLoop with queue builder tests
5805    // ========================================================================
5806
5807    #[tokio::test(flavor = "multi_thread")]
5808    async fn test_agent_loop_with_queue() {
5809        use tokio::sync::broadcast;
5810
5811        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
5812            "Hello",
5813        )]));
5814        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5815        let config = AgentConfig::default();
5816
5817        let (event_tx, _) = broadcast::channel(100);
5818        let queue = SessionLaneQueue::new("test-session", SessionQueueConfig::default(), event_tx)
5819            .await
5820            .unwrap();
5821
5822        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config)
5823            .with_queue(Arc::new(queue));
5824
5825        assert!(agent.command_queue.is_some());
5826    }
5827
5828    #[tokio::test]
5829    async fn test_agent_loop_without_queue() {
5830        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
5831            "Hello",
5832        )]));
5833        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5834        let config = AgentConfig::default();
5835
5836        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
5837
5838        assert!(agent.command_queue.is_none());
5839    }
5840
5841    // ========================================================================
5842    // Parallel Plan Execution Tests
5843    // ========================================================================
5844
5845    #[tokio::test]
5846    async fn test_execute_plan_parallel_independent() {
5847        use crate::planning::{Complexity, ExecutionPlan, Task};
5848
5849        // 3 independent steps (no dependencies) — should all execute.
5850        // MockLlmClient needs one response per execute_loop call per step.
5851        let mock_client = Arc::new(MockLlmClient::new(vec![
5852            MockLlmClient::text_response("Step 1 done"),
5853            MockLlmClient::text_response("Step 2 done"),
5854            MockLlmClient::text_response("Step 3 done"),
5855        ]));
5856
5857        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5858        let config = AgentConfig::default();
5859        let agent = AgentLoop::new(
5860            mock_client.clone(),
5861            tool_executor,
5862            test_tool_context(),
5863            config,
5864        );
5865
5866        let mut plan = ExecutionPlan::new("Test parallel", Complexity::Simple);
5867        plan.add_step(Task::new("s1", "First step"));
5868        plan.add_step(Task::new("s2", "Second step"));
5869        plan.add_step(Task::new("s3", "Third step"));
5870
5871        let (tx, mut rx) = mpsc::channel(100);
5872        let result = agent.execute_plan(&[], &plan, Some(tx)).await.unwrap();
5873
5874        // All 3 steps should have been executed (3 * 15 = 45 total tokens)
5875        assert_eq!(result.usage.total_tokens, 45);
5876
5877        // Verify we received StepStart and StepEnd events for all 3 steps
5878        let mut step_starts = Vec::new();
5879        let mut step_ends = Vec::new();
5880        rx.close();
5881        while let Some(event) = rx.recv().await {
5882            match event {
5883                AgentEvent::StepStart { step_id, .. } => step_starts.push(step_id),
5884                AgentEvent::StepEnd {
5885                    step_id, status, ..
5886                } => {
5887                    assert_eq!(status, TaskStatus::Completed);
5888                    step_ends.push(step_id);
5889                }
5890                _ => {}
5891            }
5892        }
5893        assert_eq!(step_starts.len(), 3);
5894        assert_eq!(step_ends.len(), 3);
5895    }
5896
5897    #[tokio::test]
5898    async fn test_execute_plan_respects_dependencies() {
5899        use crate::planning::{Complexity, ExecutionPlan, Task};
5900
5901        // s1 and s2 are independent (wave 1), s3 depends on both (wave 2).
5902        // This requires 3 responses total.
5903        let mock_client = Arc::new(MockLlmClient::new(vec![
5904            MockLlmClient::text_response("Step 1 done"),
5905            MockLlmClient::text_response("Step 2 done"),
5906            MockLlmClient::text_response("Step 3 done"),
5907        ]));
5908
5909        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5910        let config = AgentConfig::default();
5911        let agent = AgentLoop::new(
5912            mock_client.clone(),
5913            tool_executor,
5914            test_tool_context(),
5915            config,
5916        );
5917
5918        let mut plan = ExecutionPlan::new("Test deps", Complexity::Medium);
5919        plan.add_step(Task::new("s1", "Independent A"));
5920        plan.add_step(Task::new("s2", "Independent B"));
5921        plan.add_step(
5922            Task::new("s3", "Depends on A+B")
5923                .with_dependencies(vec!["s1".to_string(), "s2".to_string()]),
5924        );
5925
5926        let (tx, mut rx) = mpsc::channel(100);
5927        let result = agent.execute_plan(&[], &plan, Some(tx)).await.unwrap();
5928
5929        // All 3 steps should have been executed (3 * 15 = 45 total tokens)
5930        assert_eq!(result.usage.total_tokens, 45);
5931
5932        // Verify ordering: s3's StepStart must come after s1 and s2's StepEnd
5933        let mut events = Vec::new();
5934        rx.close();
5935        while let Some(event) = rx.recv().await {
5936            match &event {
5937                AgentEvent::StepStart { step_id, .. } => {
5938                    events.push(format!("start:{}", step_id));
5939                }
5940                AgentEvent::StepEnd { step_id, .. } => {
5941                    events.push(format!("end:{}", step_id));
5942                }
5943                _ => {}
5944            }
5945        }
5946
5947        // s3 start must occur after both s1 end and s2 end
5948        let s1_end = events.iter().position(|e| e == "end:s1").unwrap();
5949        let s2_end = events.iter().position(|e| e == "end:s2").unwrap();
5950        let s3_start = events.iter().position(|e| e == "start:s3").unwrap();
5951        assert!(
5952            s3_start > s1_end,
5953            "s3 started before s1 ended: {:?}",
5954            events
5955        );
5956        assert!(
5957            s3_start > s2_end,
5958            "s3 started before s2 ended: {:?}",
5959            events
5960        );
5961
5962        // Final result should reflect step 3 (last sequential step)
5963        assert!(result.text.contains("Step 3 done") || !result.text.is_empty());
5964    }
5965
5966    #[tokio::test]
5967    async fn test_execute_plan_handles_step_failure() {
5968        use crate::planning::{Complexity, ExecutionPlan, Task};
5969
5970        // s1 succeeds, s2 depends on s1 (succeeds), s3 depends on nothing (succeeds),
5971        // s4 depends on a step that will fail (s_fail).
5972        // We simulate failure by providing no responses for s_fail's execute_loop.
5973        //
5974        // Simpler approach: s1 succeeds, s2 depends on s1 (will fail because no
5975        // mock response left), s3 is independent.
5976        // Layout: s1 (independent), s3 (independent) → wave 1 parallel
5977        //         s2 depends on s1 → wave 2
5978        //         s4 depends on s2 → wave 3 (should deadlock since s2 fails)
5979        let mock_client = Arc::new(MockLlmClient::new(vec![
5980            // Wave 1: s1 and s3 execute in parallel
5981            MockLlmClient::text_response("s1 done"),
5982            MockLlmClient::text_response("s3 done"),
5983            // Wave 2: s2 executes — but we give it no response, causing failure
5984            // Actually the MockLlmClient will fail with "No more mock responses"
5985        ]));
5986
5987        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5988        let config = AgentConfig::default();
5989        let agent = AgentLoop::new(
5990            mock_client.clone(),
5991            tool_executor,
5992            test_tool_context(),
5993            config,
5994        );
5995
5996        let mut plan = ExecutionPlan::new("Test failure", Complexity::Medium);
5997        plan.add_step(Task::new("s1", "Independent step"));
5998        plan.add_step(Task::new("s2", "Depends on s1").with_dependencies(vec!["s1".to_string()]));
5999        plan.add_step(Task::new("s3", "Another independent"));
6000        plan.add_step(Task::new("s4", "Depends on s2").with_dependencies(vec!["s2".to_string()]));
6001
6002        let (tx, mut rx) = mpsc::channel(100);
6003        let _result = agent.execute_plan(&[], &plan, Some(tx)).await.unwrap();
6004
6005        // s1 and s3 should succeed (wave 1), s2 should fail (wave 2),
6006        // s4 should never execute (deadlock — dep s2 failed, not completed)
6007        let mut completed_steps = Vec::new();
6008        let mut failed_steps = Vec::new();
6009        rx.close();
6010        while let Some(event) = rx.recv().await {
6011            if let AgentEvent::StepEnd {
6012                step_id, status, ..
6013            } = event
6014            {
6015                match status {
6016                    TaskStatus::Completed => completed_steps.push(step_id),
6017                    TaskStatus::Failed => failed_steps.push(step_id),
6018                    _ => {}
6019                }
6020            }
6021        }
6022
6023        assert!(
6024            completed_steps.contains(&"s1".to_string()),
6025            "s1 should complete"
6026        );
6027        assert!(
6028            completed_steps.contains(&"s3".to_string()),
6029            "s3 should complete"
6030        );
6031        assert!(failed_steps.contains(&"s2".to_string()), "s2 should fail");
6032        // s4 should NOT appear in either list — it was never started
6033        assert!(
6034            !completed_steps.contains(&"s4".to_string()),
6035            "s4 should not complete"
6036        );
6037        assert!(
6038            !failed_steps.contains(&"s4".to_string()),
6039            "s4 should not fail (never started)"
6040        );
6041    }
6042
6043    // ========================================================================
6044    // Phase 4: Error Recovery & Resilience Tests
6045    // ========================================================================
6046
6047    #[test]
6048    fn test_agent_config_resilience_defaults() {
6049        let config = AgentConfig::default();
6050        assert_eq!(config.max_parse_retries, 2);
6051        assert_eq!(config.tool_timeout_ms, None);
6052        assert_eq!(config.circuit_breaker_threshold, 3);
6053    }
6054
6055    /// 4.1 — Parse error recovery: bails after max_parse_retries exceeded
6056    #[tokio::test]
6057    async fn test_parse_error_recovery_bails_after_threshold() {
6058        // 3 parse errors with max_parse_retries=2: count reaches 3 > 2 → bail
6059        let mock_client = Arc::new(MockLlmClient::new(vec![
6060            MockLlmClient::tool_call_response(
6061                "c1",
6062                "bash",
6063                serde_json::json!({"__parse_error": "unexpected token at position 5"}),
6064            ),
6065            MockLlmClient::tool_call_response(
6066                "c2",
6067                "bash",
6068                serde_json::json!({"__parse_error": "missing closing brace"}),
6069            ),
6070            MockLlmClient::tool_call_response(
6071                "c3",
6072                "bash",
6073                serde_json::json!({"__parse_error": "still broken"}),
6074            ),
6075            MockLlmClient::text_response("Done"), // never reached
6076        ]));
6077
6078        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
6079        let config = AgentConfig {
6080            max_parse_retries: 2,
6081            ..AgentConfig::default()
6082        };
6083        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
6084        let result = agent.execute(&[], "Do something", None).await;
6085        assert!(result.is_err(), "should bail after parse error threshold");
6086        let err = result.unwrap_err().to_string();
6087        assert!(
6088            err.contains("malformed tool arguments"),
6089            "error should mention malformed tool arguments, got: {}",
6090            err
6091        );
6092    }
6093
6094    /// 4.1 — Parse error recovery: counter resets after a valid tool execution
6095    #[tokio::test]
6096    async fn test_parse_error_counter_resets_on_success() {
6097        // 2 parse errors (= max_parse_retries, not yet exceeded)
6098        // Then a valid tool call (resets counter)
6099        // Then final text — should NOT bail
6100        let mock_client = Arc::new(MockLlmClient::new(vec![
6101            MockLlmClient::tool_call_response(
6102                "c1",
6103                "bash",
6104                serde_json::json!({"__parse_error": "bad args"}),
6105            ),
6106            MockLlmClient::tool_call_response(
6107                "c2",
6108                "bash",
6109                serde_json::json!({"__parse_error": "bad args again"}),
6110            ),
6111            // Valid call — resets parse_error_count to 0
6112            MockLlmClient::tool_call_response(
6113                "c3",
6114                "bash",
6115                serde_json::json!({"command": "echo ok"}),
6116            ),
6117            MockLlmClient::text_response("All done"),
6118        ]));
6119
6120        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
6121        let config = AgentConfig {
6122            max_parse_retries: 2,
6123            ..AgentConfig::default()
6124        };
6125        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
6126        let result = agent.execute(&[], "Do something", None).await;
6127        assert!(
6128            result.is_ok(),
6129            "should not bail — counter reset after successful tool, got: {:?}",
6130            result.err()
6131        );
6132        assert_eq!(result.unwrap().text, "All done");
6133    }
6134
6135    /// 4.2 — Tool timeout: slow tool produces a timeout error result; session continues
6136    #[tokio::test]
6137    async fn test_tool_timeout_produces_error_result() {
6138        let mock_client = Arc::new(MockLlmClient::new(vec![
6139            MockLlmClient::tool_call_response(
6140                "t1",
6141                "bash",
6142                serde_json::json!({"command": "sleep 10"}),
6143            ),
6144            MockLlmClient::text_response("The command timed out."),
6145        ]));
6146
6147        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
6148        let config = AgentConfig {
6149            // 50ms — sleep 10 will never finish
6150            tool_timeout_ms: Some(50),
6151            ..AgentConfig::default()
6152        };
6153        let agent = AgentLoop::new(
6154            mock_client.clone(),
6155            tool_executor,
6156            test_tool_context(),
6157            config,
6158        );
6159        let result = agent.execute(&[], "Run sleep", None).await;
6160        assert!(
6161            result.is_ok(),
6162            "session should continue after tool timeout: {:?}",
6163            result.err()
6164        );
6165        assert_eq!(result.unwrap().text, "The command timed out.");
6166        // LLM called twice: initial request + response after timeout error
6167        assert_eq!(mock_client.call_count.load(Ordering::SeqCst), 2);
6168    }
6169
6170    /// 4.2 — Tool timeout: tool that finishes before the deadline succeeds normally
6171    #[tokio::test]
6172    async fn test_tool_within_timeout_succeeds() {
6173        let mock_client = Arc::new(MockLlmClient::new(vec![
6174            MockLlmClient::tool_call_response(
6175                "t1",
6176                "bash",
6177                serde_json::json!({"command": "echo fast"}),
6178            ),
6179            MockLlmClient::text_response("Command succeeded."),
6180        ]));
6181
6182        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
6183        let config = AgentConfig {
6184            tool_timeout_ms: Some(5_000), // 5 s — echo completes in <100ms
6185            ..AgentConfig::default()
6186        };
6187        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
6188        let result = agent.execute(&[], "Run something fast", None).await;
6189        assert!(
6190            result.is_ok(),
6191            "fast tool should succeed: {:?}",
6192            result.err()
6193        );
6194        assert_eq!(result.unwrap().text, "Command succeeded.");
6195    }
6196
6197    /// 4.3 — Circuit breaker: retries non-streaming LLM failures up to threshold
6198    #[tokio::test]
6199    async fn test_circuit_breaker_retries_non_streaming() {
6200        // Empty response list → every call bails with "No more mock responses"
6201        // threshold=2 → tries twice, then bails with circuit-breaker message
6202        let mock_client = Arc::new(MockLlmClient::new(vec![]));
6203
6204        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
6205        let config = AgentConfig {
6206            circuit_breaker_threshold: 2,
6207            ..AgentConfig::default()
6208        };
6209        let agent = AgentLoop::new(
6210            mock_client.clone(),
6211            tool_executor,
6212            test_tool_context(),
6213            config,
6214        );
6215        let result = agent.execute(&[], "Hello", None).await;
6216        assert!(result.is_err(), "should fail when LLM always errors");
6217        let err = result.unwrap_err().to_string();
6218        assert!(
6219            err.contains("circuit breaker"),
6220            "error should mention circuit breaker, got: {}",
6221            err
6222        );
6223        assert_eq!(
6224            mock_client.call_count.load(Ordering::SeqCst),
6225            2,
6226            "should make exactly threshold=2 LLM calls"
6227        );
6228    }
6229
6230    /// 4.3 — Circuit breaker: threshold=1 bails on the very first failure
6231    #[tokio::test]
6232    async fn test_circuit_breaker_threshold_one_no_retry() {
6233        let mock_client = Arc::new(MockLlmClient::new(vec![]));
6234
6235        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
6236        let config = AgentConfig {
6237            circuit_breaker_threshold: 1,
6238            ..AgentConfig::default()
6239        };
6240        let agent = AgentLoop::new(
6241            mock_client.clone(),
6242            tool_executor,
6243            test_tool_context(),
6244            config,
6245        );
6246        let result = agent.execute(&[], "Hello", None).await;
6247        assert!(result.is_err());
6248        assert_eq!(
6249            mock_client.call_count.load(Ordering::SeqCst),
6250            1,
6251            "with threshold=1 exactly one attempt should be made"
6252        );
6253    }
6254
6255    /// 4.3 — Circuit breaker: succeeds when LLM recovers before hitting threshold
6256    #[tokio::test]
6257    async fn test_circuit_breaker_succeeds_if_llm_recovers() {
6258        // First call fails, second call succeeds; threshold=3 — recovery within threshold
6259        struct FailOnceThenSucceed {
6260            inner: MockLlmClient,
6261            failed_once: std::sync::atomic::AtomicBool,
6262            call_count: AtomicUsize,
6263        }
6264
6265        #[async_trait::async_trait]
6266        impl LlmClient for FailOnceThenSucceed {
6267            async fn complete(
6268                &self,
6269                messages: &[Message],
6270                system: Option<&str>,
6271                tools: &[ToolDefinition],
6272            ) -> Result<LlmResponse> {
6273                self.call_count.fetch_add(1, Ordering::SeqCst);
6274                let already_failed = self
6275                    .failed_once
6276                    .swap(true, std::sync::atomic::Ordering::SeqCst);
6277                if !already_failed {
6278                    anyhow::bail!("transient network error");
6279                }
6280                self.inner.complete(messages, system, tools).await
6281            }
6282
6283            async fn complete_streaming(
6284                &self,
6285                messages: &[Message],
6286                system: Option<&str>,
6287                tools: &[ToolDefinition],
6288            ) -> Result<tokio::sync::mpsc::Receiver<crate::llm::StreamEvent>> {
6289                self.inner.complete_streaming(messages, system, tools).await
6290            }
6291        }
6292
6293        let mock = Arc::new(FailOnceThenSucceed {
6294            inner: MockLlmClient::new(vec![MockLlmClient::text_response("Recovered!")]),
6295            failed_once: std::sync::atomic::AtomicBool::new(false),
6296            call_count: AtomicUsize::new(0),
6297        });
6298
6299        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
6300        let config = AgentConfig {
6301            circuit_breaker_threshold: 3,
6302            ..AgentConfig::default()
6303        };
6304        let agent = AgentLoop::new(mock.clone(), tool_executor, test_tool_context(), config);
6305        let result = agent.execute(&[], "Hello", None).await;
6306        assert!(
6307            result.is_ok(),
6308            "should succeed when LLM recovers within threshold: {:?}",
6309            result.err()
6310        );
6311        assert_eq!(result.unwrap().text, "Recovered!");
6312        assert_eq!(
6313            mock.call_count.load(Ordering::SeqCst),
6314            2,
6315            "should have made exactly 2 calls (1 fail + 1 success)"
6316        );
6317    }
6318
6319    // ── Continuation detection tests ─────────────────────────────────────────
6320
6321    #[test]
6322    fn test_looks_incomplete_empty() {
6323        assert!(AgentLoop::looks_incomplete(""));
6324        assert!(AgentLoop::looks_incomplete("   "));
6325    }
6326
6327    #[test]
6328    fn test_looks_incomplete_trailing_colon() {
6329        assert!(AgentLoop::looks_incomplete("Let me check the file:"));
6330        assert!(AgentLoop::looks_incomplete("Next steps:"));
6331    }
6332
6333    #[test]
6334    fn test_looks_incomplete_ellipsis() {
6335        assert!(AgentLoop::looks_incomplete("Working on it..."));
6336        assert!(AgentLoop::looks_incomplete("Processing…"));
6337    }
6338
6339    #[test]
6340    fn test_looks_incomplete_intent_phrases() {
6341        assert!(AgentLoop::looks_incomplete(
6342            "I'll start by reading the file."
6343        ));
6344        assert!(AgentLoop::looks_incomplete(
6345            "Let me check the configuration."
6346        ));
6347        assert!(AgentLoop::looks_incomplete("I will now run the tests."));
6348        assert!(AgentLoop::looks_incomplete(
6349            "I need to update the Cargo.toml."
6350        ));
6351    }
6352
6353    #[test]
6354    fn test_looks_complete_final_answer() {
6355        // Clear final answers should NOT trigger continuation
6356        assert!(!AgentLoop::looks_incomplete(
6357            "The tests pass. All changes have been applied successfully."
6358        ));
6359        assert!(!AgentLoop::looks_incomplete(
6360            "Done. I've updated the three files and verified the build succeeds."
6361        ));
6362        assert!(!AgentLoop::looks_incomplete("42"));
6363        assert!(!AgentLoop::looks_incomplete("Yes."));
6364    }
6365
6366    #[test]
6367    fn test_looks_incomplete_multiline_complete() {
6368        let text = "Here is the summary:\n\n- Fixed the bug in agent.rs\n- All tests pass\n- Build succeeds";
6369        assert!(!AgentLoop::looks_incomplete(text));
6370    }
6371}