Skip to main content

a3s_code_core/
agent.rs

1//! Agent Loop Implementation
2//!
3//! The agent loop handles the core conversation cycle:
4//! 1. User sends a prompt
5//! 2. LLM generates a response (possibly with tool calls)
6//! 3. If tool calls present, execute them and send results back
7//! 4. Repeat until LLM returns without tool calls
8//!
9//! This implements agentic behavior where the LLM can use tools
10//! to accomplish tasks agentically.
11
12use crate::context::{ContextProvider, ContextQuery, ContextResult};
13use crate::hitl::ConfirmationProvider;
14use crate::hooks::{
15    ErrorType, GenerateEndEvent, GenerateStartEvent, HookEvent, HookExecutor, HookResult,
16    OnErrorEvent, PostResponseEvent, PostToolUseEvent, PrePromptEvent, PreToolUseEvent,
17    TokenUsageInfo, ToolCallInfo, ToolResultData,
18};
19use crate::llm::{LlmClient, LlmResponse, Message, TokenUsage, ToolDefinition};
20use crate::permissions::{PermissionChecker, PermissionDecision};
21use crate::planning::{AgentGoal, ExecutionPlan, TaskStatus};
22use crate::prompts::{AgentStyle, DetectionConfidence, PlanningMode, SystemPromptSlots};
23use crate::queue::SessionCommand;
24use crate::session_lane_queue::SessionLaneQueue;
25use crate::tool_search::ToolIndex;
26use crate::tools::{ToolContext, ToolExecutor, ToolStreamEvent};
27use anyhow::{Context, Result};
28use async_trait::async_trait;
29use futures::future::join_all;
30use serde::{Deserialize, Serialize};
31use serde_json::Value;
32use std::sync::Arc;
33use std::time::Duration;
34use tokio::sync::{mpsc, RwLock};
35
36/// Maximum number of tool execution rounds before stopping
37const MAX_TOOL_ROUNDS: usize = 50;
38
39/// Agent configuration
40#[derive(Clone)]
41pub struct AgentConfig {
42    /// Slot-based system prompt customization.
43    ///
44    /// Users can customize specific parts (role, guidelines, response style, extra)
45    /// without overriding the core agentic capabilities. The default agentic core
46    /// (tool usage, autonomous behavior, completion criteria) is always preserved.
47    pub prompt_slots: SystemPromptSlots,
48    pub tools: Vec<ToolDefinition>,
49    pub max_tool_rounds: usize,
50    /// Optional security provider for input taint tracking and output sanitization
51    pub security_provider: Option<Arc<dyn crate::security::SecurityProvider>>,
52    /// Optional permission checker for tool execution control
53    pub permission_checker: Option<Arc<dyn PermissionChecker>>,
54    /// Optional confirmation manager for HITL (Human-in-the-Loop)
55    pub confirmation_manager: Option<Arc<dyn ConfirmationProvider>>,
56    /// Context providers for augmenting prompts with external context
57    pub context_providers: Vec<Arc<dyn ContextProvider>>,
58    /// Planning mode — Auto (detect from message), Enabled, or Disabled.
59    pub planning_mode: PlanningMode,
60    /// Enable goal tracking
61    pub goal_tracking: bool,
62    /// Optional hook engine for firing lifecycle events (PreToolUse, PostToolUse, etc.)
63    pub hook_engine: Option<Arc<dyn HookExecutor>>,
64    /// Optional skill registry for tool permission enforcement
65    pub skill_registry: Option<Arc<crate::skills::SkillRegistry>>,
66    /// Max consecutive malformed-tool-args errors before aborting (default: 2).
67    ///
68    /// When the LLM returns tool arguments with `__parse_error`, the error is
69    /// fed back as a tool result. After this many consecutive parse errors the
70    /// loop bails instead of retrying indefinitely.
71    pub max_parse_retries: u32,
72    /// Per-tool execution timeout in milliseconds (`None` = no timeout).
73    ///
74    /// When set, each tool execution is wrapped in `tokio::time::timeout`.
75    /// A timeout produces an error result sent back to the LLM rather than
76    /// crashing the session.
77    pub tool_timeout_ms: Option<u64>,
78    /// Circuit-breaker threshold: max consecutive LLM API failures before
79    /// aborting (default: 3).
80    ///
81    /// In non-streaming mode, transient LLM failures are retried up to this
82    /// many times (with short exponential backoff) before the loop bails.
83    /// In streaming mode, any failure is fatal (events cannot be replayed).
84    pub circuit_breaker_threshold: u32,
85    /// Max consecutive identical tool signatures before aborting (default: 3).
86    ///
87    /// A tool signature is the exact combination of tool name + compact JSON
88    /// arguments. This prevents the agent from getting stuck repeating the same
89    /// tool call in a loop, for example repeatedly fetching the same URL.
90    pub duplicate_tool_call_threshold: u32,
91    /// Enable auto-compaction when context usage exceeds threshold.
92    pub auto_compact: bool,
93    /// Context usage percentage threshold to trigger auto-compaction (0.0 - 1.0).
94    /// Default: 0.80 (80%).
95    pub auto_compact_threshold: f32,
96    /// Maximum context window size in tokens (used for auto-compact calculation).
97    /// Default: 200_000.
98    pub max_context_tokens: usize,
99    /// LLM client reference for auto-compaction (needs to call LLM for summarization).
100    pub llm_client: Option<Arc<dyn LlmClient>>,
101    /// Optional agent memory for auto-remember after tool execution and recall before prompts.
102    pub memory: Option<Arc<crate::memory::AgentMemory>>,
103    /// Inject a continuation message when the LLM stops calling tools before the
104    /// task is complete. Enabled by default. Set to `false` to disable.
105    ///
106    /// When enabled, if the LLM produces a response with no tool calls but the
107    /// response text looks like an intermediate step (not a final answer), the
108    /// loop injects [`crate::prompts::CONTINUATION`] as a user message and
109    /// continues for up to `max_continuation_turns` additional turns.
110    pub continuation_enabled: bool,
111    /// Maximum number of continuation injections per execution (default: 3).
112    ///
113    /// Prevents infinite loops when the LLM repeatedly stops without completing.
114    pub max_continuation_turns: u32,
115    /// Optional tool search index for filtering tools per-turn.
116    ///
117    /// When set, only tools matching the user prompt are sent to the LLM,
118    /// reducing context usage when many MCP tools are registered.
119    pub tool_index: Option<ToolIndex>,
120}
121
122impl std::fmt::Debug for AgentConfig {
123    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
124        f.debug_struct("AgentConfig")
125            .field("prompt_slots", &self.prompt_slots)
126            .field("tools", &self.tools)
127            .field("max_tool_rounds", &self.max_tool_rounds)
128            .field("security_provider", &self.security_provider.is_some())
129            .field("permission_checker", &self.permission_checker.is_some())
130            .field("confirmation_manager", &self.confirmation_manager.is_some())
131            .field("context_providers", &self.context_providers.len())
132            .field("planning_mode", &self.planning_mode)
133            .field("goal_tracking", &self.goal_tracking)
134            .field("hook_engine", &self.hook_engine.is_some())
135            .field(
136                "skill_registry",
137                &self.skill_registry.as_ref().map(|r| r.len()),
138            )
139            .field("max_parse_retries", &self.max_parse_retries)
140            .field("tool_timeout_ms", &self.tool_timeout_ms)
141            .field("circuit_breaker_threshold", &self.circuit_breaker_threshold)
142            .field(
143                "duplicate_tool_call_threshold",
144                &self.duplicate_tool_call_threshold,
145            )
146            .field("auto_compact", &self.auto_compact)
147            .field("auto_compact_threshold", &self.auto_compact_threshold)
148            .field("max_context_tokens", &self.max_context_tokens)
149            .field("continuation_enabled", &self.continuation_enabled)
150            .field("max_continuation_turns", &self.max_continuation_turns)
151            .field("memory", &self.memory.is_some())
152            .field("tool_index", &self.tool_index.as_ref().map(|i| i.len()))
153            .finish()
154    }
155}
156
157impl Default for AgentConfig {
158    fn default() -> Self {
159        Self {
160            prompt_slots: SystemPromptSlots::default(),
161            tools: Vec::new(), // Tools are provided by ToolExecutor
162            max_tool_rounds: MAX_TOOL_ROUNDS,
163            security_provider: None,
164            permission_checker: None,
165            confirmation_manager: None,
166            context_providers: Vec::new(),
167            planning_mode: PlanningMode::default(),
168            goal_tracking: false,
169            hook_engine: None,
170            skill_registry: Some(Arc::new(crate::skills::SkillRegistry::with_builtins())),
171            max_parse_retries: 2,
172            tool_timeout_ms: None,
173            circuit_breaker_threshold: 3,
174            duplicate_tool_call_threshold: 3,
175            auto_compact: false,
176            auto_compact_threshold: 0.80,
177            max_context_tokens: 200_000,
178            llm_client: None,
179            memory: None,
180            continuation_enabled: true,
181            max_continuation_turns: 3,
182            tool_index: None,
183        }
184    }
185}
186
187/// Events emitted during agent execution
188///
189/// Subscribe via [`Session::subscribe_events()`](crate::session::Session::subscribe_events).
190/// New variants may be added in minor releases — always include a wildcard arm
191/// (`_ => {}`) when matching.
192#[derive(Debug, Clone, Serialize, Deserialize)]
193#[serde(tag = "type")]
194#[non_exhaustive]
195pub enum AgentEvent {
196    /// Agent started processing
197    #[serde(rename = "agent_start")]
198    Start { prompt: String },
199
200    /// LLM turn started
201    #[serde(rename = "turn_start")]
202    TurnStart { turn: usize },
203
204    /// Text delta from streaming
205    #[serde(rename = "text_delta")]
206    TextDelta { text: String },
207
208    /// Tool execution started
209    #[serde(rename = "tool_start")]
210    ToolStart { id: String, name: String },
211
212    /// Tool input delta from streaming (partial JSON arguments)
213    #[serde(rename = "tool_input_delta")]
214    ToolInputDelta { delta: String },
215
216    /// Tool execution completed
217    #[serde(rename = "tool_end")]
218    ToolEnd {
219        id: String,
220        name: String,
221        output: String,
222        exit_code: i32,
223        #[serde(skip_serializing_if = "Option::is_none")]
224        metadata: Option<serde_json::Value>,
225    },
226
227    /// Intermediate tool output (streaming delta)
228    #[serde(rename = "tool_output_delta")]
229    ToolOutputDelta {
230        id: String,
231        name: String,
232        delta: String,
233    },
234
235    /// LLM turn completed
236    #[serde(rename = "turn_end")]
237    TurnEnd { turn: usize, usage: TokenUsage },
238
239    /// Agent completed
240    #[serde(rename = "agent_end")]
241    End {
242        text: String,
243        usage: TokenUsage,
244        #[serde(skip_serializing_if = "Option::is_none")]
245        meta: Option<crate::llm::LlmResponseMeta>,
246    },
247
248    /// Error occurred
249    #[serde(rename = "error")]
250    Error { message: String },
251
252    /// Tool execution requires confirmation (HITL)
253    #[serde(rename = "confirmation_required")]
254    ConfirmationRequired {
255        tool_id: String,
256        tool_name: String,
257        args: serde_json::Value,
258        timeout_ms: u64,
259    },
260
261    /// Confirmation received from user (HITL)
262    #[serde(rename = "confirmation_received")]
263    ConfirmationReceived {
264        tool_id: String,
265        approved: bool,
266        reason: Option<String>,
267    },
268
269    /// Confirmation timed out (HITL)
270    #[serde(rename = "confirmation_timeout")]
271    ConfirmationTimeout {
272        tool_id: String,
273        action_taken: String, // "rejected" or "auto_approved"
274    },
275
276    /// External task pending (needs SDK processing)
277    #[serde(rename = "external_task_pending")]
278    ExternalTaskPending {
279        task_id: String,
280        session_id: String,
281        lane: crate::hitl::SessionLane,
282        command_type: String,
283        payload: serde_json::Value,
284        timeout_ms: u64,
285    },
286
287    /// External task completed
288    #[serde(rename = "external_task_completed")]
289    ExternalTaskCompleted {
290        task_id: String,
291        session_id: String,
292        success: bool,
293    },
294
295    /// Tool execution denied by permission policy
296    #[serde(rename = "permission_denied")]
297    PermissionDenied {
298        tool_id: String,
299        tool_name: String,
300        args: serde_json::Value,
301        reason: String,
302    },
303
304    /// Context resolution started
305    #[serde(rename = "context_resolving")]
306    ContextResolving { providers: Vec<String> },
307
308    /// Context resolution completed
309    #[serde(rename = "context_resolved")]
310    ContextResolved {
311        total_items: usize,
312        total_tokens: usize,
313    },
314
315    // ========================================================================
316    // a3s-lane integration events
317    // ========================================================================
318    /// Command moved to dead letter queue after exhausting retries
319    #[serde(rename = "command_dead_lettered")]
320    CommandDeadLettered {
321        command_id: String,
322        command_type: String,
323        lane: String,
324        error: String,
325        attempts: u32,
326    },
327
328    /// Command retry attempt
329    #[serde(rename = "command_retry")]
330    CommandRetry {
331        command_id: String,
332        command_type: String,
333        lane: String,
334        attempt: u32,
335        delay_ms: u64,
336    },
337
338    /// Queue alert (depth warning, latency alert, etc.)
339    #[serde(rename = "queue_alert")]
340    QueueAlert {
341        level: String,
342        alert_type: String,
343        message: String,
344    },
345
346    // ========================================================================
347    // Task tracking events
348    // ========================================================================
349    /// Task list updated
350    #[serde(rename = "task_updated")]
351    TaskUpdated {
352        session_id: String,
353        tasks: Vec<crate::planning::Task>,
354    },
355
356    // ========================================================================
357    // Memory System events (Phase 3)
358    // ========================================================================
359    /// Memory stored
360    #[serde(rename = "memory_stored")]
361    MemoryStored {
362        memory_id: String,
363        memory_type: String,
364        importance: f32,
365        tags: Vec<String>,
366    },
367
368    /// Memory recalled
369    #[serde(rename = "memory_recalled")]
370    MemoryRecalled {
371        memory_id: String,
372        content: String,
373        relevance: f32,
374    },
375
376    /// Memories searched
377    #[serde(rename = "memories_searched")]
378    MemoriesSearched {
379        query: Option<String>,
380        tags: Vec<String>,
381        result_count: usize,
382    },
383
384    /// Memory cleared
385    #[serde(rename = "memory_cleared")]
386    MemoryCleared {
387        tier: String, // "long_term", "short_term", "working"
388        count: u64,
389    },
390
391    // ========================================================================
392    // Subagent events
393    // ========================================================================
394    /// Subagent task started
395    #[serde(rename = "subagent_start")]
396    SubagentStart {
397        /// Unique task identifier
398        task_id: String,
399        /// Child session ID
400        session_id: String,
401        /// Parent session ID
402        parent_session_id: String,
403        /// Agent type (e.g., "explore", "general")
404        agent: String,
405        /// Short description of the task
406        description: String,
407    },
408
409    /// Subagent task progress update
410    #[serde(rename = "subagent_progress")]
411    SubagentProgress {
412        /// Task identifier
413        task_id: String,
414        /// Child session ID
415        session_id: String,
416        /// Progress status message
417        status: String,
418        /// Additional metadata
419        metadata: serde_json::Value,
420    },
421
422    /// Subagent task completed
423    #[serde(rename = "subagent_end")]
424    SubagentEnd {
425        /// Task identifier
426        task_id: String,
427        /// Child session ID
428        session_id: String,
429        /// Agent type
430        agent: String,
431        /// Task output/result
432        output: String,
433        /// Whether the task succeeded
434        success: bool,
435    },
436
437    // ========================================================================
438    // Planning and Goal Tracking Events (Phase 1)
439    // ========================================================================
440    /// Planning phase started
441    #[serde(rename = "planning_start")]
442    PlanningStart { prompt: String },
443
444    /// Planning phase completed
445    #[serde(rename = "planning_end")]
446    PlanningEnd {
447        plan: ExecutionPlan,
448        estimated_steps: usize,
449    },
450
451    /// Step execution started
452    #[serde(rename = "step_start")]
453    StepStart {
454        step_id: String,
455        description: String,
456        step_number: usize,
457        total_steps: usize,
458    },
459
460    /// Step execution completed
461    #[serde(rename = "step_end")]
462    StepEnd {
463        step_id: String,
464        status: TaskStatus,
465        step_number: usize,
466        total_steps: usize,
467    },
468
469    /// Goal extracted from prompt
470    #[serde(rename = "goal_extracted")]
471    GoalExtracted { goal: AgentGoal },
472
473    /// Goal progress update
474    #[serde(rename = "goal_progress")]
475    GoalProgress {
476        goal: String,
477        progress: f32,
478        completed_steps: usize,
479        total_steps: usize,
480    },
481
482    /// Goal achieved
483    #[serde(rename = "goal_achieved")]
484    GoalAchieved {
485        goal: String,
486        total_steps: usize,
487        duration_ms: i64,
488    },
489
490    // ========================================================================
491    // Context Compaction events
492    // ========================================================================
493    /// Context automatically compacted due to high usage
494    #[serde(rename = "context_compacted")]
495    ContextCompacted {
496        session_id: String,
497        before_messages: usize,
498        after_messages: usize,
499        percent_before: f32,
500    },
501
502    // ========================================================================
503    // Persistence events
504    // ========================================================================
505    /// Session persistence failed — SDK clients should handle this
506    #[serde(rename = "persistence_failed")]
507    PersistenceFailed {
508        session_id: String,
509        operation: String,
510        error: String,
511    },
512
513    // ========================================================================
514    // Side question (btw)
515    // ========================================================================
516    /// Ephemeral side question answered.
517    ///
518    /// Emitted by [`crate::agent_api::AgentSession::btw()`] in streaming mode.
519    /// The answer is never added to conversation history.
520    #[serde(rename = "btw_answer")]
521    BtwAnswer {
522        question: String,
523        answer: String,
524        usage: TokenUsage,
525    },
526}
527
528/// Result of agent execution
529#[derive(Debug, Clone)]
530pub struct AgentResult {
531    pub text: String,
532    pub messages: Vec<Message>,
533    pub usage: TokenUsage,
534    pub tool_calls_count: usize,
535}
536
537// ============================================================================
538// ToolCommand — bridges ToolExecutor to SessionCommand for queue submission
539// ============================================================================
540
541/// Adapter that implements `SessionCommand` for tool execution via the queue.
542///
543/// Wraps a `ToolExecutor` call so it can be submitted to `SessionLaneQueue`.
544pub struct ToolCommand {
545    tool_executor: Arc<ToolExecutor>,
546    tool_name: String,
547    tool_args: Value,
548    tool_context: ToolContext,
549    skill_registry: Option<Arc<crate::skills::SkillRegistry>>,
550}
551
552impl ToolCommand {
553    /// Create a new ToolCommand
554    pub fn new(
555        tool_executor: Arc<ToolExecutor>,
556        tool_name: String,
557        tool_args: Value,
558        tool_context: ToolContext,
559        skill_registry: Option<Arc<crate::skills::SkillRegistry>>,
560    ) -> Self {
561        Self {
562            tool_executor,
563            tool_name,
564            tool_args,
565            tool_context,
566            skill_registry,
567        }
568    }
569}
570
571#[async_trait]
572impl SessionCommand for ToolCommand {
573    async fn execute(&self) -> Result<Value> {
574        // Check skill-based tool permissions
575        if let Some(registry) = &self.skill_registry {
576            let instruction_skills = registry.by_kind(crate::skills::SkillKind::Instruction);
577
578            // If there are instruction skills with tool restrictions, check permissions
579            let has_restrictions = instruction_skills.iter().any(|s| s.allowed_tools.is_some());
580
581            if has_restrictions {
582                let mut allowed = false;
583
584                for skill in &instruction_skills {
585                    if skill.is_tool_allowed(&self.tool_name) {
586                        allowed = true;
587                        break;
588                    }
589                }
590
591                if !allowed {
592                    return Err(anyhow::anyhow!(
593                        "Tool '{}' is not allowed by any active skill. Active skills restrict tools to their allowed-tools lists.",
594                        self.tool_name
595                    ));
596                }
597            }
598        }
599
600        // Execute the tool
601        let result = self
602            .tool_executor
603            .execute_with_context(&self.tool_name, &self.tool_args, &self.tool_context)
604            .await?;
605        Ok(serde_json::json!({
606            "output": result.output,
607            "exit_code": result.exit_code,
608            "metadata": result.metadata,
609        }))
610    }
611
612    fn command_type(&self) -> &str {
613        &self.tool_name
614    }
615
616    fn payload(&self) -> Value {
617        self.tool_args.clone()
618    }
619}
620
621// ============================================================================
622// AgentLoop
623// ============================================================================
624
625/// Agent loop executor
626#[derive(Clone)]
627pub struct AgentLoop {
628    llm_client: Arc<dyn LlmClient>,
629    tool_executor: Arc<ToolExecutor>,
630    tool_context: ToolContext,
631    config: AgentConfig,
632    /// Optional per-session tool metrics collector
633    tool_metrics: Option<Arc<RwLock<crate::telemetry::ToolMetrics>>>,
634    /// Optional lane queue for priority-based tool execution
635    command_queue: Option<Arc<SessionLaneQueue>>,
636    /// Optional progress tracker for real-time tool/token usage tracking
637    progress_tracker: Option<Arc<tokio::sync::RwLock<crate::task::ProgressTracker>>>,
638    /// Optional task manager for centralized task lifecycle tracking
639    task_manager: Option<Arc<crate::task::TaskManager>>,
640}
641
642impl AgentLoop {
643    pub fn new(
644        llm_client: Arc<dyn LlmClient>,
645        tool_executor: Arc<ToolExecutor>,
646        tool_context: ToolContext,
647        config: AgentConfig,
648    ) -> Self {
649        Self {
650            llm_client,
651            tool_executor,
652            tool_context,
653            config,
654            tool_metrics: None,
655            command_queue: None,
656            progress_tracker: None,
657            task_manager: None,
658        }
659    }
660
661    /// Set the progress tracker for real-time tool/token usage tracking.
662    pub fn with_progress_tracker(
663        mut self,
664        tracker: Arc<tokio::sync::RwLock<crate::task::ProgressTracker>>,
665    ) -> Self {
666        self.progress_tracker = Some(tracker);
667        self
668    }
669
670    /// Set the task manager for centralized task lifecycle tracking.
671    pub fn with_task_manager(mut self, manager: Arc<crate::task::TaskManager>) -> Self {
672        self.task_manager = Some(manager);
673        self
674    }
675
676    /// Set the tool metrics collector for this agent loop
677    pub fn with_tool_metrics(
678        mut self,
679        metrics: Arc<RwLock<crate::telemetry::ToolMetrics>>,
680    ) -> Self {
681        self.tool_metrics = Some(metrics);
682        self
683    }
684
685    /// Set the lane queue for priority-based tool execution.
686    ///
687    /// When set, tools are routed through the lane queue which supports
688    /// External task handling for multi-machine parallel processing.
689    pub fn with_queue(mut self, queue: Arc<SessionLaneQueue>) -> Self {
690        self.command_queue = Some(queue);
691        self
692    }
693
694    /// Track a tool call result in the progress tracker.
695    fn track_tool_result(&self, tool_name: &str, args: &serde_json::Value, exit_code: i32) {
696        if let Some(ref tracker) = self.progress_tracker {
697            let args_summary = Self::compact_json_args(args);
698            let success = exit_code == 0;
699            if let Ok(mut guard) = tracker.try_write() {
700                guard.track_tool_call(tool_name, args_summary, success);
701            }
702        }
703    }
704
705    /// Compact JSON arguments to a short summary string for tracking.
706    fn compact_json_args(args: &serde_json::Value) -> String {
707        let raw = match args {
708            serde_json::Value::Null => String::new(),
709            serde_json::Value::String(s) => s.clone(),
710            _ => serde_json::to_string(args).unwrap_or_default(),
711        };
712        let compact = raw.split_whitespace().collect::<Vec<_>>().join(" ");
713        if compact.len() > 180 {
714            format!("{}...", &compact[..180])
715        } else {
716            compact
717        }
718    }
719
720    /// Execute a tool, applying the configured timeout if set.
721    ///
722    /// On timeout, returns an error describing which tool timed out and after
723    /// how many milliseconds. The caller converts this to a tool-result error
724    /// message that is fed back to the LLM.
725    async fn execute_tool_timed(
726        &self,
727        name: &str,
728        args: &serde_json::Value,
729        ctx: &ToolContext,
730    ) -> anyhow::Result<crate::tools::ToolResult> {
731        let fut = self.tool_executor.execute_with_context(name, args, ctx);
732        if let Some(timeout_ms) = self.config.tool_timeout_ms {
733            match tokio::time::timeout(Duration::from_millis(timeout_ms), fut).await {
734                Ok(result) => result,
735                Err(_) => Err(anyhow::anyhow!(
736                    "Tool '{}' timed out after {}ms",
737                    name,
738                    timeout_ms
739                )),
740            }
741        } else {
742            fut.await
743        }
744    }
745
746    /// Convert a tool execution result into the (output, exit_code, is_error, metadata, images) tuple.
747    fn tool_result_to_tuple(
748        result: anyhow::Result<crate::tools::ToolResult>,
749    ) -> (
750        String,
751        i32,
752        bool,
753        Option<serde_json::Value>,
754        Vec<crate::llm::Attachment>,
755    ) {
756        match result {
757            Ok(r) => (
758                r.output,
759                r.exit_code,
760                r.exit_code != 0,
761                r.metadata,
762                r.images,
763            ),
764            Err(e) => {
765                let msg = e.to_string();
766                // Classify the error so the LLM knows whether retrying makes sense.
767                let hint = if Self::is_transient_error(&msg) {
768                    " [transient — you may retry this tool call]"
769                } else {
770                    " [permanent — do not retry without changing the arguments]"
771                };
772                (
773                    format!("Tool execution error: {}{}", msg, hint),
774                    1,
775                    true,
776                    None,
777                    Vec::new(),
778                )
779            }
780        }
781    }
782
783    /// Inspect the workspace for well-known project marker files and return a short
784    /// `## Project Context` section that the agent can use without any manual configuration.
785    /// Returns an empty string when the workspace type cannot be determined.
786    fn detect_project_hint(workspace: &std::path::Path) -> String {
787        struct Marker {
788            file: &'static str,
789            lang: &'static str,
790            tip: &'static str,
791        }
792
793        let markers = [
794            Marker {
795                file: "Cargo.toml",
796                lang: "Rust",
797                tip: "Use `cargo build`, `cargo test`, `cargo clippy`, and `cargo fmt`. \
798                  Prefer `anyhow` / `thiserror` for error handling. \
799                  Follow the Microsoft Rust Guidelines (no panics in library code, \
800                  async-first with Tokio).",
801            },
802            Marker {
803                file: "package.json",
804                lang: "Node.js / TypeScript",
805                tip: "Check `package.json` for the package manager (npm/yarn/pnpm/bun) \
806                  and available scripts. Prefer TypeScript with strict mode. \
807                  Use ESM imports unless the project is CommonJS.",
808            },
809            Marker {
810                file: "pyproject.toml",
811                lang: "Python",
812                tip: "Use the package manager declared in `pyproject.toml` \
813                  (uv, poetry, hatch, etc.). Prefer type hints and async/await for I/O.",
814            },
815            Marker {
816                file: "setup.py",
817                lang: "Python",
818                tip: "Legacy Python project. Prefer type hints and async/await for I/O.",
819            },
820            Marker {
821                file: "requirements.txt",
822                lang: "Python",
823                tip: "Python project with pip-style dependencies. \
824                  Prefer type hints and async/await for I/O.",
825            },
826            Marker {
827                file: "go.mod",
828                lang: "Go",
829                tip: "Use `go build ./...` and `go test ./...`. \
830                  Follow standard Go project layout. Use `gofmt` for formatting.",
831            },
832            Marker {
833                file: "pom.xml",
834                lang: "Java / Maven",
835                tip: "Use `mvn compile`, `mvn test`, `mvn package`. \
836                  Follow standard Maven project structure.",
837            },
838            Marker {
839                file: "build.gradle",
840                lang: "Java / Gradle",
841                tip: "Use `./gradlew build` and `./gradlew test`. \
842                  Follow standard Gradle project structure.",
843            },
844            Marker {
845                file: "build.gradle.kts",
846                lang: "Kotlin / Gradle",
847                tip: "Use `./gradlew build` and `./gradlew test`. \
848                  Prefer Kotlin coroutines for async work.",
849            },
850            Marker {
851                file: "CMakeLists.txt",
852                lang: "C / C++",
853                tip: "Use `cmake -B build && cmake --build build`. \
854                  Check for `compile_commands.json` for IDE tooling.",
855            },
856            Marker {
857                file: "Makefile",
858                lang: "C / C++ (or generic)",
859                tip: "Use `make` or `make <target>`. \
860                  Check available targets with `make help` or by reading the Makefile.",
861            },
862        ];
863
864        // Check for C# / .NET — no single fixed filename, so glob for *.csproj / *.sln
865        let is_dotnet = workspace.join("*.csproj").exists() || {
866            // Fast check: look for any .csproj or .sln in the workspace root
867            std::fs::read_dir(workspace)
868                .map(|entries| {
869                    entries.flatten().any(|e| {
870                        let name = e.file_name();
871                        let s = name.to_string_lossy();
872                        s.ends_with(".csproj") || s.ends_with(".sln")
873                    })
874                })
875                .unwrap_or(false)
876        };
877
878        if is_dotnet {
879            return "## Project Context\n\nThis is a **C# / .NET** project. \
880             Use `dotnet build`, `dotnet test`, and `dotnet run`. \
881             Follow C# coding conventions and async/await patterns."
882                .to_string();
883        }
884
885        for marker in &markers {
886            if workspace.join(marker.file).exists() {
887                return format!(
888                    "## Project Context\n\nThis is a **{}** project. {}",
889                    marker.lang, marker.tip
890                );
891            }
892        }
893
894        String::new()
895    }
896
897    /// Returns `true` for errors that are likely transient (network, timeout, I/O contention).
898    /// Used to annotate tool error messages so the LLM knows whether retrying is safe.
899    fn is_transient_error(msg: &str) -> bool {
900        let lower = msg.to_lowercase();
901        lower.contains("timeout")
902        || lower.contains("timed out")
903        || lower.contains("connection refused")
904        || lower.contains("connection reset")
905        || lower.contains("broken pipe")
906        || lower.contains("temporarily unavailable")
907        || lower.contains("resource temporarily unavailable")
908        || lower.contains("os error 11")  // EAGAIN
909        || lower.contains("os error 35")  // EAGAIN on macOS
910        || lower.contains("rate limit")
911        || lower.contains("too many requests")
912        || lower.contains("service unavailable")
913        || lower.contains("network unreachable")
914    }
915
916    /// Returns `true` when a tool writes a file and is safe to run concurrently with other
917    /// independent writes (no ordering dependencies, no side-channel output).
918    fn is_parallel_safe_write(name: &str, _args: &serde_json::Value) -> bool {
919        matches!(
920            name,
921            "write_file" | "edit_file" | "create_file" | "append_to_file" | "replace_in_file"
922        )
923    }
924
925    /// Extract the target file path from write-tool arguments so we can check for conflicts.
926    fn extract_write_path(args: &serde_json::Value) -> Option<String> {
927        // write_file / create_file / append_to_file / replace_in_file use "path"
928        // edit_file uses "path" as well
929        args.get("path")
930            .and_then(|v| v.as_str())
931            .map(|s| s.to_string())
932    }
933
934    /// Execute a tool through the lane queue (if configured) or directly.
935    /// Wraps execution in task lifecycle if task_manager is configured.
936    async fn execute_tool_queued_or_direct(
937        &self,
938        name: &str,
939        args: &serde_json::Value,
940        ctx: &ToolContext,
941    ) -> anyhow::Result<crate::tools::ToolResult> {
942        // Create task for this tool execution if task_manager is available
943        let task_id = if let Some(ref tm) = self.task_manager {
944            let task = crate::task::Task::tool(name, args.clone());
945            let id = task.id;
946            tm.spawn(task);
947            // Start the task immediately
948            let _ = tm.start(id);
949            Some(id)
950        } else {
951            None
952        };
953
954        let result = self
955            .execute_tool_queued_or_direct_inner(name, args, ctx)
956            .await;
957
958        // Complete or fail the task based on result
959        if let Some(ref tm) = self.task_manager {
960            if let Some(tid) = task_id {
961                match &result {
962                    Ok(r) => {
963                        let output = serde_json::json!({
964                            "output": r.output.clone(),
965                            "exit_code": r.exit_code,
966                        });
967                        let _ = tm.complete(tid, Some(output));
968                    }
969                    Err(e) => {
970                        let _ = tm.fail(tid, e.to_string());
971                    }
972                }
973            }
974        }
975
976        result
977    }
978
979    /// Inner execution without task lifecycle wrapping.
980    async fn execute_tool_queued_or_direct_inner(
981        &self,
982        name: &str,
983        args: &serde_json::Value,
984        ctx: &ToolContext,
985    ) -> anyhow::Result<crate::tools::ToolResult> {
986        if let Some(ref queue) = self.command_queue {
987            let command = ToolCommand::new(
988                Arc::clone(&self.tool_executor),
989                name.to_string(),
990                args.clone(),
991                ctx.clone(),
992                self.config.skill_registry.clone(),
993            );
994            let rx = queue.submit_by_tool(name, Box::new(command)).await;
995            match rx.await {
996                Ok(Ok(value)) => {
997                    let output = value["output"]
998                        .as_str()
999                        .ok_or_else(|| {
1000                            anyhow::anyhow!(
1001                                "Queue result missing 'output' field for tool '{}'",
1002                                name
1003                            )
1004                        })?
1005                        .to_string();
1006                    let exit_code = value["exit_code"].as_i64().unwrap_or(0) as i32;
1007                    return Ok(crate::tools::ToolResult {
1008                        name: name.to_string(),
1009                        output,
1010                        exit_code,
1011                        metadata: None,
1012                        images: Vec::new(),
1013                    });
1014                }
1015                Ok(Err(e)) => {
1016                    tracing::warn!(
1017                        "Queue execution failed for tool '{}', falling back to direct: {}",
1018                        name,
1019                        e
1020                    );
1021                }
1022                Err(_) => {
1023                    tracing::warn!(
1024                        "Queue channel closed for tool '{}', falling back to direct",
1025                        name
1026                    );
1027                }
1028            }
1029        }
1030        self.execute_tool_timed(name, args, ctx).await
1031    }
1032
1033    /// Call the LLM, handling streaming vs non-streaming internally.
1034    ///
1035    /// Streaming events (`TextDelta`, `ToolStart`) are forwarded to `event_tx`
1036    /// as they arrive. Non-streaming mode simply awaits the complete response.
1037    ///
1038    /// When a `ToolIndex` is configured, tools are filtered per-turn based on
1039    /// the last user message, reducing context usage with large tool sets.
1040    ///
1041    /// Returns `Err` on any LLM API failure. The circuit breaker in
1042    /// `execute_loop` wraps this call with retry logic for non-streaming mode.
1043    async fn call_llm(
1044        &self,
1045        messages: &[Message],
1046        system: Option<&str>,
1047        event_tx: &Option<mpsc::Sender<AgentEvent>>,
1048        cancel_token: &tokio_util::sync::CancellationToken,
1049    ) -> anyhow::Result<LlmResponse> {
1050        // Filter tools through ToolIndex if configured
1051        let tools = if let Some(ref index) = self.config.tool_index {
1052            let query = messages
1053                .iter()
1054                .rev()
1055                .find(|m| m.role == "user")
1056                .and_then(|m| {
1057                    m.content.iter().find_map(|b| match b {
1058                        crate::llm::ContentBlock::Text { text } => Some(text.as_str()),
1059                        _ => None,
1060                    })
1061                })
1062                .unwrap_or("");
1063            let matches = index.search(query, index.len());
1064            let matched_names: std::collections::HashSet<&str> =
1065                matches.iter().map(|m| m.name.as_str()).collect();
1066            self.config
1067                .tools
1068                .iter()
1069                .filter(|t| matched_names.contains(t.name.as_str()))
1070                .cloned()
1071                .collect::<Vec<_>>()
1072        } else {
1073            self.config.tools.clone()
1074        };
1075
1076        if event_tx.is_some() {
1077            let mut stream_rx = match self
1078                .llm_client
1079                .complete_streaming(messages, system, &tools)
1080                .await
1081            {
1082                Ok(rx) => rx,
1083                Err(stream_error) => {
1084                    tracing::warn!(
1085                        error = %stream_error,
1086                        "LLM streaming setup failed; falling back to non-streaming completion"
1087                    );
1088                    return self
1089                        .llm_client
1090                        .complete(messages, system, &tools)
1091                        .await
1092                        .with_context(|| {
1093                            format!(
1094                                "LLM streaming call failed ({stream_error}); non-streaming fallback also failed"
1095                            )
1096                        });
1097                }
1098            };
1099
1100            let mut final_response: Option<LlmResponse> = None;
1101            loop {
1102                tokio::select! {
1103                    _ = cancel_token.cancelled() => {
1104                        tracing::info!("🛑 LLM streaming cancelled by CancellationToken");
1105                        anyhow::bail!("Operation cancelled by user");
1106                    }
1107                    event = stream_rx.recv() => {
1108                        match event {
1109                            Some(crate::llm::StreamEvent::TextDelta(text)) => {
1110                                if let Some(tx) = event_tx {
1111                                    tx.send(AgentEvent::TextDelta { text }).await.ok();
1112                                }
1113                            }
1114                            Some(crate::llm::StreamEvent::ToolUseStart { id, name }) => {
1115                                if let Some(tx) = event_tx {
1116                                    tx.send(AgentEvent::ToolStart { id, name }).await.ok();
1117                                }
1118                            }
1119                            Some(crate::llm::StreamEvent::ToolUseInputDelta(delta)) => {
1120                                if let Some(tx) = event_tx {
1121                                    tx.send(AgentEvent::ToolInputDelta { delta }).await.ok();
1122                                }
1123                            }
1124                            Some(crate::llm::StreamEvent::Done(resp)) => {
1125                                final_response = Some(resp);
1126                                break;
1127                            }
1128                            None => break,
1129                        }
1130                    }
1131                }
1132            }
1133            final_response.context("Stream ended without final response")
1134        } else {
1135            self.llm_client
1136                .complete(messages, system, &tools)
1137                .await
1138                .context("LLM call failed")
1139        }
1140    }
1141
1142    /// Create a tool context with streaming support.
1143    ///
1144    /// When `event_tx` is Some, spawns a forwarder task that converts
1145    /// `ToolStreamEvent::OutputDelta` into `AgentEvent::ToolOutputDelta`
1146    /// and sends them to the agent event channel.
1147    ///
1148    /// Returns the augmented `ToolContext`. The forwarder task runs until
1149    /// the tool-side sender is dropped (i.e., tool execution finishes).
1150    fn streaming_tool_context(
1151        &self,
1152        event_tx: &Option<mpsc::Sender<AgentEvent>>,
1153        tool_id: &str,
1154        tool_name: &str,
1155    ) -> ToolContext {
1156        let mut ctx = self.tool_context.clone();
1157        if let Some(agent_tx) = event_tx {
1158            let (tool_tx, mut tool_rx) = mpsc::channel::<ToolStreamEvent>(64);
1159            ctx.event_tx = Some(tool_tx);
1160
1161            let agent_tx = agent_tx.clone();
1162            let tool_id = tool_id.to_string();
1163            let tool_name = tool_name.to_string();
1164            tokio::spawn(async move {
1165                while let Some(event) = tool_rx.recv().await {
1166                    match event {
1167                        ToolStreamEvent::OutputDelta(delta) => {
1168                            agent_tx
1169                                .send(AgentEvent::ToolOutputDelta {
1170                                    id: tool_id.clone(),
1171                                    name: tool_name.clone(),
1172                                    delta,
1173                                })
1174                                .await
1175                                .ok();
1176                        }
1177                    }
1178                }
1179            });
1180        }
1181        ctx
1182    }
1183
1184    /// Resolve context from all providers for a given prompt
1185    ///
1186    /// Returns aggregated context results from all configured providers.
1187    async fn resolve_context(&self, prompt: &str, session_id: Option<&str>) -> Vec<ContextResult> {
1188        if self.config.context_providers.is_empty() {
1189            return Vec::new();
1190        }
1191
1192        let query = ContextQuery::new(prompt).with_session_id(session_id.unwrap_or(""));
1193
1194        let futures = self
1195            .config
1196            .context_providers
1197            .iter()
1198            .map(|p| p.query(&query));
1199        let outcomes = join_all(futures).await;
1200
1201        outcomes
1202            .into_iter()
1203            .enumerate()
1204            .filter_map(|(i, r)| match r {
1205                Ok(result) if !result.is_empty() => Some(result),
1206                Ok(_) => None,
1207                Err(e) => {
1208                    tracing::warn!(
1209                        "Context provider '{}' failed: {}",
1210                        self.config.context_providers[i].name(),
1211                        e
1212                    );
1213                    None
1214                }
1215            })
1216            .collect()
1217    }
1218
1219    /// Detect whether the LLM's no-tool-call response looks like an intermediate
1220    /// step rather than a genuine final answer.
1221    ///
1222    /// Returns `true` when continuation should be injected. Heuristics:
1223    /// - Response ends with a colon or ellipsis (mid-thought)
1224    /// - Response contains phrases that signal incomplete work
1225    /// - Response is very short (< 80 chars) and doesn't look like a summary
1226    fn looks_incomplete(text: &str) -> bool {
1227        let t = text.trim();
1228        if t.is_empty() {
1229            return true;
1230        }
1231        // Very short responses that aren't clearly a final answer
1232        if t.len() < 80 && !t.contains('\n') {
1233            // Short single-line — could be a genuine one-liner answer, keep going
1234            // only if it ends with punctuation that signals continuation
1235            let ends_continuation =
1236                t.ends_with(':') || t.ends_with("...") || t.ends_with('…') || t.ends_with(',');
1237            if ends_continuation {
1238                return true;
1239            }
1240        }
1241        // Phrases that signal the LLM is describing what it will do rather than doing it
1242        let incomplete_phrases = [
1243            "i'll ",
1244            "i will ",
1245            "let me ",
1246            "i need to ",
1247            "i should ",
1248            "next, i",
1249            "first, i",
1250            "now i",
1251            "i'll start",
1252            "i'll begin",
1253            "i'll now",
1254            "let's start",
1255            "let's begin",
1256            "to do this",
1257            "i'm going to",
1258        ];
1259        let lower = t.to_lowercase();
1260        for phrase in &incomplete_phrases {
1261            if lower.contains(phrase) {
1262                return true;
1263            }
1264        }
1265        false
1266    }
1267
1268    /// Get the assembled system prompt from slots.
1269    fn system_prompt(&self) -> String {
1270        self.config.prompt_slots.build()
1271    }
1272
1273    /// Build augmented system prompt with context
1274    fn build_augmented_system_prompt(&self, context_results: &[ContextResult]) -> Option<String> {
1275        let base = self.system_prompt();
1276
1277        // Use live tool executor definitions so tools added via add_mcp_server() are included
1278        let live_tools = self.tool_executor.definitions();
1279        let mcp_tools: Vec<&ToolDefinition> = live_tools
1280            .iter()
1281            .filter(|t| t.name.starts_with("mcp__"))
1282            .collect();
1283
1284        let mcp_section = if mcp_tools.is_empty() {
1285            String::new()
1286        } else {
1287            let mut lines = vec![
1288                "## MCP Tools".to_string(),
1289                String::new(),
1290                "The following MCP (Model Context Protocol) tools are available. Use them when the task requires external capabilities beyond the built-in tools:".to_string(),
1291                String::new(),
1292            ];
1293            for tool in &mcp_tools {
1294                let display = format!("- `{}` — {}", tool.name, tool.description);
1295                lines.push(display);
1296            }
1297            lines.join("\n")
1298        };
1299
1300        let parts: Vec<&str> = [base.as_str(), mcp_section.as_str()]
1301            .iter()
1302            .filter(|s| !s.is_empty())
1303            .copied()
1304            .collect();
1305
1306        // Auto-detect project type from workspace and inject language-specific guidelines,
1307        // but only when the user hasn't already set a custom `guidelines` slot.
1308        let project_hint = if self.config.prompt_slots.guidelines.is_none() {
1309            Self::detect_project_hint(&self.tool_context.workspace)
1310        } else {
1311            String::new()
1312        };
1313
1314        if context_results.is_empty() {
1315            if project_hint.is_empty() {
1316                return Some(parts.join("\n\n"));
1317            }
1318            return Some(format!("{}\n\n{}", parts.join("\n\n"), project_hint));
1319        }
1320
1321        // Build context XML block
1322        let context_xml: String = context_results
1323            .iter()
1324            .map(|r| r.to_xml())
1325            .collect::<Vec<_>>()
1326            .join("\n\n");
1327
1328        if project_hint.is_empty() {
1329            Some(format!("{}\n\n{}", parts.join("\n\n"), context_xml))
1330        } else {
1331            Some(format!(
1332                "{}\n\n{}\n\n{}",
1333                parts.join("\n\n"),
1334                project_hint,
1335                context_xml
1336            ))
1337        }
1338    }
1339
1340    /// Notify providers of turn completion for memory extraction
1341    async fn notify_turn_complete(&self, session_id: &str, prompt: &str, response: &str) {
1342        let futures = self
1343            .config
1344            .context_providers
1345            .iter()
1346            .map(|p| p.on_turn_complete(session_id, prompt, response));
1347        let outcomes = join_all(futures).await;
1348
1349        for (i, result) in outcomes.into_iter().enumerate() {
1350            if let Err(e) = result {
1351                tracing::warn!(
1352                    "Context provider '{}' on_turn_complete failed: {}",
1353                    self.config.context_providers[i].name(),
1354                    e
1355                );
1356            }
1357        }
1358    }
1359
1360    /// Fire PreToolUse hook event before tool execution.
1361    /// Returns the HookResult which may block the tool call.
1362    async fn fire_pre_tool_use(
1363        &self,
1364        session_id: &str,
1365        tool_name: &str,
1366        args: &serde_json::Value,
1367        recent_tools: Vec<String>,
1368    ) -> Option<HookResult> {
1369        if let Some(he) = &self.config.hook_engine {
1370            let event = HookEvent::PreToolUse(PreToolUseEvent {
1371                session_id: session_id.to_string(),
1372                tool: tool_name.to_string(),
1373                args: args.clone(),
1374                working_directory: self.tool_context.workspace.to_string_lossy().to_string(),
1375                recent_tools,
1376            });
1377            let result = he.fire(&event).await;
1378            if result.is_block() {
1379                return Some(result);
1380            }
1381        }
1382        None
1383    }
1384
1385    /// Fire PostToolUse hook event after tool execution (fire-and-forget).
1386    async fn fire_post_tool_use(
1387        &self,
1388        session_id: &str,
1389        tool_name: &str,
1390        args: &serde_json::Value,
1391        output: &str,
1392        success: bool,
1393        duration_ms: u64,
1394    ) {
1395        if let Some(he) = &self.config.hook_engine {
1396            let event = HookEvent::PostToolUse(PostToolUseEvent {
1397                session_id: session_id.to_string(),
1398                tool: tool_name.to_string(),
1399                args: args.clone(),
1400                result: ToolResultData {
1401                    success,
1402                    output: output.to_string(),
1403                    exit_code: if success { Some(0) } else { Some(1) },
1404                    duration_ms,
1405                },
1406            });
1407            let he = Arc::clone(he);
1408            tokio::spawn(async move {
1409                let _ = he.fire(&event).await;
1410            });
1411        }
1412    }
1413
1414    /// Fire GenerateStart hook event before an LLM call.
1415    async fn fire_generate_start(
1416        &self,
1417        session_id: &str,
1418        prompt: &str,
1419        system_prompt: &Option<String>,
1420    ) {
1421        if let Some(he) = &self.config.hook_engine {
1422            let event = HookEvent::GenerateStart(GenerateStartEvent {
1423                session_id: session_id.to_string(),
1424                prompt: prompt.to_string(),
1425                system_prompt: system_prompt.clone(),
1426                model_provider: String::new(),
1427                model_name: String::new(),
1428                available_tools: self.config.tools.iter().map(|t| t.name.clone()).collect(),
1429            });
1430            let _ = he.fire(&event).await;
1431        }
1432    }
1433
1434    /// Fire GenerateEnd hook event after an LLM call.
1435    async fn fire_generate_end(
1436        &self,
1437        session_id: &str,
1438        prompt: &str,
1439        response: &LlmResponse,
1440        duration_ms: u64,
1441    ) {
1442        if let Some(he) = &self.config.hook_engine {
1443            let tool_calls: Vec<ToolCallInfo> = response
1444                .tool_calls()
1445                .iter()
1446                .map(|tc| ToolCallInfo {
1447                    name: tc.name.clone(),
1448                    args: tc.args.clone(),
1449                })
1450                .collect();
1451
1452            let event = HookEvent::GenerateEnd(GenerateEndEvent {
1453                session_id: session_id.to_string(),
1454                prompt: prompt.to_string(),
1455                response_text: response.text().to_string(),
1456                tool_calls,
1457                usage: TokenUsageInfo {
1458                    prompt_tokens: response.usage.prompt_tokens as i32,
1459                    completion_tokens: response.usage.completion_tokens as i32,
1460                    total_tokens: response.usage.total_tokens as i32,
1461                },
1462                duration_ms,
1463            });
1464            let _ = he.fire(&event).await;
1465        }
1466    }
1467
1468    /// Fire PrePrompt hook event before prompt augmentation.
1469    /// Returns optional modified prompt text from the hook.
1470    async fn fire_pre_prompt(
1471        &self,
1472        session_id: &str,
1473        prompt: &str,
1474        system_prompt: &Option<String>,
1475        message_count: usize,
1476    ) -> Option<String> {
1477        if let Some(he) = &self.config.hook_engine {
1478            let event = HookEvent::PrePrompt(PrePromptEvent {
1479                session_id: session_id.to_string(),
1480                prompt: prompt.to_string(),
1481                system_prompt: system_prompt.clone(),
1482                message_count,
1483            });
1484            let result = he.fire(&event).await;
1485            if let HookResult::Continue(Some(modified)) = result {
1486                // Extract modified prompt from hook response
1487                if let Some(new_prompt) = modified.get("prompt").and_then(|v| v.as_str()) {
1488                    return Some(new_prompt.to_string());
1489                }
1490            }
1491        }
1492        None
1493    }
1494
1495    /// Fire PostResponse hook event after the agent loop completes.
1496    async fn fire_post_response(
1497        &self,
1498        session_id: &str,
1499        response_text: &str,
1500        tool_calls_count: usize,
1501        usage: &TokenUsage,
1502        duration_ms: u64,
1503    ) {
1504        if let Some(he) = &self.config.hook_engine {
1505            let event = HookEvent::PostResponse(PostResponseEvent {
1506                session_id: session_id.to_string(),
1507                response_text: response_text.to_string(),
1508                tool_calls_count,
1509                usage: TokenUsageInfo {
1510                    prompt_tokens: usage.prompt_tokens as i32,
1511                    completion_tokens: usage.completion_tokens as i32,
1512                    total_tokens: usage.total_tokens as i32,
1513                },
1514                duration_ms,
1515            });
1516            let he = Arc::clone(he);
1517            tokio::spawn(async move {
1518                let _ = he.fire(&event).await;
1519            });
1520        }
1521    }
1522
1523    /// Fire OnError hook event when an error occurs.
1524    async fn fire_on_error(
1525        &self,
1526        session_id: &str,
1527        error_type: ErrorType,
1528        error_message: &str,
1529        context: serde_json::Value,
1530    ) {
1531        if let Some(he) = &self.config.hook_engine {
1532            let event = HookEvent::OnError(OnErrorEvent {
1533                session_id: session_id.to_string(),
1534                error_type,
1535                error_message: error_message.to_string(),
1536                context,
1537            });
1538            let he = Arc::clone(he);
1539            tokio::spawn(async move {
1540                let _ = he.fire(&event).await;
1541            });
1542        }
1543    }
1544
1545    /// Execute the agent loop for a prompt
1546    ///
1547    /// Takes the conversation history and a new user prompt.
1548    /// Returns the agent result and updated message history.
1549    /// When event_tx is provided, uses streaming LLM API for real-time text output.
1550    pub async fn execute(
1551        &self,
1552        history: &[Message],
1553        prompt: &str,
1554        event_tx: Option<mpsc::Sender<AgentEvent>>,
1555    ) -> Result<AgentResult> {
1556        self.execute_with_session(history, prompt, None, event_tx, None)
1557            .await
1558    }
1559
1560    /// Execute the agent loop with pre-built messages (user message already included).
1561    ///
1562    /// Used by `send_with_attachments` / `stream_with_attachments` where the
1563    /// user message contains multi-modal content and is already appended to
1564    /// the messages vec.
1565    pub async fn execute_from_messages(
1566        &self,
1567        messages: Vec<Message>,
1568        session_id: Option<&str>,
1569        event_tx: Option<mpsc::Sender<AgentEvent>>,
1570        cancel_token: Option<&tokio_util::sync::CancellationToken>,
1571    ) -> Result<AgentResult> {
1572        let default_token = tokio_util::sync::CancellationToken::new();
1573        let token = cancel_token.unwrap_or(&default_token);
1574        tracing::info!(
1575            a3s.session.id = session_id.unwrap_or("none"),
1576            a3s.agent.max_turns = self.config.max_tool_rounds,
1577            "a3s.agent.execute_from_messages started"
1578        );
1579
1580        // Extract the last user message text for hooks, memory recall, and events.
1581        // Pass empty prompt so execute_loop skips adding a duplicate user message,
1582        // but provide effective_prompt for hook/memory/event purposes.
1583        let effective_prompt = messages
1584            .iter()
1585            .rev()
1586            .find(|m| m.role == "user")
1587            .map(|m| m.text())
1588            .unwrap_or_default();
1589
1590        let result = self
1591            .execute_loop_inner(
1592                &messages,
1593                "",
1594                &effective_prompt,
1595                session_id,
1596                event_tx,
1597                token,
1598                true, // emit_end: this is a standalone execution
1599            )
1600            .await;
1601
1602        match &result {
1603            Ok(r) => tracing::info!(
1604                a3s.agent.tool_calls_count = r.tool_calls_count,
1605                a3s.llm.total_tokens = r.usage.total_tokens,
1606                "a3s.agent.execute_from_messages completed"
1607            ),
1608            Err(e) => tracing::warn!(
1609                error = %e,
1610                "a3s.agent.execute_from_messages failed"
1611            ),
1612        }
1613
1614        result
1615    }
1616
1617    /// Execute the agent loop for a prompt with session context
1618    ///
1619    /// Takes the conversation history, user prompt, and optional session ID.
1620    /// When session_id is provided, context providers can use it for session-specific context.
1621    pub async fn execute_with_session(
1622        &self,
1623        history: &[Message],
1624        prompt: &str,
1625        session_id: Option<&str>,
1626        event_tx: Option<mpsc::Sender<AgentEvent>>,
1627        cancel_token: Option<&tokio_util::sync::CancellationToken>,
1628    ) -> Result<AgentResult> {
1629        let default_token = tokio_util::sync::CancellationToken::new();
1630        let token = cancel_token.unwrap_or(&default_token);
1631        tracing::info!(
1632            a3s.session.id = session_id.unwrap_or("none"),
1633            a3s.agent.max_turns = self.config.max_tool_rounds,
1634            "a3s.agent.execute started"
1635        );
1636
1637        // Determine whether to use planning mode
1638        let use_planning = if self.config.planning_mode == PlanningMode::Auto {
1639            // Auto mode: use keyword confidence first, fallback to LLM classification
1640            let (style, confidence) = AgentStyle::detect_with_confidence(prompt);
1641            if confidence == DetectionConfidence::Low {
1642                // Low confidence — use LLM classification
1643                match AgentStyle::detect_with_llm(self.llm_client.as_ref(), prompt).await {
1644                    Ok(classified_style) => {
1645                        tracing::debug!(
1646                            intent.classification = ?classified_style,
1647                            intent.source = "llm",
1648                            "Intent classified via LLM"
1649                        );
1650                        classified_style.requires_planning()
1651                    }
1652                    Err(e) => {
1653                        tracing::warn!(error = %e, "LLM intent classification failed, using keyword detection");
1654                        style.requires_planning()
1655                    }
1656                }
1657            } else {
1658                // High/Medium confidence — use keyword detection directly
1659                style.requires_planning()
1660            }
1661        } else {
1662            // Explicit mode: Enabled or Disabled
1663            self.config.planning_mode.should_plan(prompt)
1664        };
1665
1666        // Create agent task if task_manager is available
1667        let task_id = if let Some(ref tm) = self.task_manager {
1668            let workspace = self.tool_context.workspace.display().to_string();
1669            let task = crate::task::Task::agent("agent", &workspace, prompt);
1670            let id = task.id;
1671            tm.spawn(task);
1672            let _ = tm.start(id);
1673            Some(id)
1674        } else {
1675            None
1676        };
1677
1678        let result = if use_planning {
1679            self.execute_with_planning(history, prompt, event_tx).await
1680        } else {
1681            self.execute_loop(history, prompt, session_id, event_tx, token, true)
1682                .await
1683        };
1684
1685        // Complete or fail agent task based on result
1686        if let Some(ref tm) = self.task_manager {
1687            if let Some(tid) = task_id {
1688                match &result {
1689                    Ok(r) => {
1690                        let output = serde_json::json!({
1691                            "text": r.text,
1692                            "tool_calls_count": r.tool_calls_count,
1693                            "usage": r.usage,
1694                        });
1695                        let _ = tm.complete(tid, Some(output));
1696                    }
1697                    Err(e) => {
1698                        let _ = tm.fail(tid, e.to_string());
1699                    }
1700                }
1701            }
1702        }
1703
1704        match &result {
1705            Ok(r) => {
1706                tracing::info!(
1707                    a3s.agent.tool_calls_count = r.tool_calls_count,
1708                    a3s.llm.total_tokens = r.usage.total_tokens,
1709                    "a3s.agent.execute completed"
1710                );
1711                // Fire PostResponse hook
1712                self.fire_post_response(
1713                    session_id.unwrap_or(""),
1714                    &r.text,
1715                    r.tool_calls_count,
1716                    &r.usage,
1717                    0, // duration tracked externally
1718                )
1719                .await;
1720            }
1721            Err(e) => {
1722                tracing::warn!(
1723                    error = %e,
1724                    "a3s.agent.execute failed"
1725                );
1726                // Fire OnError hook
1727                self.fire_on_error(
1728                    session_id.unwrap_or(""),
1729                    ErrorType::Other,
1730                    &e.to_string(),
1731                    serde_json::json!({"phase": "execute"}),
1732                )
1733                .await;
1734            }
1735        }
1736
1737        result
1738    }
1739
1740    /// Core execution loop (without planning routing).
1741    ///
1742    /// This is the inner loop that runs LLM calls and tool executions.
1743    /// Called directly by `execute_with_session` (after planning check)
1744    /// and by `execute_plan` (for individual steps, bypassing planning).
1745    async fn execute_loop(
1746        &self,
1747        history: &[Message],
1748        prompt: &str,
1749        session_id: Option<&str>,
1750        event_tx: Option<mpsc::Sender<AgentEvent>>,
1751        cancel_token: &tokio_util::sync::CancellationToken,
1752        emit_end: bool,
1753    ) -> Result<AgentResult> {
1754        // When called via execute_loop, the prompt is used for both
1755        // message-adding and hook/memory/event purposes.
1756        self.execute_loop_inner(
1757            history,
1758            prompt,
1759            prompt,
1760            session_id,
1761            event_tx,
1762            cancel_token,
1763            emit_end,
1764        )
1765        .await
1766    }
1767
1768    /// Inner execution loop.
1769    ///
1770    /// `msg_prompt` controls whether a user message is appended (empty = skip).
1771    /// `effective_prompt` is used for hooks, memory recall, taint tracking, and events.
1772    /// `emit_end` controls whether to send `AgentEvent::End` when the loop completes
1773    /// (should be false when called from `execute_plan` to avoid duplicate End events).
1774    #[allow(clippy::too_many_arguments)]
1775    async fn execute_loop_inner(
1776        &self,
1777        history: &[Message],
1778        msg_prompt: &str,
1779        effective_prompt: &str,
1780        session_id: Option<&str>,
1781        event_tx: Option<mpsc::Sender<AgentEvent>>,
1782        cancel_token: &tokio_util::sync::CancellationToken,
1783        emit_end: bool,
1784    ) -> Result<AgentResult> {
1785        let mut messages = history.to_vec();
1786        let mut total_usage = TokenUsage::default();
1787        let mut tool_calls_count = 0;
1788        let mut turn = 0;
1789        // Consecutive malformed-tool-args errors (4.1 parse error recovery)
1790        let mut parse_error_count: u32 = 0;
1791        // Continuation injection counter
1792        let mut continuation_count: u32 = 0;
1793        let mut recent_tool_signatures: Vec<String> = Vec::new();
1794
1795        // Send start event
1796        if let Some(tx) = &event_tx {
1797            tx.send(AgentEvent::Start {
1798                prompt: effective_prompt.to_string(),
1799            })
1800            .await
1801            .ok();
1802        }
1803
1804        // Forward queue events (CommandDeadLettered, CommandRetry, QueueAlert) to event stream
1805        let _queue_forward_handle =
1806            if let (Some(ref queue), Some(ref tx)) = (&self.command_queue, &event_tx) {
1807                let mut rx = queue.subscribe();
1808                let tx = tx.clone();
1809                Some(tokio::spawn(async move {
1810                    while let Ok(event) = rx.recv().await {
1811                        if tx.send(event).await.is_err() {
1812                            break;
1813                        }
1814                    }
1815                }))
1816            } else {
1817                None
1818            };
1819
1820        // Fire PrePrompt hook (may modify the prompt)
1821        let built_system_prompt = Some(self.system_prompt());
1822        let hooked_prompt = if let Some(modified) = self
1823            .fire_pre_prompt(
1824                session_id.unwrap_or(""),
1825                effective_prompt,
1826                &built_system_prompt,
1827                messages.len(),
1828            )
1829            .await
1830        {
1831            modified
1832        } else {
1833            effective_prompt.to_string()
1834        };
1835        let effective_prompt = hooked_prompt.as_str();
1836
1837        // Taint-track the incoming prompt for sensitive data detection
1838        if let Some(ref sp) = self.config.security_provider {
1839            sp.taint_input(effective_prompt);
1840        }
1841
1842        // Recall relevant memories and inject into system prompt
1843        let system_with_memory = if let Some(ref memory) = self.config.memory {
1844            match memory.recall_similar(effective_prompt, 5).await {
1845                Ok(items) if !items.is_empty() => {
1846                    if let Some(tx) = &event_tx {
1847                        for item in &items {
1848                            tx.send(AgentEvent::MemoryRecalled {
1849                                memory_id: item.id.clone(),
1850                                content: item.content.clone(),
1851                                relevance: item.relevance_score(),
1852                            })
1853                            .await
1854                            .ok();
1855                        }
1856                        tx.send(AgentEvent::MemoriesSearched {
1857                            query: Some(effective_prompt.to_string()),
1858                            tags: Vec::new(),
1859                            result_count: items.len(),
1860                        })
1861                        .await
1862                        .ok();
1863                    }
1864                    let memory_context = items
1865                        .iter()
1866                        .map(|i| format!("- {}", i.content))
1867                        .collect::<Vec<_>>()
1868                        .join(
1869                            "
1870",
1871                        );
1872                    let base = self.system_prompt();
1873                    Some(format!(
1874                        "{}
1875
1876## Relevant past experience
1877{}",
1878                        base, memory_context
1879                    ))
1880                }
1881                _ => Some(self.system_prompt()),
1882            }
1883        } else {
1884            Some(self.system_prompt())
1885        };
1886
1887        // Resolve context from providers on first turn (before adding user message)
1888        let augmented_system = if !self.config.context_providers.is_empty() {
1889            // Send context resolving event
1890            if let Some(tx) = &event_tx {
1891                let provider_names: Vec<String> = self
1892                    .config
1893                    .context_providers
1894                    .iter()
1895                    .map(|p| p.name().to_string())
1896                    .collect();
1897                tx.send(AgentEvent::ContextResolving {
1898                    providers: provider_names,
1899                })
1900                .await
1901                .ok();
1902            }
1903
1904            tracing::info!(
1905                a3s.context.providers = self.config.context_providers.len() as i64,
1906                "Context resolution started"
1907            );
1908            let context_results = self.resolve_context(effective_prompt, session_id).await;
1909
1910            // Send context resolved event
1911            if let Some(tx) = &event_tx {
1912                let total_items: usize = context_results.iter().map(|r| r.items.len()).sum();
1913                let total_tokens: usize = context_results.iter().map(|r| r.total_tokens).sum();
1914
1915                tracing::info!(
1916                    context_items = total_items,
1917                    context_tokens = total_tokens,
1918                    "Context resolution completed"
1919                );
1920
1921                tx.send(AgentEvent::ContextResolved {
1922                    total_items,
1923                    total_tokens,
1924                })
1925                .await
1926                .ok();
1927            }
1928
1929            self.build_augmented_system_prompt(&context_results)
1930        } else {
1931            Some(self.system_prompt())
1932        };
1933
1934        // Merge memory context into system prompt
1935        let base_prompt = self.system_prompt();
1936        let augmented_system = match (augmented_system, system_with_memory) {
1937            (Some(ctx), Some(mem)) if ctx != mem => Some(ctx.replacen(&base_prompt, &mem, 1)),
1938            (Some(ctx), _) => Some(ctx),
1939            (None, mem) => mem,
1940        };
1941
1942        // Add user message
1943        if !msg_prompt.is_empty() {
1944            messages.push(Message::user(msg_prompt));
1945        }
1946
1947        loop {
1948            turn += 1;
1949
1950            if turn > self.config.max_tool_rounds {
1951                let error = format!("Max tool rounds ({}) exceeded", self.config.max_tool_rounds);
1952                if let Some(tx) = &event_tx {
1953                    tx.send(AgentEvent::Error {
1954                        message: error.clone(),
1955                    })
1956                    .await
1957                    .ok();
1958                }
1959                anyhow::bail!(error);
1960            }
1961
1962            // Send turn start event
1963            if let Some(tx) = &event_tx {
1964                tx.send(AgentEvent::TurnStart { turn }).await.ok();
1965            }
1966
1967            tracing::info!(
1968                turn = turn,
1969                max_turns = self.config.max_tool_rounds,
1970                "Agent turn started"
1971            );
1972
1973            // Call LLM - use streaming if we have an event channel
1974            tracing::info!(
1975                a3s.llm.streaming = event_tx.is_some(),
1976                "LLM completion started"
1977            );
1978
1979            // Fire GenerateStart hook
1980            self.fire_generate_start(
1981                session_id.unwrap_or(""),
1982                effective_prompt,
1983                &augmented_system,
1984            )
1985            .await;
1986
1987            let llm_start = std::time::Instant::now();
1988            // Circuit breaker (4.3): retry non-streaming LLM calls on transient failures.
1989            // Each failure increments `consecutive_llm_errors`; on success it resets to 0.
1990            // Streaming mode bails immediately on failure (events can't be replayed).
1991            let response = {
1992                let threshold = self.config.circuit_breaker_threshold.max(1);
1993                let mut attempt = 0u32;
1994                loop {
1995                    attempt += 1;
1996                    let result = self
1997                        .call_llm(
1998                            &messages,
1999                            augmented_system.as_deref(),
2000                            &event_tx,
2001                            cancel_token,
2002                        )
2003                        .await;
2004                    match result {
2005                        Ok(r) => {
2006                            break r;
2007                        }
2008                        // Never retry if cancelled
2009                        Err(e) if cancel_token.is_cancelled() => {
2010                            anyhow::bail!(e);
2011                        }
2012                        // Retry when: non-streaming under threshold, OR first streaming attempt
2013                        Err(e) if attempt < threshold && (event_tx.is_none() || attempt == 1) => {
2014                            tracing::warn!(
2015                                turn = turn,
2016                                attempt = attempt,
2017                                threshold = threshold,
2018                                error = %e,
2019                                "LLM call failed, will retry"
2020                            );
2021                            tokio::time::sleep(Duration::from_millis(100 * attempt as u64)).await;
2022                        }
2023                        // Threshold exceeded or streaming mid-stream: bail
2024                        Err(e) => {
2025                            let msg = if attempt > 1 {
2026                                format!(
2027                                    "LLM circuit breaker triggered: failed after {} attempt(s): {}",
2028                                    attempt, e
2029                                )
2030                            } else {
2031                                format!("LLM call failed: {}", e)
2032                            };
2033                            tracing::error!(turn = turn, attempt = attempt, "{}", msg);
2034                            // Fire OnError hook for LLM failure
2035                            self.fire_on_error(
2036                                session_id.unwrap_or(""),
2037                                ErrorType::LlmFailure,
2038                                &msg,
2039                                serde_json::json!({"turn": turn, "attempt": attempt}),
2040                            )
2041                            .await;
2042                            if let Some(tx) = &event_tx {
2043                                tx.send(AgentEvent::Error {
2044                                    message: msg.clone(),
2045                                })
2046                                .await
2047                                .ok();
2048                            }
2049                            anyhow::bail!(msg);
2050                        }
2051                    }
2052                }
2053            };
2054
2055            // Update usage
2056            total_usage.prompt_tokens += response.usage.prompt_tokens;
2057            total_usage.completion_tokens += response.usage.completion_tokens;
2058            total_usage.total_tokens += response.usage.total_tokens;
2059
2060            // Track token usage in progress tracker
2061            if let Some(ref tracker) = self.progress_tracker {
2062                let token_usage = crate::task::TaskTokenUsage {
2063                    input_tokens: response.usage.prompt_tokens as u64,
2064                    output_tokens: response.usage.completion_tokens as u64,
2065                    cache_read_tokens: response.usage.cache_read_tokens.unwrap_or(0) as u64,
2066                    cache_write_tokens: response.usage.cache_write_tokens.unwrap_or(0) as u64,
2067                };
2068                if let Ok(mut guard) = tracker.try_write() {
2069                    guard.track_tokens(token_usage);
2070                }
2071            }
2072
2073            // Record LLM completion telemetry
2074            let llm_duration = llm_start.elapsed();
2075            tracing::info!(
2076                turn = turn,
2077                streaming = event_tx.is_some(),
2078                prompt_tokens = response.usage.prompt_tokens,
2079                completion_tokens = response.usage.completion_tokens,
2080                total_tokens = response.usage.total_tokens,
2081                stop_reason = response.stop_reason.as_deref().unwrap_or("unknown"),
2082                duration_ms = llm_duration.as_millis() as u64,
2083                "LLM completion finished"
2084            );
2085
2086            // Fire GenerateEnd hook
2087            self.fire_generate_end(
2088                session_id.unwrap_or(""),
2089                effective_prompt,
2090                &response,
2091                llm_duration.as_millis() as u64,
2092            )
2093            .await;
2094
2095            // Record LLM usage on the llm span
2096            crate::telemetry::record_llm_usage(
2097                response.usage.prompt_tokens,
2098                response.usage.completion_tokens,
2099                response.usage.total_tokens,
2100                response.stop_reason.as_deref(),
2101            );
2102            // Log turn token usage
2103            tracing::info!(
2104                turn = turn,
2105                a3s.llm.total_tokens = response.usage.total_tokens,
2106                "Turn token usage"
2107            );
2108
2109            // Add assistant message to history
2110            messages.push(response.message.clone());
2111
2112            // Check for tool calls
2113            let tool_calls = response.tool_calls();
2114
2115            // Send turn end event
2116            if let Some(tx) = &event_tx {
2117                tx.send(AgentEvent::TurnEnd {
2118                    turn,
2119                    usage: response.usage.clone(),
2120                })
2121                .await
2122                .ok();
2123            }
2124
2125            // Auto-compact: check if context usage exceeds threshold
2126            if self.config.auto_compact {
2127                let used = response.usage.prompt_tokens;
2128                let max = self.config.max_context_tokens;
2129                let threshold = self.config.auto_compact_threshold;
2130
2131                if crate::session::compaction::should_auto_compact(used, max, threshold) {
2132                    let before_len = messages.len();
2133                    let percent_before = used as f32 / max as f32;
2134
2135                    tracing::info!(
2136                        used_tokens = used,
2137                        max_tokens = max,
2138                        percent = percent_before,
2139                        threshold = threshold,
2140                        "Auto-compact triggered"
2141                    );
2142
2143                    // Step 1: Prune large tool outputs first (cheap, no LLM call)
2144                    if let Some(pruned) = crate::session::compaction::prune_tool_outputs(&messages)
2145                    {
2146                        messages = pruned;
2147                        tracing::info!("Tool output pruning applied");
2148                    }
2149
2150                    // Step 2: Full summarization using the agent's LLM client
2151                    if let Ok(Some(compacted)) = crate::session::compaction::compact_messages(
2152                        session_id.unwrap_or(""),
2153                        &messages,
2154                        &self.llm_client,
2155                    )
2156                    .await
2157                    {
2158                        messages = compacted;
2159                    }
2160
2161                    // Emit compaction event
2162                    if let Some(tx) = &event_tx {
2163                        tx.send(AgentEvent::ContextCompacted {
2164                            session_id: session_id.unwrap_or("").to_string(),
2165                            before_messages: before_len,
2166                            after_messages: messages.len(),
2167                            percent_before,
2168                        })
2169                        .await
2170                        .ok();
2171                    }
2172                }
2173            }
2174
2175            if tool_calls.is_empty() {
2176                // No tool calls — check if we should inject a continuation message
2177                // before treating this as a final answer.
2178                let final_text = response.text();
2179
2180                if self.config.continuation_enabled
2181                    && continuation_count < self.config.max_continuation_turns
2182                    && turn < self.config.max_tool_rounds  // never inject past the turn limit
2183                    && Self::looks_incomplete(&final_text)
2184                {
2185                    continuation_count += 1;
2186                    tracing::info!(
2187                        turn = turn,
2188                        continuation = continuation_count,
2189                        max_continuation = self.config.max_continuation_turns,
2190                        "Injecting continuation message — response looks incomplete"
2191                    );
2192                    // Inject continuation as a user message and keep looping
2193                    messages.push(Message::user(crate::prompts::CONTINUATION));
2194                    continue;
2195                }
2196
2197                // Sanitize output to redact any sensitive data before returning
2198                let final_text = if let Some(ref sp) = self.config.security_provider {
2199                    sp.sanitize_output(&final_text)
2200                } else {
2201                    final_text
2202                };
2203
2204                // Record final totals
2205                tracing::info!(
2206                    tool_calls_count = tool_calls_count,
2207                    total_prompt_tokens = total_usage.prompt_tokens,
2208                    total_completion_tokens = total_usage.completion_tokens,
2209                    total_tokens = total_usage.total_tokens,
2210                    turns = turn,
2211                    "Agent execution completed"
2212                );
2213
2214                if emit_end {
2215                    if let Some(tx) = &event_tx {
2216                        tx.send(AgentEvent::End {
2217                            text: final_text.clone(),
2218                            usage: total_usage.clone(),
2219                            meta: response.meta.clone(),
2220                        })
2221                        .await
2222                        .ok();
2223                    }
2224                }
2225
2226                // Notify context providers of turn completion for memory extraction
2227                if let Some(sid) = session_id {
2228                    self.notify_turn_complete(sid, effective_prompt, &final_text)
2229                        .await;
2230                }
2231
2232                return Ok(AgentResult {
2233                    text: final_text,
2234                    messages,
2235                    usage: total_usage,
2236                    tool_calls_count,
2237                });
2238            }
2239
2240            // Execute tools sequentially
2241            // Fast path: when all tool calls are independent file writes and no hooks/HITL
2242            // are configured, execute them concurrently to avoid serial I/O bottleneck.
2243            let tool_calls = if self.config.hook_engine.is_none()
2244                && self.config.confirmation_manager.is_none()
2245                && tool_calls.len() > 1
2246                && tool_calls
2247                    .iter()
2248                    .all(|tc| Self::is_parallel_safe_write(&tc.name, &tc.args))
2249                && {
2250                    // All target paths must be distinct (no write-write conflicts)
2251                    let paths: Vec<_> = tool_calls
2252                        .iter()
2253                        .filter_map(|tc| Self::extract_write_path(&tc.args))
2254                        .collect();
2255                    paths.len() == tool_calls.len()
2256                        && paths.iter().collect::<std::collections::HashSet<_>>().len()
2257                            == paths.len()
2258                } {
2259                tracing::info!(
2260                    count = tool_calls.len(),
2261                    "Parallel write batch: executing {} independent file writes concurrently",
2262                    tool_calls.len()
2263                );
2264
2265                let futures: Vec<_> = tool_calls
2266                    .iter()
2267                    .map(|tc| {
2268                        let ctx = self.tool_context.clone();
2269                        let executor = Arc::clone(&self.tool_executor);
2270                        let name = tc.name.clone();
2271                        let args = tc.args.clone();
2272                        async move { executor.execute_with_context(&name, &args, &ctx).await }
2273                    })
2274                    .collect();
2275
2276                let results = join_all(futures).await;
2277
2278                // Post-process results in original order (sequential, preserves message ordering)
2279                for (tc, result) in tool_calls.iter().zip(results) {
2280                    tool_calls_count += 1;
2281                    let (output, exit_code, is_error, metadata, images) =
2282                        Self::tool_result_to_tuple(result);
2283
2284                    // Track tool call in progress tracker
2285                    self.track_tool_result(&tc.name, &tc.args, exit_code);
2286
2287                    let output = if let Some(ref sp) = self.config.security_provider {
2288                        sp.sanitize_output(&output)
2289                    } else {
2290                        output
2291                    };
2292
2293                    if let Some(tx) = &event_tx {
2294                        tx.send(AgentEvent::ToolEnd {
2295                            id: tc.id.clone(),
2296                            name: tc.name.clone(),
2297                            output: output.clone(),
2298                            exit_code,
2299                            metadata,
2300                        })
2301                        .await
2302                        .ok();
2303                    }
2304
2305                    if images.is_empty() {
2306                        messages.push(Message::tool_result(&tc.id, &output, is_error));
2307                    } else {
2308                        messages.push(Message::tool_result_with_images(
2309                            &tc.id, &output, &images, is_error,
2310                        ));
2311                    }
2312                }
2313
2314                // Skip the sequential loop below
2315                continue;
2316            } else {
2317                tool_calls
2318            };
2319
2320            for tool_call in tool_calls {
2321                tool_calls_count += 1;
2322
2323                let tool_start = std::time::Instant::now();
2324
2325                tracing::info!(
2326                    tool_name = tool_call.name.as_str(),
2327                    tool_id = tool_call.id.as_str(),
2328                    "Tool execution started"
2329                );
2330
2331                // Send tool start event (only if not already sent during streaming)
2332                // In streaming mode, ToolStart is sent when we receive ToolUseStart from LLM
2333                // But we still need to send ToolEnd after execution
2334
2335                // Check for malformed tool arguments from LLM (4.1 parse error recovery)
2336                if let Some(parse_error) =
2337                    tool_call.args.get("__parse_error").and_then(|v| v.as_str())
2338                {
2339                    parse_error_count += 1;
2340                    let error_msg = format!("Error: {}", parse_error);
2341                    tracing::warn!(
2342                        tool = tool_call.name.as_str(),
2343                        parse_error_count = parse_error_count,
2344                        max_parse_retries = self.config.max_parse_retries,
2345                        "Malformed tool arguments from LLM"
2346                    );
2347
2348                    if let Some(tx) = &event_tx {
2349                        tx.send(AgentEvent::ToolEnd {
2350                            id: tool_call.id.clone(),
2351                            name: tool_call.name.clone(),
2352                            output: error_msg.clone(),
2353                            exit_code: 1,
2354                            metadata: None,
2355                        })
2356                        .await
2357                        .ok();
2358                    }
2359
2360                    messages.push(Message::tool_result(&tool_call.id, &error_msg, true));
2361
2362                    if parse_error_count > self.config.max_parse_retries {
2363                        let msg = format!(
2364                            "LLM produced malformed tool arguments {} time(s) in a row \
2365                             (max_parse_retries={}); giving up",
2366                            parse_error_count, self.config.max_parse_retries
2367                        );
2368                        tracing::error!("{}", msg);
2369                        if let Some(tx) = &event_tx {
2370                            tx.send(AgentEvent::Error {
2371                                message: msg.clone(),
2372                            })
2373                            .await
2374                            .ok();
2375                        }
2376                        anyhow::bail!(msg);
2377                    }
2378                    continue;
2379                }
2380
2381                // Tool args are valid — reset parse error counter
2382                parse_error_count = 0;
2383
2384                // Check skill-based tool permissions
2385                if let Some(ref registry) = self.config.skill_registry {
2386                    let instruction_skills =
2387                        registry.by_kind(crate::skills::SkillKind::Instruction);
2388                    let has_restrictions =
2389                        instruction_skills.iter().any(|s| s.allowed_tools.is_some());
2390                    if has_restrictions {
2391                        let allowed = instruction_skills
2392                            .iter()
2393                            .any(|s| s.is_tool_allowed(&tool_call.name));
2394                        if !allowed {
2395                            let msg = format!(
2396                                "Tool '{}' is not allowed by any active skill.",
2397                                tool_call.name
2398                            );
2399                            tracing::info!(
2400                                tool_name = tool_call.name.as_str(),
2401                                "Tool blocked by skill registry"
2402                            );
2403                            if let Some(tx) = &event_tx {
2404                                tx.send(AgentEvent::PermissionDenied {
2405                                    tool_id: tool_call.id.clone(),
2406                                    tool_name: tool_call.name.clone(),
2407                                    args: tool_call.args.clone(),
2408                                    reason: msg.clone(),
2409                                })
2410                                .await
2411                                .ok();
2412                            }
2413                            messages.push(Message::tool_result(&tool_call.id, &msg, true));
2414                            continue;
2415                        }
2416                    }
2417                }
2418
2419                // Fire PreToolUse hook (may block the tool call)
2420                if let Some(HookResult::Block(reason)) = self
2421                    .fire_pre_tool_use(
2422                        session_id.unwrap_or(""),
2423                        &tool_call.name,
2424                        &tool_call.args,
2425                        recent_tool_signatures.clone(),
2426                    )
2427                    .await
2428                {
2429                    let msg = format!("Tool '{}' blocked by hook: {}", tool_call.name, reason);
2430                    tracing::info!(
2431                        tool_name = tool_call.name.as_str(),
2432                        "Tool blocked by PreToolUse hook"
2433                    );
2434
2435                    if let Some(tx) = &event_tx {
2436                        tx.send(AgentEvent::PermissionDenied {
2437                            tool_id: tool_call.id.clone(),
2438                            tool_name: tool_call.name.clone(),
2439                            args: tool_call.args.clone(),
2440                            reason: reason.clone(),
2441                        })
2442                        .await
2443                        .ok();
2444                    }
2445
2446                    messages.push(Message::tool_result(&tool_call.id, &msg, true));
2447                    continue;
2448                }
2449
2450                // Check permission before executing tool
2451                let permission_decision = if let Some(checker) = &self.config.permission_checker {
2452                    checker.check(&tool_call.name, &tool_call.args)
2453                } else {
2454                    // No policy configured — default to Ask so HITL can still intervene
2455                    PermissionDecision::Ask
2456                };
2457
2458                let (output, exit_code, is_error, metadata, images) = match permission_decision {
2459                    PermissionDecision::Deny => {
2460                        tracing::info!(
2461                            tool_name = tool_call.name.as_str(),
2462                            permission = "deny",
2463                            "Tool permission denied"
2464                        );
2465                        // Tool execution denied by permission policy
2466                        let denial_msg = format!(
2467                            "Permission denied: Tool '{}' is blocked by permission policy.",
2468                            tool_call.name
2469                        );
2470
2471                        // Send permission denied event
2472                        if let Some(tx) = &event_tx {
2473                            tx.send(AgentEvent::PermissionDenied {
2474                                tool_id: tool_call.id.clone(),
2475                                tool_name: tool_call.name.clone(),
2476                                args: tool_call.args.clone(),
2477                                reason: "Blocked by deny rule in permission policy".to_string(),
2478                            })
2479                            .await
2480                            .ok();
2481                        }
2482
2483                        (denial_msg, 1, true, None, Vec::new())
2484                    }
2485                    PermissionDecision::Allow => {
2486                        tracing::info!(
2487                            tool_name = tool_call.name.as_str(),
2488                            permission = "allow",
2489                            "Tool permission: allow"
2490                        );
2491                        // Permission explicitly allows — execute directly, no HITL
2492                        let stream_ctx =
2493                            self.streaming_tool_context(&event_tx, &tool_call.id, &tool_call.name);
2494                        let result = self
2495                            .execute_tool_queued_or_direct(
2496                                &tool_call.name,
2497                                &tool_call.args,
2498                                &stream_ctx,
2499                            )
2500                            .await;
2501
2502                        let tuple = Self::tool_result_to_tuple(result);
2503                        // Track tool call in progress tracker
2504                        let (_, exit_code, _, _, _) = tuple;
2505                        self.track_tool_result(&tool_call.name, &tool_call.args, exit_code);
2506                        tuple
2507                    }
2508                    PermissionDecision::Ask => {
2509                        tracing::info!(
2510                            tool_name = tool_call.name.as_str(),
2511                            permission = "ask",
2512                            "Tool permission: ask"
2513                        );
2514                        // Permission says Ask — delegate to HITL confirmation manager
2515                        if let Some(cm) = &self.config.confirmation_manager {
2516                            // Check YOLO lanes: if the tool's lane is in YOLO mode, skip confirmation
2517                            if !cm.requires_confirmation(&tool_call.name).await {
2518                                let stream_ctx = self.streaming_tool_context(
2519                                    &event_tx,
2520                                    &tool_call.id,
2521                                    &tool_call.name,
2522                                );
2523                                let result = self
2524                                    .execute_tool_queued_or_direct(
2525                                        &tool_call.name,
2526                                        &tool_call.args,
2527                                        &stream_ctx,
2528                                    )
2529                                    .await;
2530
2531                                let (output, exit_code, is_error, metadata, images) =
2532                                    Self::tool_result_to_tuple(result);
2533
2534                                // Track tool call in progress tracker
2535                                self.track_tool_result(&tool_call.name, &tool_call.args, exit_code);
2536
2537                                // Add tool result to messages
2538                                if images.is_empty() {
2539                                    messages.push(Message::tool_result(
2540                                        &tool_call.id,
2541                                        &output,
2542                                        is_error,
2543                                    ));
2544                                } else {
2545                                    messages.push(Message::tool_result_with_images(
2546                                        &tool_call.id,
2547                                        &output,
2548                                        &images,
2549                                        is_error,
2550                                    ));
2551                                }
2552
2553                                // Record tool result on the tool span for early exit
2554                                let tool_duration = tool_start.elapsed();
2555                                crate::telemetry::record_tool_result(exit_code, tool_duration);
2556
2557                                // Send ToolEnd event
2558                                if let Some(tx) = &event_tx {
2559                                    tx.send(AgentEvent::ToolEnd {
2560                                        id: tool_call.id.clone(),
2561                                        name: tool_call.name.clone(),
2562                                        output: output.clone(),
2563                                        exit_code,
2564                                        metadata,
2565                                    })
2566                                    .await
2567                                    .ok();
2568                                }
2569
2570                                // Fire PostToolUse hook (fire-and-forget)
2571                                self.fire_post_tool_use(
2572                                    session_id.unwrap_or(""),
2573                                    &tool_call.name,
2574                                    &tool_call.args,
2575                                    &output,
2576                                    exit_code == 0,
2577                                    tool_duration.as_millis() as u64,
2578                                )
2579                                .await;
2580
2581                                continue; // Skip the rest, move to next tool call
2582                            }
2583
2584                            // Get timeout from policy
2585                            let policy = cm.policy().await;
2586                            let timeout_ms = policy.default_timeout_ms;
2587                            let timeout_action = policy.timeout_action;
2588
2589                            // Request confirmation (this emits ConfirmationRequired event)
2590                            let rx = cm
2591                                .request_confirmation(
2592                                    &tool_call.id,
2593                                    &tool_call.name,
2594                                    &tool_call.args,
2595                                )
2596                                .await;
2597
2598                            // Forward ConfirmationRequired to the streaming event channel
2599                            // so external consumers (e.g. SafeClaw engine) can relay it
2600                            // to the browser UI.
2601                            if let Some(tx) = &event_tx {
2602                                tx.send(AgentEvent::ConfirmationRequired {
2603                                    tool_id: tool_call.id.clone(),
2604                                    tool_name: tool_call.name.clone(),
2605                                    args: tool_call.args.clone(),
2606                                    timeout_ms,
2607                                })
2608                                .await
2609                                .ok();
2610                            }
2611
2612                            // Wait for confirmation with timeout
2613                            let confirmation_result =
2614                                tokio::time::timeout(Duration::from_millis(timeout_ms), rx).await;
2615
2616                            match confirmation_result {
2617                                Ok(Ok(response)) => {
2618                                    // Forward ConfirmationReceived
2619                                    if let Some(tx) = &event_tx {
2620                                        tx.send(AgentEvent::ConfirmationReceived {
2621                                            tool_id: tool_call.id.clone(),
2622                                            approved: response.approved,
2623                                            reason: response.reason.clone(),
2624                                        })
2625                                        .await
2626                                        .ok();
2627                                    }
2628                                    if response.approved {
2629                                        let stream_ctx = self.streaming_tool_context(
2630                                            &event_tx,
2631                                            &tool_call.id,
2632                                            &tool_call.name,
2633                                        );
2634                                        let result = self
2635                                            .execute_tool_queued_or_direct(
2636                                                &tool_call.name,
2637                                                &tool_call.args,
2638                                                &stream_ctx,
2639                                            )
2640                                            .await;
2641
2642                                        let tuple = Self::tool_result_to_tuple(result);
2643                                        // Track tool call in progress tracker
2644                                        let (_, exit_code, _, _, _) = tuple;
2645                                        self.track_tool_result(
2646                                            &tool_call.name,
2647                                            &tool_call.args,
2648                                            exit_code,
2649                                        );
2650                                        tuple
2651                                    } else {
2652                                        let rejection_msg = format!(
2653                                            "Tool '{}' execution was REJECTED by the user. Reason: {}. \
2654                                             DO NOT retry this tool call unless the user explicitly asks you to.",
2655                                            tool_call.name,
2656                                            response.reason.unwrap_or_else(|| "No reason provided".to_string())
2657                                        );
2658                                        (rejection_msg, 1, true, None, Vec::new())
2659                                    }
2660                                }
2661                                Ok(Err(_)) => {
2662                                    // Forward ConfirmationTimeout (channel closed = effectively timed out)
2663                                    if let Some(tx) = &event_tx {
2664                                        tx.send(AgentEvent::ConfirmationTimeout {
2665                                            tool_id: tool_call.id.clone(),
2666                                            action_taken: "rejected".to_string(),
2667                                        })
2668                                        .await
2669                                        .ok();
2670                                    }
2671                                    let msg = format!(
2672                                        "Tool '{}' confirmation failed: confirmation channel closed",
2673                                        tool_call.name
2674                                    );
2675                                    (msg, 1, true, None, Vec::new())
2676                                }
2677                                Err(_) => {
2678                                    cm.check_timeouts().await;
2679
2680                                    // Forward ConfirmationTimeout
2681                                    if let Some(tx) = &event_tx {
2682                                        tx.send(AgentEvent::ConfirmationTimeout {
2683                                            tool_id: tool_call.id.clone(),
2684                                            action_taken: match timeout_action {
2685                                                crate::hitl::TimeoutAction::Reject => {
2686                                                    "rejected".to_string()
2687                                                }
2688                                                crate::hitl::TimeoutAction::AutoApprove => {
2689                                                    "auto_approved".to_string()
2690                                                }
2691                                            },
2692                                        })
2693                                        .await
2694                                        .ok();
2695                                    }
2696
2697                                    match timeout_action {
2698                                        crate::hitl::TimeoutAction::Reject => {
2699                                            let msg = format!(
2700                                                "Tool '{}' execution was REJECTED: user confirmation timed out after {}ms. \
2701                                                 DO NOT retry this tool call — the user did not approve it. \
2702                                                 Inform the user that the operation requires their approval and ask them to try again.",
2703                                                tool_call.name, timeout_ms
2704                                            );
2705                                            (msg, 1, true, None, Vec::new())
2706                                        }
2707                                        crate::hitl::TimeoutAction::AutoApprove => {
2708                                            let stream_ctx = self.streaming_tool_context(
2709                                                &event_tx,
2710                                                &tool_call.id,
2711                                                &tool_call.name,
2712                                            );
2713                                            let result = self
2714                                                .execute_tool_queued_or_direct(
2715                                                    &tool_call.name,
2716                                                    &tool_call.args,
2717                                                    &stream_ctx,
2718                                                )
2719                                                .await;
2720
2721                                            let tuple = Self::tool_result_to_tuple(result);
2722                                            // Track tool call in progress tracker
2723                                            let (_, exit_code, _, _, _) = tuple;
2724                                            self.track_tool_result(
2725                                                &tool_call.name,
2726                                                &tool_call.args,
2727                                                exit_code,
2728                                            );
2729                                            tuple
2730                                        }
2731                                    }
2732                                }
2733                            }
2734                        } else {
2735                            // Ask without confirmation manager — safe deny
2736                            let msg = format!(
2737                                "Tool '{}' requires confirmation but no HITL confirmation manager is configured. \
2738                                 Configure a confirmation policy to enable tool execution.",
2739                                tool_call.name
2740                            );
2741                            tracing::warn!(
2742                                tool_name = tool_call.name.as_str(),
2743                                "Tool requires confirmation but no HITL manager configured"
2744                            );
2745                            (msg, 1, true, None, Vec::new())
2746                        }
2747                    }
2748                };
2749
2750                let tool_duration = tool_start.elapsed();
2751                crate::telemetry::record_tool_result(exit_code, tool_duration);
2752
2753                // Sanitize tool output for sensitive data before it enters the message history
2754                let output = if let Some(ref sp) = self.config.security_provider {
2755                    sp.sanitize_output(&output)
2756                } else {
2757                    output
2758                };
2759
2760                recent_tool_signatures.push(format!(
2761                    "{}:{} => {}",
2762                    tool_call.name,
2763                    serde_json::to_string(&tool_call.args).unwrap_or_default(),
2764                    if is_error { "error" } else { "ok" }
2765                ));
2766                if recent_tool_signatures.len() > 8 {
2767                    let overflow = recent_tool_signatures.len() - 8;
2768                    recent_tool_signatures.drain(0..overflow);
2769                }
2770
2771                // Fire PostToolUse hook (fire-and-forget)
2772                self.fire_post_tool_use(
2773                    session_id.unwrap_or(""),
2774                    &tool_call.name,
2775                    &tool_call.args,
2776                    &output,
2777                    exit_code == 0,
2778                    tool_duration.as_millis() as u64,
2779                )
2780                .await;
2781
2782                // Auto-remember tool result in long-term memory
2783                if let Some(ref memory) = self.config.memory {
2784                    let tools_used = [tool_call.name.clone()];
2785                    let remember_result = if exit_code == 0 {
2786                        memory
2787                            .remember_success(effective_prompt, &tools_used, &output)
2788                            .await
2789                    } else {
2790                        memory
2791                            .remember_failure(effective_prompt, &output, &tools_used)
2792                            .await
2793                    };
2794                    match remember_result {
2795                        Ok(()) => {
2796                            if let Some(tx) = &event_tx {
2797                                let item_type = if exit_code == 0 { "success" } else { "failure" };
2798                                tx.send(AgentEvent::MemoryStored {
2799                                    memory_id: uuid::Uuid::new_v4().to_string(),
2800                                    memory_type: item_type.to_string(),
2801                                    importance: if exit_code == 0 { 0.8 } else { 0.9 },
2802                                    tags: vec![item_type.to_string(), tool_call.name.clone()],
2803                                })
2804                                .await
2805                                .ok();
2806                            }
2807                        }
2808                        Err(e) => {
2809                            tracing::warn!("Failed to store memory after tool execution: {}", e);
2810                        }
2811                    }
2812                }
2813
2814                // Send tool end event
2815                if let Some(tx) = &event_tx {
2816                    tx.send(AgentEvent::ToolEnd {
2817                        id: tool_call.id.clone(),
2818                        name: tool_call.name.clone(),
2819                        output: output.clone(),
2820                        exit_code,
2821                        metadata,
2822                    })
2823                    .await
2824                    .ok();
2825                }
2826
2827                // Add tool result to messages
2828                if images.is_empty() {
2829                    messages.push(Message::tool_result(&tool_call.id, &output, is_error));
2830                } else {
2831                    messages.push(Message::tool_result_with_images(
2832                        &tool_call.id,
2833                        &output,
2834                        &images,
2835                        is_error,
2836                    ));
2837                }
2838            }
2839        }
2840    }
2841
2842    /// Execute with streaming events
2843    pub async fn execute_streaming(
2844        &self,
2845        history: &[Message],
2846        prompt: &str,
2847    ) -> Result<(
2848        mpsc::Receiver<AgentEvent>,
2849        tokio::task::JoinHandle<Result<AgentResult>>,
2850        tokio_util::sync::CancellationToken,
2851    )> {
2852        let (tx, rx) = mpsc::channel(100);
2853        let cancel_token = tokio_util::sync::CancellationToken::new();
2854
2855        let llm_client = self.llm_client.clone();
2856        let tool_executor = self.tool_executor.clone();
2857        let tool_context = self.tool_context.clone();
2858        let config = self.config.clone();
2859        let tool_metrics = self.tool_metrics.clone();
2860        let command_queue = self.command_queue.clone();
2861        let history = history.to_vec();
2862        let prompt = prompt.to_string();
2863        let token_clone = cancel_token.clone();
2864
2865        let handle = tokio::spawn(async move {
2866            let mut agent = AgentLoop::new(llm_client, tool_executor, tool_context, config);
2867            if let Some(metrics) = tool_metrics {
2868                agent = agent.with_tool_metrics(metrics);
2869            }
2870            if let Some(queue) = command_queue {
2871                agent = agent.with_queue(queue);
2872            }
2873            agent
2874                .execute_with_session(&history, &prompt, None, Some(tx), Some(&token_clone))
2875                .await
2876        });
2877
2878        Ok((rx, handle, cancel_token))
2879    }
2880
2881    /// Create an execution plan for a prompt
2882    ///
2883    /// Delegates to [`LlmPlanner`] for structured JSON plan generation,
2884    /// falling back to heuristic planning if the LLM call fails.
2885    pub async fn plan(&self, prompt: &str, _context: Option<&str>) -> Result<ExecutionPlan> {
2886        use crate::planning::LlmPlanner;
2887
2888        match LlmPlanner::create_plan(&self.llm_client, prompt).await {
2889            Ok(plan) => Ok(plan),
2890            Err(e) => {
2891                tracing::warn!("LLM plan creation failed, using fallback: {}", e);
2892                Ok(LlmPlanner::fallback_plan(prompt))
2893            }
2894        }
2895    }
2896
2897    /// Execute with planning phase
2898    pub async fn execute_with_planning(
2899        &self,
2900        history: &[Message],
2901        prompt: &str,
2902        event_tx: Option<mpsc::Sender<AgentEvent>>,
2903    ) -> Result<AgentResult> {
2904        // Send planning start event
2905        if let Some(tx) = &event_tx {
2906            tx.send(AgentEvent::PlanningStart {
2907                prompt: prompt.to_string(),
2908            })
2909            .await
2910            .ok();
2911        }
2912
2913        // Extract goal when goal_tracking is enabled
2914        let goal = if self.config.goal_tracking {
2915            let g = self.extract_goal(prompt).await?;
2916            if let Some(tx) = &event_tx {
2917                tx.send(AgentEvent::GoalExtracted { goal: g.clone() })
2918                    .await
2919                    .ok();
2920            }
2921            Some(g)
2922        } else {
2923            None
2924        };
2925
2926        // Create execution plan
2927        let plan = self.plan(prompt, None).await?;
2928
2929        // Send planning end event
2930        if let Some(tx) = &event_tx {
2931            tx.send(AgentEvent::PlanningEnd {
2932                estimated_steps: plan.steps.len(),
2933                plan: plan.clone(),
2934            })
2935            .await
2936            .ok();
2937        }
2938
2939        let plan_start = std::time::Instant::now();
2940
2941        // Execute the plan step by step
2942        let result = self.execute_plan(history, &plan, event_tx.clone()).await?;
2943
2944        // Emit the final End event (execute_loop_inner does not emit End in planning mode)
2945        if let Some(tx) = &event_tx {
2946            tx.send(AgentEvent::End {
2947                text: result.text.clone(),
2948                usage: result.usage.clone(),
2949                meta: None,
2950            })
2951            .await
2952            .ok();
2953        }
2954
2955        // Check goal achievement when goal_tracking is enabled
2956        if self.config.goal_tracking {
2957            if let Some(ref g) = goal {
2958                let achieved = self.check_goal_achievement(g, &result.text).await?;
2959                if achieved {
2960                    if let Some(tx) = &event_tx {
2961                        tx.send(AgentEvent::GoalAchieved {
2962                            goal: g.description.clone(),
2963                            total_steps: result.messages.len(),
2964                            duration_ms: plan_start.elapsed().as_millis() as i64,
2965                        })
2966                        .await
2967                        .ok();
2968                    }
2969                }
2970            }
2971        }
2972
2973        Ok(result)
2974    }
2975
2976    /// Execute an execution plan using wave-based dependency-aware scheduling.
2977    ///
2978    /// Steps with no unmet dependencies are grouped into "waves". A wave with
2979    /// a single step executes sequentially (preserving the history chain). A
2980    /// wave with multiple independent steps executes them in parallel via
2981    /// `JoinSet`, then merges their results back into the shared history.
2982    async fn execute_plan(
2983        &self,
2984        history: &[Message],
2985        plan: &ExecutionPlan,
2986        event_tx: Option<mpsc::Sender<AgentEvent>>,
2987    ) -> Result<AgentResult> {
2988        let mut plan = plan.clone();
2989        let mut current_history = history.to_vec();
2990        let mut total_usage = TokenUsage::default();
2991        let mut tool_calls_count = 0;
2992        let total_steps = plan.steps.len();
2993
2994        // Add initial user message with the goal
2995        let steps_text = plan
2996            .steps
2997            .iter()
2998            .enumerate()
2999            .map(|(i, step)| format!("{}. {}", i + 1, step.content))
3000            .collect::<Vec<_>>()
3001            .join("\n");
3002        current_history.push(Message::user(&crate::prompts::render(
3003            crate::prompts::PLAN_EXECUTE_GOAL,
3004            &[("goal", &plan.goal), ("steps", &steps_text)],
3005        )));
3006
3007        loop {
3008            let ready: Vec<String> = plan
3009                .get_ready_steps()
3010                .iter()
3011                .map(|s| s.id.clone())
3012                .collect();
3013
3014            if ready.is_empty() {
3015                // All done or deadlock
3016                if plan.has_deadlock() {
3017                    tracing::warn!(
3018                        "Plan deadlock detected: {} pending steps with unresolvable dependencies",
3019                        plan.pending_count()
3020                    );
3021                }
3022                break;
3023            }
3024
3025            if ready.len() == 1 {
3026                // === Single step: sequential execution (preserves history chain) ===
3027                let step_id = &ready[0];
3028                let step = plan
3029                    .steps
3030                    .iter()
3031                    .find(|s| s.id == *step_id)
3032                    .ok_or_else(|| anyhow::anyhow!("step '{}' not found in plan", step_id))?
3033                    .clone();
3034                let step_number = plan
3035                    .steps
3036                    .iter()
3037                    .position(|s| s.id == *step_id)
3038                    .unwrap_or(0)
3039                    + 1;
3040
3041                // Send step start event
3042                if let Some(tx) = &event_tx {
3043                    tx.send(AgentEvent::StepStart {
3044                        step_id: step.id.clone(),
3045                        description: step.content.clone(),
3046                        step_number,
3047                        total_steps,
3048                    })
3049                    .await
3050                    .ok();
3051                }
3052
3053                plan.mark_status(&step.id, TaskStatus::InProgress);
3054
3055                let step_prompt = crate::prompts::render(
3056                    crate::prompts::PLAN_EXECUTE_STEP,
3057                    &[
3058                        ("step_num", &step_number.to_string()),
3059                        ("description", &step.content),
3060                    ],
3061                );
3062
3063                match self
3064                    .execute_loop(
3065                        &current_history,
3066                        &step_prompt,
3067                        None,
3068                        event_tx.clone(),
3069                        &tokio_util::sync::CancellationToken::new(),
3070                        false, // emit_end: false — End is emitted by execute_with_planning after execute_plan
3071                    )
3072                    .await
3073                {
3074                    Ok(result) => {
3075                        current_history = result.messages.clone();
3076                        total_usage.prompt_tokens += result.usage.prompt_tokens;
3077                        total_usage.completion_tokens += result.usage.completion_tokens;
3078                        total_usage.total_tokens += result.usage.total_tokens;
3079                        tool_calls_count += result.tool_calls_count;
3080                        plan.mark_status(&step.id, TaskStatus::Completed);
3081
3082                        if let Some(tx) = &event_tx {
3083                            tx.send(AgentEvent::StepEnd {
3084                                step_id: step.id.clone(),
3085                                status: TaskStatus::Completed,
3086                                step_number,
3087                                total_steps,
3088                            })
3089                            .await
3090                            .ok();
3091                        }
3092                    }
3093                    Err(e) => {
3094                        tracing::error!("Plan step '{}' failed: {}", step.id, e);
3095                        plan.mark_status(&step.id, TaskStatus::Failed);
3096
3097                        if let Some(tx) = &event_tx {
3098                            tx.send(AgentEvent::StepEnd {
3099                                step_id: step.id.clone(),
3100                                status: TaskStatus::Failed,
3101                                step_number,
3102                                total_steps,
3103                            })
3104                            .await
3105                            .ok();
3106                        }
3107                    }
3108                }
3109            } else {
3110                // === Multiple steps: parallel execution via JoinSet ===
3111                // NOTE: Each parallel branch gets a clone of the base history.
3112                // Individual branch histories (tool calls, LLM turns) are NOT merged
3113                // back — only a summary message is appended. This is a deliberate
3114                // trade-off: merging divergent histories in a deterministic order is
3115                // complex and the summary approach keeps the context window manageable.
3116                let ready_steps: Vec<_> = ready
3117                    .iter()
3118                    .filter_map(|id| {
3119                        let step = plan.steps.iter().find(|s| s.id == *id)?.clone();
3120                        let step_number =
3121                            plan.steps.iter().position(|s| s.id == *id).unwrap_or(0) + 1;
3122                        Some((step, step_number))
3123                    })
3124                    .collect();
3125
3126                // Mark all as InProgress and emit StepStart events
3127                for (step, step_number) in &ready_steps {
3128                    plan.mark_status(&step.id, TaskStatus::InProgress);
3129                    if let Some(tx) = &event_tx {
3130                        tx.send(AgentEvent::StepStart {
3131                            step_id: step.id.clone(),
3132                            description: step.content.clone(),
3133                            step_number: *step_number,
3134                            total_steps,
3135                        })
3136                        .await
3137                        .ok();
3138                    }
3139                }
3140
3141                // Spawn all into JoinSet, each with a clone of the base history
3142                let mut join_set = tokio::task::JoinSet::new();
3143                for (step, step_number) in &ready_steps {
3144                    let base_history = current_history.clone();
3145                    let agent_clone = self.clone();
3146                    let tx = event_tx.clone();
3147                    let step_clone = step.clone();
3148                    let sn = *step_number;
3149
3150                    join_set.spawn(async move {
3151                        let prompt = crate::prompts::render(
3152                            crate::prompts::PLAN_EXECUTE_STEP,
3153                            &[
3154                                ("step_num", &sn.to_string()),
3155                                ("description", &step_clone.content),
3156                            ],
3157                        );
3158                        let result = agent_clone
3159                            .execute_loop(
3160                                &base_history,
3161                                &prompt,
3162                                None,
3163                                tx,
3164                                &tokio_util::sync::CancellationToken::new(),
3165                                false, // emit_end: false — End is emitted by execute_with_planning after execute_plan
3166                            )
3167                            .await;
3168                        (step_clone.id, sn, result)
3169                    });
3170                }
3171
3172                // Collect results
3173                let mut parallel_summaries = Vec::new();
3174                while let Some(join_result) = join_set.join_next().await {
3175                    match join_result {
3176                        Ok((step_id, step_number, step_result)) => match step_result {
3177                            Ok(result) => {
3178                                total_usage.prompt_tokens += result.usage.prompt_tokens;
3179                                total_usage.completion_tokens += result.usage.completion_tokens;
3180                                total_usage.total_tokens += result.usage.total_tokens;
3181                                tool_calls_count += result.tool_calls_count;
3182                                plan.mark_status(&step_id, TaskStatus::Completed);
3183
3184                                // Collect the final assistant text for context merging
3185                                parallel_summaries.push(format!(
3186                                    "- Step {} ({}): {}",
3187                                    step_number, step_id, result.text
3188                                ));
3189
3190                                if let Some(tx) = &event_tx {
3191                                    tx.send(AgentEvent::StepEnd {
3192                                        step_id,
3193                                        status: TaskStatus::Completed,
3194                                        step_number,
3195                                        total_steps,
3196                                    })
3197                                    .await
3198                                    .ok();
3199                                }
3200                            }
3201                            Err(e) => {
3202                                tracing::error!("Plan step '{}' failed: {}", step_id, e);
3203                                plan.mark_status(&step_id, TaskStatus::Failed);
3204
3205                                if let Some(tx) = &event_tx {
3206                                    tx.send(AgentEvent::StepEnd {
3207                                        step_id,
3208                                        status: TaskStatus::Failed,
3209                                        step_number,
3210                                        total_steps,
3211                                    })
3212                                    .await
3213                                    .ok();
3214                                }
3215                            }
3216                        },
3217                        Err(e) => {
3218                            tracing::error!("JoinSet task panicked: {}", e);
3219                        }
3220                    }
3221                }
3222
3223                // Merge parallel results into history for subsequent steps
3224                if !parallel_summaries.is_empty() {
3225                    parallel_summaries.sort(); // Deterministic ordering
3226                    let results_text = parallel_summaries.join("\n");
3227                    current_history.push(Message::user(&crate::prompts::render(
3228                        crate::prompts::PLAN_PARALLEL_RESULTS,
3229                        &[("results", &results_text)],
3230                    )));
3231                }
3232            }
3233
3234            // Emit GoalProgress after each wave
3235            if self.config.goal_tracking {
3236                let completed = plan
3237                    .steps
3238                    .iter()
3239                    .filter(|s| s.status == TaskStatus::Completed)
3240                    .count();
3241                if let Some(tx) = &event_tx {
3242                    tx.send(AgentEvent::GoalProgress {
3243                        goal: plan.goal.clone(),
3244                        progress: plan.progress(),
3245                        completed_steps: completed,
3246                        total_steps,
3247                    })
3248                    .await
3249                    .ok();
3250                }
3251            }
3252        }
3253
3254        // Get final response
3255        let final_text = current_history
3256            .last()
3257            .map(|m| {
3258                m.content
3259                    .iter()
3260                    .filter_map(|block| {
3261                        if let crate::llm::ContentBlock::Text { text } = block {
3262                            Some(text.as_str())
3263                        } else {
3264                            None
3265                        }
3266                    })
3267                    .collect::<Vec<_>>()
3268                    .join("\n")
3269            })
3270            .unwrap_or_default();
3271
3272        Ok(AgentResult {
3273            text: final_text,
3274            messages: current_history,
3275            usage: total_usage,
3276            tool_calls_count,
3277        })
3278    }
3279
3280    /// Extract goal from prompt
3281    ///
3282    /// Delegates to [`LlmPlanner`] for structured JSON goal extraction,
3283    /// falling back to heuristic logic if the LLM call fails.
3284    pub async fn extract_goal(&self, prompt: &str) -> Result<AgentGoal> {
3285        use crate::planning::LlmPlanner;
3286
3287        match LlmPlanner::extract_goal(&self.llm_client, prompt).await {
3288            Ok(goal) => Ok(goal),
3289            Err(e) => {
3290                tracing::warn!("LLM goal extraction failed, using fallback: {}", e);
3291                Ok(LlmPlanner::fallback_goal(prompt))
3292            }
3293        }
3294    }
3295
3296    /// Check if goal is achieved
3297    ///
3298    /// Delegates to [`LlmPlanner`] for structured JSON achievement check,
3299    /// falling back to heuristic logic if the LLM call fails.
3300    pub async fn check_goal_achievement(
3301        &self,
3302        goal: &AgentGoal,
3303        current_state: &str,
3304    ) -> Result<bool> {
3305        use crate::planning::LlmPlanner;
3306
3307        match LlmPlanner::check_achievement(&self.llm_client, goal, current_state).await {
3308            Ok(result) => Ok(result.achieved),
3309            Err(e) => {
3310                tracing::warn!("LLM achievement check failed, using fallback: {}", e);
3311                let result = LlmPlanner::fallback_check_achievement(goal, current_state);
3312                Ok(result.achieved)
3313            }
3314        }
3315    }
3316}
3317
3318#[cfg(test)]
3319mod tests {
3320    use super::*;
3321    use crate::llm::{ContentBlock, StreamEvent};
3322    use crate::permissions::PermissionPolicy;
3323    use crate::tools::ToolExecutor;
3324    use std::path::PathBuf;
3325    use std::sync::atomic::{AtomicUsize, Ordering};
3326
3327    /// Create a default ToolContext for tests
3328    fn test_tool_context() -> ToolContext {
3329        ToolContext::new(PathBuf::from("/tmp"))
3330    }
3331
3332    #[test]
3333    fn test_agent_config_default() {
3334        let config = AgentConfig::default();
3335        assert!(config.prompt_slots.is_empty());
3336        assert!(config.tools.is_empty()); // Tools are provided externally
3337        assert_eq!(config.max_tool_rounds, MAX_TOOL_ROUNDS);
3338        assert!(config.permission_checker.is_none());
3339        assert!(config.context_providers.is_empty());
3340        // Built-in skills are always present by default
3341        let registry = config
3342            .skill_registry
3343            .expect("skill_registry must be Some by default");
3344        assert!(registry.len() >= 7, "expected at least 7 built-in skills");
3345        assert!(registry.get("code-search").is_some());
3346        assert!(registry.get("find-bugs").is_some());
3347    }
3348
3349    // ========================================================================
3350    // Mock LLM Client for Testing
3351    // ========================================================================
3352
3353    /// Mock LLM client that returns predefined responses
3354    pub(crate) struct MockLlmClient {
3355        /// Responses to return (consumed in order)
3356        responses: std::sync::Mutex<Vec<LlmResponse>>,
3357        /// Number of calls made
3358        pub(crate) call_count: AtomicUsize,
3359    }
3360
3361    impl MockLlmClient {
3362        pub(crate) fn new(responses: Vec<LlmResponse>) -> Self {
3363            Self {
3364                responses: std::sync::Mutex::new(responses),
3365                call_count: AtomicUsize::new(0),
3366            }
3367        }
3368
3369        /// Create a response with text only (no tool calls)
3370        pub(crate) fn text_response(text: &str) -> LlmResponse {
3371            LlmResponse {
3372                message: Message {
3373                    role: "assistant".to_string(),
3374                    content: vec![ContentBlock::Text {
3375                        text: text.to_string(),
3376                    }],
3377                    reasoning_content: None,
3378                },
3379                usage: TokenUsage {
3380                    prompt_tokens: 10,
3381                    completion_tokens: 5,
3382                    total_tokens: 15,
3383                    cache_read_tokens: None,
3384                    cache_write_tokens: None,
3385                },
3386                stop_reason: Some("end_turn".to_string()),
3387                meta: None,
3388            }
3389        }
3390
3391        /// Create a response with a tool call
3392        pub(crate) fn tool_call_response(
3393            tool_id: &str,
3394            tool_name: &str,
3395            args: serde_json::Value,
3396        ) -> LlmResponse {
3397            LlmResponse {
3398                message: Message {
3399                    role: "assistant".to_string(),
3400                    content: vec![ContentBlock::ToolUse {
3401                        id: tool_id.to_string(),
3402                        name: tool_name.to_string(),
3403                        input: args,
3404                    }],
3405                    reasoning_content: None,
3406                },
3407                usage: TokenUsage {
3408                    prompt_tokens: 10,
3409                    completion_tokens: 5,
3410                    total_tokens: 15,
3411                    cache_read_tokens: None,
3412                    cache_write_tokens: None,
3413                },
3414                stop_reason: Some("tool_use".to_string()),
3415                meta: None,
3416            }
3417        }
3418    }
3419
3420    #[async_trait::async_trait]
3421    impl LlmClient for MockLlmClient {
3422        async fn complete(
3423            &self,
3424            _messages: &[Message],
3425            _system: Option<&str>,
3426            _tools: &[ToolDefinition],
3427        ) -> Result<LlmResponse> {
3428            self.call_count.fetch_add(1, Ordering::SeqCst);
3429            let mut responses = self.responses.lock().unwrap();
3430            if responses.is_empty() {
3431                anyhow::bail!("No more mock responses available");
3432            }
3433            Ok(responses.remove(0))
3434        }
3435
3436        async fn complete_streaming(
3437            &self,
3438            _messages: &[Message],
3439            _system: Option<&str>,
3440            _tools: &[ToolDefinition],
3441        ) -> Result<mpsc::Receiver<StreamEvent>> {
3442            self.call_count.fetch_add(1, Ordering::SeqCst);
3443            let mut responses = self.responses.lock().unwrap();
3444            if responses.is_empty() {
3445                anyhow::bail!("No more mock responses available");
3446            }
3447            let response = responses.remove(0);
3448
3449            let (tx, rx) = mpsc::channel(10);
3450            tokio::spawn(async move {
3451                // Send text deltas if any
3452                for block in &response.message.content {
3453                    if let ContentBlock::Text { text } = block {
3454                        tx.send(StreamEvent::TextDelta(text.clone())).await.ok();
3455                    }
3456                }
3457                tx.send(StreamEvent::Done(response)).await.ok();
3458            });
3459
3460            Ok(rx)
3461        }
3462    }
3463
3464    // ========================================================================
3465    // Agent Loop Tests
3466    // ========================================================================
3467
3468    #[tokio::test]
3469    async fn test_agent_simple_response() {
3470        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
3471            "Hello, I'm an AI assistant.",
3472        )]));
3473
3474        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
3475        let config = AgentConfig::default();
3476
3477        let agent = AgentLoop::new(
3478            mock_client.clone(),
3479            tool_executor,
3480            test_tool_context(),
3481            config,
3482        );
3483        let result = agent.execute(&[], "Hello", None).await.unwrap();
3484
3485        assert_eq!(result.text, "Hello, I'm an AI assistant.");
3486        assert_eq!(result.tool_calls_count, 0);
3487        assert_eq!(mock_client.call_count.load(Ordering::SeqCst), 1);
3488    }
3489
3490    #[tokio::test]
3491    async fn test_agent_with_tool_call() {
3492        let mock_client = Arc::new(MockLlmClient::new(vec![
3493            // First response: tool call
3494            MockLlmClient::tool_call_response(
3495                "tool-1",
3496                "bash",
3497                serde_json::json!({"command": "echo hello"}),
3498            ),
3499            // Second response: final text
3500            MockLlmClient::text_response("The command output was: hello"),
3501        ]));
3502
3503        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
3504        let config = AgentConfig::default();
3505
3506        let agent = AgentLoop::new(
3507            mock_client.clone(),
3508            tool_executor,
3509            test_tool_context(),
3510            config,
3511        );
3512        let result = agent.execute(&[], "Run echo hello", None).await.unwrap();
3513
3514        assert_eq!(result.text, "The command output was: hello");
3515        assert_eq!(result.tool_calls_count, 1);
3516        assert_eq!(mock_client.call_count.load(Ordering::SeqCst), 2);
3517    }
3518
3519    #[tokio::test]
3520    async fn test_agent_permission_deny() {
3521        let mock_client = Arc::new(MockLlmClient::new(vec![
3522            // First response: tool call that will be denied
3523            MockLlmClient::tool_call_response(
3524                "tool-1",
3525                "bash",
3526                serde_json::json!({"command": "rm -rf /tmp/test"}),
3527            ),
3528            // Second response: LLM responds to the denial
3529            MockLlmClient::text_response(
3530                "I cannot execute that command due to permission restrictions.",
3531            ),
3532        ]));
3533
3534        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
3535
3536        // Create permission policy that denies rm commands
3537        let permission_policy = PermissionPolicy::new().deny("bash(rm:*)");
3538
3539        let config = AgentConfig {
3540            permission_checker: Some(Arc::new(permission_policy)),
3541            ..Default::default()
3542        };
3543
3544        let (tx, mut rx) = mpsc::channel(100);
3545        let agent = AgentLoop::new(
3546            mock_client.clone(),
3547            tool_executor,
3548            test_tool_context(),
3549            config,
3550        );
3551        let result = agent.execute(&[], "Delete files", Some(tx)).await.unwrap();
3552
3553        // Check that we received a PermissionDenied event
3554        let mut found_permission_denied = false;
3555        while let Ok(event) = rx.try_recv() {
3556            if let AgentEvent::PermissionDenied { tool_name, .. } = event {
3557                assert_eq!(tool_name, "bash");
3558                found_permission_denied = true;
3559            }
3560        }
3561        assert!(
3562            found_permission_denied,
3563            "Should have received PermissionDenied event"
3564        );
3565
3566        assert_eq!(result.tool_calls_count, 1);
3567    }
3568
3569    #[tokio::test]
3570    async fn test_agent_permission_allow() {
3571        let mock_client = Arc::new(MockLlmClient::new(vec![
3572            // First response: tool call that will be allowed
3573            MockLlmClient::tool_call_response(
3574                "tool-1",
3575                "bash",
3576                serde_json::json!({"command": "echo hello"}),
3577            ),
3578            // Second response: final text
3579            MockLlmClient::text_response("Done!"),
3580        ]));
3581
3582        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
3583
3584        // Create permission policy that allows echo commands
3585        let permission_policy = PermissionPolicy::new()
3586            .allow("bash(echo:*)")
3587            .deny("bash(rm:*)");
3588
3589        let config = AgentConfig {
3590            permission_checker: Some(Arc::new(permission_policy)),
3591            ..Default::default()
3592        };
3593
3594        let agent = AgentLoop::new(
3595            mock_client.clone(),
3596            tool_executor,
3597            test_tool_context(),
3598            config,
3599        );
3600        let result = agent.execute(&[], "Echo hello", None).await.unwrap();
3601
3602        assert_eq!(result.text, "Done!");
3603        assert_eq!(result.tool_calls_count, 1);
3604    }
3605
3606    #[tokio::test]
3607    async fn test_agent_streaming_events() {
3608        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
3609            "Hello!",
3610        )]));
3611
3612        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
3613        let config = AgentConfig::default();
3614
3615        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
3616        let (mut rx, handle, _cancel_token) = agent.execute_streaming(&[], "Hi").await.unwrap();
3617
3618        // Collect events
3619        let mut events = Vec::new();
3620        while let Some(event) = rx.recv().await {
3621            events.push(event);
3622        }
3623
3624        let result = handle.await.unwrap().unwrap();
3625        assert_eq!(result.text, "Hello!");
3626
3627        // Check we received Start and End events
3628        assert!(events.iter().any(|e| matches!(e, AgentEvent::Start { .. })));
3629        assert!(events.iter().any(|e| matches!(e, AgentEvent::End { .. })));
3630    }
3631
3632    #[tokio::test]
3633    async fn test_agent_max_tool_rounds() {
3634        // Create a mock that always returns tool calls (infinite loop)
3635        let responses: Vec<LlmResponse> = (0..100)
3636            .map(|i| {
3637                MockLlmClient::tool_call_response(
3638                    &format!("tool-{}", i),
3639                    "bash",
3640                    serde_json::json!({"command": "echo loop"}),
3641                )
3642            })
3643            .collect();
3644
3645        let mock_client = Arc::new(MockLlmClient::new(responses));
3646        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
3647
3648        let config = AgentConfig {
3649            max_tool_rounds: 3,
3650            ..Default::default()
3651        };
3652
3653        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
3654        let result = agent.execute(&[], "Loop forever", None).await;
3655
3656        // Should fail due to max tool rounds exceeded
3657        assert!(result.is_err());
3658        assert!(result.unwrap_err().to_string().contains("Max tool rounds"));
3659    }
3660
3661    #[tokio::test]
3662    async fn test_agent_no_permission_policy_defaults_to_ask() {
3663        // When no permission policy is set, tools default to Ask.
3664        // Without a confirmation manager, Ask = safe deny.
3665        let mock_client = Arc::new(MockLlmClient::new(vec![
3666            MockLlmClient::tool_call_response(
3667                "tool-1",
3668                "bash",
3669                serde_json::json!({"command": "rm -rf /tmp/test"}),
3670            ),
3671            MockLlmClient::text_response("Denied!"),
3672        ]));
3673
3674        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
3675        let config = AgentConfig {
3676            permission_checker: None, // No policy → defaults to Ask
3677            // No confirmation_manager → safe deny
3678            ..Default::default()
3679        };
3680
3681        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
3682        let result = agent.execute(&[], "Delete", None).await.unwrap();
3683
3684        // Should be denied (no policy + no CM = safe deny)
3685        assert_eq!(result.text, "Denied!");
3686        assert_eq!(result.tool_calls_count, 1);
3687    }
3688
3689    #[tokio::test]
3690    async fn test_agent_permission_ask_without_cm_denies() {
3691        // When permission is Ask and no confirmation manager configured,
3692        // tool execution should be denied (safe default).
3693        let mock_client = Arc::new(MockLlmClient::new(vec![
3694            MockLlmClient::tool_call_response(
3695                "tool-1",
3696                "bash",
3697                serde_json::json!({"command": "echo test"}),
3698            ),
3699            MockLlmClient::text_response("Denied!"),
3700        ]));
3701
3702        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
3703
3704        // Create policy where bash falls through to Ask (default)
3705        let permission_policy = PermissionPolicy::new(); // Default decision is Ask
3706
3707        let config = AgentConfig {
3708            permission_checker: Some(Arc::new(permission_policy)),
3709            // No confirmation_manager — safe deny
3710            ..Default::default()
3711        };
3712
3713        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
3714        let result = agent.execute(&[], "Echo", None).await.unwrap();
3715
3716        // Should deny (Ask without CM = safe deny)
3717        assert_eq!(result.text, "Denied!");
3718        // The tool result should contain the denial message
3719        assert!(result.tool_calls_count >= 1);
3720    }
3721
3722    // ========================================================================
3723    // HITL (Human-in-the-Loop) Tests
3724    // ========================================================================
3725
3726    #[tokio::test]
3727    async fn test_agent_hitl_approved() {
3728        use crate::hitl::{ConfirmationManager, ConfirmationPolicy};
3729        use tokio::sync::broadcast;
3730
3731        let mock_client = Arc::new(MockLlmClient::new(vec![
3732            MockLlmClient::tool_call_response(
3733                "tool-1",
3734                "bash",
3735                serde_json::json!({"command": "echo hello"}),
3736            ),
3737            MockLlmClient::text_response("Command executed!"),
3738        ]));
3739
3740        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
3741
3742        // Create HITL confirmation manager with policy enabled
3743        let (event_tx, _event_rx) = broadcast::channel(100);
3744        let hitl_policy = ConfirmationPolicy {
3745            enabled: true,
3746            ..Default::default()
3747        };
3748        let confirmation_manager = Arc::new(ConfirmationManager::new(hitl_policy, event_tx));
3749
3750        // Create permission policy that returns Ask for bash
3751        let permission_policy = PermissionPolicy::new(); // Default is Ask
3752
3753        let config = AgentConfig {
3754            permission_checker: Some(Arc::new(permission_policy)),
3755            confirmation_manager: Some(confirmation_manager.clone()),
3756            ..Default::default()
3757        };
3758
3759        // Spawn a task to approve the confirmation
3760        let cm_clone = confirmation_manager.clone();
3761        tokio::spawn(async move {
3762            // Wait a bit for the confirmation request to be created
3763            tokio::time::sleep(std::time::Duration::from_millis(50)).await;
3764            // Approve it
3765            cm_clone.confirm("tool-1", true, None).await.ok();
3766        });
3767
3768        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
3769        let result = agent.execute(&[], "Run echo", None).await.unwrap();
3770
3771        assert_eq!(result.text, "Command executed!");
3772        assert_eq!(result.tool_calls_count, 1);
3773    }
3774
3775    #[tokio::test]
3776    async fn test_agent_hitl_rejected() {
3777        use crate::hitl::{ConfirmationManager, ConfirmationPolicy};
3778        use tokio::sync::broadcast;
3779
3780        let mock_client = Arc::new(MockLlmClient::new(vec![
3781            MockLlmClient::tool_call_response(
3782                "tool-1",
3783                "bash",
3784                serde_json::json!({"command": "rm -rf /"}),
3785            ),
3786            MockLlmClient::text_response("Understood, I won't do that."),
3787        ]));
3788
3789        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
3790
3791        // Create HITL confirmation manager
3792        let (event_tx, _event_rx) = broadcast::channel(100);
3793        let hitl_policy = ConfirmationPolicy {
3794            enabled: true,
3795            ..Default::default()
3796        };
3797        let confirmation_manager = Arc::new(ConfirmationManager::new(hitl_policy, event_tx));
3798
3799        // Permission policy returns Ask
3800        let permission_policy = PermissionPolicy::new();
3801
3802        let config = AgentConfig {
3803            permission_checker: Some(Arc::new(permission_policy)),
3804            confirmation_manager: Some(confirmation_manager.clone()),
3805            ..Default::default()
3806        };
3807
3808        // Spawn a task to reject the confirmation
3809        let cm_clone = confirmation_manager.clone();
3810        tokio::spawn(async move {
3811            tokio::time::sleep(std::time::Duration::from_millis(50)).await;
3812            cm_clone
3813                .confirm("tool-1", false, Some("Too dangerous".to_string()))
3814                .await
3815                .ok();
3816        });
3817
3818        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
3819        let result = agent.execute(&[], "Delete everything", None).await.unwrap();
3820
3821        // LLM should respond to the rejection
3822        assert_eq!(result.text, "Understood, I won't do that.");
3823    }
3824
3825    #[tokio::test]
3826    async fn test_agent_hitl_timeout_reject() {
3827        use crate::hitl::{ConfirmationManager, ConfirmationPolicy, TimeoutAction};
3828        use tokio::sync::broadcast;
3829
3830        let mock_client = Arc::new(MockLlmClient::new(vec![
3831            MockLlmClient::tool_call_response(
3832                "tool-1",
3833                "bash",
3834                serde_json::json!({"command": "echo test"}),
3835            ),
3836            MockLlmClient::text_response("Timed out, I understand."),
3837        ]));
3838
3839        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
3840
3841        // Create HITL with very short timeout and Reject action
3842        let (event_tx, _event_rx) = broadcast::channel(100);
3843        let hitl_policy = ConfirmationPolicy {
3844            enabled: true,
3845            default_timeout_ms: 50, // Very short timeout
3846            timeout_action: TimeoutAction::Reject,
3847            ..Default::default()
3848        };
3849        let confirmation_manager = Arc::new(ConfirmationManager::new(hitl_policy, event_tx));
3850
3851        let permission_policy = PermissionPolicy::new();
3852
3853        let config = AgentConfig {
3854            permission_checker: Some(Arc::new(permission_policy)),
3855            confirmation_manager: Some(confirmation_manager),
3856            ..Default::default()
3857        };
3858
3859        // Don't approve - let it timeout
3860        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
3861        let result = agent.execute(&[], "Echo", None).await.unwrap();
3862
3863        // Should get timeout rejection response from LLM
3864        assert_eq!(result.text, "Timed out, I understand.");
3865    }
3866
3867    #[tokio::test]
3868    async fn test_agent_hitl_timeout_auto_approve() {
3869        use crate::hitl::{ConfirmationManager, ConfirmationPolicy, TimeoutAction};
3870        use tokio::sync::broadcast;
3871
3872        let mock_client = Arc::new(MockLlmClient::new(vec![
3873            MockLlmClient::tool_call_response(
3874                "tool-1",
3875                "bash",
3876                serde_json::json!({"command": "echo hello"}),
3877            ),
3878            MockLlmClient::text_response("Auto-approved and executed!"),
3879        ]));
3880
3881        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
3882
3883        // Create HITL with very short timeout and AutoApprove action
3884        let (event_tx, _event_rx) = broadcast::channel(100);
3885        let hitl_policy = ConfirmationPolicy {
3886            enabled: true,
3887            default_timeout_ms: 50, // Very short timeout
3888            timeout_action: TimeoutAction::AutoApprove,
3889            ..Default::default()
3890        };
3891        let confirmation_manager = Arc::new(ConfirmationManager::new(hitl_policy, event_tx));
3892
3893        let permission_policy = PermissionPolicy::new();
3894
3895        let config = AgentConfig {
3896            permission_checker: Some(Arc::new(permission_policy)),
3897            confirmation_manager: Some(confirmation_manager),
3898            ..Default::default()
3899        };
3900
3901        // Don't approve - let it timeout and auto-approve
3902        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
3903        let result = agent.execute(&[], "Echo", None).await.unwrap();
3904
3905        // Should auto-approve on timeout and execute
3906        assert_eq!(result.text, "Auto-approved and executed!");
3907        assert_eq!(result.tool_calls_count, 1);
3908    }
3909
3910    #[tokio::test]
3911    async fn test_agent_hitl_confirmation_events() {
3912        use crate::hitl::{ConfirmationManager, ConfirmationPolicy};
3913        use tokio::sync::broadcast;
3914
3915        let mock_client = Arc::new(MockLlmClient::new(vec![
3916            MockLlmClient::tool_call_response(
3917                "tool-1",
3918                "bash",
3919                serde_json::json!({"command": "echo test"}),
3920            ),
3921            MockLlmClient::text_response("Done!"),
3922        ]));
3923
3924        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
3925
3926        // Create HITL confirmation manager
3927        let (event_tx, mut event_rx) = broadcast::channel(100);
3928        let hitl_policy = ConfirmationPolicy {
3929            enabled: true,
3930            default_timeout_ms: 5000, // Long enough timeout
3931            ..Default::default()
3932        };
3933        let confirmation_manager = Arc::new(ConfirmationManager::new(hitl_policy, event_tx));
3934
3935        let permission_policy = PermissionPolicy::new();
3936
3937        let config = AgentConfig {
3938            permission_checker: Some(Arc::new(permission_policy)),
3939            confirmation_manager: Some(confirmation_manager.clone()),
3940            ..Default::default()
3941        };
3942
3943        // Spawn task to approve and collect events
3944        let cm_clone = confirmation_manager.clone();
3945        let event_handle = tokio::spawn(async move {
3946            let mut events = Vec::new();
3947            // Wait for ConfirmationRequired event
3948            while let Ok(event) = event_rx.recv().await {
3949                events.push(event.clone());
3950                if let AgentEvent::ConfirmationRequired { tool_id, .. } = event {
3951                    // Approve it
3952                    cm_clone.confirm(&tool_id, true, None).await.ok();
3953                    // Wait for ConfirmationReceived
3954                    if let Ok(recv_event) = event_rx.recv().await {
3955                        events.push(recv_event);
3956                    }
3957                    break;
3958                }
3959            }
3960            events
3961        });
3962
3963        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
3964        let _result = agent.execute(&[], "Echo", None).await.unwrap();
3965
3966        // Check events
3967        let events = event_handle.await.unwrap();
3968        assert!(
3969            events
3970                .iter()
3971                .any(|e| matches!(e, AgentEvent::ConfirmationRequired { .. })),
3972            "Should have ConfirmationRequired event"
3973        );
3974        assert!(
3975            events
3976                .iter()
3977                .any(|e| matches!(e, AgentEvent::ConfirmationReceived { approved: true, .. })),
3978            "Should have ConfirmationReceived event with approved=true"
3979        );
3980    }
3981
3982    #[tokio::test]
3983    async fn test_agent_hitl_disabled_auto_executes() {
3984        // When HITL is disabled, tools should execute automatically even with Ask permission
3985        use crate::hitl::{ConfirmationManager, ConfirmationPolicy};
3986        use tokio::sync::broadcast;
3987
3988        let mock_client = Arc::new(MockLlmClient::new(vec![
3989            MockLlmClient::tool_call_response(
3990                "tool-1",
3991                "bash",
3992                serde_json::json!({"command": "echo auto"}),
3993            ),
3994            MockLlmClient::text_response("Auto executed!"),
3995        ]));
3996
3997        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
3998
3999        // Create HITL with enabled=false
4000        let (event_tx, _event_rx) = broadcast::channel(100);
4001        let hitl_policy = ConfirmationPolicy {
4002            enabled: false, // HITL disabled
4003            ..Default::default()
4004        };
4005        let confirmation_manager = Arc::new(ConfirmationManager::new(hitl_policy, event_tx));
4006
4007        let permission_policy = PermissionPolicy::new(); // Default is Ask
4008
4009        let config = AgentConfig {
4010            permission_checker: Some(Arc::new(permission_policy)),
4011            confirmation_manager: Some(confirmation_manager),
4012            ..Default::default()
4013        };
4014
4015        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
4016        let result = agent.execute(&[], "Echo", None).await.unwrap();
4017
4018        // Should execute without waiting for confirmation
4019        assert_eq!(result.text, "Auto executed!");
4020        assert_eq!(result.tool_calls_count, 1);
4021    }
4022
4023    #[tokio::test]
4024    async fn test_agent_hitl_with_permission_deny_skips_hitl() {
4025        // When permission is Deny, HITL should not be triggered
4026        use crate::hitl::{ConfirmationManager, ConfirmationPolicy};
4027        use tokio::sync::broadcast;
4028
4029        let mock_client = Arc::new(MockLlmClient::new(vec![
4030            MockLlmClient::tool_call_response(
4031                "tool-1",
4032                "bash",
4033                serde_json::json!({"command": "rm -rf /"}),
4034            ),
4035            MockLlmClient::text_response("Blocked by permission."),
4036        ]));
4037
4038        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4039
4040        // Create HITL enabled
4041        let (event_tx, mut event_rx) = broadcast::channel(100);
4042        let hitl_policy = ConfirmationPolicy {
4043            enabled: true,
4044            ..Default::default()
4045        };
4046        let confirmation_manager = Arc::new(ConfirmationManager::new(hitl_policy, event_tx));
4047
4048        // Permission policy denies rm commands
4049        let permission_policy = PermissionPolicy::new().deny("bash(rm:*)");
4050
4051        let config = AgentConfig {
4052            permission_checker: Some(Arc::new(permission_policy)),
4053            confirmation_manager: Some(confirmation_manager),
4054            ..Default::default()
4055        };
4056
4057        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
4058        let result = agent.execute(&[], "Delete", None).await.unwrap();
4059
4060        // Should be denied without HITL
4061        assert_eq!(result.text, "Blocked by permission.");
4062
4063        // Should NOT have any ConfirmationRequired events
4064        let mut found_confirmation = false;
4065        while let Ok(event) = event_rx.try_recv() {
4066            if matches!(event, AgentEvent::ConfirmationRequired { .. }) {
4067                found_confirmation = true;
4068            }
4069        }
4070        assert!(
4071            !found_confirmation,
4072            "HITL should not be triggered when permission is Deny"
4073        );
4074    }
4075
4076    #[tokio::test]
4077    async fn test_agent_hitl_with_permission_allow_skips_hitl() {
4078        // When permission is Allow, HITL confirmation is skipped entirely.
4079        // PermissionPolicy is the declarative rule engine; Allow = execute directly.
4080        use crate::hitl::{ConfirmationManager, ConfirmationPolicy};
4081        use tokio::sync::broadcast;
4082
4083        let mock_client = Arc::new(MockLlmClient::new(vec![
4084            MockLlmClient::tool_call_response(
4085                "tool-1",
4086                "bash",
4087                serde_json::json!({"command": "echo hello"}),
4088            ),
4089            MockLlmClient::text_response("Allowed!"),
4090        ]));
4091
4092        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4093
4094        // Create HITL enabled
4095        let (event_tx, mut event_rx) = broadcast::channel(100);
4096        let hitl_policy = ConfirmationPolicy {
4097            enabled: true,
4098            ..Default::default()
4099        };
4100        let confirmation_manager = Arc::new(ConfirmationManager::new(hitl_policy, event_tx));
4101
4102        // Permission policy allows echo commands
4103        let permission_policy = PermissionPolicy::new().allow("bash(echo:*)");
4104
4105        let config = AgentConfig {
4106            permission_checker: Some(Arc::new(permission_policy)),
4107            confirmation_manager: Some(confirmation_manager.clone()),
4108            ..Default::default()
4109        };
4110
4111        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
4112        let result = agent.execute(&[], "Echo", None).await.unwrap();
4113
4114        // Should execute directly without HITL (permission Allow skips confirmation)
4115        assert_eq!(result.text, "Allowed!");
4116
4117        // Should NOT have ConfirmationRequired event (Allow bypasses HITL)
4118        let mut found_confirmation = false;
4119        while let Ok(event) = event_rx.try_recv() {
4120            if matches!(event, AgentEvent::ConfirmationRequired { .. }) {
4121                found_confirmation = true;
4122            }
4123        }
4124        assert!(
4125            !found_confirmation,
4126            "Permission Allow should skip HITL confirmation"
4127        );
4128    }
4129
4130    #[tokio::test]
4131    async fn test_agent_hitl_multiple_tool_calls() {
4132        // Test multiple tool calls in sequence with HITL
4133        use crate::hitl::{ConfirmationManager, ConfirmationPolicy};
4134        use tokio::sync::broadcast;
4135
4136        let mock_client = Arc::new(MockLlmClient::new(vec![
4137            // First response: two tool calls
4138            LlmResponse {
4139                message: Message {
4140                    role: "assistant".to_string(),
4141                    content: vec![
4142                        ContentBlock::ToolUse {
4143                            id: "tool-1".to_string(),
4144                            name: "bash".to_string(),
4145                            input: serde_json::json!({"command": "echo first"}),
4146                        },
4147                        ContentBlock::ToolUse {
4148                            id: "tool-2".to_string(),
4149                            name: "bash".to_string(),
4150                            input: serde_json::json!({"command": "echo second"}),
4151                        },
4152                    ],
4153                    reasoning_content: None,
4154                },
4155                usage: TokenUsage {
4156                    prompt_tokens: 10,
4157                    completion_tokens: 5,
4158                    total_tokens: 15,
4159                    cache_read_tokens: None,
4160                    cache_write_tokens: None,
4161                },
4162                stop_reason: Some("tool_use".to_string()),
4163                meta: None,
4164            },
4165            MockLlmClient::text_response("Both executed!"),
4166        ]));
4167
4168        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4169
4170        // Create HITL
4171        let (event_tx, _event_rx) = broadcast::channel(100);
4172        let hitl_policy = ConfirmationPolicy {
4173            enabled: true,
4174            default_timeout_ms: 5000,
4175            ..Default::default()
4176        };
4177        let confirmation_manager = Arc::new(ConfirmationManager::new(hitl_policy, event_tx));
4178
4179        let permission_policy = PermissionPolicy::new(); // Default Ask
4180
4181        let config = AgentConfig {
4182            permission_checker: Some(Arc::new(permission_policy)),
4183            confirmation_manager: Some(confirmation_manager.clone()),
4184            ..Default::default()
4185        };
4186
4187        // Spawn task to approve both tools
4188        let cm_clone = confirmation_manager.clone();
4189        tokio::spawn(async move {
4190            tokio::time::sleep(std::time::Duration::from_millis(30)).await;
4191            cm_clone.confirm("tool-1", true, None).await.ok();
4192            tokio::time::sleep(std::time::Duration::from_millis(30)).await;
4193            cm_clone.confirm("tool-2", true, None).await.ok();
4194        });
4195
4196        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
4197        let result = agent.execute(&[], "Run both", None).await.unwrap();
4198
4199        assert_eq!(result.text, "Both executed!");
4200        assert_eq!(result.tool_calls_count, 2);
4201    }
4202
4203    #[tokio::test]
4204    async fn test_agent_hitl_partial_approval() {
4205        // Test: first tool approved, second rejected
4206        use crate::hitl::{ConfirmationManager, ConfirmationPolicy};
4207        use tokio::sync::broadcast;
4208
4209        let mock_client = Arc::new(MockLlmClient::new(vec![
4210            // First response: two tool calls
4211            LlmResponse {
4212                message: Message {
4213                    role: "assistant".to_string(),
4214                    content: vec![
4215                        ContentBlock::ToolUse {
4216                            id: "tool-1".to_string(),
4217                            name: "bash".to_string(),
4218                            input: serde_json::json!({"command": "echo safe"}),
4219                        },
4220                        ContentBlock::ToolUse {
4221                            id: "tool-2".to_string(),
4222                            name: "bash".to_string(),
4223                            input: serde_json::json!({"command": "rm -rf /"}),
4224                        },
4225                    ],
4226                    reasoning_content: None,
4227                },
4228                usage: TokenUsage {
4229                    prompt_tokens: 10,
4230                    completion_tokens: 5,
4231                    total_tokens: 15,
4232                    cache_read_tokens: None,
4233                    cache_write_tokens: None,
4234                },
4235                stop_reason: Some("tool_use".to_string()),
4236                meta: None,
4237            },
4238            MockLlmClient::text_response("First worked, second rejected."),
4239        ]));
4240
4241        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4242
4243        let (event_tx, _event_rx) = broadcast::channel(100);
4244        let hitl_policy = ConfirmationPolicy {
4245            enabled: true,
4246            default_timeout_ms: 5000,
4247            ..Default::default()
4248        };
4249        let confirmation_manager = Arc::new(ConfirmationManager::new(hitl_policy, event_tx));
4250
4251        let permission_policy = PermissionPolicy::new();
4252
4253        let config = AgentConfig {
4254            permission_checker: Some(Arc::new(permission_policy)),
4255            confirmation_manager: Some(confirmation_manager.clone()),
4256            ..Default::default()
4257        };
4258
4259        // Approve first, reject second
4260        let cm_clone = confirmation_manager.clone();
4261        tokio::spawn(async move {
4262            tokio::time::sleep(std::time::Duration::from_millis(30)).await;
4263            cm_clone.confirm("tool-1", true, None).await.ok();
4264            tokio::time::sleep(std::time::Duration::from_millis(30)).await;
4265            cm_clone
4266                .confirm("tool-2", false, Some("Dangerous".to_string()))
4267                .await
4268                .ok();
4269        });
4270
4271        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
4272        let result = agent.execute(&[], "Run both", None).await.unwrap();
4273
4274        assert_eq!(result.text, "First worked, second rejected.");
4275        assert_eq!(result.tool_calls_count, 2);
4276    }
4277
4278    #[tokio::test]
4279    async fn test_agent_hitl_yolo_mode_auto_approves() {
4280        // YOLO mode: specific lanes auto-approve without confirmation
4281        use crate::hitl::{ConfirmationManager, ConfirmationPolicy, SessionLane};
4282        use tokio::sync::broadcast;
4283
4284        let mock_client = Arc::new(MockLlmClient::new(vec![
4285            MockLlmClient::tool_call_response(
4286                "tool-1",
4287                "read", // Query lane tool
4288                serde_json::json!({"path": "/tmp/test.txt"}),
4289            ),
4290            MockLlmClient::text_response("File read!"),
4291        ]));
4292
4293        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4294
4295        // YOLO mode for Query lane (read, glob, ls, grep)
4296        let (event_tx, mut event_rx) = broadcast::channel(100);
4297        let mut yolo_lanes = std::collections::HashSet::new();
4298        yolo_lanes.insert(SessionLane::Query);
4299        let hitl_policy = ConfirmationPolicy {
4300            enabled: true,
4301            yolo_lanes, // Auto-approve query operations
4302            ..Default::default()
4303        };
4304        let confirmation_manager = Arc::new(ConfirmationManager::new(hitl_policy, event_tx));
4305
4306        let permission_policy = PermissionPolicy::new();
4307
4308        let config = AgentConfig {
4309            permission_checker: Some(Arc::new(permission_policy)),
4310            confirmation_manager: Some(confirmation_manager),
4311            ..Default::default()
4312        };
4313
4314        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
4315        let result = agent.execute(&[], "Read file", None).await.unwrap();
4316
4317        // Should auto-execute without confirmation (YOLO mode)
4318        assert_eq!(result.text, "File read!");
4319
4320        // Should NOT have ConfirmationRequired for yolo lane
4321        let mut found_confirmation = false;
4322        while let Ok(event) = event_rx.try_recv() {
4323            if matches!(event, AgentEvent::ConfirmationRequired { .. }) {
4324                found_confirmation = true;
4325            }
4326        }
4327        assert!(
4328            !found_confirmation,
4329            "YOLO mode should not trigger confirmation"
4330        );
4331    }
4332
4333    #[tokio::test]
4334    async fn test_agent_config_with_all_options() {
4335        use crate::hitl::{ConfirmationManager, ConfirmationPolicy};
4336        use tokio::sync::broadcast;
4337
4338        let (event_tx, _) = broadcast::channel(100);
4339        let hitl_policy = ConfirmationPolicy::default();
4340        let confirmation_manager = Arc::new(ConfirmationManager::new(hitl_policy, event_tx));
4341
4342        let permission_policy = PermissionPolicy::new().allow("bash(*)");
4343
4344        let config = AgentConfig {
4345            prompt_slots: SystemPromptSlots {
4346                extra: Some("Test system prompt".to_string()),
4347                ..Default::default()
4348            },
4349            tools: vec![],
4350            max_tool_rounds: 10,
4351            permission_checker: Some(Arc::new(permission_policy)),
4352            confirmation_manager: Some(confirmation_manager),
4353            context_providers: vec![],
4354            planning_mode: PlanningMode::default(),
4355            goal_tracking: false,
4356            hook_engine: None,
4357            skill_registry: None,
4358            ..AgentConfig::default()
4359        };
4360
4361        assert!(config.prompt_slots.build().contains("Test system prompt"));
4362        assert_eq!(config.max_tool_rounds, 10);
4363        assert!(config.permission_checker.is_some());
4364        assert!(config.confirmation_manager.is_some());
4365        assert!(config.context_providers.is_empty());
4366
4367        // Test Debug trait
4368        let debug_str = format!("{:?}", config);
4369        assert!(debug_str.contains("AgentConfig"));
4370        assert!(debug_str.contains("permission_checker: true"));
4371        assert!(debug_str.contains("confirmation_manager: true"));
4372        assert!(debug_str.contains("context_providers: 0"));
4373    }
4374
4375    // ========================================================================
4376    // Context Provider Tests
4377    // ========================================================================
4378
4379    use crate::context::{ContextItem, ContextType};
4380
4381    /// Mock context provider for testing
4382    struct MockContextProvider {
4383        name: String,
4384        items: Vec<ContextItem>,
4385        on_turn_calls: std::sync::Arc<tokio::sync::RwLock<Vec<(String, String, String)>>>,
4386    }
4387
4388    impl MockContextProvider {
4389        fn new(name: &str) -> Self {
4390            Self {
4391                name: name.to_string(),
4392                items: Vec::new(),
4393                on_turn_calls: std::sync::Arc::new(tokio::sync::RwLock::new(Vec::new())),
4394            }
4395        }
4396
4397        fn with_items(mut self, items: Vec<ContextItem>) -> Self {
4398            self.items = items;
4399            self
4400        }
4401    }
4402
4403    #[async_trait::async_trait]
4404    impl ContextProvider for MockContextProvider {
4405        fn name(&self) -> &str {
4406            &self.name
4407        }
4408
4409        async fn query(&self, _query: &ContextQuery) -> anyhow::Result<ContextResult> {
4410            let mut result = ContextResult::new(&self.name);
4411            for item in &self.items {
4412                result.add_item(item.clone());
4413            }
4414            Ok(result)
4415        }
4416
4417        async fn on_turn_complete(
4418            &self,
4419            session_id: &str,
4420            prompt: &str,
4421            response: &str,
4422        ) -> anyhow::Result<()> {
4423            let mut calls = self.on_turn_calls.write().await;
4424            calls.push((
4425                session_id.to_string(),
4426                prompt.to_string(),
4427                response.to_string(),
4428            ));
4429            Ok(())
4430        }
4431    }
4432
4433    #[tokio::test]
4434    async fn test_agent_with_context_provider() {
4435        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
4436            "Response using context",
4437        )]));
4438
4439        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4440
4441        let provider =
4442            MockContextProvider::new("test-provider").with_items(vec![ContextItem::new(
4443                "ctx-1",
4444                ContextType::Resource,
4445                "Relevant context here",
4446            )
4447            .with_source("test://docs/example")]);
4448
4449        let config = AgentConfig {
4450            prompt_slots: SystemPromptSlots {
4451                extra: Some("You are helpful.".to_string()),
4452                ..Default::default()
4453            },
4454            context_providers: vec![Arc::new(provider)],
4455            ..Default::default()
4456        };
4457
4458        let agent = AgentLoop::new(
4459            mock_client.clone(),
4460            tool_executor,
4461            test_tool_context(),
4462            config,
4463        );
4464        let result = agent.execute(&[], "What is X?", None).await.unwrap();
4465
4466        assert_eq!(result.text, "Response using context");
4467        assert_eq!(mock_client.call_count.load(Ordering::SeqCst), 1);
4468    }
4469
4470    #[tokio::test]
4471    async fn test_agent_context_provider_events() {
4472        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
4473            "Answer",
4474        )]));
4475
4476        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4477
4478        let provider =
4479            MockContextProvider::new("event-provider").with_items(vec![ContextItem::new(
4480                "item-1",
4481                ContextType::Memory,
4482                "Memory content",
4483            )
4484            .with_token_count(50)]);
4485
4486        let config = AgentConfig {
4487            context_providers: vec![Arc::new(provider)],
4488            ..Default::default()
4489        };
4490
4491        let (tx, mut rx) = mpsc::channel(100);
4492        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
4493        let _result = agent.execute(&[], "Test prompt", Some(tx)).await.unwrap();
4494
4495        // Collect events
4496        let mut events = Vec::new();
4497        while let Ok(event) = rx.try_recv() {
4498            events.push(event);
4499        }
4500
4501        // Should have ContextResolving and ContextResolved events
4502        assert!(
4503            events
4504                .iter()
4505                .any(|e| matches!(e, AgentEvent::ContextResolving { .. })),
4506            "Should have ContextResolving event"
4507        );
4508        assert!(
4509            events
4510                .iter()
4511                .any(|e| matches!(e, AgentEvent::ContextResolved { .. })),
4512            "Should have ContextResolved event"
4513        );
4514
4515        // Check context resolved values
4516        for event in &events {
4517            if let AgentEvent::ContextResolved {
4518                total_items,
4519                total_tokens,
4520            } = event
4521            {
4522                assert_eq!(*total_items, 1);
4523                assert_eq!(*total_tokens, 50);
4524            }
4525        }
4526    }
4527
4528    #[tokio::test]
4529    async fn test_agent_multiple_context_providers() {
4530        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
4531            "Combined response",
4532        )]));
4533
4534        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4535
4536        let provider1 = MockContextProvider::new("provider-1").with_items(vec![ContextItem::new(
4537            "p1-1",
4538            ContextType::Resource,
4539            "Resource from P1",
4540        )
4541        .with_token_count(100)]);
4542
4543        let provider2 = MockContextProvider::new("provider-2").with_items(vec![
4544            ContextItem::new("p2-1", ContextType::Memory, "Memory from P2").with_token_count(50),
4545            ContextItem::new("p2-2", ContextType::Skill, "Skill from P2").with_token_count(75),
4546        ]);
4547
4548        let config = AgentConfig {
4549            prompt_slots: SystemPromptSlots {
4550                extra: Some("Base system prompt.".to_string()),
4551                ..Default::default()
4552            },
4553            context_providers: vec![Arc::new(provider1), Arc::new(provider2)],
4554            ..Default::default()
4555        };
4556
4557        let (tx, mut rx) = mpsc::channel(100);
4558        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
4559        let result = agent.execute(&[], "Query", Some(tx)).await.unwrap();
4560
4561        assert_eq!(result.text, "Combined response");
4562
4563        // Check context resolved event has combined totals
4564        while let Ok(event) = rx.try_recv() {
4565            if let AgentEvent::ContextResolved {
4566                total_items,
4567                total_tokens,
4568            } = event
4569            {
4570                assert_eq!(total_items, 3); // 1 + 2
4571                assert_eq!(total_tokens, 225); // 100 + 50 + 75
4572            }
4573        }
4574    }
4575
4576    #[tokio::test]
4577    async fn test_agent_no_context_providers() {
4578        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
4579            "No context",
4580        )]));
4581
4582        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4583
4584        // No context providers
4585        let config = AgentConfig::default();
4586
4587        let (tx, mut rx) = mpsc::channel(100);
4588        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
4589        let result = agent.execute(&[], "Simple prompt", Some(tx)).await.unwrap();
4590
4591        assert_eq!(result.text, "No context");
4592
4593        // Should NOT have context events when no providers
4594        let mut events = Vec::new();
4595        while let Ok(event) = rx.try_recv() {
4596            events.push(event);
4597        }
4598
4599        assert!(
4600            !events
4601                .iter()
4602                .any(|e| matches!(e, AgentEvent::ContextResolving { .. })),
4603            "Should NOT have ContextResolving event"
4604        );
4605    }
4606
4607    #[tokio::test]
4608    async fn test_agent_context_on_turn_complete() {
4609        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
4610            "Final response",
4611        )]));
4612
4613        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4614
4615        let provider = Arc::new(MockContextProvider::new("memory-provider"));
4616        let on_turn_calls = provider.on_turn_calls.clone();
4617
4618        let config = AgentConfig {
4619            context_providers: vec![provider],
4620            ..Default::default()
4621        };
4622
4623        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
4624
4625        // Execute with session ID
4626        let result = agent
4627            .execute_with_session(&[], "User prompt", Some("sess-123"), None, None)
4628            .await
4629            .unwrap();
4630
4631        assert_eq!(result.text, "Final response");
4632
4633        // Check on_turn_complete was called
4634        let calls = on_turn_calls.read().await;
4635        assert_eq!(calls.len(), 1);
4636        assert_eq!(calls[0].0, "sess-123");
4637        assert_eq!(calls[0].1, "User prompt");
4638        assert_eq!(calls[0].2, "Final response");
4639    }
4640
4641    #[tokio::test]
4642    async fn test_agent_context_on_turn_complete_no_session() {
4643        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
4644            "Response",
4645        )]));
4646
4647        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4648
4649        let provider = Arc::new(MockContextProvider::new("memory-provider"));
4650        let on_turn_calls = provider.on_turn_calls.clone();
4651
4652        let config = AgentConfig {
4653            context_providers: vec![provider],
4654            ..Default::default()
4655        };
4656
4657        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
4658
4659        // Execute without session ID (uses execute() which passes None)
4660        let _result = agent.execute(&[], "Prompt", None).await.unwrap();
4661
4662        // on_turn_complete should NOT be called when session_id is None
4663        let calls = on_turn_calls.read().await;
4664        assert!(calls.is_empty());
4665    }
4666
4667    #[tokio::test]
4668    async fn test_agent_build_augmented_system_prompt() {
4669        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response("OK")]));
4670
4671        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4672
4673        let provider = MockContextProvider::new("test").with_items(vec![ContextItem::new(
4674            "doc-1",
4675            ContextType::Resource,
4676            "Auth uses JWT tokens.",
4677        )
4678        .with_source("viking://docs/auth")]);
4679
4680        let config = AgentConfig {
4681            prompt_slots: SystemPromptSlots {
4682                extra: Some("You are helpful.".to_string()),
4683                ..Default::default()
4684            },
4685            context_providers: vec![Arc::new(provider)],
4686            ..Default::default()
4687        };
4688
4689        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
4690
4691        // Test building augmented prompt
4692        let context_results = agent.resolve_context("test", None).await;
4693        let augmented = agent.build_augmented_system_prompt(&context_results);
4694
4695        let augmented_str = augmented.unwrap();
4696        assert!(augmented_str.contains("You are helpful."));
4697        assert!(augmented_str.contains("<context source=\"viking://docs/auth\" type=\"Resource\">"));
4698        assert!(augmented_str.contains("Auth uses JWT tokens."));
4699    }
4700
4701    // ========================================================================
4702    // Agentic Loop Integration Tests
4703    // ========================================================================
4704
4705    /// Helper: collect all events from a channel
4706    async fn collect_events(mut rx: mpsc::Receiver<AgentEvent>) -> Vec<AgentEvent> {
4707        let mut events = Vec::new();
4708        while let Ok(event) = rx.try_recv() {
4709            events.push(event);
4710        }
4711        // Drain remaining
4712        while let Some(event) = rx.recv().await {
4713            events.push(event);
4714        }
4715        events
4716    }
4717
4718    #[tokio::test]
4719    async fn test_agent_multi_turn_tool_chain() {
4720        // LLM calls tool A → sees result → calls tool B → sees result → final answer
4721        let mock_client = Arc::new(MockLlmClient::new(vec![
4722            // Turn 1: call ls
4723            MockLlmClient::tool_call_response(
4724                "t1",
4725                "bash",
4726                serde_json::json!({"command": "echo step1"}),
4727            ),
4728            // Turn 2: call another tool based on first result
4729            MockLlmClient::tool_call_response(
4730                "t2",
4731                "bash",
4732                serde_json::json!({"command": "echo step2"}),
4733            ),
4734            // Turn 3: final answer
4735            MockLlmClient::text_response("Completed both steps: step1 then step2"),
4736        ]));
4737
4738        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4739        let config = AgentConfig::default();
4740
4741        let agent = AgentLoop::new(
4742            mock_client.clone(),
4743            tool_executor,
4744            test_tool_context(),
4745            config,
4746        );
4747        let result = agent.execute(&[], "Run two steps", None).await.unwrap();
4748
4749        assert_eq!(result.text, "Completed both steps: step1 then step2");
4750        assert_eq!(result.tool_calls_count, 2);
4751        assert_eq!(mock_client.call_count.load(Ordering::SeqCst), 3);
4752
4753        // Verify message history: user → assistant(tool_use) → user(tool_result) → assistant(tool_use) → user(tool_result) → assistant(text)
4754        assert_eq!(result.messages[0].role, "user");
4755        assert_eq!(result.messages[1].role, "assistant"); // tool call 1
4756        assert_eq!(result.messages[2].role, "user"); // tool result 1 (Anthropic convention)
4757        assert_eq!(result.messages[3].role, "assistant"); // tool call 2
4758        assert_eq!(result.messages[4].role, "user"); // tool result 2
4759        assert_eq!(result.messages[5].role, "assistant"); // final text
4760        assert_eq!(result.messages.len(), 6);
4761    }
4762
4763    #[tokio::test]
4764    async fn test_agent_conversation_history_preserved() {
4765        // Pass existing history, verify it's preserved in output
4766        let existing_history = vec![
4767            Message::user("What is Rust?"),
4768            Message {
4769                role: "assistant".to_string(),
4770                content: vec![ContentBlock::Text {
4771                    text: "Rust is a systems programming language.".to_string(),
4772                }],
4773                reasoning_content: None,
4774            },
4775        ];
4776
4777        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
4778            "Rust was created by Graydon Hoare at Mozilla.",
4779        )]));
4780
4781        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4782        let agent = AgentLoop::new(
4783            mock_client.clone(),
4784            tool_executor,
4785            test_tool_context(),
4786            AgentConfig::default(),
4787        );
4788
4789        let result = agent
4790            .execute(&existing_history, "Who created it?", None)
4791            .await
4792            .unwrap();
4793
4794        // History should contain: old user + old assistant + new user + new assistant
4795        assert_eq!(result.messages.len(), 4);
4796        assert_eq!(result.messages[0].text(), "What is Rust?");
4797        assert_eq!(
4798            result.messages[1].text(),
4799            "Rust is a systems programming language."
4800        );
4801        assert_eq!(result.messages[2].text(), "Who created it?");
4802        assert_eq!(
4803            result.messages[3].text(),
4804            "Rust was created by Graydon Hoare at Mozilla."
4805        );
4806    }
4807
4808    #[tokio::test]
4809    async fn test_agent_event_stream_completeness() {
4810        // Verify full event sequence for a single tool call loop
4811        let mock_client = Arc::new(MockLlmClient::new(vec![
4812            MockLlmClient::tool_call_response(
4813                "t1",
4814                "bash",
4815                serde_json::json!({"command": "echo hi"}),
4816            ),
4817            MockLlmClient::text_response("Done"),
4818        ]));
4819
4820        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4821        let agent = AgentLoop::new(
4822            mock_client,
4823            tool_executor,
4824            test_tool_context(),
4825            AgentConfig::default(),
4826        );
4827
4828        let (tx, rx) = mpsc::channel(100);
4829        let result = agent.execute(&[], "Say hi", Some(tx)).await.unwrap();
4830        assert_eq!(result.text, "Done");
4831
4832        let events = collect_events(rx).await;
4833
4834        // Verify event sequence
4835        let event_types: Vec<&str> = events
4836            .iter()
4837            .map(|e| match e {
4838                AgentEvent::Start { .. } => "Start",
4839                AgentEvent::TurnStart { .. } => "TurnStart",
4840                AgentEvent::TurnEnd { .. } => "TurnEnd",
4841                AgentEvent::ToolEnd { .. } => "ToolEnd",
4842                AgentEvent::End { .. } => "End",
4843                _ => "Other",
4844            })
4845            .collect();
4846
4847        // Must start with Start, end with End
4848        assert_eq!(event_types.first(), Some(&"Start"));
4849        assert_eq!(event_types.last(), Some(&"End"));
4850
4851        // Must have 2 TurnStarts (tool call turn + final answer turn)
4852        let turn_starts = event_types.iter().filter(|&&t| t == "TurnStart").count();
4853        assert_eq!(turn_starts, 2);
4854
4855        // Must have 1 ToolEnd
4856        let tool_ends = event_types.iter().filter(|&&t| t == "ToolEnd").count();
4857        assert_eq!(tool_ends, 1);
4858    }
4859
4860    #[tokio::test]
4861    async fn test_agent_multiple_tools_single_turn() {
4862        // LLM returns 2 tool calls in one response
4863        let mock_client = Arc::new(MockLlmClient::new(vec![
4864            LlmResponse {
4865                message: Message {
4866                    role: "assistant".to_string(),
4867                    content: vec![
4868                        ContentBlock::ToolUse {
4869                            id: "t1".to_string(),
4870                            name: "bash".to_string(),
4871                            input: serde_json::json!({"command": "echo first"}),
4872                        },
4873                        ContentBlock::ToolUse {
4874                            id: "t2".to_string(),
4875                            name: "bash".to_string(),
4876                            input: serde_json::json!({"command": "echo second"}),
4877                        },
4878                    ],
4879                    reasoning_content: None,
4880                },
4881                usage: TokenUsage {
4882                    prompt_tokens: 10,
4883                    completion_tokens: 5,
4884                    total_tokens: 15,
4885                    cache_read_tokens: None,
4886                    cache_write_tokens: None,
4887                },
4888                stop_reason: Some("tool_use".to_string()),
4889                meta: None,
4890            },
4891            MockLlmClient::text_response("Both commands ran"),
4892        ]));
4893
4894        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4895        let agent = AgentLoop::new(
4896            mock_client.clone(),
4897            tool_executor,
4898            test_tool_context(),
4899            AgentConfig::default(),
4900        );
4901
4902        let result = agent.execute(&[], "Run both", None).await.unwrap();
4903
4904        assert_eq!(result.text, "Both commands ran");
4905        assert_eq!(result.tool_calls_count, 2);
4906        assert_eq!(mock_client.call_count.load(Ordering::SeqCst), 2); // Only 2 LLM calls
4907
4908        // Messages: user → assistant(2 tools) → user(tool_result) → user(tool_result) → assistant(text)
4909        assert_eq!(result.messages[0].role, "user");
4910        assert_eq!(result.messages[1].role, "assistant");
4911        assert_eq!(result.messages[2].role, "user"); // tool result 1
4912        assert_eq!(result.messages[3].role, "user"); // tool result 2
4913        assert_eq!(result.messages[4].role, "assistant");
4914    }
4915
4916    #[tokio::test]
4917    async fn test_agent_token_usage_accumulation() {
4918        // Verify usage sums across multiple turns
4919        let mock_client = Arc::new(MockLlmClient::new(vec![
4920            MockLlmClient::tool_call_response(
4921                "t1",
4922                "bash",
4923                serde_json::json!({"command": "echo x"}),
4924            ),
4925            MockLlmClient::text_response("Done"),
4926        ]));
4927
4928        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4929        let agent = AgentLoop::new(
4930            mock_client,
4931            tool_executor,
4932            test_tool_context(),
4933            AgentConfig::default(),
4934        );
4935
4936        let result = agent.execute(&[], "test", None).await.unwrap();
4937
4938        // Each mock response has prompt=10, completion=5, total=15
4939        // 2 LLM calls → 20 prompt, 10 completion, 30 total
4940        assert_eq!(result.usage.prompt_tokens, 20);
4941        assert_eq!(result.usage.completion_tokens, 10);
4942        assert_eq!(result.usage.total_tokens, 30);
4943    }
4944
4945    #[tokio::test]
4946    async fn test_agent_system_prompt_passed() {
4947        // Verify system prompt is used (MockLlmClient captures calls)
4948        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
4949            "I am a coding assistant.",
4950        )]));
4951
4952        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4953        let config = AgentConfig {
4954            prompt_slots: SystemPromptSlots {
4955                extra: Some("You are a coding assistant.".to_string()),
4956                ..Default::default()
4957            },
4958            ..Default::default()
4959        };
4960
4961        let agent = AgentLoop::new(
4962            mock_client.clone(),
4963            tool_executor,
4964            test_tool_context(),
4965            config,
4966        );
4967        let result = agent.execute(&[], "What are you?", None).await.unwrap();
4968
4969        assert_eq!(result.text, "I am a coding assistant.");
4970        assert_eq!(mock_client.call_count.load(Ordering::SeqCst), 1);
4971    }
4972
4973    #[tokio::test]
4974    async fn test_agent_max_rounds_with_persistent_tool_calls() {
4975        // LLM keeps calling tools forever — should hit max_tool_rounds
4976        let mut responses = Vec::new();
4977        for i in 0..15 {
4978            responses.push(MockLlmClient::tool_call_response(
4979                &format!("t{}", i),
4980                "bash",
4981                serde_json::json!({"command": format!("echo round{}", i)}),
4982            ));
4983        }
4984
4985        let mock_client = Arc::new(MockLlmClient::new(responses));
4986        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4987        let config = AgentConfig {
4988            max_tool_rounds: 5,
4989            ..Default::default()
4990        };
4991
4992        let agent = AgentLoop::new(
4993            mock_client.clone(),
4994            tool_executor,
4995            test_tool_context(),
4996            config,
4997        );
4998        let result = agent.execute(&[], "Loop forever", None).await;
4999
5000        assert!(result.is_err());
5001        let err = result.unwrap_err().to_string();
5002        assert!(err.contains("Max tool rounds (5) exceeded"));
5003    }
5004
5005    #[tokio::test]
5006    async fn test_agent_end_event_contains_final_text() {
5007        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
5008            "Final answer here",
5009        )]));
5010
5011        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5012        let agent = AgentLoop::new(
5013            mock_client,
5014            tool_executor,
5015            test_tool_context(),
5016            AgentConfig::default(),
5017        );
5018
5019        let (tx, rx) = mpsc::channel(100);
5020        agent.execute(&[], "test", Some(tx)).await.unwrap();
5021
5022        let events = collect_events(rx).await;
5023        let end_event = events.iter().find(|e| matches!(e, AgentEvent::End { .. }));
5024        assert!(end_event.is_some());
5025
5026        if let AgentEvent::End { text, usage, .. } = end_event.unwrap() {
5027            assert_eq!(text, "Final answer here");
5028            assert_eq!(usage.total_tokens, 15);
5029        }
5030    }
5031}
5032
5033#[cfg(test)]
5034mod extra_agent_tests {
5035    use super::*;
5036    use crate::agent::tests::MockLlmClient;
5037    use crate::queue::SessionQueueConfig;
5038    use crate::tools::ToolExecutor;
5039    use std::path::PathBuf;
5040    use std::sync::atomic::{AtomicUsize, Ordering};
5041
5042    fn test_tool_context() -> ToolContext {
5043        ToolContext::new(PathBuf::from("/tmp"))
5044    }
5045
5046    // ========================================================================
5047    // AgentConfig
5048    // ========================================================================
5049
5050    #[test]
5051    fn test_agent_config_debug() {
5052        let config = AgentConfig {
5053            prompt_slots: SystemPromptSlots {
5054                extra: Some("You are helpful".to_string()),
5055                ..Default::default()
5056            },
5057            tools: vec![],
5058            max_tool_rounds: 10,
5059            permission_checker: None,
5060            confirmation_manager: None,
5061            context_providers: vec![],
5062            planning_mode: PlanningMode::Enabled,
5063            goal_tracking: false,
5064            hook_engine: None,
5065            skill_registry: None,
5066            ..AgentConfig::default()
5067        };
5068        let debug = format!("{:?}", config);
5069        assert!(debug.contains("AgentConfig"));
5070        assert!(debug.contains("planning_mode"));
5071    }
5072
5073    #[test]
5074    fn test_agent_config_default_values() {
5075        let config = AgentConfig::default();
5076        assert_eq!(config.max_tool_rounds, MAX_TOOL_ROUNDS);
5077        assert_eq!(config.planning_mode, PlanningMode::Auto);
5078        assert!(!config.goal_tracking);
5079        assert!(config.context_providers.is_empty());
5080    }
5081
5082    // ========================================================================
5083    // AgentEvent serialization
5084    // ========================================================================
5085
5086    #[test]
5087    fn test_agent_event_serialize_start() {
5088        let event = AgentEvent::Start {
5089            prompt: "Hello".to_string(),
5090        };
5091        let json = serde_json::to_string(&event).unwrap();
5092        assert!(json.contains("agent_start"));
5093        assert!(json.contains("Hello"));
5094    }
5095
5096    #[test]
5097    fn test_agent_event_serialize_text_delta() {
5098        let event = AgentEvent::TextDelta {
5099            text: "chunk".to_string(),
5100        };
5101        let json = serde_json::to_string(&event).unwrap();
5102        assert!(json.contains("text_delta"));
5103    }
5104
5105    #[test]
5106    fn test_agent_event_serialize_tool_start() {
5107        let event = AgentEvent::ToolStart {
5108            id: "t1".to_string(),
5109            name: "bash".to_string(),
5110        };
5111        let json = serde_json::to_string(&event).unwrap();
5112        assert!(json.contains("tool_start"));
5113        assert!(json.contains("bash"));
5114    }
5115
5116    #[test]
5117    fn test_agent_event_serialize_tool_end() {
5118        let event = AgentEvent::ToolEnd {
5119            id: "t1".to_string(),
5120            name: "bash".to_string(),
5121            output: "hello".to_string(),
5122            exit_code: 0,
5123            metadata: None,
5124        };
5125        let json = serde_json::to_string(&event).unwrap();
5126        assert!(json.contains("tool_end"));
5127    }
5128
5129    #[test]
5130    fn test_agent_event_tool_end_has_metadata_field() {
5131        let event = AgentEvent::ToolEnd {
5132            id: "t1".to_string(),
5133            name: "write".to_string(),
5134            output: "Wrote 5 bytes".to_string(),
5135            exit_code: 0,
5136            metadata: Some(
5137                serde_json::json!({ "before": "old", "after": "new", "file_path": "f.txt" }),
5138            ),
5139        };
5140        let json = serde_json::to_string(&event).unwrap();
5141        assert!(json.contains("\"before\""));
5142    }
5143
5144    #[test]
5145    fn test_agent_event_serialize_error() {
5146        let event = AgentEvent::Error {
5147            message: "oops".to_string(),
5148        };
5149        let json = serde_json::to_string(&event).unwrap();
5150        assert!(json.contains("error"));
5151        assert!(json.contains("oops"));
5152    }
5153
5154    #[test]
5155    fn test_agent_event_serialize_confirmation_required() {
5156        let event = AgentEvent::ConfirmationRequired {
5157            tool_id: "t1".to_string(),
5158            tool_name: "bash".to_string(),
5159            args: serde_json::json!({"cmd": "rm"}),
5160            timeout_ms: 30000,
5161        };
5162        let json = serde_json::to_string(&event).unwrap();
5163        assert!(json.contains("confirmation_required"));
5164    }
5165
5166    #[test]
5167    fn test_agent_event_serialize_confirmation_received() {
5168        let event = AgentEvent::ConfirmationReceived {
5169            tool_id: "t1".to_string(),
5170            approved: true,
5171            reason: Some("safe".to_string()),
5172        };
5173        let json = serde_json::to_string(&event).unwrap();
5174        assert!(json.contains("confirmation_received"));
5175    }
5176
5177    #[test]
5178    fn test_agent_event_serialize_confirmation_timeout() {
5179        let event = AgentEvent::ConfirmationTimeout {
5180            tool_id: "t1".to_string(),
5181            action_taken: "rejected".to_string(),
5182        };
5183        let json = serde_json::to_string(&event).unwrap();
5184        assert!(json.contains("confirmation_timeout"));
5185    }
5186
5187    #[test]
5188    fn test_agent_event_serialize_external_task_pending() {
5189        let event = AgentEvent::ExternalTaskPending {
5190            task_id: "task-1".to_string(),
5191            session_id: "sess-1".to_string(),
5192            lane: crate::hitl::SessionLane::Execute,
5193            command_type: "bash".to_string(),
5194            payload: serde_json::json!({}),
5195            timeout_ms: 60000,
5196        };
5197        let json = serde_json::to_string(&event).unwrap();
5198        assert!(json.contains("external_task_pending"));
5199    }
5200
5201    #[test]
5202    fn test_agent_event_serialize_external_task_completed() {
5203        let event = AgentEvent::ExternalTaskCompleted {
5204            task_id: "task-1".to_string(),
5205            session_id: "sess-1".to_string(),
5206            success: false,
5207        };
5208        let json = serde_json::to_string(&event).unwrap();
5209        assert!(json.contains("external_task_completed"));
5210    }
5211
5212    #[test]
5213    fn test_agent_event_serialize_permission_denied() {
5214        let event = AgentEvent::PermissionDenied {
5215            tool_id: "t1".to_string(),
5216            tool_name: "bash".to_string(),
5217            args: serde_json::json!({}),
5218            reason: "denied".to_string(),
5219        };
5220        let json = serde_json::to_string(&event).unwrap();
5221        assert!(json.contains("permission_denied"));
5222    }
5223
5224    #[test]
5225    fn test_agent_event_serialize_context_compacted() {
5226        let event = AgentEvent::ContextCompacted {
5227            session_id: "sess-1".to_string(),
5228            before_messages: 100,
5229            after_messages: 20,
5230            percent_before: 0.85,
5231        };
5232        let json = serde_json::to_string(&event).unwrap();
5233        assert!(json.contains("context_compacted"));
5234    }
5235
5236    #[test]
5237    fn test_agent_event_serialize_turn_start() {
5238        let event = AgentEvent::TurnStart { turn: 3 };
5239        let json = serde_json::to_string(&event).unwrap();
5240        assert!(json.contains("turn_start"));
5241    }
5242
5243    #[test]
5244    fn test_agent_event_serialize_turn_end() {
5245        let event = AgentEvent::TurnEnd {
5246            turn: 3,
5247            usage: TokenUsage::default(),
5248        };
5249        let json = serde_json::to_string(&event).unwrap();
5250        assert!(json.contains("turn_end"));
5251    }
5252
5253    #[test]
5254    fn test_agent_event_serialize_end() {
5255        let event = AgentEvent::End {
5256            text: "Done".to_string(),
5257            usage: TokenUsage {
5258                prompt_tokens: 100,
5259                completion_tokens: 50,
5260                total_tokens: 150,
5261                cache_read_tokens: None,
5262                cache_write_tokens: None,
5263            },
5264            meta: None,
5265        };
5266        let json = serde_json::to_string(&event).unwrap();
5267        assert!(json.contains("agent_end"));
5268    }
5269
5270    // ========================================================================
5271    // AgentResult
5272    // ========================================================================
5273
5274    #[test]
5275    fn test_agent_result_fields() {
5276        let result = AgentResult {
5277            text: "output".to_string(),
5278            messages: vec![Message::user("hello")],
5279            usage: TokenUsage::default(),
5280            tool_calls_count: 3,
5281        };
5282        assert_eq!(result.text, "output");
5283        assert_eq!(result.messages.len(), 1);
5284        assert_eq!(result.tool_calls_count, 3);
5285    }
5286
5287    // ========================================================================
5288    // Missing AgentEvent serialization tests
5289    // ========================================================================
5290
5291    #[test]
5292    fn test_agent_event_serialize_context_resolving() {
5293        let event = AgentEvent::ContextResolving {
5294            providers: vec!["provider1".to_string(), "provider2".to_string()],
5295        };
5296        let json = serde_json::to_string(&event).unwrap();
5297        assert!(json.contains("context_resolving"));
5298        assert!(json.contains("provider1"));
5299    }
5300
5301    #[test]
5302    fn test_agent_event_serialize_context_resolved() {
5303        let event = AgentEvent::ContextResolved {
5304            total_items: 5,
5305            total_tokens: 1000,
5306        };
5307        let json = serde_json::to_string(&event).unwrap();
5308        assert!(json.contains("context_resolved"));
5309        assert!(json.contains("1000"));
5310    }
5311
5312    #[test]
5313    fn test_agent_event_serialize_command_dead_lettered() {
5314        let event = AgentEvent::CommandDeadLettered {
5315            command_id: "cmd-1".to_string(),
5316            command_type: "bash".to_string(),
5317            lane: "execute".to_string(),
5318            error: "timeout".to_string(),
5319            attempts: 3,
5320        };
5321        let json = serde_json::to_string(&event).unwrap();
5322        assert!(json.contains("command_dead_lettered"));
5323        assert!(json.contains("cmd-1"));
5324    }
5325
5326    #[test]
5327    fn test_agent_event_serialize_command_retry() {
5328        let event = AgentEvent::CommandRetry {
5329            command_id: "cmd-2".to_string(),
5330            command_type: "read".to_string(),
5331            lane: "query".to_string(),
5332            attempt: 2,
5333            delay_ms: 1000,
5334        };
5335        let json = serde_json::to_string(&event).unwrap();
5336        assert!(json.contains("command_retry"));
5337        assert!(json.contains("cmd-2"));
5338    }
5339
5340    #[test]
5341    fn test_agent_event_serialize_queue_alert() {
5342        let event = AgentEvent::QueueAlert {
5343            level: "warning".to_string(),
5344            alert_type: "depth".to_string(),
5345            message: "Queue depth exceeded".to_string(),
5346        };
5347        let json = serde_json::to_string(&event).unwrap();
5348        assert!(json.contains("queue_alert"));
5349        assert!(json.contains("warning"));
5350    }
5351
5352    #[test]
5353    fn test_agent_event_serialize_task_updated() {
5354        let event = AgentEvent::TaskUpdated {
5355            session_id: "sess-1".to_string(),
5356            tasks: vec![],
5357        };
5358        let json = serde_json::to_string(&event).unwrap();
5359        assert!(json.contains("task_updated"));
5360        assert!(json.contains("sess-1"));
5361    }
5362
5363    #[test]
5364    fn test_agent_event_serialize_memory_stored() {
5365        let event = AgentEvent::MemoryStored {
5366            memory_id: "mem-1".to_string(),
5367            memory_type: "conversation".to_string(),
5368            importance: 0.8,
5369            tags: vec!["important".to_string()],
5370        };
5371        let json = serde_json::to_string(&event).unwrap();
5372        assert!(json.contains("memory_stored"));
5373        assert!(json.contains("mem-1"));
5374    }
5375
5376    #[test]
5377    fn test_agent_event_serialize_memory_recalled() {
5378        let event = AgentEvent::MemoryRecalled {
5379            memory_id: "mem-2".to_string(),
5380            content: "Previous conversation".to_string(),
5381            relevance: 0.9,
5382        };
5383        let json = serde_json::to_string(&event).unwrap();
5384        assert!(json.contains("memory_recalled"));
5385        assert!(json.contains("mem-2"));
5386    }
5387
5388    #[test]
5389    fn test_agent_event_serialize_memories_searched() {
5390        let event = AgentEvent::MemoriesSearched {
5391            query: Some("search term".to_string()),
5392            tags: vec!["tag1".to_string()],
5393            result_count: 5,
5394        };
5395        let json = serde_json::to_string(&event).unwrap();
5396        assert!(json.contains("memories_searched"));
5397        assert!(json.contains("search term"));
5398    }
5399
5400    #[test]
5401    fn test_agent_event_serialize_memory_cleared() {
5402        let event = AgentEvent::MemoryCleared {
5403            tier: "short_term".to_string(),
5404            count: 10,
5405        };
5406        let json = serde_json::to_string(&event).unwrap();
5407        assert!(json.contains("memory_cleared"));
5408        assert!(json.contains("short_term"));
5409    }
5410
5411    #[test]
5412    fn test_agent_event_serialize_subagent_start() {
5413        let event = AgentEvent::SubagentStart {
5414            task_id: "task-1".to_string(),
5415            session_id: "child-sess".to_string(),
5416            parent_session_id: "parent-sess".to_string(),
5417            agent: "explore".to_string(),
5418            description: "Explore codebase".to_string(),
5419        };
5420        let json = serde_json::to_string(&event).unwrap();
5421        assert!(json.contains("subagent_start"));
5422        assert!(json.contains("explore"));
5423    }
5424
5425    #[test]
5426    fn test_agent_event_serialize_subagent_progress() {
5427        let event = AgentEvent::SubagentProgress {
5428            task_id: "task-1".to_string(),
5429            session_id: "child-sess".to_string(),
5430            status: "processing".to_string(),
5431            metadata: serde_json::json!({"progress": 50}),
5432        };
5433        let json = serde_json::to_string(&event).unwrap();
5434        assert!(json.contains("subagent_progress"));
5435        assert!(json.contains("processing"));
5436    }
5437
5438    #[test]
5439    fn test_agent_event_serialize_subagent_end() {
5440        let event = AgentEvent::SubagentEnd {
5441            task_id: "task-1".to_string(),
5442            session_id: "child-sess".to_string(),
5443            agent: "explore".to_string(),
5444            output: "Found 10 files".to_string(),
5445            success: true,
5446        };
5447        let json = serde_json::to_string(&event).unwrap();
5448        assert!(json.contains("subagent_end"));
5449        assert!(json.contains("Found 10 files"));
5450    }
5451
5452    #[test]
5453    fn test_agent_event_serialize_planning_start() {
5454        let event = AgentEvent::PlanningStart {
5455            prompt: "Build a web app".to_string(),
5456        };
5457        let json = serde_json::to_string(&event).unwrap();
5458        assert!(json.contains("planning_start"));
5459        assert!(json.contains("Build a web app"));
5460    }
5461
5462    #[test]
5463    fn test_agent_event_serialize_planning_end() {
5464        use crate::planning::{Complexity, ExecutionPlan};
5465        let plan = ExecutionPlan::new("Test goal".to_string(), Complexity::Simple);
5466        let event = AgentEvent::PlanningEnd {
5467            plan,
5468            estimated_steps: 3,
5469        };
5470        let json = serde_json::to_string(&event).unwrap();
5471        assert!(json.contains("planning_end"));
5472        assert!(json.contains("estimated_steps"));
5473    }
5474
5475    #[test]
5476    fn test_agent_event_serialize_step_start() {
5477        let event = AgentEvent::StepStart {
5478            step_id: "step-1".to_string(),
5479            description: "Initialize project".to_string(),
5480            step_number: 1,
5481            total_steps: 5,
5482        };
5483        let json = serde_json::to_string(&event).unwrap();
5484        assert!(json.contains("step_start"));
5485        assert!(json.contains("Initialize project"));
5486    }
5487
5488    #[test]
5489    fn test_agent_event_serialize_step_end() {
5490        let event = AgentEvent::StepEnd {
5491            step_id: "step-1".to_string(),
5492            status: TaskStatus::Completed,
5493            step_number: 1,
5494            total_steps: 5,
5495        };
5496        let json = serde_json::to_string(&event).unwrap();
5497        assert!(json.contains("step_end"));
5498        assert!(json.contains("step-1"));
5499    }
5500
5501    #[test]
5502    fn test_agent_event_serialize_goal_extracted() {
5503        use crate::planning::AgentGoal;
5504        let goal = AgentGoal::new("Complete the task".to_string());
5505        let event = AgentEvent::GoalExtracted { goal };
5506        let json = serde_json::to_string(&event).unwrap();
5507        assert!(json.contains("goal_extracted"));
5508    }
5509
5510    #[test]
5511    fn test_agent_event_serialize_goal_progress() {
5512        let event = AgentEvent::GoalProgress {
5513            goal: "Build app".to_string(),
5514            progress: 0.5,
5515            completed_steps: 2,
5516            total_steps: 4,
5517        };
5518        let json = serde_json::to_string(&event).unwrap();
5519        assert!(json.contains("goal_progress"));
5520        assert!(json.contains("0.5"));
5521    }
5522
5523    #[test]
5524    fn test_agent_event_serialize_goal_achieved() {
5525        let event = AgentEvent::GoalAchieved {
5526            goal: "Build app".to_string(),
5527            total_steps: 4,
5528            duration_ms: 5000,
5529        };
5530        let json = serde_json::to_string(&event).unwrap();
5531        assert!(json.contains("goal_achieved"));
5532        assert!(json.contains("5000"));
5533    }
5534
5535    #[tokio::test]
5536    async fn test_extract_goal_with_json_response() {
5537        // LlmPlanner expects JSON with "description" and "success_criteria" fields
5538        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
5539            r#"{"description": "Build web app", "success_criteria": ["App runs on port 3000", "Has login page"]}"#,
5540        )]));
5541        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5542        let agent = AgentLoop::new(
5543            mock_client,
5544            tool_executor,
5545            test_tool_context(),
5546            AgentConfig::default(),
5547        );
5548
5549        let goal = agent.extract_goal("Build a web app").await.unwrap();
5550        assert_eq!(goal.description, "Build web app");
5551        assert_eq!(goal.success_criteria.len(), 2);
5552        assert_eq!(goal.success_criteria[0], "App runs on port 3000");
5553    }
5554
5555    #[tokio::test]
5556    async fn test_extract_goal_fallback_on_non_json() {
5557        // Non-JSON response triggers fallback: returns the original prompt as goal
5558        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
5559            "Some non-JSON response",
5560        )]));
5561        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5562        let agent = AgentLoop::new(
5563            mock_client,
5564            tool_executor,
5565            test_tool_context(),
5566            AgentConfig::default(),
5567        );
5568
5569        let goal = agent.extract_goal("Do something").await.unwrap();
5570        // Fallback uses the original prompt as description
5571        assert_eq!(goal.description, "Do something");
5572        // Fallback adds 2 generic criteria
5573        assert_eq!(goal.success_criteria.len(), 2);
5574    }
5575
5576    #[tokio::test]
5577    async fn test_check_goal_achievement_json_yes() {
5578        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
5579            r#"{"achieved": true, "progress": 1.0, "remaining_criteria": []}"#,
5580        )]));
5581        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5582        let agent = AgentLoop::new(
5583            mock_client,
5584            tool_executor,
5585            test_tool_context(),
5586            AgentConfig::default(),
5587        );
5588
5589        let goal = crate::planning::AgentGoal::new("Test goal".to_string());
5590        let achieved = agent
5591            .check_goal_achievement(&goal, "All done")
5592            .await
5593            .unwrap();
5594        assert!(achieved);
5595    }
5596
5597    #[tokio::test]
5598    async fn test_check_goal_achievement_fallback_not_done() {
5599        // Non-JSON response triggers heuristic fallback
5600        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
5601            "invalid json",
5602        )]));
5603        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5604        let agent = AgentLoop::new(
5605            mock_client,
5606            tool_executor,
5607            test_tool_context(),
5608            AgentConfig::default(),
5609        );
5610
5611        let goal = crate::planning::AgentGoal::new("Test goal".to_string());
5612        // "still working" doesn't contain "complete"/"done"/"finished"
5613        let achieved = agent
5614            .check_goal_achievement(&goal, "still working")
5615            .await
5616            .unwrap();
5617        assert!(!achieved);
5618    }
5619
5620    // ========================================================================
5621    // build_augmented_system_prompt Tests
5622    // ========================================================================
5623
5624    #[test]
5625    fn test_build_augmented_system_prompt_empty_context() {
5626        let mock_client = Arc::new(MockLlmClient::new(vec![]));
5627        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5628        let config = AgentConfig {
5629            prompt_slots: SystemPromptSlots {
5630                extra: Some("Base prompt".to_string()),
5631                ..Default::default()
5632            },
5633            ..Default::default()
5634        };
5635        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
5636
5637        let result = agent.build_augmented_system_prompt(&[]);
5638        assert!(result.unwrap().contains("Base prompt"));
5639    }
5640
5641    #[test]
5642    fn test_build_augmented_system_prompt_no_custom_slots() {
5643        let mock_client = Arc::new(MockLlmClient::new(vec![]));
5644        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5645        let agent = AgentLoop::new(
5646            mock_client,
5647            tool_executor,
5648            test_tool_context(),
5649            AgentConfig::default(),
5650        );
5651
5652        let result = agent.build_augmented_system_prompt(&[]);
5653        // Default slots still produce the default agentic prompt
5654        assert!(result.is_some());
5655        assert!(result.unwrap().contains("Core Behaviour"));
5656    }
5657
5658    #[test]
5659    fn test_build_augmented_system_prompt_with_context_no_base() {
5660        use crate::context::{ContextItem, ContextResult, ContextType};
5661
5662        let mock_client = Arc::new(MockLlmClient::new(vec![]));
5663        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5664        let agent = AgentLoop::new(
5665            mock_client,
5666            tool_executor,
5667            test_tool_context(),
5668            AgentConfig::default(),
5669        );
5670
5671        let context = vec![ContextResult {
5672            provider: "test".to_string(),
5673            items: vec![ContextItem::new("id1", ContextType::Resource, "Content")],
5674            total_tokens: 10,
5675            truncated: false,
5676        }];
5677
5678        let result = agent.build_augmented_system_prompt(&context);
5679        assert!(result.is_some());
5680        let text = result.unwrap();
5681        assert!(text.contains("<context"));
5682        assert!(text.contains("Content"));
5683    }
5684
5685    // ========================================================================
5686    // AgentResult Clone and Debug
5687    // ========================================================================
5688
5689    #[test]
5690    fn test_agent_result_clone() {
5691        let result = AgentResult {
5692            text: "output".to_string(),
5693            messages: vec![Message::user("hello")],
5694            usage: TokenUsage::default(),
5695            tool_calls_count: 3,
5696        };
5697        let cloned = result.clone();
5698        assert_eq!(cloned.text, result.text);
5699        assert_eq!(cloned.tool_calls_count, result.tool_calls_count);
5700    }
5701
5702    #[test]
5703    fn test_agent_result_debug() {
5704        let result = AgentResult {
5705            text: "output".to_string(),
5706            messages: vec![Message::user("hello")],
5707            usage: TokenUsage::default(),
5708            tool_calls_count: 3,
5709        };
5710        let debug = format!("{:?}", result);
5711        assert!(debug.contains("AgentResult"));
5712        assert!(debug.contains("output"));
5713    }
5714
5715    // ========================================================================
5716    // handle_post_execution_metadata Tests
5717    // ========================================================================
5718
5719    // ========================================================================
5720    // ToolCommand adapter tests
5721    // ========================================================================
5722
5723    #[tokio::test]
5724    async fn test_tool_command_command_type() {
5725        let executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5726        let cmd = ToolCommand {
5727            tool_executor: executor,
5728            tool_name: "read".to_string(),
5729            tool_args: serde_json::json!({"file": "test.rs"}),
5730            skill_registry: None,
5731            tool_context: test_tool_context(),
5732        };
5733        assert_eq!(cmd.command_type(), "read");
5734    }
5735
5736    #[tokio::test]
5737    async fn test_tool_command_payload() {
5738        let executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5739        let args = serde_json::json!({"file": "test.rs", "offset": 10});
5740        let cmd = ToolCommand {
5741            tool_executor: executor,
5742            tool_name: "read".to_string(),
5743            tool_args: args.clone(),
5744            skill_registry: None,
5745            tool_context: test_tool_context(),
5746        };
5747        assert_eq!(cmd.payload(), args);
5748    }
5749
5750    // ========================================================================
5751    // AgentLoop with queue builder tests
5752    // ========================================================================
5753
5754    #[tokio::test(flavor = "multi_thread")]
5755    async fn test_agent_loop_with_queue() {
5756        use tokio::sync::broadcast;
5757
5758        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
5759            "Hello",
5760        )]));
5761        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5762        let config = AgentConfig::default();
5763
5764        let (event_tx, _) = broadcast::channel(100);
5765        let queue = SessionLaneQueue::new("test-session", SessionQueueConfig::default(), event_tx)
5766            .await
5767            .unwrap();
5768
5769        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config)
5770            .with_queue(Arc::new(queue));
5771
5772        assert!(agent.command_queue.is_some());
5773    }
5774
5775    #[tokio::test]
5776    async fn test_agent_loop_without_queue() {
5777        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
5778            "Hello",
5779        )]));
5780        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5781        let config = AgentConfig::default();
5782
5783        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
5784
5785        assert!(agent.command_queue.is_none());
5786    }
5787
5788    // ========================================================================
5789    // Parallel Plan Execution Tests
5790    // ========================================================================
5791
5792    #[tokio::test]
5793    async fn test_execute_plan_parallel_independent() {
5794        use crate::planning::{Complexity, ExecutionPlan, Task};
5795
5796        // 3 independent steps (no dependencies) — should all execute.
5797        // MockLlmClient needs one response per execute_loop call per step.
5798        let mock_client = Arc::new(MockLlmClient::new(vec![
5799            MockLlmClient::text_response("Step 1 done"),
5800            MockLlmClient::text_response("Step 2 done"),
5801            MockLlmClient::text_response("Step 3 done"),
5802        ]));
5803
5804        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5805        let config = AgentConfig::default();
5806        let agent = AgentLoop::new(
5807            mock_client.clone(),
5808            tool_executor,
5809            test_tool_context(),
5810            config,
5811        );
5812
5813        let mut plan = ExecutionPlan::new("Test parallel", Complexity::Simple);
5814        plan.add_step(Task::new("s1", "First step"));
5815        plan.add_step(Task::new("s2", "Second step"));
5816        plan.add_step(Task::new("s3", "Third step"));
5817
5818        let (tx, mut rx) = mpsc::channel(100);
5819        let result = agent.execute_plan(&[], &plan, Some(tx)).await.unwrap();
5820
5821        // All 3 steps should have been executed (3 * 15 = 45 total tokens)
5822        assert_eq!(result.usage.total_tokens, 45);
5823
5824        // Verify we received StepStart and StepEnd events for all 3 steps
5825        let mut step_starts = Vec::new();
5826        let mut step_ends = Vec::new();
5827        rx.close();
5828        while let Some(event) = rx.recv().await {
5829            match event {
5830                AgentEvent::StepStart { step_id, .. } => step_starts.push(step_id),
5831                AgentEvent::StepEnd {
5832                    step_id, status, ..
5833                } => {
5834                    assert_eq!(status, TaskStatus::Completed);
5835                    step_ends.push(step_id);
5836                }
5837                _ => {}
5838            }
5839        }
5840        assert_eq!(step_starts.len(), 3);
5841        assert_eq!(step_ends.len(), 3);
5842    }
5843
5844    #[tokio::test]
5845    async fn test_execute_plan_respects_dependencies() {
5846        use crate::planning::{Complexity, ExecutionPlan, Task};
5847
5848        // s1 and s2 are independent (wave 1), s3 depends on both (wave 2).
5849        // This requires 3 responses total.
5850        let mock_client = Arc::new(MockLlmClient::new(vec![
5851            MockLlmClient::text_response("Step 1 done"),
5852            MockLlmClient::text_response("Step 2 done"),
5853            MockLlmClient::text_response("Step 3 done"),
5854        ]));
5855
5856        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5857        let config = AgentConfig::default();
5858        let agent = AgentLoop::new(
5859            mock_client.clone(),
5860            tool_executor,
5861            test_tool_context(),
5862            config,
5863        );
5864
5865        let mut plan = ExecutionPlan::new("Test deps", Complexity::Medium);
5866        plan.add_step(Task::new("s1", "Independent A"));
5867        plan.add_step(Task::new("s2", "Independent B"));
5868        plan.add_step(
5869            Task::new("s3", "Depends on A+B")
5870                .with_dependencies(vec!["s1".to_string(), "s2".to_string()]),
5871        );
5872
5873        let (tx, mut rx) = mpsc::channel(100);
5874        let result = agent.execute_plan(&[], &plan, Some(tx)).await.unwrap();
5875
5876        // All 3 steps should have been executed (3 * 15 = 45 total tokens)
5877        assert_eq!(result.usage.total_tokens, 45);
5878
5879        // Verify ordering: s3's StepStart must come after s1 and s2's StepEnd
5880        let mut events = Vec::new();
5881        rx.close();
5882        while let Some(event) = rx.recv().await {
5883            match &event {
5884                AgentEvent::StepStart { step_id, .. } => {
5885                    events.push(format!("start:{}", step_id));
5886                }
5887                AgentEvent::StepEnd { step_id, .. } => {
5888                    events.push(format!("end:{}", step_id));
5889                }
5890                _ => {}
5891            }
5892        }
5893
5894        // s3 start must occur after both s1 end and s2 end
5895        let s1_end = events.iter().position(|e| e == "end:s1").unwrap();
5896        let s2_end = events.iter().position(|e| e == "end:s2").unwrap();
5897        let s3_start = events.iter().position(|e| e == "start:s3").unwrap();
5898        assert!(
5899            s3_start > s1_end,
5900            "s3 started before s1 ended: {:?}",
5901            events
5902        );
5903        assert!(
5904            s3_start > s2_end,
5905            "s3 started before s2 ended: {:?}",
5906            events
5907        );
5908
5909        // Final result should reflect step 3 (last sequential step)
5910        assert!(result.text.contains("Step 3 done") || !result.text.is_empty());
5911    }
5912
5913    #[tokio::test]
5914    async fn test_execute_plan_handles_step_failure() {
5915        use crate::planning::{Complexity, ExecutionPlan, Task};
5916
5917        // s1 succeeds, s2 depends on s1 (succeeds), s3 depends on nothing (succeeds),
5918        // s4 depends on a step that will fail (s_fail).
5919        // We simulate failure by providing no responses for s_fail's execute_loop.
5920        //
5921        // Simpler approach: s1 succeeds, s2 depends on s1 (will fail because no
5922        // mock response left), s3 is independent.
5923        // Layout: s1 (independent), s3 (independent) → wave 1 parallel
5924        //         s2 depends on s1 → wave 2
5925        //         s4 depends on s2 → wave 3 (should deadlock since s2 fails)
5926        let mock_client = Arc::new(MockLlmClient::new(vec![
5927            // Wave 1: s1 and s3 execute in parallel
5928            MockLlmClient::text_response("s1 done"),
5929            MockLlmClient::text_response("s3 done"),
5930            // Wave 2: s2 executes — but we give it no response, causing failure
5931            // Actually the MockLlmClient will fail with "No more mock responses"
5932        ]));
5933
5934        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5935        let config = AgentConfig::default();
5936        let agent = AgentLoop::new(
5937            mock_client.clone(),
5938            tool_executor,
5939            test_tool_context(),
5940            config,
5941        );
5942
5943        let mut plan = ExecutionPlan::new("Test failure", Complexity::Medium);
5944        plan.add_step(Task::new("s1", "Independent step"));
5945        plan.add_step(Task::new("s2", "Depends on s1").with_dependencies(vec!["s1".to_string()]));
5946        plan.add_step(Task::new("s3", "Another independent"));
5947        plan.add_step(Task::new("s4", "Depends on s2").with_dependencies(vec!["s2".to_string()]));
5948
5949        let (tx, mut rx) = mpsc::channel(100);
5950        let _result = agent.execute_plan(&[], &plan, Some(tx)).await.unwrap();
5951
5952        // s1 and s3 should succeed (wave 1), s2 should fail (wave 2),
5953        // s4 should never execute (deadlock — dep s2 failed, not completed)
5954        let mut completed_steps = Vec::new();
5955        let mut failed_steps = Vec::new();
5956        rx.close();
5957        while let Some(event) = rx.recv().await {
5958            if let AgentEvent::StepEnd {
5959                step_id, status, ..
5960            } = event
5961            {
5962                match status {
5963                    TaskStatus::Completed => completed_steps.push(step_id),
5964                    TaskStatus::Failed => failed_steps.push(step_id),
5965                    _ => {}
5966                }
5967            }
5968        }
5969
5970        assert!(
5971            completed_steps.contains(&"s1".to_string()),
5972            "s1 should complete"
5973        );
5974        assert!(
5975            completed_steps.contains(&"s3".to_string()),
5976            "s3 should complete"
5977        );
5978        assert!(failed_steps.contains(&"s2".to_string()), "s2 should fail");
5979        // s4 should NOT appear in either list — it was never started
5980        assert!(
5981            !completed_steps.contains(&"s4".to_string()),
5982            "s4 should not complete"
5983        );
5984        assert!(
5985            !failed_steps.contains(&"s4".to_string()),
5986            "s4 should not fail (never started)"
5987        );
5988    }
5989
5990    // ========================================================================
5991    // Phase 4: Error Recovery & Resilience Tests
5992    // ========================================================================
5993
5994    #[test]
5995    fn test_agent_config_resilience_defaults() {
5996        let config = AgentConfig::default();
5997        assert_eq!(config.max_parse_retries, 2);
5998        assert_eq!(config.tool_timeout_ms, None);
5999        assert_eq!(config.circuit_breaker_threshold, 3);
6000    }
6001
6002    /// 4.1 — Parse error recovery: bails after max_parse_retries exceeded
6003    #[tokio::test]
6004    async fn test_parse_error_recovery_bails_after_threshold() {
6005        // 3 parse errors with max_parse_retries=2: count reaches 3 > 2 → bail
6006        let mock_client = Arc::new(MockLlmClient::new(vec![
6007            MockLlmClient::tool_call_response(
6008                "c1",
6009                "bash",
6010                serde_json::json!({"__parse_error": "unexpected token at position 5"}),
6011            ),
6012            MockLlmClient::tool_call_response(
6013                "c2",
6014                "bash",
6015                serde_json::json!({"__parse_error": "missing closing brace"}),
6016            ),
6017            MockLlmClient::tool_call_response(
6018                "c3",
6019                "bash",
6020                serde_json::json!({"__parse_error": "still broken"}),
6021            ),
6022            MockLlmClient::text_response("Done"), // never reached
6023        ]));
6024
6025        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
6026        let config = AgentConfig {
6027            max_parse_retries: 2,
6028            ..AgentConfig::default()
6029        };
6030        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
6031        let result = agent.execute(&[], "Do something", None).await;
6032        assert!(result.is_err(), "should bail after parse error threshold");
6033        let err = result.unwrap_err().to_string();
6034        assert!(
6035            err.contains("malformed tool arguments"),
6036            "error should mention malformed tool arguments, got: {}",
6037            err
6038        );
6039    }
6040
6041    /// 4.1 — Parse error recovery: counter resets after a valid tool execution
6042    #[tokio::test]
6043    async fn test_parse_error_counter_resets_on_success() {
6044        // 2 parse errors (= max_parse_retries, not yet exceeded)
6045        // Then a valid tool call (resets counter)
6046        // Then final text — should NOT bail
6047        let mock_client = Arc::new(MockLlmClient::new(vec![
6048            MockLlmClient::tool_call_response(
6049                "c1",
6050                "bash",
6051                serde_json::json!({"__parse_error": "bad args"}),
6052            ),
6053            MockLlmClient::tool_call_response(
6054                "c2",
6055                "bash",
6056                serde_json::json!({"__parse_error": "bad args again"}),
6057            ),
6058            // Valid call — resets parse_error_count to 0
6059            MockLlmClient::tool_call_response(
6060                "c3",
6061                "bash",
6062                serde_json::json!({"command": "echo ok"}),
6063            ),
6064            MockLlmClient::text_response("All done"),
6065        ]));
6066
6067        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
6068        let config = AgentConfig {
6069            max_parse_retries: 2,
6070            ..AgentConfig::default()
6071        };
6072        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
6073        let result = agent.execute(&[], "Do something", None).await;
6074        assert!(
6075            result.is_ok(),
6076            "should not bail — counter reset after successful tool, got: {:?}",
6077            result.err()
6078        );
6079        assert_eq!(result.unwrap().text, "All done");
6080    }
6081
6082    /// 4.2 — Tool timeout: slow tool produces a timeout error result; session continues
6083    #[tokio::test]
6084    async fn test_tool_timeout_produces_error_result() {
6085        let mock_client = Arc::new(MockLlmClient::new(vec![
6086            MockLlmClient::tool_call_response(
6087                "t1",
6088                "bash",
6089                serde_json::json!({"command": "sleep 10"}),
6090            ),
6091            MockLlmClient::text_response("The command timed out."),
6092        ]));
6093
6094        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
6095        let config = AgentConfig {
6096            // 50ms — sleep 10 will never finish
6097            tool_timeout_ms: Some(50),
6098            ..AgentConfig::default()
6099        };
6100        let agent = AgentLoop::new(
6101            mock_client.clone(),
6102            tool_executor,
6103            test_tool_context(),
6104            config,
6105        );
6106        let result = agent.execute(&[], "Run sleep", None).await;
6107        assert!(
6108            result.is_ok(),
6109            "session should continue after tool timeout: {:?}",
6110            result.err()
6111        );
6112        assert_eq!(result.unwrap().text, "The command timed out.");
6113        // LLM called twice: initial request + response after timeout error
6114        assert_eq!(mock_client.call_count.load(Ordering::SeqCst), 2);
6115    }
6116
6117    /// 4.2 — Tool timeout: tool that finishes before the deadline succeeds normally
6118    #[tokio::test]
6119    async fn test_tool_within_timeout_succeeds() {
6120        let mock_client = Arc::new(MockLlmClient::new(vec![
6121            MockLlmClient::tool_call_response(
6122                "t1",
6123                "bash",
6124                serde_json::json!({"command": "echo fast"}),
6125            ),
6126            MockLlmClient::text_response("Command succeeded."),
6127        ]));
6128
6129        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
6130        let config = AgentConfig {
6131            tool_timeout_ms: Some(5_000), // 5 s — echo completes in <100ms
6132            ..AgentConfig::default()
6133        };
6134        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
6135        let result = agent.execute(&[], "Run something fast", None).await;
6136        assert!(
6137            result.is_ok(),
6138            "fast tool should succeed: {:?}",
6139            result.err()
6140        );
6141        assert_eq!(result.unwrap().text, "Command succeeded.");
6142    }
6143
6144    /// 4.3 — Circuit breaker: retries non-streaming LLM failures up to threshold
6145    #[tokio::test]
6146    async fn test_circuit_breaker_retries_non_streaming() {
6147        // Empty response list → every call bails with "No more mock responses"
6148        // threshold=2 → tries twice, then bails with circuit-breaker message
6149        let mock_client = Arc::new(MockLlmClient::new(vec![]));
6150
6151        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
6152        let config = AgentConfig {
6153            circuit_breaker_threshold: 2,
6154            ..AgentConfig::default()
6155        };
6156        let agent = AgentLoop::new(
6157            mock_client.clone(),
6158            tool_executor,
6159            test_tool_context(),
6160            config,
6161        );
6162        let result = agent.execute(&[], "Hello", None).await;
6163        assert!(result.is_err(), "should fail when LLM always errors");
6164        let err = result.unwrap_err().to_string();
6165        assert!(
6166            err.contains("circuit breaker"),
6167            "error should mention circuit breaker, got: {}",
6168            err
6169        );
6170        assert_eq!(
6171            mock_client.call_count.load(Ordering::SeqCst),
6172            2,
6173            "should make exactly threshold=2 LLM calls"
6174        );
6175    }
6176
6177    /// 4.3 — Circuit breaker: threshold=1 bails on the very first failure
6178    #[tokio::test]
6179    async fn test_circuit_breaker_threshold_one_no_retry() {
6180        let mock_client = Arc::new(MockLlmClient::new(vec![]));
6181
6182        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
6183        let config = AgentConfig {
6184            circuit_breaker_threshold: 1,
6185            ..AgentConfig::default()
6186        };
6187        let agent = AgentLoop::new(
6188            mock_client.clone(),
6189            tool_executor,
6190            test_tool_context(),
6191            config,
6192        );
6193        let result = agent.execute(&[], "Hello", None).await;
6194        assert!(result.is_err());
6195        assert_eq!(
6196            mock_client.call_count.load(Ordering::SeqCst),
6197            1,
6198            "with threshold=1 exactly one attempt should be made"
6199        );
6200    }
6201
6202    /// 4.3 — Circuit breaker: succeeds when LLM recovers before hitting threshold
6203    #[tokio::test]
6204    async fn test_circuit_breaker_succeeds_if_llm_recovers() {
6205        // First call fails, second call succeeds; threshold=3 — recovery within threshold
6206        struct FailOnceThenSucceed {
6207            inner: MockLlmClient,
6208            failed_once: std::sync::atomic::AtomicBool,
6209            call_count: AtomicUsize,
6210        }
6211
6212        #[async_trait::async_trait]
6213        impl LlmClient for FailOnceThenSucceed {
6214            async fn complete(
6215                &self,
6216                messages: &[Message],
6217                system: Option<&str>,
6218                tools: &[ToolDefinition],
6219            ) -> Result<LlmResponse> {
6220                self.call_count.fetch_add(1, Ordering::SeqCst);
6221                let already_failed = self
6222                    .failed_once
6223                    .swap(true, std::sync::atomic::Ordering::SeqCst);
6224                if !already_failed {
6225                    anyhow::bail!("transient network error");
6226                }
6227                self.inner.complete(messages, system, tools).await
6228            }
6229
6230            async fn complete_streaming(
6231                &self,
6232                messages: &[Message],
6233                system: Option<&str>,
6234                tools: &[ToolDefinition],
6235            ) -> Result<tokio::sync::mpsc::Receiver<crate::llm::StreamEvent>> {
6236                self.inner.complete_streaming(messages, system, tools).await
6237            }
6238        }
6239
6240        let mock = Arc::new(FailOnceThenSucceed {
6241            inner: MockLlmClient::new(vec![MockLlmClient::text_response("Recovered!")]),
6242            failed_once: std::sync::atomic::AtomicBool::new(false),
6243            call_count: AtomicUsize::new(0),
6244        });
6245
6246        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
6247        let config = AgentConfig {
6248            circuit_breaker_threshold: 3,
6249            ..AgentConfig::default()
6250        };
6251        let agent = AgentLoop::new(mock.clone(), tool_executor, test_tool_context(), config);
6252        let result = agent.execute(&[], "Hello", None).await;
6253        assert!(
6254            result.is_ok(),
6255            "should succeed when LLM recovers within threshold: {:?}",
6256            result.err()
6257        );
6258        assert_eq!(result.unwrap().text, "Recovered!");
6259        assert_eq!(
6260            mock.call_count.load(Ordering::SeqCst),
6261            2,
6262            "should have made exactly 2 calls (1 fail + 1 success)"
6263        );
6264    }
6265
6266    // ── Continuation detection tests ─────────────────────────────────────────
6267
6268    #[test]
6269    fn test_looks_incomplete_empty() {
6270        assert!(AgentLoop::looks_incomplete(""));
6271        assert!(AgentLoop::looks_incomplete("   "));
6272    }
6273
6274    #[test]
6275    fn test_looks_incomplete_trailing_colon() {
6276        assert!(AgentLoop::looks_incomplete("Let me check the file:"));
6277        assert!(AgentLoop::looks_incomplete("Next steps:"));
6278    }
6279
6280    #[test]
6281    fn test_looks_incomplete_ellipsis() {
6282        assert!(AgentLoop::looks_incomplete("Working on it..."));
6283        assert!(AgentLoop::looks_incomplete("Processing…"));
6284    }
6285
6286    #[test]
6287    fn test_looks_incomplete_intent_phrases() {
6288        assert!(AgentLoop::looks_incomplete(
6289            "I'll start by reading the file."
6290        ));
6291        assert!(AgentLoop::looks_incomplete(
6292            "Let me check the configuration."
6293        ));
6294        assert!(AgentLoop::looks_incomplete("I will now run the tests."));
6295        assert!(AgentLoop::looks_incomplete(
6296            "I need to update the Cargo.toml."
6297        ));
6298    }
6299
6300    #[test]
6301    fn test_looks_complete_final_answer() {
6302        // Clear final answers should NOT trigger continuation
6303        assert!(!AgentLoop::looks_incomplete(
6304            "The tests pass. All changes have been applied successfully."
6305        ));
6306        assert!(!AgentLoop::looks_incomplete(
6307            "Done. I've updated the three files and verified the build succeeds."
6308        ));
6309        assert!(!AgentLoop::looks_incomplete("42"));
6310        assert!(!AgentLoop::looks_incomplete("Yes."));
6311    }
6312
6313    #[test]
6314    fn test_looks_incomplete_multiline_complete() {
6315        let text = "Here is the summary:\n\n- Fixed the bug in agent.rs\n- All tests pass\n- Build succeeds";
6316        assert!(!AgentLoop::looks_incomplete(text));
6317    }
6318}