Skip to main content

a3s_code_core/
agent.rs

1//! Agent Loop Implementation
2//!
3//! The agent loop handles the core conversation cycle:
4//! 1. User sends a prompt
5//! 2. LLM generates a response (possibly with tool calls)
6//! 3. If tool calls present, execute them and send results back
7//! 4. Repeat until LLM returns without tool calls
8//!
9//! This implements agentic behavior where the LLM can use tools
10//! to accomplish tasks agentically.
11
12use crate::context::{ContextProvider, ContextQuery, ContextResult};
13use crate::hitl::ConfirmationProvider;
14use crate::hooks::{
15    ErrorType, GenerateEndEvent, GenerateStartEvent, HookEvent, HookExecutor, HookResult,
16    OnErrorEvent, PostResponseEvent, PostToolUseEvent, PrePromptEvent, PreToolUseEvent,
17    TokenUsageInfo, ToolCallInfo, ToolResultData,
18};
19use crate::llm::{LlmClient, LlmResponse, Message, TokenUsage, ToolDefinition};
20use crate::permissions::{PermissionChecker, PermissionDecision};
21use crate::planning::{AgentGoal, ExecutionPlan, TaskStatus};
22use crate::prompts::{AgentStyle, DetectionConfidence, PlanningMode, SystemPromptSlots};
23use crate::queue::SessionCommand;
24use crate::session_lane_queue::SessionLaneQueue;
25use crate::tool_search::ToolIndex;
26use crate::tools::{ToolContext, ToolExecutor, ToolStreamEvent};
27use anyhow::{Context, Result};
28use async_trait::async_trait;
29use futures::future::join_all;
30use serde::{Deserialize, Serialize};
31use serde_json::Value;
32use std::sync::Arc;
33use std::time::Duration;
34use tokio::sync::{mpsc, RwLock};
35
36/// Maximum number of tool execution rounds before stopping
37const MAX_TOOL_ROUNDS: usize = 50;
38
39/// Agent configuration
40#[derive(Clone)]
41pub struct AgentConfig {
42    /// Slot-based system prompt customization.
43    ///
44    /// Users can customize specific parts (role, guidelines, response style, extra)
45    /// without overriding the core agentic capabilities. The default agentic core
46    /// (tool usage, autonomous behavior, completion criteria) is always preserved.
47    pub prompt_slots: SystemPromptSlots,
48    pub tools: Vec<ToolDefinition>,
49    pub max_tool_rounds: usize,
50    /// Optional security provider for input taint tracking and output sanitization
51    pub security_provider: Option<Arc<dyn crate::security::SecurityProvider>>,
52    /// Optional permission checker for tool execution control
53    pub permission_checker: Option<Arc<dyn PermissionChecker>>,
54    /// Optional confirmation manager for HITL (Human-in-the-Loop)
55    pub confirmation_manager: Option<Arc<dyn ConfirmationProvider>>,
56    /// Context providers for augmenting prompts with external context
57    pub context_providers: Vec<Arc<dyn ContextProvider>>,
58    /// Planning mode — Auto (detect from message), Enabled, or Disabled.
59    pub planning_mode: PlanningMode,
60    /// Enable goal tracking
61    pub goal_tracking: bool,
62    /// Optional hook engine for firing lifecycle events (PreToolUse, PostToolUse, etc.)
63    pub hook_engine: Option<Arc<dyn HookExecutor>>,
64    /// Optional skill registry for tool permission enforcement
65    pub skill_registry: Option<Arc<crate::skills::SkillRegistry>>,
66    /// Max consecutive malformed-tool-args errors before aborting (default: 2).
67    ///
68    /// When the LLM returns tool arguments with `__parse_error`, the error is
69    /// fed back as a tool result. After this many consecutive parse errors the
70    /// loop bails instead of retrying indefinitely.
71    pub max_parse_retries: u32,
72    /// Per-tool execution timeout in milliseconds (`None` = no timeout).
73    ///
74    /// When set, each tool execution is wrapped in `tokio::time::timeout`.
75    /// A timeout produces an error result sent back to the LLM rather than
76    /// crashing the session.
77    pub tool_timeout_ms: Option<u64>,
78    /// Circuit-breaker threshold: max consecutive LLM API failures before
79    /// aborting (default: 3).
80    ///
81    /// In non-streaming mode, transient LLM failures are retried up to this
82    /// many times (with short exponential backoff) before the loop bails.
83    /// In streaming mode, any failure is fatal (events cannot be replayed).
84    pub circuit_breaker_threshold: u32,
85    /// Max consecutive identical tool signatures before aborting (default: 3).
86    ///
87    /// A tool signature is the exact combination of tool name + compact JSON
88    /// arguments. This prevents the agent from getting stuck repeating the same
89    /// tool call in a loop, for example repeatedly fetching the same URL.
90    pub duplicate_tool_call_threshold: u32,
91    /// Enable auto-compaction when context usage exceeds threshold.
92    pub auto_compact: bool,
93    /// Context usage percentage threshold to trigger auto-compaction (0.0 - 1.0).
94    /// Default: 0.80 (80%).
95    pub auto_compact_threshold: f32,
96    /// Maximum context window size in tokens (used for auto-compact calculation).
97    /// Default: 200_000.
98    pub max_context_tokens: usize,
99    /// LLM client reference for auto-compaction (needs to call LLM for summarization).
100    pub llm_client: Option<Arc<dyn LlmClient>>,
101    /// Optional agent memory for auto-remember after tool execution and recall before prompts.
102    pub memory: Option<Arc<crate::memory::AgentMemory>>,
103    /// Inject a continuation message when the LLM stops calling tools before the
104    /// task is complete. Enabled by default. Set to `false` to disable.
105    ///
106    /// When enabled, if the LLM produces a response with no tool calls but the
107    /// response text looks like an intermediate step (not a final answer), the
108    /// loop injects [`crate::prompts::CONTINUATION`] as a user message and
109    /// continues for up to `max_continuation_turns` additional turns.
110    pub continuation_enabled: bool,
111    /// Maximum number of continuation injections per execution (default: 3).
112    ///
113    /// Prevents infinite loops when the LLM repeatedly stops without completing.
114    pub max_continuation_turns: u32,
115    /// Optional tool search index for filtering tools per-turn.
116    ///
117    /// When set, only tools matching the user prompt are sent to the LLM,
118    /// reducing context usage when many MCP tools are registered.
119    pub tool_index: Option<ToolIndex>,
120}
121
122impl std::fmt::Debug for AgentConfig {
123    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
124        f.debug_struct("AgentConfig")
125            .field("prompt_slots", &self.prompt_slots)
126            .field("tools", &self.tools)
127            .field("max_tool_rounds", &self.max_tool_rounds)
128            .field("security_provider", &self.security_provider.is_some())
129            .field("permission_checker", &self.permission_checker.is_some())
130            .field("confirmation_manager", &self.confirmation_manager.is_some())
131            .field("context_providers", &self.context_providers.len())
132            .field("planning_mode", &self.planning_mode)
133            .field("goal_tracking", &self.goal_tracking)
134            .field("hook_engine", &self.hook_engine.is_some())
135            .field(
136                "skill_registry",
137                &self.skill_registry.as_ref().map(|r| r.len()),
138            )
139            .field("max_parse_retries", &self.max_parse_retries)
140            .field("tool_timeout_ms", &self.tool_timeout_ms)
141            .field("circuit_breaker_threshold", &self.circuit_breaker_threshold)
142            .field(
143                "duplicate_tool_call_threshold",
144                &self.duplicate_tool_call_threshold,
145            )
146            .field("auto_compact", &self.auto_compact)
147            .field("auto_compact_threshold", &self.auto_compact_threshold)
148            .field("max_context_tokens", &self.max_context_tokens)
149            .field("continuation_enabled", &self.continuation_enabled)
150            .field("max_continuation_turns", &self.max_continuation_turns)
151            .field("memory", &self.memory.is_some())
152            .field("tool_index", &self.tool_index.as_ref().map(|i| i.len()))
153            .finish()
154    }
155}
156
157impl Default for AgentConfig {
158    fn default() -> Self {
159        Self {
160            prompt_slots: SystemPromptSlots::default(),
161            tools: Vec::new(), // Tools are provided by ToolExecutor
162            max_tool_rounds: MAX_TOOL_ROUNDS,
163            security_provider: None,
164            permission_checker: None,
165            confirmation_manager: None,
166            context_providers: Vec::new(),
167            planning_mode: PlanningMode::default(),
168            goal_tracking: false,
169            hook_engine: None,
170            skill_registry: Some(Arc::new(crate::skills::SkillRegistry::with_builtins())),
171            max_parse_retries: 2,
172            tool_timeout_ms: None,
173            circuit_breaker_threshold: 3,
174            duplicate_tool_call_threshold: 3,
175            auto_compact: false,
176            auto_compact_threshold: 0.80,
177            max_context_tokens: 200_000,
178            llm_client: None,
179            memory: None,
180            continuation_enabled: true,
181            max_continuation_turns: 3,
182            tool_index: None,
183        }
184    }
185}
186
187/// Events emitted during agent execution
188///
189/// Subscribe via [`Session::subscribe_events()`](crate::session::Session::subscribe_events).
190/// New variants may be added in minor releases — always include a wildcard arm
191/// (`_ => {}`) when matching.
192#[derive(Debug, Clone, Serialize, Deserialize)]
193#[serde(tag = "type")]
194#[non_exhaustive]
195pub enum AgentEvent {
196    /// Agent started processing
197    #[serde(rename = "agent_start")]
198    Start { prompt: String },
199
200    /// LLM turn started
201    #[serde(rename = "turn_start")]
202    TurnStart { turn: usize },
203
204    /// Text delta from streaming
205    #[serde(rename = "text_delta")]
206    TextDelta { text: String },
207
208    /// Tool execution started
209    #[serde(rename = "tool_start")]
210    ToolStart { id: String, name: String },
211
212    /// Tool input delta from streaming (partial JSON arguments)
213    #[serde(rename = "tool_input_delta")]
214    ToolInputDelta { delta: String },
215
216    /// Tool execution completed
217    #[serde(rename = "tool_end")]
218    ToolEnd {
219        id: String,
220        name: String,
221        output: String,
222        exit_code: i32,
223        #[serde(skip_serializing_if = "Option::is_none")]
224        metadata: Option<serde_json::Value>,
225    },
226
227    /// Intermediate tool output (streaming delta)
228    #[serde(rename = "tool_output_delta")]
229    ToolOutputDelta {
230        id: String,
231        name: String,
232        delta: String,
233    },
234
235    /// LLM turn completed
236    #[serde(rename = "turn_end")]
237    TurnEnd { turn: usize, usage: TokenUsage },
238
239    /// Agent completed
240    #[serde(rename = "agent_end")]
241    End {
242        text: String,
243        usage: TokenUsage,
244        #[serde(skip_serializing_if = "Option::is_none")]
245        meta: Option<crate::llm::LlmResponseMeta>,
246    },
247
248    /// Error occurred
249    #[serde(rename = "error")]
250    Error { message: String },
251
252    /// Tool execution requires confirmation (HITL)
253    #[serde(rename = "confirmation_required")]
254    ConfirmationRequired {
255        tool_id: String,
256        tool_name: String,
257        args: serde_json::Value,
258        timeout_ms: u64,
259    },
260
261    /// Confirmation received from user (HITL)
262    #[serde(rename = "confirmation_received")]
263    ConfirmationReceived {
264        tool_id: String,
265        approved: bool,
266        reason: Option<String>,
267    },
268
269    /// Confirmation timed out (HITL)
270    #[serde(rename = "confirmation_timeout")]
271    ConfirmationTimeout {
272        tool_id: String,
273        action_taken: String, // "rejected" or "auto_approved"
274    },
275
276    /// External task pending (needs SDK processing)
277    #[serde(rename = "external_task_pending")]
278    ExternalTaskPending {
279        task_id: String,
280        session_id: String,
281        lane: crate::hitl::SessionLane,
282        command_type: String,
283        payload: serde_json::Value,
284        timeout_ms: u64,
285    },
286
287    /// External task completed
288    #[serde(rename = "external_task_completed")]
289    ExternalTaskCompleted {
290        task_id: String,
291        session_id: String,
292        success: bool,
293    },
294
295    /// Tool execution denied by permission policy
296    #[serde(rename = "permission_denied")]
297    PermissionDenied {
298        tool_id: String,
299        tool_name: String,
300        args: serde_json::Value,
301        reason: String,
302    },
303
304    /// Context resolution started
305    #[serde(rename = "context_resolving")]
306    ContextResolving { providers: Vec<String> },
307
308    /// Context resolution completed
309    #[serde(rename = "context_resolved")]
310    ContextResolved {
311        total_items: usize,
312        total_tokens: usize,
313    },
314
315    // ========================================================================
316    // a3s-lane integration events
317    // ========================================================================
318    /// Command moved to dead letter queue after exhausting retries
319    #[serde(rename = "command_dead_lettered")]
320    CommandDeadLettered {
321        command_id: String,
322        command_type: String,
323        lane: String,
324        error: String,
325        attempts: u32,
326    },
327
328    /// Command retry attempt
329    #[serde(rename = "command_retry")]
330    CommandRetry {
331        command_id: String,
332        command_type: String,
333        lane: String,
334        attempt: u32,
335        delay_ms: u64,
336    },
337
338    /// Queue alert (depth warning, latency alert, etc.)
339    #[serde(rename = "queue_alert")]
340    QueueAlert {
341        level: String,
342        alert_type: String,
343        message: String,
344    },
345
346    // ========================================================================
347    // Task tracking events
348    // ========================================================================
349    /// Task list updated
350    #[serde(rename = "task_updated")]
351    TaskUpdated {
352        session_id: String,
353        tasks: Vec<crate::planning::Task>,
354    },
355
356    // ========================================================================
357    // Memory System events (Phase 3)
358    // ========================================================================
359    /// Memory stored
360    #[serde(rename = "memory_stored")]
361    MemoryStored {
362        memory_id: String,
363        memory_type: String,
364        importance: f32,
365        tags: Vec<String>,
366    },
367
368    /// Memory recalled
369    #[serde(rename = "memory_recalled")]
370    MemoryRecalled {
371        memory_id: String,
372        content: String,
373        relevance: f32,
374    },
375
376    /// Memories searched
377    #[serde(rename = "memories_searched")]
378    MemoriesSearched {
379        query: Option<String>,
380        tags: Vec<String>,
381        result_count: usize,
382    },
383
384    /// Memory cleared
385    #[serde(rename = "memory_cleared")]
386    MemoryCleared {
387        tier: String, // "long_term", "short_term", "working"
388        count: u64,
389    },
390
391    // ========================================================================
392    // Subagent events
393    // ========================================================================
394    /// Subagent task started
395    #[serde(rename = "subagent_start")]
396    SubagentStart {
397        /// Unique task identifier
398        task_id: String,
399        /// Child session ID
400        session_id: String,
401        /// Parent session ID
402        parent_session_id: String,
403        /// Agent type (e.g., "explore", "general")
404        agent: String,
405        /// Short description of the task
406        description: String,
407    },
408
409    /// Subagent task progress update
410    #[serde(rename = "subagent_progress")]
411    SubagentProgress {
412        /// Task identifier
413        task_id: String,
414        /// Child session ID
415        session_id: String,
416        /// Progress status message
417        status: String,
418        /// Additional metadata
419        metadata: serde_json::Value,
420    },
421
422    /// Subagent task completed
423    #[serde(rename = "subagent_end")]
424    SubagentEnd {
425        /// Task identifier
426        task_id: String,
427        /// Child session ID
428        session_id: String,
429        /// Agent type
430        agent: String,
431        /// Task output/result
432        output: String,
433        /// Whether the task succeeded
434        success: bool,
435    },
436
437    // ========================================================================
438    // Planning and Goal Tracking Events (Phase 1)
439    // ========================================================================
440    /// Planning phase started
441    #[serde(rename = "planning_start")]
442    PlanningStart { prompt: String },
443
444    /// Planning phase completed
445    #[serde(rename = "planning_end")]
446    PlanningEnd {
447        plan: ExecutionPlan,
448        estimated_steps: usize,
449    },
450
451    /// Step execution started
452    #[serde(rename = "step_start")]
453    StepStart {
454        step_id: String,
455        description: String,
456        step_number: usize,
457        total_steps: usize,
458    },
459
460    /// Step execution completed
461    #[serde(rename = "step_end")]
462    StepEnd {
463        step_id: String,
464        status: TaskStatus,
465        step_number: usize,
466        total_steps: usize,
467    },
468
469    /// Goal extracted from prompt
470    #[serde(rename = "goal_extracted")]
471    GoalExtracted { goal: AgentGoal },
472
473    /// Goal progress update
474    #[serde(rename = "goal_progress")]
475    GoalProgress {
476        goal: String,
477        progress: f32,
478        completed_steps: usize,
479        total_steps: usize,
480    },
481
482    /// Goal achieved
483    #[serde(rename = "goal_achieved")]
484    GoalAchieved {
485        goal: String,
486        total_steps: usize,
487        duration_ms: i64,
488    },
489
490    // ========================================================================
491    // Context Compaction events
492    // ========================================================================
493    /// Context automatically compacted due to high usage
494    #[serde(rename = "context_compacted")]
495    ContextCompacted {
496        session_id: String,
497        before_messages: usize,
498        after_messages: usize,
499        percent_before: f32,
500    },
501
502    // ========================================================================
503    // Persistence events
504    // ========================================================================
505    /// Session persistence failed — SDK clients should handle this
506    #[serde(rename = "persistence_failed")]
507    PersistenceFailed {
508        session_id: String,
509        operation: String,
510        error: String,
511    },
512
513    // ========================================================================
514    // Side question (btw)
515    // ========================================================================
516    /// Ephemeral side question answered.
517    ///
518    /// Emitted by [`crate::agent_api::AgentSession::btw()`] in streaming mode.
519    /// The answer is never added to conversation history.
520    #[serde(rename = "btw_answer")]
521    BtwAnswer {
522        question: String,
523        answer: String,
524        usage: TokenUsage,
525    },
526}
527
528/// Result of agent execution
529#[derive(Debug, Clone)]
530pub struct AgentResult {
531    pub text: String,
532    pub messages: Vec<Message>,
533    pub usage: TokenUsage,
534    pub tool_calls_count: usize,
535}
536
537// ============================================================================
538// ToolCommand — bridges ToolExecutor to SessionCommand for queue submission
539// ============================================================================
540
541/// Adapter that implements `SessionCommand` for tool execution via the queue.
542///
543/// Wraps a `ToolExecutor` call so it can be submitted to `SessionLaneQueue`.
544pub struct ToolCommand {
545    tool_executor: Arc<ToolExecutor>,
546    tool_name: String,
547    tool_args: Value,
548    tool_context: ToolContext,
549    skill_registry: Option<Arc<crate::skills::SkillRegistry>>,
550}
551
552impl ToolCommand {
553    /// Create a new ToolCommand
554    pub fn new(
555        tool_executor: Arc<ToolExecutor>,
556        tool_name: String,
557        tool_args: Value,
558        tool_context: ToolContext,
559        skill_registry: Option<Arc<crate::skills::SkillRegistry>>,
560    ) -> Self {
561        Self {
562            tool_executor,
563            tool_name,
564            tool_args,
565            tool_context,
566            skill_registry,
567        }
568    }
569}
570
571#[async_trait]
572impl SessionCommand for ToolCommand {
573    async fn execute(&self) -> Result<Value> {
574        // Check skill-based tool permissions
575        if let Some(registry) = &self.skill_registry {
576            let instruction_skills = registry.by_kind(crate::skills::SkillKind::Instruction);
577
578            // If there are instruction skills with tool restrictions, check permissions
579            let has_restrictions = instruction_skills.iter().any(|s| s.allowed_tools.is_some());
580
581            if has_restrictions {
582                let mut allowed = false;
583
584                for skill in &instruction_skills {
585                    if skill.is_tool_allowed(&self.tool_name) {
586                        allowed = true;
587                        break;
588                    }
589                }
590
591                if !allowed {
592                    return Err(anyhow::anyhow!(
593                        "Tool '{}' is not allowed by any active skill. Active skills restrict tools to their allowed-tools lists.",
594                        self.tool_name
595                    ));
596                }
597            }
598        }
599
600        // Execute the tool
601        let result = self
602            .tool_executor
603            .execute_with_context(&self.tool_name, &self.tool_args, &self.tool_context)
604            .await?;
605        Ok(serde_json::json!({
606            "output": result.output,
607            "exit_code": result.exit_code,
608            "metadata": result.metadata,
609        }))
610    }
611
612    fn command_type(&self) -> &str {
613        &self.tool_name
614    }
615
616    fn payload(&self) -> Value {
617        self.tool_args.clone()
618    }
619}
620
621// ============================================================================
622// AgentLoop
623// ============================================================================
624
625/// Agent loop executor
626#[derive(Clone)]
627pub struct AgentLoop {
628    llm_client: Arc<dyn LlmClient>,
629    tool_executor: Arc<ToolExecutor>,
630    tool_context: ToolContext,
631    config: AgentConfig,
632    /// Optional per-session tool metrics collector
633    tool_metrics: Option<Arc<RwLock<crate::telemetry::ToolMetrics>>>,
634    /// Optional lane queue for priority-based tool execution
635    command_queue: Option<Arc<SessionLaneQueue>>,
636    /// Optional progress tracker for real-time tool/token usage tracking
637    progress_tracker: Option<Arc<tokio::sync::RwLock<crate::task::ProgressTracker>>>,
638    /// Optional task manager for centralized task lifecycle tracking
639    task_manager: Option<Arc<crate::task::TaskManager>>,
640}
641
642impl AgentLoop {
643    pub fn new(
644        llm_client: Arc<dyn LlmClient>,
645        tool_executor: Arc<ToolExecutor>,
646        tool_context: ToolContext,
647        config: AgentConfig,
648    ) -> Self {
649        Self {
650            llm_client,
651            tool_executor,
652            tool_context,
653            config,
654            tool_metrics: None,
655            command_queue: None,
656            progress_tracker: None,
657            task_manager: None,
658        }
659    }
660
661    /// Set the progress tracker for real-time tool/token usage tracking.
662    pub fn with_progress_tracker(
663        mut self,
664        tracker: Arc<tokio::sync::RwLock<crate::task::ProgressTracker>>,
665    ) -> Self {
666        self.progress_tracker = Some(tracker);
667        self
668    }
669
670    /// Set the task manager for centralized task lifecycle tracking.
671    pub fn with_task_manager(mut self, manager: Arc<crate::task::TaskManager>) -> Self {
672        self.task_manager = Some(manager);
673        self
674    }
675
676    /// Set the tool metrics collector for this agent loop
677    pub fn with_tool_metrics(
678        mut self,
679        metrics: Arc<RwLock<crate::telemetry::ToolMetrics>>,
680    ) -> Self {
681        self.tool_metrics = Some(metrics);
682        self
683    }
684
685    /// Set the lane queue for priority-based tool execution.
686    ///
687    /// When set, tools are routed through the lane queue which supports
688    /// External task handling for multi-machine parallel processing.
689    pub fn with_queue(mut self, queue: Arc<SessionLaneQueue>) -> Self {
690        self.command_queue = Some(queue);
691        self
692    }
693
694    /// Track a tool call result in the progress tracker.
695    fn track_tool_result(&self, tool_name: &str, args: &serde_json::Value, exit_code: i32) {
696        if let Some(ref tracker) = self.progress_tracker {
697            let args_summary = Self::compact_json_args(args);
698            let success = exit_code == 0;
699            if let Ok(mut guard) = tracker.try_write() {
700                guard.track_tool_call(tool_name, args_summary, success);
701            }
702        }
703    }
704
705    /// Compact JSON arguments to a short summary string for tracking.
706    fn compact_json_args(args: &serde_json::Value) -> String {
707        let raw = match args {
708            serde_json::Value::Null => String::new(),
709            serde_json::Value::String(s) => s.clone(),
710            _ => serde_json::to_string(args).unwrap_or_default(),
711        };
712        let compact = raw.split_whitespace().collect::<Vec<_>>().join(" ");
713        if compact.len() > 180 {
714            format!("{}...", &compact[..180])
715        } else {
716            compact
717        }
718    }
719
720    /// Execute a tool, applying the configured timeout if set.
721    ///
722    /// On timeout, returns an error describing which tool timed out and after
723    /// how many milliseconds. The caller converts this to a tool-result error
724    /// message that is fed back to the LLM.
725    async fn execute_tool_timed(
726        &self,
727        name: &str,
728        args: &serde_json::Value,
729        ctx: &ToolContext,
730    ) -> anyhow::Result<crate::tools::ToolResult> {
731        let fut = self.tool_executor.execute_with_context(name, args, ctx);
732        if let Some(timeout_ms) = self.config.tool_timeout_ms {
733            match tokio::time::timeout(Duration::from_millis(timeout_ms), fut).await {
734                Ok(result) => result,
735                Err(_) => Err(anyhow::anyhow!(
736                    "Tool '{}' timed out after {}ms",
737                    name,
738                    timeout_ms
739                )),
740            }
741        } else {
742            fut.await
743        }
744    }
745
746    /// Convert a tool execution result into the (output, exit_code, is_error, metadata, images) tuple.
747    fn tool_result_to_tuple(
748        result: anyhow::Result<crate::tools::ToolResult>,
749    ) -> (
750        String,
751        i32,
752        bool,
753        Option<serde_json::Value>,
754        Vec<crate::llm::Attachment>,
755    ) {
756        match result {
757            Ok(r) => (
758                r.output,
759                r.exit_code,
760                r.exit_code != 0,
761                r.metadata,
762                r.images,
763            ),
764            Err(e) => {
765                let msg = e.to_string();
766                // Classify the error so the LLM knows whether retrying makes sense.
767                let hint = if Self::is_transient_error(&msg) {
768                    " [transient — you may retry this tool call]"
769                } else {
770                    " [permanent — do not retry without changing the arguments]"
771                };
772                (
773                    format!("Tool execution error: {}{}", msg, hint),
774                    1,
775                    true,
776                    None,
777                    Vec::new(),
778                )
779            }
780        }
781    }
782
783    /// Inspect the workspace for well-known project marker files and return a short
784    /// `## Project Context` section that the agent can use without any manual configuration.
785    /// Returns an empty string when the workspace type cannot be determined.
786    fn detect_project_hint(workspace: &std::path::Path) -> String {
787        struct Marker {
788            file: &'static str,
789            lang: &'static str,
790            tip: &'static str,
791        }
792
793        let markers = [
794            Marker {
795                file: "Cargo.toml",
796                lang: "Rust",
797                tip: "Use `cargo build`, `cargo test`, `cargo clippy`, and `cargo fmt`. \
798                  Prefer `anyhow` / `thiserror` for error handling. \
799                  Follow the Microsoft Rust Guidelines (no panics in library code, \
800                  async-first with Tokio).",
801            },
802            Marker {
803                file: "package.json",
804                lang: "Node.js / TypeScript",
805                tip: "Check `package.json` for the package manager (npm/yarn/pnpm/bun) \
806                  and available scripts. Prefer TypeScript with strict mode. \
807                  Use ESM imports unless the project is CommonJS.",
808            },
809            Marker {
810                file: "pyproject.toml",
811                lang: "Python",
812                tip: "Use the package manager declared in `pyproject.toml` \
813                  (uv, poetry, hatch, etc.). Prefer type hints and async/await for I/O.",
814            },
815            Marker {
816                file: "setup.py",
817                lang: "Python",
818                tip: "Legacy Python project. Prefer type hints and async/await for I/O.",
819            },
820            Marker {
821                file: "requirements.txt",
822                lang: "Python",
823                tip: "Python project with pip-style dependencies. \
824                  Prefer type hints and async/await for I/O.",
825            },
826            Marker {
827                file: "go.mod",
828                lang: "Go",
829                tip: "Use `go build ./...` and `go test ./...`. \
830                  Follow standard Go project layout. Use `gofmt` for formatting.",
831            },
832            Marker {
833                file: "pom.xml",
834                lang: "Java / Maven",
835                tip: "Use `mvn compile`, `mvn test`, `mvn package`. \
836                  Follow standard Maven project structure.",
837            },
838            Marker {
839                file: "build.gradle",
840                lang: "Java / Gradle",
841                tip: "Use `./gradlew build` and `./gradlew test`. \
842                  Follow standard Gradle project structure.",
843            },
844            Marker {
845                file: "build.gradle.kts",
846                lang: "Kotlin / Gradle",
847                tip: "Use `./gradlew build` and `./gradlew test`. \
848                  Prefer Kotlin coroutines for async work.",
849            },
850            Marker {
851                file: "CMakeLists.txt",
852                lang: "C / C++",
853                tip: "Use `cmake -B build && cmake --build build`. \
854                  Check for `compile_commands.json` for IDE tooling.",
855            },
856            Marker {
857                file: "Makefile",
858                lang: "C / C++ (or generic)",
859                tip: "Use `make` or `make <target>`. \
860                  Check available targets with `make help` or by reading the Makefile.",
861            },
862        ];
863
864        // Check for C# / .NET — no single fixed filename, so glob for *.csproj / *.sln
865        let is_dotnet = workspace.join("*.csproj").exists() || {
866            // Fast check: look for any .csproj or .sln in the workspace root
867            std::fs::read_dir(workspace)
868                .map(|entries| {
869                    entries.flatten().any(|e| {
870                        let name = e.file_name();
871                        let s = name.to_string_lossy();
872                        s.ends_with(".csproj") || s.ends_with(".sln")
873                    })
874                })
875                .unwrap_or(false)
876        };
877
878        if is_dotnet {
879            return "## Project Context\n\nThis is a **C# / .NET** project. \
880             Use `dotnet build`, `dotnet test`, and `dotnet run`. \
881             Follow C# coding conventions and async/await patterns."
882                .to_string();
883        }
884
885        for marker in &markers {
886            if workspace.join(marker.file).exists() {
887                return format!(
888                    "## Project Context\n\nThis is a **{}** project. {}",
889                    marker.lang, marker.tip
890                );
891            }
892        }
893
894        String::new()
895    }
896
897    /// Returns `true` for errors that are likely transient (network, timeout, I/O contention).
898    /// Used to annotate tool error messages so the LLM knows whether retrying is safe.
899    fn is_transient_error(msg: &str) -> bool {
900        let lower = msg.to_lowercase();
901        lower.contains("timeout")
902        || lower.contains("timed out")
903        || lower.contains("connection refused")
904        || lower.contains("connection reset")
905        || lower.contains("broken pipe")
906        || lower.contains("temporarily unavailable")
907        || lower.contains("resource temporarily unavailable")
908        || lower.contains("os error 11")  // EAGAIN
909        || lower.contains("os error 35")  // EAGAIN on macOS
910        || lower.contains("rate limit")
911        || lower.contains("too many requests")
912        || lower.contains("service unavailable")
913        || lower.contains("network unreachable")
914    }
915
916    /// Returns `true` when a tool writes a file and is safe to run concurrently with other
917    /// independent writes (no ordering dependencies, no side-channel output).
918    fn is_parallel_safe_write(name: &str, _args: &serde_json::Value) -> bool {
919        matches!(
920            name,
921            "write_file" | "edit_file" | "create_file" | "append_to_file" | "replace_in_file"
922        )
923    }
924
925    /// Extract the target file path from write-tool arguments so we can check for conflicts.
926    fn extract_write_path(args: &serde_json::Value) -> Option<String> {
927        // write_file / create_file / append_to_file / replace_in_file use "path"
928        // edit_file uses "path" as well
929        args.get("path")
930            .and_then(|v| v.as_str())
931            .map(|s| s.to_string())
932    }
933
934    /// Execute a tool through the lane queue (if configured) or directly.
935    /// Wraps execution in task lifecycle if task_manager is configured.
936    async fn execute_tool_queued_or_direct(
937        &self,
938        name: &str,
939        args: &serde_json::Value,
940        ctx: &ToolContext,
941    ) -> anyhow::Result<crate::tools::ToolResult> {
942        // Create task for this tool execution if task_manager is available
943        let task_id = if let Some(ref tm) = self.task_manager {
944            let task = crate::task::Task::tool(name, args.clone());
945            let id = task.id;
946            tm.spawn(task);
947            // Start the task immediately
948            let _ = tm.start(id);
949            Some(id)
950        } else {
951            None
952        };
953
954        let result = self
955            .execute_tool_queued_or_direct_inner(name, args, ctx)
956            .await;
957
958        // Complete or fail the task based on result
959        if let Some(ref tm) = self.task_manager {
960            if let Some(tid) = task_id {
961                match &result {
962                    Ok(r) => {
963                        let output = serde_json::json!({
964                            "output": r.output.clone(),
965                            "exit_code": r.exit_code,
966                        });
967                        let _ = tm.complete(tid, Some(output));
968                    }
969                    Err(e) => {
970                        let _ = tm.fail(tid, e.to_string());
971                    }
972                }
973            }
974        }
975
976        result
977    }
978
979    /// Inner execution without task lifecycle wrapping.
980    async fn execute_tool_queued_or_direct_inner(
981        &self,
982        name: &str,
983        args: &serde_json::Value,
984        ctx: &ToolContext,
985    ) -> anyhow::Result<crate::tools::ToolResult> {
986        if let Some(ref queue) = self.command_queue {
987            let command = ToolCommand::new(
988                Arc::clone(&self.tool_executor),
989                name.to_string(),
990                args.clone(),
991                ctx.clone(),
992                self.config.skill_registry.clone(),
993            );
994            let rx = queue.submit_by_tool(name, Box::new(command)).await;
995            match rx.await {
996                Ok(Ok(value)) => {
997                    let output = value["output"]
998                        .as_str()
999                        .ok_or_else(|| {
1000                            anyhow::anyhow!(
1001                                "Queue result missing 'output' field for tool '{}'",
1002                                name
1003                            )
1004                        })?
1005                        .to_string();
1006                    let exit_code = value["exit_code"].as_i64().unwrap_or(0) as i32;
1007                    return Ok(crate::tools::ToolResult {
1008                        name: name.to_string(),
1009                        output,
1010                        exit_code,
1011                        metadata: None,
1012                        images: Vec::new(),
1013                    });
1014                }
1015                Ok(Err(e)) => {
1016                    tracing::warn!(
1017                        "Queue execution failed for tool '{}', falling back to direct: {}",
1018                        name,
1019                        e
1020                    );
1021                }
1022                Err(_) => {
1023                    tracing::warn!(
1024                        "Queue channel closed for tool '{}', falling back to direct",
1025                        name
1026                    );
1027                }
1028            }
1029        }
1030        self.execute_tool_timed(name, args, ctx).await
1031    }
1032
1033    /// Call the LLM, handling streaming vs non-streaming internally.
1034    ///
1035    /// Streaming events (`TextDelta`, `ToolStart`) are forwarded to `event_tx`
1036    /// as they arrive. Non-streaming mode simply awaits the complete response.
1037    ///
1038    /// When a `ToolIndex` is configured, tools are filtered per-turn based on
1039    /// the last user message, reducing context usage with large tool sets.
1040    ///
1041    /// Returns `Err` on any LLM API failure. The circuit breaker in
1042    /// `execute_loop` wraps this call with retry logic for non-streaming mode.
1043    async fn call_llm(
1044        &self,
1045        messages: &[Message],
1046        system: Option<&str>,
1047        event_tx: &Option<mpsc::Sender<AgentEvent>>,
1048        cancel_token: &tokio_util::sync::CancellationToken,
1049    ) -> anyhow::Result<LlmResponse> {
1050        // Filter tools through ToolIndex if configured
1051        let tools = if let Some(ref index) = self.config.tool_index {
1052            let query = messages
1053                .iter()
1054                .rev()
1055                .find(|m| m.role == "user")
1056                .and_then(|m| {
1057                    m.content.iter().find_map(|b| match b {
1058                        crate::llm::ContentBlock::Text { text } => Some(text.as_str()),
1059                        _ => None,
1060                    })
1061                })
1062                .unwrap_or("");
1063            let matches = index.search(query, index.len());
1064            let matched_names: std::collections::HashSet<&str> =
1065                matches.iter().map(|m| m.name.as_str()).collect();
1066            self.config
1067                .tools
1068                .iter()
1069                .filter(|t| matched_names.contains(t.name.as_str()))
1070                .cloned()
1071                .collect::<Vec<_>>()
1072        } else {
1073            self.config.tools.clone()
1074        };
1075
1076        if event_tx.is_some() {
1077            let mut stream_rx = match self
1078                .llm_client
1079                .complete_streaming(messages, system, &tools)
1080                .await
1081            {
1082                Ok(rx) => rx,
1083                Err(stream_error) => {
1084                    tracing::warn!(
1085                        error = %stream_error,
1086                        "LLM streaming setup failed; falling back to non-streaming completion"
1087                    );
1088                    return self
1089                        .llm_client
1090                        .complete(messages, system, &tools)
1091                        .await
1092                        .with_context(|| {
1093                            format!(
1094                                "LLM streaming call failed ({stream_error}); non-streaming fallback also failed"
1095                            )
1096                        });
1097                }
1098            };
1099
1100            let mut final_response: Option<LlmResponse> = None;
1101            loop {
1102                tokio::select! {
1103                    _ = cancel_token.cancelled() => {
1104                        tracing::info!("🛑 LLM streaming cancelled by CancellationToken");
1105                        anyhow::bail!("Operation cancelled by user");
1106                    }
1107                    event = stream_rx.recv() => {
1108                        match event {
1109                            Some(crate::llm::StreamEvent::TextDelta(text)) => {
1110                                if let Some(tx) = event_tx {
1111                                    tx.send(AgentEvent::TextDelta { text }).await.ok();
1112                                }
1113                            }
1114                            Some(crate::llm::StreamEvent::ToolUseStart { id, name }) => {
1115                                if let Some(tx) = event_tx {
1116                                    tx.send(AgentEvent::ToolStart { id, name }).await.ok();
1117                                }
1118                            }
1119                            Some(crate::llm::StreamEvent::ToolUseInputDelta(delta)) => {
1120                                if let Some(tx) = event_tx {
1121                                    tx.send(AgentEvent::ToolInputDelta { delta }).await.ok();
1122                                }
1123                            }
1124                            Some(crate::llm::StreamEvent::Done(resp)) => {
1125                                final_response = Some(resp);
1126                                break;
1127                            }
1128                            None => break,
1129                        }
1130                    }
1131                }
1132            }
1133            final_response.context("Stream ended without final response")
1134        } else {
1135            self.llm_client
1136                .complete(messages, system, &tools)
1137                .await
1138                .context("LLM call failed")
1139        }
1140    }
1141
1142    /// Create a tool context with streaming support.
1143    ///
1144    /// When `event_tx` is Some, spawns a forwarder task that converts
1145    /// `ToolStreamEvent::OutputDelta` into `AgentEvent::ToolOutputDelta`
1146    /// and sends them to the agent event channel.
1147    ///
1148    /// Returns the augmented `ToolContext`. The forwarder task runs until
1149    /// the tool-side sender is dropped (i.e., tool execution finishes).
1150    fn streaming_tool_context(
1151        &self,
1152        event_tx: &Option<mpsc::Sender<AgentEvent>>,
1153        tool_id: &str,
1154        tool_name: &str,
1155    ) -> ToolContext {
1156        let mut ctx = self.tool_context.clone();
1157        if let Some(agent_tx) = event_tx {
1158            let (tool_tx, mut tool_rx) = mpsc::channel::<ToolStreamEvent>(64);
1159            ctx.event_tx = Some(tool_tx);
1160
1161            let agent_tx = agent_tx.clone();
1162            let tool_id = tool_id.to_string();
1163            let tool_name = tool_name.to_string();
1164            tokio::spawn(async move {
1165                while let Some(event) = tool_rx.recv().await {
1166                    match event {
1167                        ToolStreamEvent::OutputDelta(delta) => {
1168                            agent_tx
1169                                .send(AgentEvent::ToolOutputDelta {
1170                                    id: tool_id.clone(),
1171                                    name: tool_name.clone(),
1172                                    delta,
1173                                })
1174                                .await
1175                                .ok();
1176                        }
1177                    }
1178                }
1179            });
1180        }
1181        ctx
1182    }
1183
1184    /// Resolve context from all providers for a given prompt
1185    ///
1186    /// Returns aggregated context results from all configured providers.
1187    async fn resolve_context(&self, prompt: &str, session_id: Option<&str>) -> Vec<ContextResult> {
1188        if self.config.context_providers.is_empty() {
1189            return Vec::new();
1190        }
1191
1192        let query = ContextQuery::new(prompt).with_session_id(session_id.unwrap_or(""));
1193
1194        let futures = self
1195            .config
1196            .context_providers
1197            .iter()
1198            .map(|p| p.query(&query));
1199        let outcomes = join_all(futures).await;
1200
1201        outcomes
1202            .into_iter()
1203            .enumerate()
1204            .filter_map(|(i, r)| match r {
1205                Ok(result) if !result.is_empty() => Some(result),
1206                Ok(_) => None,
1207                Err(e) => {
1208                    tracing::warn!(
1209                        "Context provider '{}' failed: {}",
1210                        self.config.context_providers[i].name(),
1211                        e
1212                    );
1213                    None
1214                }
1215            })
1216            .collect()
1217    }
1218
1219    /// Detect whether the LLM's no-tool-call response looks like an intermediate
1220    /// step rather than a genuine final answer.
1221    ///
1222    /// Returns `true` when continuation should be injected. Heuristics:
1223    /// - Response ends with a colon or ellipsis (mid-thought)
1224    /// - Response contains phrases that signal incomplete work
1225    /// - Response is very short (< 80 chars) and doesn't look like a summary
1226    fn looks_incomplete(text: &str) -> bool {
1227        let t = text.trim();
1228        if t.is_empty() {
1229            return true;
1230        }
1231        // Very short responses that aren't clearly a final answer
1232        if t.len() < 80 && !t.contains('\n') {
1233            // Short single-line — could be a genuine one-liner answer, keep going
1234            // only if it ends with punctuation that signals continuation
1235            let ends_continuation =
1236                t.ends_with(':') || t.ends_with("...") || t.ends_with('…') || t.ends_with(',');
1237            if ends_continuation {
1238                return true;
1239            }
1240        }
1241        // Phrases that signal the LLM is describing what it will do rather than doing it
1242        let incomplete_phrases = [
1243            "i'll ",
1244            "i will ",
1245            "let me ",
1246            "i need to ",
1247            "i should ",
1248            "next, i",
1249            "first, i",
1250            "now i",
1251            "i'll start",
1252            "i'll begin",
1253            "i'll now",
1254            "let's start",
1255            "let's begin",
1256            "to do this",
1257            "i'm going to",
1258        ];
1259        let lower = t.to_lowercase();
1260        for phrase in &incomplete_phrases {
1261            if lower.contains(phrase) {
1262                return true;
1263            }
1264        }
1265        false
1266    }
1267
1268    /// Get the assembled system prompt from slots.
1269    fn system_prompt(&self) -> String {
1270        self.config.prompt_slots.build()
1271    }
1272
1273    /// Build augmented system prompt with context
1274    fn build_augmented_system_prompt(&self, context_results: &[ContextResult]) -> Option<String> {
1275        let base = self.system_prompt();
1276
1277        // Use live tool executor definitions so tools added via add_mcp_server() are included
1278        let live_tools = self.tool_executor.definitions();
1279        let mcp_tools: Vec<&ToolDefinition> = live_tools
1280            .iter()
1281            .filter(|t| t.name.starts_with("mcp__"))
1282            .collect();
1283
1284        let mcp_section = if mcp_tools.is_empty() {
1285            String::new()
1286        } else {
1287            let mut lines = vec![
1288                "## MCP Tools".to_string(),
1289                String::new(),
1290                "The following MCP (Model Context Protocol) tools are available. Use them when the task requires external capabilities beyond the built-in tools:".to_string(),
1291                String::new(),
1292            ];
1293            for tool in &mcp_tools {
1294                let display = format!("- `{}` — {}", tool.name, tool.description);
1295                lines.push(display);
1296            }
1297            lines.join("\n")
1298        };
1299
1300        let parts: Vec<&str> = [base.as_str(), mcp_section.as_str()]
1301            .iter()
1302            .filter(|s| !s.is_empty())
1303            .copied()
1304            .collect();
1305
1306        // Auto-detect project type from workspace and inject language-specific guidelines,
1307        // but only when the user hasn't already set a custom `guidelines` slot.
1308        let project_hint = if self.config.prompt_slots.guidelines.is_none() {
1309            Self::detect_project_hint(&self.tool_context.workspace)
1310        } else {
1311            String::new()
1312        };
1313
1314        if context_results.is_empty() {
1315            if project_hint.is_empty() {
1316                return Some(parts.join("\n\n"));
1317            }
1318            return Some(format!("{}\n\n{}", parts.join("\n\n"), project_hint));
1319        }
1320
1321        // Build context XML block
1322        let context_xml: String = context_results
1323            .iter()
1324            .map(|r| r.to_xml())
1325            .collect::<Vec<_>>()
1326            .join("\n\n");
1327
1328        if project_hint.is_empty() {
1329            Some(format!("{}\n\n{}", parts.join("\n\n"), context_xml))
1330        } else {
1331            Some(format!(
1332                "{}\n\n{}\n\n{}",
1333                parts.join("\n\n"),
1334                project_hint,
1335                context_xml
1336            ))
1337        }
1338    }
1339
1340    /// Notify providers of turn completion for memory extraction
1341    async fn notify_turn_complete(&self, session_id: &str, prompt: &str, response: &str) {
1342        let futures = self
1343            .config
1344            .context_providers
1345            .iter()
1346            .map(|p| p.on_turn_complete(session_id, prompt, response));
1347        let outcomes = join_all(futures).await;
1348
1349        for (i, result) in outcomes.into_iter().enumerate() {
1350            if let Err(e) = result {
1351                tracing::warn!(
1352                    "Context provider '{}' on_turn_complete failed: {}",
1353                    self.config.context_providers[i].name(),
1354                    e
1355                );
1356            }
1357        }
1358    }
1359
1360    /// Fire PreToolUse hook event before tool execution.
1361    /// Returns the HookResult which may block the tool call.
1362    async fn fire_pre_tool_use(
1363        &self,
1364        session_id: &str,
1365        tool_name: &str,
1366        args: &serde_json::Value,
1367        recent_tools: Vec<String>,
1368    ) -> Option<HookResult> {
1369        if let Some(he) = &self.config.hook_engine {
1370            let event = HookEvent::PreToolUse(PreToolUseEvent {
1371                session_id: session_id.to_string(),
1372                tool: tool_name.to_string(),
1373                args: args.clone(),
1374                working_directory: self.tool_context.workspace.to_string_lossy().to_string(),
1375                recent_tools,
1376            });
1377            let result = he.fire(&event).await;
1378            if result.is_block() {
1379                return Some(result);
1380            }
1381        }
1382        None
1383    }
1384
1385    /// Fire PostToolUse hook event after tool execution (fire-and-forget).
1386    async fn fire_post_tool_use(
1387        &self,
1388        session_id: &str,
1389        tool_name: &str,
1390        args: &serde_json::Value,
1391        output: &str,
1392        success: bool,
1393        duration_ms: u64,
1394    ) {
1395        if let Some(he) = &self.config.hook_engine {
1396            let event = HookEvent::PostToolUse(PostToolUseEvent {
1397                session_id: session_id.to_string(),
1398                tool: tool_name.to_string(),
1399                args: args.clone(),
1400                result: ToolResultData {
1401                    success,
1402                    output: output.to_string(),
1403                    exit_code: if success { Some(0) } else { Some(1) },
1404                    duration_ms,
1405                },
1406            });
1407            let he = Arc::clone(he);
1408            tokio::spawn(async move {
1409                let _ = he.fire(&event).await;
1410            });
1411        }
1412    }
1413
1414    /// Fire GenerateStart hook event before an LLM call.
1415    async fn fire_generate_start(
1416        &self,
1417        session_id: &str,
1418        prompt: &str,
1419        system_prompt: &Option<String>,
1420    ) {
1421        if let Some(he) = &self.config.hook_engine {
1422            let event = HookEvent::GenerateStart(GenerateStartEvent {
1423                session_id: session_id.to_string(),
1424                prompt: prompt.to_string(),
1425                system_prompt: system_prompt.clone(),
1426                model_provider: String::new(),
1427                model_name: String::new(),
1428                available_tools: self.config.tools.iter().map(|t| t.name.clone()).collect(),
1429            });
1430            let _ = he.fire(&event).await;
1431        }
1432    }
1433
1434    /// Fire GenerateEnd hook event after an LLM call.
1435    async fn fire_generate_end(
1436        &self,
1437        session_id: &str,
1438        prompt: &str,
1439        response: &LlmResponse,
1440        duration_ms: u64,
1441    ) {
1442        if let Some(he) = &self.config.hook_engine {
1443            let tool_calls: Vec<ToolCallInfo> = response
1444                .tool_calls()
1445                .iter()
1446                .map(|tc| ToolCallInfo {
1447                    name: tc.name.clone(),
1448                    args: tc.args.clone(),
1449                })
1450                .collect();
1451
1452            let event = HookEvent::GenerateEnd(GenerateEndEvent {
1453                session_id: session_id.to_string(),
1454                prompt: prompt.to_string(),
1455                response_text: response.text().to_string(),
1456                tool_calls,
1457                usage: TokenUsageInfo {
1458                    prompt_tokens: response.usage.prompt_tokens as i32,
1459                    completion_tokens: response.usage.completion_tokens as i32,
1460                    total_tokens: response.usage.total_tokens as i32,
1461                },
1462                duration_ms,
1463            });
1464            let _ = he.fire(&event).await;
1465        }
1466    }
1467
1468    /// Fire PrePrompt hook event before prompt augmentation.
1469    /// Returns optional modified prompt text from the hook.
1470    async fn fire_pre_prompt(
1471        &self,
1472        session_id: &str,
1473        prompt: &str,
1474        system_prompt: &Option<String>,
1475        message_count: usize,
1476    ) -> Option<String> {
1477        if let Some(he) = &self.config.hook_engine {
1478            let event = HookEvent::PrePrompt(PrePromptEvent {
1479                session_id: session_id.to_string(),
1480                prompt: prompt.to_string(),
1481                system_prompt: system_prompt.clone(),
1482                message_count,
1483            });
1484            let result = he.fire(&event).await;
1485            if let HookResult::Continue(Some(modified)) = result {
1486                // Extract modified prompt from hook response
1487                if let Some(new_prompt) = modified.get("prompt").and_then(|v| v.as_str()) {
1488                    return Some(new_prompt.to_string());
1489                }
1490            }
1491        }
1492        None
1493    }
1494
1495    /// Fire PostResponse hook event after the agent loop completes.
1496    async fn fire_post_response(
1497        &self,
1498        session_id: &str,
1499        response_text: &str,
1500        tool_calls_count: usize,
1501        usage: &TokenUsage,
1502        duration_ms: u64,
1503    ) {
1504        if let Some(he) = &self.config.hook_engine {
1505            let event = HookEvent::PostResponse(PostResponseEvent {
1506                session_id: session_id.to_string(),
1507                response_text: response_text.to_string(),
1508                tool_calls_count,
1509                usage: TokenUsageInfo {
1510                    prompt_tokens: usage.prompt_tokens as i32,
1511                    completion_tokens: usage.completion_tokens as i32,
1512                    total_tokens: usage.total_tokens as i32,
1513                },
1514                duration_ms,
1515            });
1516            let he = Arc::clone(he);
1517            tokio::spawn(async move {
1518                let _ = he.fire(&event).await;
1519            });
1520        }
1521    }
1522
1523    /// Fire OnError hook event when an error occurs.
1524    async fn fire_on_error(
1525        &self,
1526        session_id: &str,
1527        error_type: ErrorType,
1528        error_message: &str,
1529        context: serde_json::Value,
1530    ) {
1531        if let Some(he) = &self.config.hook_engine {
1532            let event = HookEvent::OnError(OnErrorEvent {
1533                session_id: session_id.to_string(),
1534                error_type,
1535                error_message: error_message.to_string(),
1536                context,
1537            });
1538            let he = Arc::clone(he);
1539            tokio::spawn(async move {
1540                let _ = he.fire(&event).await;
1541            });
1542        }
1543    }
1544
1545    /// Execute the agent loop for a prompt
1546    ///
1547    /// Takes the conversation history and a new user prompt.
1548    /// Returns the agent result and updated message history.
1549    /// When event_tx is provided, uses streaming LLM API for real-time text output.
1550    pub async fn execute(
1551        &self,
1552        history: &[Message],
1553        prompt: &str,
1554        event_tx: Option<mpsc::Sender<AgentEvent>>,
1555    ) -> Result<AgentResult> {
1556        self.execute_with_session(history, prompt, None, event_tx, None)
1557            .await
1558    }
1559
1560    /// Execute the agent loop with pre-built messages (user message already included).
1561    ///
1562    /// Used by `send_with_attachments` / `stream_with_attachments` where the
1563    /// user message contains multi-modal content and is already appended to
1564    /// the messages vec.
1565    pub async fn execute_from_messages(
1566        &self,
1567        messages: Vec<Message>,
1568        session_id: Option<&str>,
1569        event_tx: Option<mpsc::Sender<AgentEvent>>,
1570        cancel_token: Option<&tokio_util::sync::CancellationToken>,
1571    ) -> Result<AgentResult> {
1572        let default_token = tokio_util::sync::CancellationToken::new();
1573        let token = cancel_token.unwrap_or(&default_token);
1574        tracing::info!(
1575            a3s.session.id = session_id.unwrap_or("none"),
1576            a3s.agent.max_turns = self.config.max_tool_rounds,
1577            "a3s.agent.execute_from_messages started"
1578        );
1579
1580        // Extract the last user message text for hooks, memory recall, and events.
1581        // Pass empty prompt so execute_loop skips adding a duplicate user message,
1582        // but provide effective_prompt for hook/memory/event purposes.
1583        let effective_prompt = messages
1584            .iter()
1585            .rev()
1586            .find(|m| m.role == "user")
1587            .map(|m| m.text())
1588            .unwrap_or_default();
1589
1590        let result = self
1591            .execute_loop_inner(
1592                &messages,
1593                "",
1594                &effective_prompt,
1595                session_id,
1596                event_tx,
1597                token,
1598            )
1599            .await;
1600
1601        match &result {
1602            Ok(r) => tracing::info!(
1603                a3s.agent.tool_calls_count = r.tool_calls_count,
1604                a3s.llm.total_tokens = r.usage.total_tokens,
1605                "a3s.agent.execute_from_messages completed"
1606            ),
1607            Err(e) => tracing::warn!(
1608                error = %e,
1609                "a3s.agent.execute_from_messages failed"
1610            ),
1611        }
1612
1613        result
1614    }
1615
1616    /// Execute the agent loop for a prompt with session context
1617    ///
1618    /// Takes the conversation history, user prompt, and optional session ID.
1619    /// When session_id is provided, context providers can use it for session-specific context.
1620    pub async fn execute_with_session(
1621        &self,
1622        history: &[Message],
1623        prompt: &str,
1624        session_id: Option<&str>,
1625        event_tx: Option<mpsc::Sender<AgentEvent>>,
1626        cancel_token: Option<&tokio_util::sync::CancellationToken>,
1627    ) -> Result<AgentResult> {
1628        let default_token = tokio_util::sync::CancellationToken::new();
1629        let token = cancel_token.unwrap_or(&default_token);
1630        tracing::info!(
1631            a3s.session.id = session_id.unwrap_or("none"),
1632            a3s.agent.max_turns = self.config.max_tool_rounds,
1633            "a3s.agent.execute started"
1634        );
1635
1636        // Determine whether to use planning mode
1637        let use_planning = if self.config.planning_mode == PlanningMode::Auto {
1638            // Auto mode: use keyword confidence first, fallback to LLM classification
1639            let (style, confidence) = AgentStyle::detect_with_confidence(prompt);
1640            if confidence == DetectionConfidence::Low {
1641                // Low confidence — use LLM classification if available
1642                if let Some(ref llm) = self.config.llm_client {
1643                    match AgentStyle::detect_with_llm(llm.as_ref(), prompt).await {
1644                        Ok(classified_style) => {
1645                            tracing::debug!(
1646                                intent.classification = ?classified_style,
1647                                intent.source = "llm",
1648                                "Intent classified via LLM"
1649                            );
1650                            classified_style.requires_planning()
1651                        }
1652                        Err(e) => {
1653                            tracing::warn!(error = %e, "LLM intent classification failed, using keyword detection");
1654                            style.requires_planning()
1655                        }
1656                    }
1657                } else {
1658                    // No LLM client available — use keyword detection
1659                    style.requires_planning()
1660                }
1661            } else {
1662                // High/Medium confidence — use keyword detection directly
1663                style.requires_planning()
1664            }
1665        } else {
1666            // Explicit mode: Enabled or Disabled
1667            self.config.planning_mode.should_plan(prompt)
1668        };
1669
1670        // Create agent task if task_manager is available
1671        let task_id = if let Some(ref tm) = self.task_manager {
1672            let workspace = self.tool_context.workspace.display().to_string();
1673            let task = crate::task::Task::agent("agent", &workspace, prompt);
1674            let id = task.id;
1675            tm.spawn(task);
1676            let _ = tm.start(id);
1677            Some(id)
1678        } else {
1679            None
1680        };
1681
1682        let result = if use_planning {
1683            self.execute_with_planning(history, prompt, event_tx).await
1684        } else {
1685            self.execute_loop(history, prompt, session_id, event_tx, token)
1686                .await
1687        };
1688
1689        // Complete or fail agent task based on result
1690        if let Some(ref tm) = self.task_manager {
1691            if let Some(tid) = task_id {
1692                match &result {
1693                    Ok(r) => {
1694                        let output = serde_json::json!({
1695                            "text": r.text,
1696                            "tool_calls_count": r.tool_calls_count,
1697                            "usage": r.usage,
1698                        });
1699                        let _ = tm.complete(tid, Some(output));
1700                    }
1701                    Err(e) => {
1702                        let _ = tm.fail(tid, e.to_string());
1703                    }
1704                }
1705            }
1706        }
1707
1708        match &result {
1709            Ok(r) => {
1710                tracing::info!(
1711                    a3s.agent.tool_calls_count = r.tool_calls_count,
1712                    a3s.llm.total_tokens = r.usage.total_tokens,
1713                    "a3s.agent.execute completed"
1714                );
1715                // Fire PostResponse hook
1716                self.fire_post_response(
1717                    session_id.unwrap_or(""),
1718                    &r.text,
1719                    r.tool_calls_count,
1720                    &r.usage,
1721                    0, // duration tracked externally
1722                )
1723                .await;
1724            }
1725            Err(e) => {
1726                tracing::warn!(
1727                    error = %e,
1728                    "a3s.agent.execute failed"
1729                );
1730                // Fire OnError hook
1731                self.fire_on_error(
1732                    session_id.unwrap_or(""),
1733                    ErrorType::Other,
1734                    &e.to_string(),
1735                    serde_json::json!({"phase": "execute"}),
1736                )
1737                .await;
1738            }
1739        }
1740
1741        result
1742    }
1743
1744    /// Core execution loop (without planning routing).
1745    ///
1746    /// This is the inner loop that runs LLM calls and tool executions.
1747    /// Called directly by `execute_with_session` (after planning check)
1748    /// and by `execute_plan` (for individual steps, bypassing planning).
1749    async fn execute_loop(
1750        &self,
1751        history: &[Message],
1752        prompt: &str,
1753        session_id: Option<&str>,
1754        event_tx: Option<mpsc::Sender<AgentEvent>>,
1755        cancel_token: &tokio_util::sync::CancellationToken,
1756    ) -> Result<AgentResult> {
1757        // When called via execute_loop, the prompt is used for both
1758        // message-adding and hook/memory/event purposes.
1759        self.execute_loop_inner(history, prompt, prompt, session_id, event_tx, cancel_token)
1760            .await
1761    }
1762
1763    /// Inner execution loop.
1764    ///
1765    /// `msg_prompt` controls whether a user message is appended (empty = skip).
1766    /// `effective_prompt` is used for hooks, memory recall, taint tracking, and events.
1767    async fn execute_loop_inner(
1768        &self,
1769        history: &[Message],
1770        msg_prompt: &str,
1771        effective_prompt: &str,
1772        session_id: Option<&str>,
1773        event_tx: Option<mpsc::Sender<AgentEvent>>,
1774        cancel_token: &tokio_util::sync::CancellationToken,
1775    ) -> Result<AgentResult> {
1776        let mut messages = history.to_vec();
1777        let mut total_usage = TokenUsage::default();
1778        let mut tool_calls_count = 0;
1779        let mut turn = 0;
1780        // Consecutive malformed-tool-args errors (4.1 parse error recovery)
1781        let mut parse_error_count: u32 = 0;
1782        // Continuation injection counter
1783        let mut continuation_count: u32 = 0;
1784        let mut recent_tool_signatures: Vec<String> = Vec::new();
1785
1786        // Send start event
1787        if let Some(tx) = &event_tx {
1788            tx.send(AgentEvent::Start {
1789                prompt: effective_prompt.to_string(),
1790            })
1791            .await
1792            .ok();
1793        }
1794
1795        // Forward queue events (CommandDeadLettered, CommandRetry, QueueAlert) to event stream
1796        let _queue_forward_handle =
1797            if let (Some(ref queue), Some(ref tx)) = (&self.command_queue, &event_tx) {
1798                let mut rx = queue.subscribe();
1799                let tx = tx.clone();
1800                Some(tokio::spawn(async move {
1801                    while let Ok(event) = rx.recv().await {
1802                        if tx.send(event).await.is_err() {
1803                            break;
1804                        }
1805                    }
1806                }))
1807            } else {
1808                None
1809            };
1810
1811        // Fire PrePrompt hook (may modify the prompt)
1812        let built_system_prompt = Some(self.system_prompt());
1813        let hooked_prompt = if let Some(modified) = self
1814            .fire_pre_prompt(
1815                session_id.unwrap_or(""),
1816                effective_prompt,
1817                &built_system_prompt,
1818                messages.len(),
1819            )
1820            .await
1821        {
1822            modified
1823        } else {
1824            effective_prompt.to_string()
1825        };
1826        let effective_prompt = hooked_prompt.as_str();
1827
1828        // Taint-track the incoming prompt for sensitive data detection
1829        if let Some(ref sp) = self.config.security_provider {
1830            sp.taint_input(effective_prompt);
1831        }
1832
1833        // Recall relevant memories and inject into system prompt
1834        let system_with_memory = if let Some(ref memory) = self.config.memory {
1835            match memory.recall_similar(effective_prompt, 5).await {
1836                Ok(items) if !items.is_empty() => {
1837                    if let Some(tx) = &event_tx {
1838                        for item in &items {
1839                            tx.send(AgentEvent::MemoryRecalled {
1840                                memory_id: item.id.clone(),
1841                                content: item.content.clone(),
1842                                relevance: item.relevance_score(),
1843                            })
1844                            .await
1845                            .ok();
1846                        }
1847                        tx.send(AgentEvent::MemoriesSearched {
1848                            query: Some(effective_prompt.to_string()),
1849                            tags: Vec::new(),
1850                            result_count: items.len(),
1851                        })
1852                        .await
1853                        .ok();
1854                    }
1855                    let memory_context = items
1856                        .iter()
1857                        .map(|i| format!("- {}", i.content))
1858                        .collect::<Vec<_>>()
1859                        .join(
1860                            "
1861",
1862                        );
1863                    let base = self.system_prompt();
1864                    Some(format!(
1865                        "{}
1866
1867## Relevant past experience
1868{}",
1869                        base, memory_context
1870                    ))
1871                }
1872                _ => Some(self.system_prompt()),
1873            }
1874        } else {
1875            Some(self.system_prompt())
1876        };
1877
1878        // Resolve context from providers on first turn (before adding user message)
1879        let augmented_system = if !self.config.context_providers.is_empty() {
1880            // Send context resolving event
1881            if let Some(tx) = &event_tx {
1882                let provider_names: Vec<String> = self
1883                    .config
1884                    .context_providers
1885                    .iter()
1886                    .map(|p| p.name().to_string())
1887                    .collect();
1888                tx.send(AgentEvent::ContextResolving {
1889                    providers: provider_names,
1890                })
1891                .await
1892                .ok();
1893            }
1894
1895            tracing::info!(
1896                a3s.context.providers = self.config.context_providers.len() as i64,
1897                "Context resolution started"
1898            );
1899            let context_results = self.resolve_context(effective_prompt, session_id).await;
1900
1901            // Send context resolved event
1902            if let Some(tx) = &event_tx {
1903                let total_items: usize = context_results.iter().map(|r| r.items.len()).sum();
1904                let total_tokens: usize = context_results.iter().map(|r| r.total_tokens).sum();
1905
1906                tracing::info!(
1907                    context_items = total_items,
1908                    context_tokens = total_tokens,
1909                    "Context resolution completed"
1910                );
1911
1912                tx.send(AgentEvent::ContextResolved {
1913                    total_items,
1914                    total_tokens,
1915                })
1916                .await
1917                .ok();
1918            }
1919
1920            self.build_augmented_system_prompt(&context_results)
1921        } else {
1922            Some(self.system_prompt())
1923        };
1924
1925        // Merge memory context into system prompt
1926        let base_prompt = self.system_prompt();
1927        let augmented_system = match (augmented_system, system_with_memory) {
1928            (Some(ctx), Some(mem)) if ctx != mem => Some(ctx.replacen(&base_prompt, &mem, 1)),
1929            (Some(ctx), _) => Some(ctx),
1930            (None, mem) => mem,
1931        };
1932
1933        // Add user message
1934        if !msg_prompt.is_empty() {
1935            messages.push(Message::user(msg_prompt));
1936        }
1937
1938        loop {
1939            turn += 1;
1940
1941            if turn > self.config.max_tool_rounds {
1942                let error = format!("Max tool rounds ({}) exceeded", self.config.max_tool_rounds);
1943                if let Some(tx) = &event_tx {
1944                    tx.send(AgentEvent::Error {
1945                        message: error.clone(),
1946                    })
1947                    .await
1948                    .ok();
1949                }
1950                anyhow::bail!(error);
1951            }
1952
1953            // Send turn start event
1954            if let Some(tx) = &event_tx {
1955                tx.send(AgentEvent::TurnStart { turn }).await.ok();
1956            }
1957
1958            tracing::info!(
1959                turn = turn,
1960                max_turns = self.config.max_tool_rounds,
1961                "Agent turn started"
1962            );
1963
1964            // Call LLM - use streaming if we have an event channel
1965            tracing::info!(
1966                a3s.llm.streaming = event_tx.is_some(),
1967                "LLM completion started"
1968            );
1969
1970            // Fire GenerateStart hook
1971            self.fire_generate_start(
1972                session_id.unwrap_or(""),
1973                effective_prompt,
1974                &augmented_system,
1975            )
1976            .await;
1977
1978            let llm_start = std::time::Instant::now();
1979            // Circuit breaker (4.3): retry non-streaming LLM calls on transient failures.
1980            // Each failure increments `consecutive_llm_errors`; on success it resets to 0.
1981            // Streaming mode bails immediately on failure (events can't be replayed).
1982            let response = {
1983                let threshold = self.config.circuit_breaker_threshold.max(1);
1984                let mut attempt = 0u32;
1985                loop {
1986                    attempt += 1;
1987                    let result = self
1988                        .call_llm(
1989                            &messages,
1990                            augmented_system.as_deref(),
1991                            &event_tx,
1992                            cancel_token,
1993                        )
1994                        .await;
1995                    match result {
1996                        Ok(r) => {
1997                            break r;
1998                        }
1999                        // Never retry if cancelled
2000                        Err(e) if cancel_token.is_cancelled() => {
2001                            anyhow::bail!(e);
2002                        }
2003                        // Retry when: non-streaming under threshold, OR first streaming attempt
2004                        Err(e) if attempt < threshold && (event_tx.is_none() || attempt == 1) => {
2005                            tracing::warn!(
2006                                turn = turn,
2007                                attempt = attempt,
2008                                threshold = threshold,
2009                                error = %e,
2010                                "LLM call failed, will retry"
2011                            );
2012                            tokio::time::sleep(Duration::from_millis(100 * attempt as u64)).await;
2013                        }
2014                        // Threshold exceeded or streaming mid-stream: bail
2015                        Err(e) => {
2016                            let msg = if attempt > 1 {
2017                                format!(
2018                                    "LLM circuit breaker triggered: failed after {} attempt(s): {}",
2019                                    attempt, e
2020                                )
2021                            } else {
2022                                format!("LLM call failed: {}", e)
2023                            };
2024                            tracing::error!(turn = turn, attempt = attempt, "{}", msg);
2025                            // Fire OnError hook for LLM failure
2026                            self.fire_on_error(
2027                                session_id.unwrap_or(""),
2028                                ErrorType::LlmFailure,
2029                                &msg,
2030                                serde_json::json!({"turn": turn, "attempt": attempt}),
2031                            )
2032                            .await;
2033                            if let Some(tx) = &event_tx {
2034                                tx.send(AgentEvent::Error {
2035                                    message: msg.clone(),
2036                                })
2037                                .await
2038                                .ok();
2039                            }
2040                            anyhow::bail!(msg);
2041                        }
2042                    }
2043                }
2044            };
2045
2046            // Update usage
2047            total_usage.prompt_tokens += response.usage.prompt_tokens;
2048            total_usage.completion_tokens += response.usage.completion_tokens;
2049            total_usage.total_tokens += response.usage.total_tokens;
2050
2051            // Track token usage in progress tracker
2052            if let Some(ref tracker) = self.progress_tracker {
2053                let token_usage = crate::task::TaskTokenUsage {
2054                    input_tokens: response.usage.prompt_tokens as u64,
2055                    output_tokens: response.usage.completion_tokens as u64,
2056                    cache_read_tokens: response.usage.cache_read_tokens.unwrap_or(0) as u64,
2057                    cache_write_tokens: response.usage.cache_write_tokens.unwrap_or(0) as u64,
2058                };
2059                if let Ok(mut guard) = tracker.try_write() {
2060                    guard.track_tokens(token_usage);
2061                }
2062            }
2063
2064            // Record LLM completion telemetry
2065            let llm_duration = llm_start.elapsed();
2066            tracing::info!(
2067                turn = turn,
2068                streaming = event_tx.is_some(),
2069                prompt_tokens = response.usage.prompt_tokens,
2070                completion_tokens = response.usage.completion_tokens,
2071                total_tokens = response.usage.total_tokens,
2072                stop_reason = response.stop_reason.as_deref().unwrap_or("unknown"),
2073                duration_ms = llm_duration.as_millis() as u64,
2074                "LLM completion finished"
2075            );
2076
2077            // Fire GenerateEnd hook
2078            self.fire_generate_end(
2079                session_id.unwrap_or(""),
2080                effective_prompt,
2081                &response,
2082                llm_duration.as_millis() as u64,
2083            )
2084            .await;
2085
2086            // Record LLM usage on the llm span
2087            crate::telemetry::record_llm_usage(
2088                response.usage.prompt_tokens,
2089                response.usage.completion_tokens,
2090                response.usage.total_tokens,
2091                response.stop_reason.as_deref(),
2092            );
2093            // Log turn token usage
2094            tracing::info!(
2095                turn = turn,
2096                a3s.llm.total_tokens = response.usage.total_tokens,
2097                "Turn token usage"
2098            );
2099
2100            // Add assistant message to history
2101            messages.push(response.message.clone());
2102
2103            // Check for tool calls
2104            let tool_calls = response.tool_calls();
2105
2106            // Send turn end event
2107            if let Some(tx) = &event_tx {
2108                tx.send(AgentEvent::TurnEnd {
2109                    turn,
2110                    usage: response.usage.clone(),
2111                })
2112                .await
2113                .ok();
2114            }
2115
2116            // Auto-compact: check if context usage exceeds threshold
2117            if self.config.auto_compact {
2118                let used = response.usage.prompt_tokens;
2119                let max = self.config.max_context_tokens;
2120                let threshold = self.config.auto_compact_threshold;
2121
2122                if crate::session::compaction::should_auto_compact(used, max, threshold) {
2123                    let before_len = messages.len();
2124                    let percent_before = used as f32 / max as f32;
2125
2126                    tracing::info!(
2127                        used_tokens = used,
2128                        max_tokens = max,
2129                        percent = percent_before,
2130                        threshold = threshold,
2131                        "Auto-compact triggered"
2132                    );
2133
2134                    // Step 1: Prune large tool outputs first (cheap, no LLM call)
2135                    if let Some(pruned) = crate::session::compaction::prune_tool_outputs(&messages)
2136                    {
2137                        messages = pruned;
2138                        tracing::info!("Tool output pruning applied");
2139                    }
2140
2141                    // Step 2: Full summarization using the agent's LLM client
2142                    if let Ok(Some(compacted)) = crate::session::compaction::compact_messages(
2143                        session_id.unwrap_or(""),
2144                        &messages,
2145                        &self.llm_client,
2146                    )
2147                    .await
2148                    {
2149                        messages = compacted;
2150                    }
2151
2152                    // Emit compaction event
2153                    if let Some(tx) = &event_tx {
2154                        tx.send(AgentEvent::ContextCompacted {
2155                            session_id: session_id.unwrap_or("").to_string(),
2156                            before_messages: before_len,
2157                            after_messages: messages.len(),
2158                            percent_before,
2159                        })
2160                        .await
2161                        .ok();
2162                    }
2163                }
2164            }
2165
2166            if tool_calls.is_empty() {
2167                // No tool calls — check if we should inject a continuation message
2168                // before treating this as a final answer.
2169                let final_text = response.text();
2170
2171                if self.config.continuation_enabled
2172                    && continuation_count < self.config.max_continuation_turns
2173                    && turn < self.config.max_tool_rounds  // never inject past the turn limit
2174                    && Self::looks_incomplete(&final_text)
2175                {
2176                    continuation_count += 1;
2177                    tracing::info!(
2178                        turn = turn,
2179                        continuation = continuation_count,
2180                        max_continuation = self.config.max_continuation_turns,
2181                        "Injecting continuation message — response looks incomplete"
2182                    );
2183                    // Inject continuation as a user message and keep looping
2184                    messages.push(Message::user(crate::prompts::CONTINUATION));
2185                    continue;
2186                }
2187
2188                // Sanitize output to redact any sensitive data before returning
2189                let final_text = if let Some(ref sp) = self.config.security_provider {
2190                    sp.sanitize_output(&final_text)
2191                } else {
2192                    final_text
2193                };
2194
2195                // Record final totals
2196                tracing::info!(
2197                    tool_calls_count = tool_calls_count,
2198                    total_prompt_tokens = total_usage.prompt_tokens,
2199                    total_completion_tokens = total_usage.completion_tokens,
2200                    total_tokens = total_usage.total_tokens,
2201                    turns = turn,
2202                    "Agent execution completed"
2203                );
2204
2205                if let Some(tx) = &event_tx {
2206                    tx.send(AgentEvent::End {
2207                        text: final_text.clone(),
2208                        usage: total_usage.clone(),
2209                        meta: response.meta.clone(),
2210                    })
2211                    .await
2212                    .ok();
2213                }
2214
2215                // Notify context providers of turn completion for memory extraction
2216                if let Some(sid) = session_id {
2217                    self.notify_turn_complete(sid, effective_prompt, &final_text)
2218                        .await;
2219                }
2220
2221                return Ok(AgentResult {
2222                    text: final_text,
2223                    messages,
2224                    usage: total_usage,
2225                    tool_calls_count,
2226                });
2227            }
2228
2229            // Execute tools sequentially
2230            // Fast path: when all tool calls are independent file writes and no hooks/HITL
2231            // are configured, execute them concurrently to avoid serial I/O bottleneck.
2232            let tool_calls = if self.config.hook_engine.is_none()
2233                && self.config.confirmation_manager.is_none()
2234                && tool_calls.len() > 1
2235                && tool_calls
2236                    .iter()
2237                    .all(|tc| Self::is_parallel_safe_write(&tc.name, &tc.args))
2238                && {
2239                    // All target paths must be distinct (no write-write conflicts)
2240                    let paths: Vec<_> = tool_calls
2241                        .iter()
2242                        .filter_map(|tc| Self::extract_write_path(&tc.args))
2243                        .collect();
2244                    paths.len() == tool_calls.len()
2245                        && paths.iter().collect::<std::collections::HashSet<_>>().len()
2246                            == paths.len()
2247                } {
2248                tracing::info!(
2249                    count = tool_calls.len(),
2250                    "Parallel write batch: executing {} independent file writes concurrently",
2251                    tool_calls.len()
2252                );
2253
2254                let futures: Vec<_> = tool_calls
2255                    .iter()
2256                    .map(|tc| {
2257                        let ctx = self.tool_context.clone();
2258                        let executor = Arc::clone(&self.tool_executor);
2259                        let name = tc.name.clone();
2260                        let args = tc.args.clone();
2261                        async move { executor.execute_with_context(&name, &args, &ctx).await }
2262                    })
2263                    .collect();
2264
2265                let results = join_all(futures).await;
2266
2267                // Post-process results in original order (sequential, preserves message ordering)
2268                for (tc, result) in tool_calls.iter().zip(results) {
2269                    tool_calls_count += 1;
2270                    let (output, exit_code, is_error, metadata, images) =
2271                        Self::tool_result_to_tuple(result);
2272
2273                    // Track tool call in progress tracker
2274                    self.track_tool_result(&tc.name, &tc.args, exit_code);
2275
2276                    let output = if let Some(ref sp) = self.config.security_provider {
2277                        sp.sanitize_output(&output)
2278                    } else {
2279                        output
2280                    };
2281
2282                    if let Some(tx) = &event_tx {
2283                        tx.send(AgentEvent::ToolEnd {
2284                            id: tc.id.clone(),
2285                            name: tc.name.clone(),
2286                            output: output.clone(),
2287                            exit_code,
2288                            metadata,
2289                        })
2290                        .await
2291                        .ok();
2292                    }
2293
2294                    if images.is_empty() {
2295                        messages.push(Message::tool_result(&tc.id, &output, is_error));
2296                    } else {
2297                        messages.push(Message::tool_result_with_images(
2298                            &tc.id, &output, &images, is_error,
2299                        ));
2300                    }
2301                }
2302
2303                // Skip the sequential loop below
2304                continue;
2305            } else {
2306                tool_calls
2307            };
2308
2309            for tool_call in tool_calls {
2310                tool_calls_count += 1;
2311
2312                let tool_start = std::time::Instant::now();
2313
2314                tracing::info!(
2315                    tool_name = tool_call.name.as_str(),
2316                    tool_id = tool_call.id.as_str(),
2317                    "Tool execution started"
2318                );
2319
2320                // Send tool start event (only if not already sent during streaming)
2321                // In streaming mode, ToolStart is sent when we receive ToolUseStart from LLM
2322                // But we still need to send ToolEnd after execution
2323
2324                // Check for malformed tool arguments from LLM (4.1 parse error recovery)
2325                if let Some(parse_error) =
2326                    tool_call.args.get("__parse_error").and_then(|v| v.as_str())
2327                {
2328                    parse_error_count += 1;
2329                    let error_msg = format!("Error: {}", parse_error);
2330                    tracing::warn!(
2331                        tool = tool_call.name.as_str(),
2332                        parse_error_count = parse_error_count,
2333                        max_parse_retries = self.config.max_parse_retries,
2334                        "Malformed tool arguments from LLM"
2335                    );
2336
2337                    if let Some(tx) = &event_tx {
2338                        tx.send(AgentEvent::ToolEnd {
2339                            id: tool_call.id.clone(),
2340                            name: tool_call.name.clone(),
2341                            output: error_msg.clone(),
2342                            exit_code: 1,
2343                            metadata: None,
2344                        })
2345                        .await
2346                        .ok();
2347                    }
2348
2349                    messages.push(Message::tool_result(&tool_call.id, &error_msg, true));
2350
2351                    if parse_error_count > self.config.max_parse_retries {
2352                        let msg = format!(
2353                            "LLM produced malformed tool arguments {} time(s) in a row \
2354                             (max_parse_retries={}); giving up",
2355                            parse_error_count, self.config.max_parse_retries
2356                        );
2357                        tracing::error!("{}", msg);
2358                        if let Some(tx) = &event_tx {
2359                            tx.send(AgentEvent::Error {
2360                                message: msg.clone(),
2361                            })
2362                            .await
2363                            .ok();
2364                        }
2365                        anyhow::bail!(msg);
2366                    }
2367                    continue;
2368                }
2369
2370                // Tool args are valid — reset parse error counter
2371                parse_error_count = 0;
2372
2373                // Check skill-based tool permissions
2374                if let Some(ref registry) = self.config.skill_registry {
2375                    let instruction_skills =
2376                        registry.by_kind(crate::skills::SkillKind::Instruction);
2377                    let has_restrictions =
2378                        instruction_skills.iter().any(|s| s.allowed_tools.is_some());
2379                    if has_restrictions {
2380                        let allowed = instruction_skills
2381                            .iter()
2382                            .any(|s| s.is_tool_allowed(&tool_call.name));
2383                        if !allowed {
2384                            let msg = format!(
2385                                "Tool '{}' is not allowed by any active skill.",
2386                                tool_call.name
2387                            );
2388                            tracing::info!(
2389                                tool_name = tool_call.name.as_str(),
2390                                "Tool blocked by skill registry"
2391                            );
2392                            if let Some(tx) = &event_tx {
2393                                tx.send(AgentEvent::PermissionDenied {
2394                                    tool_id: tool_call.id.clone(),
2395                                    tool_name: tool_call.name.clone(),
2396                                    args: tool_call.args.clone(),
2397                                    reason: msg.clone(),
2398                                })
2399                                .await
2400                                .ok();
2401                            }
2402                            messages.push(Message::tool_result(&tool_call.id, &msg, true));
2403                            continue;
2404                        }
2405                    }
2406                }
2407
2408                // Fire PreToolUse hook (may block the tool call)
2409                if let Some(HookResult::Block(reason)) = self
2410                    .fire_pre_tool_use(
2411                        session_id.unwrap_or(""),
2412                        &tool_call.name,
2413                        &tool_call.args,
2414                        recent_tool_signatures.clone(),
2415                    )
2416                    .await
2417                {
2418                    let msg = format!("Tool '{}' blocked by hook: {}", tool_call.name, reason);
2419                    tracing::info!(
2420                        tool_name = tool_call.name.as_str(),
2421                        "Tool blocked by PreToolUse hook"
2422                    );
2423
2424                    if let Some(tx) = &event_tx {
2425                        tx.send(AgentEvent::PermissionDenied {
2426                            tool_id: tool_call.id.clone(),
2427                            tool_name: tool_call.name.clone(),
2428                            args: tool_call.args.clone(),
2429                            reason: reason.clone(),
2430                        })
2431                        .await
2432                        .ok();
2433                    }
2434
2435                    messages.push(Message::tool_result(&tool_call.id, &msg, true));
2436                    continue;
2437                }
2438
2439                // Check permission before executing tool
2440                let permission_decision = if let Some(checker) = &self.config.permission_checker {
2441                    checker.check(&tool_call.name, &tool_call.args)
2442                } else {
2443                    // No policy configured — default to Ask so HITL can still intervene
2444                    PermissionDecision::Ask
2445                };
2446
2447                let (output, exit_code, is_error, metadata, images) = match permission_decision {
2448                    PermissionDecision::Deny => {
2449                        tracing::info!(
2450                            tool_name = tool_call.name.as_str(),
2451                            permission = "deny",
2452                            "Tool permission denied"
2453                        );
2454                        // Tool execution denied by permission policy
2455                        let denial_msg = format!(
2456                            "Permission denied: Tool '{}' is blocked by permission policy.",
2457                            tool_call.name
2458                        );
2459
2460                        // Send permission denied event
2461                        if let Some(tx) = &event_tx {
2462                            tx.send(AgentEvent::PermissionDenied {
2463                                tool_id: tool_call.id.clone(),
2464                                tool_name: tool_call.name.clone(),
2465                                args: tool_call.args.clone(),
2466                                reason: "Blocked by deny rule in permission policy".to_string(),
2467                            })
2468                            .await
2469                            .ok();
2470                        }
2471
2472                        (denial_msg, 1, true, None, Vec::new())
2473                    }
2474                    PermissionDecision::Allow => {
2475                        tracing::info!(
2476                            tool_name = tool_call.name.as_str(),
2477                            permission = "allow",
2478                            "Tool permission: allow"
2479                        );
2480                        // Permission explicitly allows — execute directly, no HITL
2481                        let stream_ctx =
2482                            self.streaming_tool_context(&event_tx, &tool_call.id, &tool_call.name);
2483                        let result = self
2484                            .execute_tool_queued_or_direct(
2485                                &tool_call.name,
2486                                &tool_call.args,
2487                                &stream_ctx,
2488                            )
2489                            .await;
2490
2491                        let tuple = Self::tool_result_to_tuple(result);
2492                        // Track tool call in progress tracker
2493                        let (_, exit_code, _, _, _) = tuple;
2494                        self.track_tool_result(&tool_call.name, &tool_call.args, exit_code);
2495                        tuple
2496                    }
2497                    PermissionDecision::Ask => {
2498                        tracing::info!(
2499                            tool_name = tool_call.name.as_str(),
2500                            permission = "ask",
2501                            "Tool permission: ask"
2502                        );
2503                        // Permission says Ask — delegate to HITL confirmation manager
2504                        if let Some(cm) = &self.config.confirmation_manager {
2505                            // Check YOLO lanes: if the tool's lane is in YOLO mode, skip confirmation
2506                            if !cm.requires_confirmation(&tool_call.name).await {
2507                                let stream_ctx = self.streaming_tool_context(
2508                                    &event_tx,
2509                                    &tool_call.id,
2510                                    &tool_call.name,
2511                                );
2512                                let result = self
2513                                    .execute_tool_queued_or_direct(
2514                                        &tool_call.name,
2515                                        &tool_call.args,
2516                                        &stream_ctx,
2517                                    )
2518                                    .await;
2519
2520                                let (output, exit_code, is_error, metadata, images) =
2521                                    Self::tool_result_to_tuple(result);
2522
2523                                // Track tool call in progress tracker
2524                                self.track_tool_result(&tool_call.name, &tool_call.args, exit_code);
2525
2526                                // Add tool result to messages
2527                                if images.is_empty() {
2528                                    messages.push(Message::tool_result(
2529                                        &tool_call.id,
2530                                        &output,
2531                                        is_error,
2532                                    ));
2533                                } else {
2534                                    messages.push(Message::tool_result_with_images(
2535                                        &tool_call.id,
2536                                        &output,
2537                                        &images,
2538                                        is_error,
2539                                    ));
2540                                }
2541
2542                                // Record tool result on the tool span for early exit
2543                                let tool_duration = tool_start.elapsed();
2544                                crate::telemetry::record_tool_result(exit_code, tool_duration);
2545
2546                                // Send ToolEnd event
2547                                if let Some(tx) = &event_tx {
2548                                    tx.send(AgentEvent::ToolEnd {
2549                                        id: tool_call.id.clone(),
2550                                        name: tool_call.name.clone(),
2551                                        output: output.clone(),
2552                                        exit_code,
2553                                        metadata,
2554                                    })
2555                                    .await
2556                                    .ok();
2557                                }
2558
2559                                // Fire PostToolUse hook (fire-and-forget)
2560                                self.fire_post_tool_use(
2561                                    session_id.unwrap_or(""),
2562                                    &tool_call.name,
2563                                    &tool_call.args,
2564                                    &output,
2565                                    exit_code == 0,
2566                                    tool_duration.as_millis() as u64,
2567                                )
2568                                .await;
2569
2570                                continue; // Skip the rest, move to next tool call
2571                            }
2572
2573                            // Get timeout from policy
2574                            let policy = cm.policy().await;
2575                            let timeout_ms = policy.default_timeout_ms;
2576                            let timeout_action = policy.timeout_action;
2577
2578                            // Request confirmation (this emits ConfirmationRequired event)
2579                            let rx = cm
2580                                .request_confirmation(
2581                                    &tool_call.id,
2582                                    &tool_call.name,
2583                                    &tool_call.args,
2584                                )
2585                                .await;
2586
2587                            // Forward ConfirmationRequired to the streaming event channel
2588                            // so external consumers (e.g. SafeClaw engine) can relay it
2589                            // to the browser UI.
2590                            if let Some(tx) = &event_tx {
2591                                tx.send(AgentEvent::ConfirmationRequired {
2592                                    tool_id: tool_call.id.clone(),
2593                                    tool_name: tool_call.name.clone(),
2594                                    args: tool_call.args.clone(),
2595                                    timeout_ms,
2596                                })
2597                                .await
2598                                .ok();
2599                            }
2600
2601                            // Wait for confirmation with timeout
2602                            let confirmation_result =
2603                                tokio::time::timeout(Duration::from_millis(timeout_ms), rx).await;
2604
2605                            match confirmation_result {
2606                                Ok(Ok(response)) => {
2607                                    // Forward ConfirmationReceived
2608                                    if let Some(tx) = &event_tx {
2609                                        tx.send(AgentEvent::ConfirmationReceived {
2610                                            tool_id: tool_call.id.clone(),
2611                                            approved: response.approved,
2612                                            reason: response.reason.clone(),
2613                                        })
2614                                        .await
2615                                        .ok();
2616                                    }
2617                                    if response.approved {
2618                                        let stream_ctx = self.streaming_tool_context(
2619                                            &event_tx,
2620                                            &tool_call.id,
2621                                            &tool_call.name,
2622                                        );
2623                                        let result = self
2624                                            .execute_tool_queued_or_direct(
2625                                                &tool_call.name,
2626                                                &tool_call.args,
2627                                                &stream_ctx,
2628                                            )
2629                                            .await;
2630
2631                                        let tuple = Self::tool_result_to_tuple(result);
2632                                        // Track tool call in progress tracker
2633                                        let (_, exit_code, _, _, _) = tuple;
2634                                        self.track_tool_result(
2635                                            &tool_call.name,
2636                                            &tool_call.args,
2637                                            exit_code,
2638                                        );
2639                                        tuple
2640                                    } else {
2641                                        let rejection_msg = format!(
2642                                            "Tool '{}' execution was REJECTED by the user. Reason: {}. \
2643                                             DO NOT retry this tool call unless the user explicitly asks you to.",
2644                                            tool_call.name,
2645                                            response.reason.unwrap_or_else(|| "No reason provided".to_string())
2646                                        );
2647                                        (rejection_msg, 1, true, None, Vec::new())
2648                                    }
2649                                }
2650                                Ok(Err(_)) => {
2651                                    // Forward ConfirmationTimeout (channel closed = effectively timed out)
2652                                    if let Some(tx) = &event_tx {
2653                                        tx.send(AgentEvent::ConfirmationTimeout {
2654                                            tool_id: tool_call.id.clone(),
2655                                            action_taken: "rejected".to_string(),
2656                                        })
2657                                        .await
2658                                        .ok();
2659                                    }
2660                                    let msg = format!(
2661                                        "Tool '{}' confirmation failed: confirmation channel closed",
2662                                        tool_call.name
2663                                    );
2664                                    (msg, 1, true, None, Vec::new())
2665                                }
2666                                Err(_) => {
2667                                    cm.check_timeouts().await;
2668
2669                                    // Forward ConfirmationTimeout
2670                                    if let Some(tx) = &event_tx {
2671                                        tx.send(AgentEvent::ConfirmationTimeout {
2672                                            tool_id: tool_call.id.clone(),
2673                                            action_taken: match timeout_action {
2674                                                crate::hitl::TimeoutAction::Reject => {
2675                                                    "rejected".to_string()
2676                                                }
2677                                                crate::hitl::TimeoutAction::AutoApprove => {
2678                                                    "auto_approved".to_string()
2679                                                }
2680                                            },
2681                                        })
2682                                        .await
2683                                        .ok();
2684                                    }
2685
2686                                    match timeout_action {
2687                                        crate::hitl::TimeoutAction::Reject => {
2688                                            let msg = format!(
2689                                                "Tool '{}' execution was REJECTED: user confirmation timed out after {}ms. \
2690                                                 DO NOT retry this tool call — the user did not approve it. \
2691                                                 Inform the user that the operation requires their approval and ask them to try again.",
2692                                                tool_call.name, timeout_ms
2693                                            );
2694                                            (msg, 1, true, None, Vec::new())
2695                                        }
2696                                        crate::hitl::TimeoutAction::AutoApprove => {
2697                                            let stream_ctx = self.streaming_tool_context(
2698                                                &event_tx,
2699                                                &tool_call.id,
2700                                                &tool_call.name,
2701                                            );
2702                                            let result = self
2703                                                .execute_tool_queued_or_direct(
2704                                                    &tool_call.name,
2705                                                    &tool_call.args,
2706                                                    &stream_ctx,
2707                                                )
2708                                                .await;
2709
2710                                            let tuple = Self::tool_result_to_tuple(result);
2711                                            // Track tool call in progress tracker
2712                                            let (_, exit_code, _, _, _) = tuple;
2713                                            self.track_tool_result(
2714                                                &tool_call.name,
2715                                                &tool_call.args,
2716                                                exit_code,
2717                                            );
2718                                            tuple
2719                                        }
2720                                    }
2721                                }
2722                            }
2723                        } else {
2724                            // Ask without confirmation manager — safe deny
2725                            let msg = format!(
2726                                "Tool '{}' requires confirmation but no HITL confirmation manager is configured. \
2727                                 Configure a confirmation policy to enable tool execution.",
2728                                tool_call.name
2729                            );
2730                            tracing::warn!(
2731                                tool_name = tool_call.name.as_str(),
2732                                "Tool requires confirmation but no HITL manager configured"
2733                            );
2734                            (msg, 1, true, None, Vec::new())
2735                        }
2736                    }
2737                };
2738
2739                let tool_duration = tool_start.elapsed();
2740                crate::telemetry::record_tool_result(exit_code, tool_duration);
2741
2742                // Sanitize tool output for sensitive data before it enters the message history
2743                let output = if let Some(ref sp) = self.config.security_provider {
2744                    sp.sanitize_output(&output)
2745                } else {
2746                    output
2747                };
2748
2749                recent_tool_signatures.push(format!(
2750                    "{}:{} => {}",
2751                    tool_call.name,
2752                    serde_json::to_string(&tool_call.args).unwrap_or_default(),
2753                    if is_error { "error" } else { "ok" }
2754                ));
2755                if recent_tool_signatures.len() > 8 {
2756                    let overflow = recent_tool_signatures.len() - 8;
2757                    recent_tool_signatures.drain(0..overflow);
2758                }
2759
2760                // Fire PostToolUse hook (fire-and-forget)
2761                self.fire_post_tool_use(
2762                    session_id.unwrap_or(""),
2763                    &tool_call.name,
2764                    &tool_call.args,
2765                    &output,
2766                    exit_code == 0,
2767                    tool_duration.as_millis() as u64,
2768                )
2769                .await;
2770
2771                // Auto-remember tool result in long-term memory
2772                if let Some(ref memory) = self.config.memory {
2773                    let tools_used = [tool_call.name.clone()];
2774                    let remember_result = if exit_code == 0 {
2775                        memory
2776                            .remember_success(effective_prompt, &tools_used, &output)
2777                            .await
2778                    } else {
2779                        memory
2780                            .remember_failure(effective_prompt, &output, &tools_used)
2781                            .await
2782                    };
2783                    match remember_result {
2784                        Ok(()) => {
2785                            if let Some(tx) = &event_tx {
2786                                let item_type = if exit_code == 0 { "success" } else { "failure" };
2787                                tx.send(AgentEvent::MemoryStored {
2788                                    memory_id: uuid::Uuid::new_v4().to_string(),
2789                                    memory_type: item_type.to_string(),
2790                                    importance: if exit_code == 0 { 0.8 } else { 0.9 },
2791                                    tags: vec![item_type.to_string(), tool_call.name.clone()],
2792                                })
2793                                .await
2794                                .ok();
2795                            }
2796                        }
2797                        Err(e) => {
2798                            tracing::warn!("Failed to store memory after tool execution: {}", e);
2799                        }
2800                    }
2801                }
2802
2803                // Send tool end event
2804                if let Some(tx) = &event_tx {
2805                    tx.send(AgentEvent::ToolEnd {
2806                        id: tool_call.id.clone(),
2807                        name: tool_call.name.clone(),
2808                        output: output.clone(),
2809                        exit_code,
2810                        metadata,
2811                    })
2812                    .await
2813                    .ok();
2814                }
2815
2816                // Add tool result to messages
2817                if images.is_empty() {
2818                    messages.push(Message::tool_result(&tool_call.id, &output, is_error));
2819                } else {
2820                    messages.push(Message::tool_result_with_images(
2821                        &tool_call.id,
2822                        &output,
2823                        &images,
2824                        is_error,
2825                    ));
2826                }
2827            }
2828        }
2829    }
2830
2831    /// Execute with streaming events
2832    pub async fn execute_streaming(
2833        &self,
2834        history: &[Message],
2835        prompt: &str,
2836    ) -> Result<(
2837        mpsc::Receiver<AgentEvent>,
2838        tokio::task::JoinHandle<Result<AgentResult>>,
2839        tokio_util::sync::CancellationToken,
2840    )> {
2841        let (tx, rx) = mpsc::channel(100);
2842        let cancel_token = tokio_util::sync::CancellationToken::new();
2843
2844        let llm_client = self.llm_client.clone();
2845        let tool_executor = self.tool_executor.clone();
2846        let tool_context = self.tool_context.clone();
2847        let config = self.config.clone();
2848        let tool_metrics = self.tool_metrics.clone();
2849        let command_queue = self.command_queue.clone();
2850        let history = history.to_vec();
2851        let prompt = prompt.to_string();
2852        let token_clone = cancel_token.clone();
2853
2854        let handle = tokio::spawn(async move {
2855            let mut agent = AgentLoop::new(llm_client, tool_executor, tool_context, config);
2856            if let Some(metrics) = tool_metrics {
2857                agent = agent.with_tool_metrics(metrics);
2858            }
2859            if let Some(queue) = command_queue {
2860                agent = agent.with_queue(queue);
2861            }
2862            agent
2863                .execute_with_session(&history, &prompt, None, Some(tx), Some(&token_clone))
2864                .await
2865        });
2866
2867        Ok((rx, handle, cancel_token))
2868    }
2869
2870    /// Create an execution plan for a prompt
2871    ///
2872    /// Delegates to [`LlmPlanner`] for structured JSON plan generation,
2873    /// falling back to heuristic planning if the LLM call fails.
2874    pub async fn plan(&self, prompt: &str, _context: Option<&str>) -> Result<ExecutionPlan> {
2875        use crate::planning::LlmPlanner;
2876
2877        match LlmPlanner::create_plan(&self.llm_client, prompt).await {
2878            Ok(plan) => Ok(plan),
2879            Err(e) => {
2880                tracing::warn!("LLM plan creation failed, using fallback: {}", e);
2881                Ok(LlmPlanner::fallback_plan(prompt))
2882            }
2883        }
2884    }
2885
2886    /// Execute with planning phase
2887    pub async fn execute_with_planning(
2888        &self,
2889        history: &[Message],
2890        prompt: &str,
2891        event_tx: Option<mpsc::Sender<AgentEvent>>,
2892    ) -> Result<AgentResult> {
2893        // Send planning start event
2894        if let Some(tx) = &event_tx {
2895            tx.send(AgentEvent::PlanningStart {
2896                prompt: prompt.to_string(),
2897            })
2898            .await
2899            .ok();
2900        }
2901
2902        // Extract goal when goal_tracking is enabled
2903        let goal = if self.config.goal_tracking {
2904            let g = self.extract_goal(prompt).await?;
2905            if let Some(tx) = &event_tx {
2906                tx.send(AgentEvent::GoalExtracted { goal: g.clone() })
2907                    .await
2908                    .ok();
2909            }
2910            Some(g)
2911        } else {
2912            None
2913        };
2914
2915        // Create execution plan
2916        let plan = self.plan(prompt, None).await?;
2917
2918        // Send planning end event
2919        if let Some(tx) = &event_tx {
2920            tx.send(AgentEvent::PlanningEnd {
2921                estimated_steps: plan.steps.len(),
2922                plan: plan.clone(),
2923            })
2924            .await
2925            .ok();
2926        }
2927
2928        let plan_start = std::time::Instant::now();
2929
2930        // Execute the plan step by step
2931        let result = self.execute_plan(history, &plan, event_tx.clone()).await?;
2932
2933        // Check goal achievement when goal_tracking is enabled
2934        if self.config.goal_tracking {
2935            if let Some(ref g) = goal {
2936                let achieved = self.check_goal_achievement(g, &result.text).await?;
2937                if achieved {
2938                    if let Some(tx) = &event_tx {
2939                        tx.send(AgentEvent::GoalAchieved {
2940                            goal: g.description.clone(),
2941                            total_steps: result.messages.len(),
2942                            duration_ms: plan_start.elapsed().as_millis() as i64,
2943                        })
2944                        .await
2945                        .ok();
2946                    }
2947                }
2948            }
2949        }
2950
2951        Ok(result)
2952    }
2953
2954    /// Execute an execution plan using wave-based dependency-aware scheduling.
2955    ///
2956    /// Steps with no unmet dependencies are grouped into "waves". A wave with
2957    /// a single step executes sequentially (preserving the history chain). A
2958    /// wave with multiple independent steps executes them in parallel via
2959    /// `JoinSet`, then merges their results back into the shared history.
2960    async fn execute_plan(
2961        &self,
2962        history: &[Message],
2963        plan: &ExecutionPlan,
2964        event_tx: Option<mpsc::Sender<AgentEvent>>,
2965    ) -> Result<AgentResult> {
2966        let mut plan = plan.clone();
2967        let mut current_history = history.to_vec();
2968        let mut total_usage = TokenUsage::default();
2969        let mut tool_calls_count = 0;
2970        let total_steps = plan.steps.len();
2971
2972        // Add initial user message with the goal
2973        let steps_text = plan
2974            .steps
2975            .iter()
2976            .enumerate()
2977            .map(|(i, step)| format!("{}. {}", i + 1, step.content))
2978            .collect::<Vec<_>>()
2979            .join("\n");
2980        current_history.push(Message::user(&crate::prompts::render(
2981            crate::prompts::PLAN_EXECUTE_GOAL,
2982            &[("goal", &plan.goal), ("steps", &steps_text)],
2983        )));
2984
2985        loop {
2986            let ready: Vec<String> = plan
2987                .get_ready_steps()
2988                .iter()
2989                .map(|s| s.id.clone())
2990                .collect();
2991
2992            if ready.is_empty() {
2993                // All done or deadlock
2994                if plan.has_deadlock() {
2995                    tracing::warn!(
2996                        "Plan deadlock detected: {} pending steps with unresolvable dependencies",
2997                        plan.pending_count()
2998                    );
2999                }
3000                break;
3001            }
3002
3003            if ready.len() == 1 {
3004                // === Single step: sequential execution (preserves history chain) ===
3005                let step_id = &ready[0];
3006                let step = plan
3007                    .steps
3008                    .iter()
3009                    .find(|s| s.id == *step_id)
3010                    .ok_or_else(|| anyhow::anyhow!("step '{}' not found in plan", step_id))?
3011                    .clone();
3012                let step_number = plan
3013                    .steps
3014                    .iter()
3015                    .position(|s| s.id == *step_id)
3016                    .unwrap_or(0)
3017                    + 1;
3018
3019                // Send step start event
3020                if let Some(tx) = &event_tx {
3021                    tx.send(AgentEvent::StepStart {
3022                        step_id: step.id.clone(),
3023                        description: step.content.clone(),
3024                        step_number,
3025                        total_steps,
3026                    })
3027                    .await
3028                    .ok();
3029                }
3030
3031                plan.mark_status(&step.id, TaskStatus::InProgress);
3032
3033                let step_prompt = crate::prompts::render(
3034                    crate::prompts::PLAN_EXECUTE_STEP,
3035                    &[
3036                        ("step_num", &step_number.to_string()),
3037                        ("description", &step.content),
3038                    ],
3039                );
3040
3041                match self
3042                    .execute_loop(
3043                        &current_history,
3044                        &step_prompt,
3045                        None,
3046                        event_tx.clone(),
3047                        &tokio_util::sync::CancellationToken::new(),
3048                    )
3049                    .await
3050                {
3051                    Ok(result) => {
3052                        current_history = result.messages.clone();
3053                        total_usage.prompt_tokens += result.usage.prompt_tokens;
3054                        total_usage.completion_tokens += result.usage.completion_tokens;
3055                        total_usage.total_tokens += result.usage.total_tokens;
3056                        tool_calls_count += result.tool_calls_count;
3057                        plan.mark_status(&step.id, TaskStatus::Completed);
3058
3059                        if let Some(tx) = &event_tx {
3060                            tx.send(AgentEvent::StepEnd {
3061                                step_id: step.id.clone(),
3062                                status: TaskStatus::Completed,
3063                                step_number,
3064                                total_steps,
3065                            })
3066                            .await
3067                            .ok();
3068                        }
3069                    }
3070                    Err(e) => {
3071                        tracing::error!("Plan step '{}' failed: {}", step.id, e);
3072                        plan.mark_status(&step.id, TaskStatus::Failed);
3073
3074                        if let Some(tx) = &event_tx {
3075                            tx.send(AgentEvent::StepEnd {
3076                                step_id: step.id.clone(),
3077                                status: TaskStatus::Failed,
3078                                step_number,
3079                                total_steps,
3080                            })
3081                            .await
3082                            .ok();
3083                        }
3084                    }
3085                }
3086            } else {
3087                // === Multiple steps: parallel execution via JoinSet ===
3088                // NOTE: Each parallel branch gets a clone of the base history.
3089                // Individual branch histories (tool calls, LLM turns) are NOT merged
3090                // back — only a summary message is appended. This is a deliberate
3091                // trade-off: merging divergent histories in a deterministic order is
3092                // complex and the summary approach keeps the context window manageable.
3093                let ready_steps: Vec<_> = ready
3094                    .iter()
3095                    .filter_map(|id| {
3096                        let step = plan.steps.iter().find(|s| s.id == *id)?.clone();
3097                        let step_number =
3098                            plan.steps.iter().position(|s| s.id == *id).unwrap_or(0) + 1;
3099                        Some((step, step_number))
3100                    })
3101                    .collect();
3102
3103                // Mark all as InProgress and emit StepStart events
3104                for (step, step_number) in &ready_steps {
3105                    plan.mark_status(&step.id, TaskStatus::InProgress);
3106                    if let Some(tx) = &event_tx {
3107                        tx.send(AgentEvent::StepStart {
3108                            step_id: step.id.clone(),
3109                            description: step.content.clone(),
3110                            step_number: *step_number,
3111                            total_steps,
3112                        })
3113                        .await
3114                        .ok();
3115                    }
3116                }
3117
3118                // Spawn all into JoinSet, each with a clone of the base history
3119                let mut join_set = tokio::task::JoinSet::new();
3120                for (step, step_number) in &ready_steps {
3121                    let base_history = current_history.clone();
3122                    let agent_clone = self.clone();
3123                    let tx = event_tx.clone();
3124                    let step_clone = step.clone();
3125                    let sn = *step_number;
3126
3127                    join_set.spawn(async move {
3128                        let prompt = crate::prompts::render(
3129                            crate::prompts::PLAN_EXECUTE_STEP,
3130                            &[
3131                                ("step_num", &sn.to_string()),
3132                                ("description", &step_clone.content),
3133                            ],
3134                        );
3135                        let result = agent_clone
3136                            .execute_loop(
3137                                &base_history,
3138                                &prompt,
3139                                None,
3140                                tx,
3141                                &tokio_util::sync::CancellationToken::new(),
3142                            )
3143                            .await;
3144                        (step_clone.id, sn, result)
3145                    });
3146                }
3147
3148                // Collect results
3149                let mut parallel_summaries = Vec::new();
3150                while let Some(join_result) = join_set.join_next().await {
3151                    match join_result {
3152                        Ok((step_id, step_number, step_result)) => match step_result {
3153                            Ok(result) => {
3154                                total_usage.prompt_tokens += result.usage.prompt_tokens;
3155                                total_usage.completion_tokens += result.usage.completion_tokens;
3156                                total_usage.total_tokens += result.usage.total_tokens;
3157                                tool_calls_count += result.tool_calls_count;
3158                                plan.mark_status(&step_id, TaskStatus::Completed);
3159
3160                                // Collect the final assistant text for context merging
3161                                parallel_summaries.push(format!(
3162                                    "- Step {} ({}): {}",
3163                                    step_number, step_id, result.text
3164                                ));
3165
3166                                if let Some(tx) = &event_tx {
3167                                    tx.send(AgentEvent::StepEnd {
3168                                        step_id,
3169                                        status: TaskStatus::Completed,
3170                                        step_number,
3171                                        total_steps,
3172                                    })
3173                                    .await
3174                                    .ok();
3175                                }
3176                            }
3177                            Err(e) => {
3178                                tracing::error!("Plan step '{}' failed: {}", step_id, e);
3179                                plan.mark_status(&step_id, TaskStatus::Failed);
3180
3181                                if let Some(tx) = &event_tx {
3182                                    tx.send(AgentEvent::StepEnd {
3183                                        step_id,
3184                                        status: TaskStatus::Failed,
3185                                        step_number,
3186                                        total_steps,
3187                                    })
3188                                    .await
3189                                    .ok();
3190                                }
3191                            }
3192                        },
3193                        Err(e) => {
3194                            tracing::error!("JoinSet task panicked: {}", e);
3195                        }
3196                    }
3197                }
3198
3199                // Merge parallel results into history for subsequent steps
3200                if !parallel_summaries.is_empty() {
3201                    parallel_summaries.sort(); // Deterministic ordering
3202                    let results_text = parallel_summaries.join("\n");
3203                    current_history.push(Message::user(&crate::prompts::render(
3204                        crate::prompts::PLAN_PARALLEL_RESULTS,
3205                        &[("results", &results_text)],
3206                    )));
3207                }
3208            }
3209
3210            // Emit GoalProgress after each wave
3211            if self.config.goal_tracking {
3212                let completed = plan
3213                    .steps
3214                    .iter()
3215                    .filter(|s| s.status == TaskStatus::Completed)
3216                    .count();
3217                if let Some(tx) = &event_tx {
3218                    tx.send(AgentEvent::GoalProgress {
3219                        goal: plan.goal.clone(),
3220                        progress: plan.progress(),
3221                        completed_steps: completed,
3222                        total_steps,
3223                    })
3224                    .await
3225                    .ok();
3226                }
3227            }
3228        }
3229
3230        // Get final response
3231        let final_text = current_history
3232            .last()
3233            .map(|m| {
3234                m.content
3235                    .iter()
3236                    .filter_map(|block| {
3237                        if let crate::llm::ContentBlock::Text { text } = block {
3238                            Some(text.as_str())
3239                        } else {
3240                            None
3241                        }
3242                    })
3243                    .collect::<Vec<_>>()
3244                    .join("\n")
3245            })
3246            .unwrap_or_default();
3247
3248        Ok(AgentResult {
3249            text: final_text,
3250            messages: current_history,
3251            usage: total_usage,
3252            tool_calls_count,
3253        })
3254    }
3255
3256    /// Extract goal from prompt
3257    ///
3258    /// Delegates to [`LlmPlanner`] for structured JSON goal extraction,
3259    /// falling back to heuristic logic if the LLM call fails.
3260    pub async fn extract_goal(&self, prompt: &str) -> Result<AgentGoal> {
3261        use crate::planning::LlmPlanner;
3262
3263        match LlmPlanner::extract_goal(&self.llm_client, prompt).await {
3264            Ok(goal) => Ok(goal),
3265            Err(e) => {
3266                tracing::warn!("LLM goal extraction failed, using fallback: {}", e);
3267                Ok(LlmPlanner::fallback_goal(prompt))
3268            }
3269        }
3270    }
3271
3272    /// Check if goal is achieved
3273    ///
3274    /// Delegates to [`LlmPlanner`] for structured JSON achievement check,
3275    /// falling back to heuristic logic if the LLM call fails.
3276    pub async fn check_goal_achievement(
3277        &self,
3278        goal: &AgentGoal,
3279        current_state: &str,
3280    ) -> Result<bool> {
3281        use crate::planning::LlmPlanner;
3282
3283        match LlmPlanner::check_achievement(&self.llm_client, goal, current_state).await {
3284            Ok(result) => Ok(result.achieved),
3285            Err(e) => {
3286                tracing::warn!("LLM achievement check failed, using fallback: {}", e);
3287                let result = LlmPlanner::fallback_check_achievement(goal, current_state);
3288                Ok(result.achieved)
3289            }
3290        }
3291    }
3292}
3293
3294#[cfg(test)]
3295mod tests {
3296    use super::*;
3297    use crate::llm::{ContentBlock, StreamEvent};
3298    use crate::permissions::PermissionPolicy;
3299    use crate::tools::ToolExecutor;
3300    use std::path::PathBuf;
3301    use std::sync::atomic::{AtomicUsize, Ordering};
3302
3303    /// Create a default ToolContext for tests
3304    fn test_tool_context() -> ToolContext {
3305        ToolContext::new(PathBuf::from("/tmp"))
3306    }
3307
3308    #[test]
3309    fn test_agent_config_default() {
3310        let config = AgentConfig::default();
3311        assert!(config.prompt_slots.is_empty());
3312        assert!(config.tools.is_empty()); // Tools are provided externally
3313        assert_eq!(config.max_tool_rounds, MAX_TOOL_ROUNDS);
3314        assert!(config.permission_checker.is_none());
3315        assert!(config.context_providers.is_empty());
3316        // Built-in skills are always present by default
3317        let registry = config
3318            .skill_registry
3319            .expect("skill_registry must be Some by default");
3320        assert!(registry.len() >= 7, "expected at least 7 built-in skills");
3321        assert!(registry.get("code-search").is_some());
3322        assert!(registry.get("find-bugs").is_some());
3323    }
3324
3325    // ========================================================================
3326    // Mock LLM Client for Testing
3327    // ========================================================================
3328
3329    /// Mock LLM client that returns predefined responses
3330    pub(crate) struct MockLlmClient {
3331        /// Responses to return (consumed in order)
3332        responses: std::sync::Mutex<Vec<LlmResponse>>,
3333        /// Number of calls made
3334        pub(crate) call_count: AtomicUsize,
3335    }
3336
3337    impl MockLlmClient {
3338        pub(crate) fn new(responses: Vec<LlmResponse>) -> Self {
3339            Self {
3340                responses: std::sync::Mutex::new(responses),
3341                call_count: AtomicUsize::new(0),
3342            }
3343        }
3344
3345        /// Create a response with text only (no tool calls)
3346        pub(crate) fn text_response(text: &str) -> LlmResponse {
3347            LlmResponse {
3348                message: Message {
3349                    role: "assistant".to_string(),
3350                    content: vec![ContentBlock::Text {
3351                        text: text.to_string(),
3352                    }],
3353                    reasoning_content: None,
3354                },
3355                usage: TokenUsage {
3356                    prompt_tokens: 10,
3357                    completion_tokens: 5,
3358                    total_tokens: 15,
3359                    cache_read_tokens: None,
3360                    cache_write_tokens: None,
3361                },
3362                stop_reason: Some("end_turn".to_string()),
3363                meta: None,
3364            }
3365        }
3366
3367        /// Create a response with a tool call
3368        pub(crate) fn tool_call_response(
3369            tool_id: &str,
3370            tool_name: &str,
3371            args: serde_json::Value,
3372        ) -> LlmResponse {
3373            LlmResponse {
3374                message: Message {
3375                    role: "assistant".to_string(),
3376                    content: vec![ContentBlock::ToolUse {
3377                        id: tool_id.to_string(),
3378                        name: tool_name.to_string(),
3379                        input: args,
3380                    }],
3381                    reasoning_content: None,
3382                },
3383                usage: TokenUsage {
3384                    prompt_tokens: 10,
3385                    completion_tokens: 5,
3386                    total_tokens: 15,
3387                    cache_read_tokens: None,
3388                    cache_write_tokens: None,
3389                },
3390                stop_reason: Some("tool_use".to_string()),
3391                meta: None,
3392            }
3393        }
3394    }
3395
3396    #[async_trait::async_trait]
3397    impl LlmClient for MockLlmClient {
3398        async fn complete(
3399            &self,
3400            _messages: &[Message],
3401            _system: Option<&str>,
3402            _tools: &[ToolDefinition],
3403        ) -> Result<LlmResponse> {
3404            self.call_count.fetch_add(1, Ordering::SeqCst);
3405            let mut responses = self.responses.lock().unwrap();
3406            if responses.is_empty() {
3407                anyhow::bail!("No more mock responses available");
3408            }
3409            Ok(responses.remove(0))
3410        }
3411
3412        async fn complete_streaming(
3413            &self,
3414            _messages: &[Message],
3415            _system: Option<&str>,
3416            _tools: &[ToolDefinition],
3417        ) -> Result<mpsc::Receiver<StreamEvent>> {
3418            self.call_count.fetch_add(1, Ordering::SeqCst);
3419            let mut responses = self.responses.lock().unwrap();
3420            if responses.is_empty() {
3421                anyhow::bail!("No more mock responses available");
3422            }
3423            let response = responses.remove(0);
3424
3425            let (tx, rx) = mpsc::channel(10);
3426            tokio::spawn(async move {
3427                // Send text deltas if any
3428                for block in &response.message.content {
3429                    if let ContentBlock::Text { text } = block {
3430                        tx.send(StreamEvent::TextDelta(text.clone())).await.ok();
3431                    }
3432                }
3433                tx.send(StreamEvent::Done(response)).await.ok();
3434            });
3435
3436            Ok(rx)
3437        }
3438    }
3439
3440    // ========================================================================
3441    // Agent Loop Tests
3442    // ========================================================================
3443
3444    #[tokio::test]
3445    async fn test_agent_simple_response() {
3446        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
3447            "Hello, I'm an AI assistant.",
3448        )]));
3449
3450        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
3451        let config = AgentConfig::default();
3452
3453        let agent = AgentLoop::new(
3454            mock_client.clone(),
3455            tool_executor,
3456            test_tool_context(),
3457            config,
3458        );
3459        let result = agent.execute(&[], "Hello", None).await.unwrap();
3460
3461        assert_eq!(result.text, "Hello, I'm an AI assistant.");
3462        assert_eq!(result.tool_calls_count, 0);
3463        assert_eq!(mock_client.call_count.load(Ordering::SeqCst), 1);
3464    }
3465
3466    #[tokio::test]
3467    async fn test_agent_with_tool_call() {
3468        let mock_client = Arc::new(MockLlmClient::new(vec![
3469            // First response: tool call
3470            MockLlmClient::tool_call_response(
3471                "tool-1",
3472                "bash",
3473                serde_json::json!({"command": "echo hello"}),
3474            ),
3475            // Second response: final text
3476            MockLlmClient::text_response("The command output was: hello"),
3477        ]));
3478
3479        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
3480        let config = AgentConfig::default();
3481
3482        let agent = AgentLoop::new(
3483            mock_client.clone(),
3484            tool_executor,
3485            test_tool_context(),
3486            config,
3487        );
3488        let result = agent.execute(&[], "Run echo hello", None).await.unwrap();
3489
3490        assert_eq!(result.text, "The command output was: hello");
3491        assert_eq!(result.tool_calls_count, 1);
3492        assert_eq!(mock_client.call_count.load(Ordering::SeqCst), 2);
3493    }
3494
3495    #[tokio::test]
3496    async fn test_agent_permission_deny() {
3497        let mock_client = Arc::new(MockLlmClient::new(vec![
3498            // First response: tool call that will be denied
3499            MockLlmClient::tool_call_response(
3500                "tool-1",
3501                "bash",
3502                serde_json::json!({"command": "rm -rf /tmp/test"}),
3503            ),
3504            // Second response: LLM responds to the denial
3505            MockLlmClient::text_response(
3506                "I cannot execute that command due to permission restrictions.",
3507            ),
3508        ]));
3509
3510        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
3511
3512        // Create permission policy that denies rm commands
3513        let permission_policy = PermissionPolicy::new().deny("bash(rm:*)");
3514
3515        let config = AgentConfig {
3516            permission_checker: Some(Arc::new(permission_policy)),
3517            ..Default::default()
3518        };
3519
3520        let (tx, mut rx) = mpsc::channel(100);
3521        let agent = AgentLoop::new(
3522            mock_client.clone(),
3523            tool_executor,
3524            test_tool_context(),
3525            config,
3526        );
3527        let result = agent.execute(&[], "Delete files", Some(tx)).await.unwrap();
3528
3529        // Check that we received a PermissionDenied event
3530        let mut found_permission_denied = false;
3531        while let Ok(event) = rx.try_recv() {
3532            if let AgentEvent::PermissionDenied { tool_name, .. } = event {
3533                assert_eq!(tool_name, "bash");
3534                found_permission_denied = true;
3535            }
3536        }
3537        assert!(
3538            found_permission_denied,
3539            "Should have received PermissionDenied event"
3540        );
3541
3542        assert_eq!(result.tool_calls_count, 1);
3543    }
3544
3545    #[tokio::test]
3546    async fn test_agent_permission_allow() {
3547        let mock_client = Arc::new(MockLlmClient::new(vec![
3548            // First response: tool call that will be allowed
3549            MockLlmClient::tool_call_response(
3550                "tool-1",
3551                "bash",
3552                serde_json::json!({"command": "echo hello"}),
3553            ),
3554            // Second response: final text
3555            MockLlmClient::text_response("Done!"),
3556        ]));
3557
3558        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
3559
3560        // Create permission policy that allows echo commands
3561        let permission_policy = PermissionPolicy::new()
3562            .allow("bash(echo:*)")
3563            .deny("bash(rm:*)");
3564
3565        let config = AgentConfig {
3566            permission_checker: Some(Arc::new(permission_policy)),
3567            ..Default::default()
3568        };
3569
3570        let agent = AgentLoop::new(
3571            mock_client.clone(),
3572            tool_executor,
3573            test_tool_context(),
3574            config,
3575        );
3576        let result = agent.execute(&[], "Echo hello", None).await.unwrap();
3577
3578        assert_eq!(result.text, "Done!");
3579        assert_eq!(result.tool_calls_count, 1);
3580    }
3581
3582    #[tokio::test]
3583    async fn test_agent_streaming_events() {
3584        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
3585            "Hello!",
3586        )]));
3587
3588        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
3589        let config = AgentConfig::default();
3590
3591        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
3592        let (mut rx, handle, _cancel_token) = agent.execute_streaming(&[], "Hi").await.unwrap();
3593
3594        // Collect events
3595        let mut events = Vec::new();
3596        while let Some(event) = rx.recv().await {
3597            events.push(event);
3598        }
3599
3600        let result = handle.await.unwrap().unwrap();
3601        assert_eq!(result.text, "Hello!");
3602
3603        // Check we received Start and End events
3604        assert!(events.iter().any(|e| matches!(e, AgentEvent::Start { .. })));
3605        assert!(events.iter().any(|e| matches!(e, AgentEvent::End { .. })));
3606    }
3607
3608    #[tokio::test]
3609    async fn test_agent_max_tool_rounds() {
3610        // Create a mock that always returns tool calls (infinite loop)
3611        let responses: Vec<LlmResponse> = (0..100)
3612            .map(|i| {
3613                MockLlmClient::tool_call_response(
3614                    &format!("tool-{}", i),
3615                    "bash",
3616                    serde_json::json!({"command": "echo loop"}),
3617                )
3618            })
3619            .collect();
3620
3621        let mock_client = Arc::new(MockLlmClient::new(responses));
3622        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
3623
3624        let config = AgentConfig {
3625            max_tool_rounds: 3,
3626            ..Default::default()
3627        };
3628
3629        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
3630        let result = agent.execute(&[], "Loop forever", None).await;
3631
3632        // Should fail due to max tool rounds exceeded
3633        assert!(result.is_err());
3634        assert!(result.unwrap_err().to_string().contains("Max tool rounds"));
3635    }
3636
3637    #[tokio::test]
3638    async fn test_agent_no_permission_policy_defaults_to_ask() {
3639        // When no permission policy is set, tools default to Ask.
3640        // Without a confirmation manager, Ask = safe deny.
3641        let mock_client = Arc::new(MockLlmClient::new(vec![
3642            MockLlmClient::tool_call_response(
3643                "tool-1",
3644                "bash",
3645                serde_json::json!({"command": "rm -rf /tmp/test"}),
3646            ),
3647            MockLlmClient::text_response("Denied!"),
3648        ]));
3649
3650        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
3651        let config = AgentConfig {
3652            permission_checker: None, // No policy → defaults to Ask
3653            // No confirmation_manager → safe deny
3654            ..Default::default()
3655        };
3656
3657        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
3658        let result = agent.execute(&[], "Delete", None).await.unwrap();
3659
3660        // Should be denied (no policy + no CM = safe deny)
3661        assert_eq!(result.text, "Denied!");
3662        assert_eq!(result.tool_calls_count, 1);
3663    }
3664
3665    #[tokio::test]
3666    async fn test_agent_permission_ask_without_cm_denies() {
3667        // When permission is Ask and no confirmation manager configured,
3668        // tool execution should be denied (safe default).
3669        let mock_client = Arc::new(MockLlmClient::new(vec![
3670            MockLlmClient::tool_call_response(
3671                "tool-1",
3672                "bash",
3673                serde_json::json!({"command": "echo test"}),
3674            ),
3675            MockLlmClient::text_response("Denied!"),
3676        ]));
3677
3678        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
3679
3680        // Create policy where bash falls through to Ask (default)
3681        let permission_policy = PermissionPolicy::new(); // Default decision is Ask
3682
3683        let config = AgentConfig {
3684            permission_checker: Some(Arc::new(permission_policy)),
3685            // No confirmation_manager — safe deny
3686            ..Default::default()
3687        };
3688
3689        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
3690        let result = agent.execute(&[], "Echo", None).await.unwrap();
3691
3692        // Should deny (Ask without CM = safe deny)
3693        assert_eq!(result.text, "Denied!");
3694        // The tool result should contain the denial message
3695        assert!(result.tool_calls_count >= 1);
3696    }
3697
3698    // ========================================================================
3699    // HITL (Human-in-the-Loop) Tests
3700    // ========================================================================
3701
3702    #[tokio::test]
3703    async fn test_agent_hitl_approved() {
3704        use crate::hitl::{ConfirmationManager, ConfirmationPolicy};
3705        use tokio::sync::broadcast;
3706
3707        let mock_client = Arc::new(MockLlmClient::new(vec![
3708            MockLlmClient::tool_call_response(
3709                "tool-1",
3710                "bash",
3711                serde_json::json!({"command": "echo hello"}),
3712            ),
3713            MockLlmClient::text_response("Command executed!"),
3714        ]));
3715
3716        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
3717
3718        // Create HITL confirmation manager with policy enabled
3719        let (event_tx, _event_rx) = broadcast::channel(100);
3720        let hitl_policy = ConfirmationPolicy {
3721            enabled: true,
3722            ..Default::default()
3723        };
3724        let confirmation_manager = Arc::new(ConfirmationManager::new(hitl_policy, event_tx));
3725
3726        // Create permission policy that returns Ask for bash
3727        let permission_policy = PermissionPolicy::new(); // Default is Ask
3728
3729        let config = AgentConfig {
3730            permission_checker: Some(Arc::new(permission_policy)),
3731            confirmation_manager: Some(confirmation_manager.clone()),
3732            ..Default::default()
3733        };
3734
3735        // Spawn a task to approve the confirmation
3736        let cm_clone = confirmation_manager.clone();
3737        tokio::spawn(async move {
3738            // Wait a bit for the confirmation request to be created
3739            tokio::time::sleep(std::time::Duration::from_millis(50)).await;
3740            // Approve it
3741            cm_clone.confirm("tool-1", true, None).await.ok();
3742        });
3743
3744        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
3745        let result = agent.execute(&[], "Run echo", None).await.unwrap();
3746
3747        assert_eq!(result.text, "Command executed!");
3748        assert_eq!(result.tool_calls_count, 1);
3749    }
3750
3751    #[tokio::test]
3752    async fn test_agent_hitl_rejected() {
3753        use crate::hitl::{ConfirmationManager, ConfirmationPolicy};
3754        use tokio::sync::broadcast;
3755
3756        let mock_client = Arc::new(MockLlmClient::new(vec![
3757            MockLlmClient::tool_call_response(
3758                "tool-1",
3759                "bash",
3760                serde_json::json!({"command": "rm -rf /"}),
3761            ),
3762            MockLlmClient::text_response("Understood, I won't do that."),
3763        ]));
3764
3765        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
3766
3767        // Create HITL confirmation manager
3768        let (event_tx, _event_rx) = broadcast::channel(100);
3769        let hitl_policy = ConfirmationPolicy {
3770            enabled: true,
3771            ..Default::default()
3772        };
3773        let confirmation_manager = Arc::new(ConfirmationManager::new(hitl_policy, event_tx));
3774
3775        // Permission policy returns Ask
3776        let permission_policy = PermissionPolicy::new();
3777
3778        let config = AgentConfig {
3779            permission_checker: Some(Arc::new(permission_policy)),
3780            confirmation_manager: Some(confirmation_manager.clone()),
3781            ..Default::default()
3782        };
3783
3784        // Spawn a task to reject the confirmation
3785        let cm_clone = confirmation_manager.clone();
3786        tokio::spawn(async move {
3787            tokio::time::sleep(std::time::Duration::from_millis(50)).await;
3788            cm_clone
3789                .confirm("tool-1", false, Some("Too dangerous".to_string()))
3790                .await
3791                .ok();
3792        });
3793
3794        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
3795        let result = agent.execute(&[], "Delete everything", None).await.unwrap();
3796
3797        // LLM should respond to the rejection
3798        assert_eq!(result.text, "Understood, I won't do that.");
3799    }
3800
3801    #[tokio::test]
3802    async fn test_agent_hitl_timeout_reject() {
3803        use crate::hitl::{ConfirmationManager, ConfirmationPolicy, TimeoutAction};
3804        use tokio::sync::broadcast;
3805
3806        let mock_client = Arc::new(MockLlmClient::new(vec![
3807            MockLlmClient::tool_call_response(
3808                "tool-1",
3809                "bash",
3810                serde_json::json!({"command": "echo test"}),
3811            ),
3812            MockLlmClient::text_response("Timed out, I understand."),
3813        ]));
3814
3815        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
3816
3817        // Create HITL with very short timeout and Reject action
3818        let (event_tx, _event_rx) = broadcast::channel(100);
3819        let hitl_policy = ConfirmationPolicy {
3820            enabled: true,
3821            default_timeout_ms: 50, // Very short timeout
3822            timeout_action: TimeoutAction::Reject,
3823            ..Default::default()
3824        };
3825        let confirmation_manager = Arc::new(ConfirmationManager::new(hitl_policy, event_tx));
3826
3827        let permission_policy = PermissionPolicy::new();
3828
3829        let config = AgentConfig {
3830            permission_checker: Some(Arc::new(permission_policy)),
3831            confirmation_manager: Some(confirmation_manager),
3832            ..Default::default()
3833        };
3834
3835        // Don't approve - let it timeout
3836        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
3837        let result = agent.execute(&[], "Echo", None).await.unwrap();
3838
3839        // Should get timeout rejection response from LLM
3840        assert_eq!(result.text, "Timed out, I understand.");
3841    }
3842
3843    #[tokio::test]
3844    async fn test_agent_hitl_timeout_auto_approve() {
3845        use crate::hitl::{ConfirmationManager, ConfirmationPolicy, TimeoutAction};
3846        use tokio::sync::broadcast;
3847
3848        let mock_client = Arc::new(MockLlmClient::new(vec![
3849            MockLlmClient::tool_call_response(
3850                "tool-1",
3851                "bash",
3852                serde_json::json!({"command": "echo hello"}),
3853            ),
3854            MockLlmClient::text_response("Auto-approved and executed!"),
3855        ]));
3856
3857        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
3858
3859        // Create HITL with very short timeout and AutoApprove action
3860        let (event_tx, _event_rx) = broadcast::channel(100);
3861        let hitl_policy = ConfirmationPolicy {
3862            enabled: true,
3863            default_timeout_ms: 50, // Very short timeout
3864            timeout_action: TimeoutAction::AutoApprove,
3865            ..Default::default()
3866        };
3867        let confirmation_manager = Arc::new(ConfirmationManager::new(hitl_policy, event_tx));
3868
3869        let permission_policy = PermissionPolicy::new();
3870
3871        let config = AgentConfig {
3872            permission_checker: Some(Arc::new(permission_policy)),
3873            confirmation_manager: Some(confirmation_manager),
3874            ..Default::default()
3875        };
3876
3877        // Don't approve - let it timeout and auto-approve
3878        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
3879        let result = agent.execute(&[], "Echo", None).await.unwrap();
3880
3881        // Should auto-approve on timeout and execute
3882        assert_eq!(result.text, "Auto-approved and executed!");
3883        assert_eq!(result.tool_calls_count, 1);
3884    }
3885
3886    #[tokio::test]
3887    async fn test_agent_hitl_confirmation_events() {
3888        use crate::hitl::{ConfirmationManager, ConfirmationPolicy};
3889        use tokio::sync::broadcast;
3890
3891        let mock_client = Arc::new(MockLlmClient::new(vec![
3892            MockLlmClient::tool_call_response(
3893                "tool-1",
3894                "bash",
3895                serde_json::json!({"command": "echo test"}),
3896            ),
3897            MockLlmClient::text_response("Done!"),
3898        ]));
3899
3900        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
3901
3902        // Create HITL confirmation manager
3903        let (event_tx, mut event_rx) = broadcast::channel(100);
3904        let hitl_policy = ConfirmationPolicy {
3905            enabled: true,
3906            default_timeout_ms: 5000, // Long enough timeout
3907            ..Default::default()
3908        };
3909        let confirmation_manager = Arc::new(ConfirmationManager::new(hitl_policy, event_tx));
3910
3911        let permission_policy = PermissionPolicy::new();
3912
3913        let config = AgentConfig {
3914            permission_checker: Some(Arc::new(permission_policy)),
3915            confirmation_manager: Some(confirmation_manager.clone()),
3916            ..Default::default()
3917        };
3918
3919        // Spawn task to approve and collect events
3920        let cm_clone = confirmation_manager.clone();
3921        let event_handle = tokio::spawn(async move {
3922            let mut events = Vec::new();
3923            // Wait for ConfirmationRequired event
3924            while let Ok(event) = event_rx.recv().await {
3925                events.push(event.clone());
3926                if let AgentEvent::ConfirmationRequired { tool_id, .. } = event {
3927                    // Approve it
3928                    cm_clone.confirm(&tool_id, true, None).await.ok();
3929                    // Wait for ConfirmationReceived
3930                    if let Ok(recv_event) = event_rx.recv().await {
3931                        events.push(recv_event);
3932                    }
3933                    break;
3934                }
3935            }
3936            events
3937        });
3938
3939        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
3940        let _result = agent.execute(&[], "Echo", None).await.unwrap();
3941
3942        // Check events
3943        let events = event_handle.await.unwrap();
3944        assert!(
3945            events
3946                .iter()
3947                .any(|e| matches!(e, AgentEvent::ConfirmationRequired { .. })),
3948            "Should have ConfirmationRequired event"
3949        );
3950        assert!(
3951            events
3952                .iter()
3953                .any(|e| matches!(e, AgentEvent::ConfirmationReceived { approved: true, .. })),
3954            "Should have ConfirmationReceived event with approved=true"
3955        );
3956    }
3957
3958    #[tokio::test]
3959    async fn test_agent_hitl_disabled_auto_executes() {
3960        // When HITL is disabled, tools should execute automatically even with Ask permission
3961        use crate::hitl::{ConfirmationManager, ConfirmationPolicy};
3962        use tokio::sync::broadcast;
3963
3964        let mock_client = Arc::new(MockLlmClient::new(vec![
3965            MockLlmClient::tool_call_response(
3966                "tool-1",
3967                "bash",
3968                serde_json::json!({"command": "echo auto"}),
3969            ),
3970            MockLlmClient::text_response("Auto executed!"),
3971        ]));
3972
3973        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
3974
3975        // Create HITL with enabled=false
3976        let (event_tx, _event_rx) = broadcast::channel(100);
3977        let hitl_policy = ConfirmationPolicy {
3978            enabled: false, // HITL disabled
3979            ..Default::default()
3980        };
3981        let confirmation_manager = Arc::new(ConfirmationManager::new(hitl_policy, event_tx));
3982
3983        let permission_policy = PermissionPolicy::new(); // Default is Ask
3984
3985        let config = AgentConfig {
3986            permission_checker: Some(Arc::new(permission_policy)),
3987            confirmation_manager: Some(confirmation_manager),
3988            ..Default::default()
3989        };
3990
3991        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
3992        let result = agent.execute(&[], "Echo", None).await.unwrap();
3993
3994        // Should execute without waiting for confirmation
3995        assert_eq!(result.text, "Auto executed!");
3996        assert_eq!(result.tool_calls_count, 1);
3997    }
3998
3999    #[tokio::test]
4000    async fn test_agent_hitl_with_permission_deny_skips_hitl() {
4001        // When permission is Deny, HITL should not be triggered
4002        use crate::hitl::{ConfirmationManager, ConfirmationPolicy};
4003        use tokio::sync::broadcast;
4004
4005        let mock_client = Arc::new(MockLlmClient::new(vec![
4006            MockLlmClient::tool_call_response(
4007                "tool-1",
4008                "bash",
4009                serde_json::json!({"command": "rm -rf /"}),
4010            ),
4011            MockLlmClient::text_response("Blocked by permission."),
4012        ]));
4013
4014        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4015
4016        // Create HITL enabled
4017        let (event_tx, mut event_rx) = broadcast::channel(100);
4018        let hitl_policy = ConfirmationPolicy {
4019            enabled: true,
4020            ..Default::default()
4021        };
4022        let confirmation_manager = Arc::new(ConfirmationManager::new(hitl_policy, event_tx));
4023
4024        // Permission policy denies rm commands
4025        let permission_policy = PermissionPolicy::new().deny("bash(rm:*)");
4026
4027        let config = AgentConfig {
4028            permission_checker: Some(Arc::new(permission_policy)),
4029            confirmation_manager: Some(confirmation_manager),
4030            ..Default::default()
4031        };
4032
4033        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
4034        let result = agent.execute(&[], "Delete", None).await.unwrap();
4035
4036        // Should be denied without HITL
4037        assert_eq!(result.text, "Blocked by permission.");
4038
4039        // Should NOT have any ConfirmationRequired events
4040        let mut found_confirmation = false;
4041        while let Ok(event) = event_rx.try_recv() {
4042            if matches!(event, AgentEvent::ConfirmationRequired { .. }) {
4043                found_confirmation = true;
4044            }
4045        }
4046        assert!(
4047            !found_confirmation,
4048            "HITL should not be triggered when permission is Deny"
4049        );
4050    }
4051
4052    #[tokio::test]
4053    async fn test_agent_hitl_with_permission_allow_skips_hitl() {
4054        // When permission is Allow, HITL confirmation is skipped entirely.
4055        // PermissionPolicy is the declarative rule engine; Allow = execute directly.
4056        use crate::hitl::{ConfirmationManager, ConfirmationPolicy};
4057        use tokio::sync::broadcast;
4058
4059        let mock_client = Arc::new(MockLlmClient::new(vec![
4060            MockLlmClient::tool_call_response(
4061                "tool-1",
4062                "bash",
4063                serde_json::json!({"command": "echo hello"}),
4064            ),
4065            MockLlmClient::text_response("Allowed!"),
4066        ]));
4067
4068        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4069
4070        // Create HITL enabled
4071        let (event_tx, mut event_rx) = broadcast::channel(100);
4072        let hitl_policy = ConfirmationPolicy {
4073            enabled: true,
4074            ..Default::default()
4075        };
4076        let confirmation_manager = Arc::new(ConfirmationManager::new(hitl_policy, event_tx));
4077
4078        // Permission policy allows echo commands
4079        let permission_policy = PermissionPolicy::new().allow("bash(echo:*)");
4080
4081        let config = AgentConfig {
4082            permission_checker: Some(Arc::new(permission_policy)),
4083            confirmation_manager: Some(confirmation_manager.clone()),
4084            ..Default::default()
4085        };
4086
4087        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
4088        let result = agent.execute(&[], "Echo", None).await.unwrap();
4089
4090        // Should execute directly without HITL (permission Allow skips confirmation)
4091        assert_eq!(result.text, "Allowed!");
4092
4093        // Should NOT have ConfirmationRequired event (Allow bypasses HITL)
4094        let mut found_confirmation = false;
4095        while let Ok(event) = event_rx.try_recv() {
4096            if matches!(event, AgentEvent::ConfirmationRequired { .. }) {
4097                found_confirmation = true;
4098            }
4099        }
4100        assert!(
4101            !found_confirmation,
4102            "Permission Allow should skip HITL confirmation"
4103        );
4104    }
4105
4106    #[tokio::test]
4107    async fn test_agent_hitl_multiple_tool_calls() {
4108        // Test multiple tool calls in sequence with HITL
4109        use crate::hitl::{ConfirmationManager, ConfirmationPolicy};
4110        use tokio::sync::broadcast;
4111
4112        let mock_client = Arc::new(MockLlmClient::new(vec![
4113            // First response: two tool calls
4114            LlmResponse {
4115                message: Message {
4116                    role: "assistant".to_string(),
4117                    content: vec![
4118                        ContentBlock::ToolUse {
4119                            id: "tool-1".to_string(),
4120                            name: "bash".to_string(),
4121                            input: serde_json::json!({"command": "echo first"}),
4122                        },
4123                        ContentBlock::ToolUse {
4124                            id: "tool-2".to_string(),
4125                            name: "bash".to_string(),
4126                            input: serde_json::json!({"command": "echo second"}),
4127                        },
4128                    ],
4129                    reasoning_content: None,
4130                },
4131                usage: TokenUsage {
4132                    prompt_tokens: 10,
4133                    completion_tokens: 5,
4134                    total_tokens: 15,
4135                    cache_read_tokens: None,
4136                    cache_write_tokens: None,
4137                },
4138                stop_reason: Some("tool_use".to_string()),
4139                meta: None,
4140            },
4141            MockLlmClient::text_response("Both executed!"),
4142        ]));
4143
4144        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4145
4146        // Create HITL
4147        let (event_tx, _event_rx) = broadcast::channel(100);
4148        let hitl_policy = ConfirmationPolicy {
4149            enabled: true,
4150            default_timeout_ms: 5000,
4151            ..Default::default()
4152        };
4153        let confirmation_manager = Arc::new(ConfirmationManager::new(hitl_policy, event_tx));
4154
4155        let permission_policy = PermissionPolicy::new(); // Default Ask
4156
4157        let config = AgentConfig {
4158            permission_checker: Some(Arc::new(permission_policy)),
4159            confirmation_manager: Some(confirmation_manager.clone()),
4160            ..Default::default()
4161        };
4162
4163        // Spawn task to approve both tools
4164        let cm_clone = confirmation_manager.clone();
4165        tokio::spawn(async move {
4166            tokio::time::sleep(std::time::Duration::from_millis(30)).await;
4167            cm_clone.confirm("tool-1", true, None).await.ok();
4168            tokio::time::sleep(std::time::Duration::from_millis(30)).await;
4169            cm_clone.confirm("tool-2", true, None).await.ok();
4170        });
4171
4172        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
4173        let result = agent.execute(&[], "Run both", None).await.unwrap();
4174
4175        assert_eq!(result.text, "Both executed!");
4176        assert_eq!(result.tool_calls_count, 2);
4177    }
4178
4179    #[tokio::test]
4180    async fn test_agent_hitl_partial_approval() {
4181        // Test: first tool approved, second rejected
4182        use crate::hitl::{ConfirmationManager, ConfirmationPolicy};
4183        use tokio::sync::broadcast;
4184
4185        let mock_client = Arc::new(MockLlmClient::new(vec![
4186            // First response: two tool calls
4187            LlmResponse {
4188                message: Message {
4189                    role: "assistant".to_string(),
4190                    content: vec![
4191                        ContentBlock::ToolUse {
4192                            id: "tool-1".to_string(),
4193                            name: "bash".to_string(),
4194                            input: serde_json::json!({"command": "echo safe"}),
4195                        },
4196                        ContentBlock::ToolUse {
4197                            id: "tool-2".to_string(),
4198                            name: "bash".to_string(),
4199                            input: serde_json::json!({"command": "rm -rf /"}),
4200                        },
4201                    ],
4202                    reasoning_content: None,
4203                },
4204                usage: TokenUsage {
4205                    prompt_tokens: 10,
4206                    completion_tokens: 5,
4207                    total_tokens: 15,
4208                    cache_read_tokens: None,
4209                    cache_write_tokens: None,
4210                },
4211                stop_reason: Some("tool_use".to_string()),
4212                meta: None,
4213            },
4214            MockLlmClient::text_response("First worked, second rejected."),
4215        ]));
4216
4217        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4218
4219        let (event_tx, _event_rx) = broadcast::channel(100);
4220        let hitl_policy = ConfirmationPolicy {
4221            enabled: true,
4222            default_timeout_ms: 5000,
4223            ..Default::default()
4224        };
4225        let confirmation_manager = Arc::new(ConfirmationManager::new(hitl_policy, event_tx));
4226
4227        let permission_policy = PermissionPolicy::new();
4228
4229        let config = AgentConfig {
4230            permission_checker: Some(Arc::new(permission_policy)),
4231            confirmation_manager: Some(confirmation_manager.clone()),
4232            ..Default::default()
4233        };
4234
4235        // Approve first, reject second
4236        let cm_clone = confirmation_manager.clone();
4237        tokio::spawn(async move {
4238            tokio::time::sleep(std::time::Duration::from_millis(30)).await;
4239            cm_clone.confirm("tool-1", true, None).await.ok();
4240            tokio::time::sleep(std::time::Duration::from_millis(30)).await;
4241            cm_clone
4242                .confirm("tool-2", false, Some("Dangerous".to_string()))
4243                .await
4244                .ok();
4245        });
4246
4247        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
4248        let result = agent.execute(&[], "Run both", None).await.unwrap();
4249
4250        assert_eq!(result.text, "First worked, second rejected.");
4251        assert_eq!(result.tool_calls_count, 2);
4252    }
4253
4254    #[tokio::test]
4255    async fn test_agent_hitl_yolo_mode_auto_approves() {
4256        // YOLO mode: specific lanes auto-approve without confirmation
4257        use crate::hitl::{ConfirmationManager, ConfirmationPolicy, SessionLane};
4258        use tokio::sync::broadcast;
4259
4260        let mock_client = Arc::new(MockLlmClient::new(vec![
4261            MockLlmClient::tool_call_response(
4262                "tool-1",
4263                "read", // Query lane tool
4264                serde_json::json!({"path": "/tmp/test.txt"}),
4265            ),
4266            MockLlmClient::text_response("File read!"),
4267        ]));
4268
4269        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4270
4271        // YOLO mode for Query lane (read, glob, ls, grep)
4272        let (event_tx, mut event_rx) = broadcast::channel(100);
4273        let mut yolo_lanes = std::collections::HashSet::new();
4274        yolo_lanes.insert(SessionLane::Query);
4275        let hitl_policy = ConfirmationPolicy {
4276            enabled: true,
4277            yolo_lanes, // Auto-approve query operations
4278            ..Default::default()
4279        };
4280        let confirmation_manager = Arc::new(ConfirmationManager::new(hitl_policy, event_tx));
4281
4282        let permission_policy = PermissionPolicy::new();
4283
4284        let config = AgentConfig {
4285            permission_checker: Some(Arc::new(permission_policy)),
4286            confirmation_manager: Some(confirmation_manager),
4287            ..Default::default()
4288        };
4289
4290        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
4291        let result = agent.execute(&[], "Read file", None).await.unwrap();
4292
4293        // Should auto-execute without confirmation (YOLO mode)
4294        assert_eq!(result.text, "File read!");
4295
4296        // Should NOT have ConfirmationRequired for yolo lane
4297        let mut found_confirmation = false;
4298        while let Ok(event) = event_rx.try_recv() {
4299            if matches!(event, AgentEvent::ConfirmationRequired { .. }) {
4300                found_confirmation = true;
4301            }
4302        }
4303        assert!(
4304            !found_confirmation,
4305            "YOLO mode should not trigger confirmation"
4306        );
4307    }
4308
4309    #[tokio::test]
4310    async fn test_agent_config_with_all_options() {
4311        use crate::hitl::{ConfirmationManager, ConfirmationPolicy};
4312        use tokio::sync::broadcast;
4313
4314        let (event_tx, _) = broadcast::channel(100);
4315        let hitl_policy = ConfirmationPolicy::default();
4316        let confirmation_manager = Arc::new(ConfirmationManager::new(hitl_policy, event_tx));
4317
4318        let permission_policy = PermissionPolicy::new().allow("bash(*)");
4319
4320        let config = AgentConfig {
4321            prompt_slots: SystemPromptSlots {
4322                extra: Some("Test system prompt".to_string()),
4323                ..Default::default()
4324            },
4325            tools: vec![],
4326            max_tool_rounds: 10,
4327            permission_checker: Some(Arc::new(permission_policy)),
4328            confirmation_manager: Some(confirmation_manager),
4329            context_providers: vec![],
4330            planning_mode: PlanningMode::default(),
4331            goal_tracking: false,
4332            hook_engine: None,
4333            skill_registry: None,
4334            ..AgentConfig::default()
4335        };
4336
4337        assert!(config.prompt_slots.build().contains("Test system prompt"));
4338        assert_eq!(config.max_tool_rounds, 10);
4339        assert!(config.permission_checker.is_some());
4340        assert!(config.confirmation_manager.is_some());
4341        assert!(config.context_providers.is_empty());
4342
4343        // Test Debug trait
4344        let debug_str = format!("{:?}", config);
4345        assert!(debug_str.contains("AgentConfig"));
4346        assert!(debug_str.contains("permission_checker: true"));
4347        assert!(debug_str.contains("confirmation_manager: true"));
4348        assert!(debug_str.contains("context_providers: 0"));
4349    }
4350
4351    // ========================================================================
4352    // Context Provider Tests
4353    // ========================================================================
4354
4355    use crate::context::{ContextItem, ContextType};
4356
4357    /// Mock context provider for testing
4358    struct MockContextProvider {
4359        name: String,
4360        items: Vec<ContextItem>,
4361        on_turn_calls: std::sync::Arc<tokio::sync::RwLock<Vec<(String, String, String)>>>,
4362    }
4363
4364    impl MockContextProvider {
4365        fn new(name: &str) -> Self {
4366            Self {
4367                name: name.to_string(),
4368                items: Vec::new(),
4369                on_turn_calls: std::sync::Arc::new(tokio::sync::RwLock::new(Vec::new())),
4370            }
4371        }
4372
4373        fn with_items(mut self, items: Vec<ContextItem>) -> Self {
4374            self.items = items;
4375            self
4376        }
4377    }
4378
4379    #[async_trait::async_trait]
4380    impl ContextProvider for MockContextProvider {
4381        fn name(&self) -> &str {
4382            &self.name
4383        }
4384
4385        async fn query(&self, _query: &ContextQuery) -> anyhow::Result<ContextResult> {
4386            let mut result = ContextResult::new(&self.name);
4387            for item in &self.items {
4388                result.add_item(item.clone());
4389            }
4390            Ok(result)
4391        }
4392
4393        async fn on_turn_complete(
4394            &self,
4395            session_id: &str,
4396            prompt: &str,
4397            response: &str,
4398        ) -> anyhow::Result<()> {
4399            let mut calls = self.on_turn_calls.write().await;
4400            calls.push((
4401                session_id.to_string(),
4402                prompt.to_string(),
4403                response.to_string(),
4404            ));
4405            Ok(())
4406        }
4407    }
4408
4409    #[tokio::test]
4410    async fn test_agent_with_context_provider() {
4411        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
4412            "Response using context",
4413        )]));
4414
4415        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4416
4417        let provider =
4418            MockContextProvider::new("test-provider").with_items(vec![ContextItem::new(
4419                "ctx-1",
4420                ContextType::Resource,
4421                "Relevant context here",
4422            )
4423            .with_source("test://docs/example")]);
4424
4425        let config = AgentConfig {
4426            prompt_slots: SystemPromptSlots {
4427                extra: Some("You are helpful.".to_string()),
4428                ..Default::default()
4429            },
4430            context_providers: vec![Arc::new(provider)],
4431            ..Default::default()
4432        };
4433
4434        let agent = AgentLoop::new(
4435            mock_client.clone(),
4436            tool_executor,
4437            test_tool_context(),
4438            config,
4439        );
4440        let result = agent.execute(&[], "What is X?", None).await.unwrap();
4441
4442        assert_eq!(result.text, "Response using context");
4443        assert_eq!(mock_client.call_count.load(Ordering::SeqCst), 1);
4444    }
4445
4446    #[tokio::test]
4447    async fn test_agent_context_provider_events() {
4448        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
4449            "Answer",
4450        )]));
4451
4452        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4453
4454        let provider =
4455            MockContextProvider::new("event-provider").with_items(vec![ContextItem::new(
4456                "item-1",
4457                ContextType::Memory,
4458                "Memory content",
4459            )
4460            .with_token_count(50)]);
4461
4462        let config = AgentConfig {
4463            context_providers: vec![Arc::new(provider)],
4464            ..Default::default()
4465        };
4466
4467        let (tx, mut rx) = mpsc::channel(100);
4468        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
4469        let _result = agent.execute(&[], "Test prompt", Some(tx)).await.unwrap();
4470
4471        // Collect events
4472        let mut events = Vec::new();
4473        while let Ok(event) = rx.try_recv() {
4474            events.push(event);
4475        }
4476
4477        // Should have ContextResolving and ContextResolved events
4478        assert!(
4479            events
4480                .iter()
4481                .any(|e| matches!(e, AgentEvent::ContextResolving { .. })),
4482            "Should have ContextResolving event"
4483        );
4484        assert!(
4485            events
4486                .iter()
4487                .any(|e| matches!(e, AgentEvent::ContextResolved { .. })),
4488            "Should have ContextResolved event"
4489        );
4490
4491        // Check context resolved values
4492        for event in &events {
4493            if let AgentEvent::ContextResolved {
4494                total_items,
4495                total_tokens,
4496            } = event
4497            {
4498                assert_eq!(*total_items, 1);
4499                assert_eq!(*total_tokens, 50);
4500            }
4501        }
4502    }
4503
4504    #[tokio::test]
4505    async fn test_agent_multiple_context_providers() {
4506        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
4507            "Combined response",
4508        )]));
4509
4510        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4511
4512        let provider1 = MockContextProvider::new("provider-1").with_items(vec![ContextItem::new(
4513            "p1-1",
4514            ContextType::Resource,
4515            "Resource from P1",
4516        )
4517        .with_token_count(100)]);
4518
4519        let provider2 = MockContextProvider::new("provider-2").with_items(vec![
4520            ContextItem::new("p2-1", ContextType::Memory, "Memory from P2").with_token_count(50),
4521            ContextItem::new("p2-2", ContextType::Skill, "Skill from P2").with_token_count(75),
4522        ]);
4523
4524        let config = AgentConfig {
4525            prompt_slots: SystemPromptSlots {
4526                extra: Some("Base system prompt.".to_string()),
4527                ..Default::default()
4528            },
4529            context_providers: vec![Arc::new(provider1), Arc::new(provider2)],
4530            ..Default::default()
4531        };
4532
4533        let (tx, mut rx) = mpsc::channel(100);
4534        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
4535        let result = agent.execute(&[], "Query", Some(tx)).await.unwrap();
4536
4537        assert_eq!(result.text, "Combined response");
4538
4539        // Check context resolved event has combined totals
4540        while let Ok(event) = rx.try_recv() {
4541            if let AgentEvent::ContextResolved {
4542                total_items,
4543                total_tokens,
4544            } = event
4545            {
4546                assert_eq!(total_items, 3); // 1 + 2
4547                assert_eq!(total_tokens, 225); // 100 + 50 + 75
4548            }
4549        }
4550    }
4551
4552    #[tokio::test]
4553    async fn test_agent_no_context_providers() {
4554        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
4555            "No context",
4556        )]));
4557
4558        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4559
4560        // No context providers
4561        let config = AgentConfig::default();
4562
4563        let (tx, mut rx) = mpsc::channel(100);
4564        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
4565        let result = agent.execute(&[], "Simple prompt", Some(tx)).await.unwrap();
4566
4567        assert_eq!(result.text, "No context");
4568
4569        // Should NOT have context events when no providers
4570        let mut events = Vec::new();
4571        while let Ok(event) = rx.try_recv() {
4572            events.push(event);
4573        }
4574
4575        assert!(
4576            !events
4577                .iter()
4578                .any(|e| matches!(e, AgentEvent::ContextResolving { .. })),
4579            "Should NOT have ContextResolving event"
4580        );
4581    }
4582
4583    #[tokio::test]
4584    async fn test_agent_context_on_turn_complete() {
4585        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
4586            "Final response",
4587        )]));
4588
4589        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4590
4591        let provider = Arc::new(MockContextProvider::new("memory-provider"));
4592        let on_turn_calls = provider.on_turn_calls.clone();
4593
4594        let config = AgentConfig {
4595            context_providers: vec![provider],
4596            ..Default::default()
4597        };
4598
4599        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
4600
4601        // Execute with session ID
4602        let result = agent
4603            .execute_with_session(&[], "User prompt", Some("sess-123"), None, None)
4604            .await
4605            .unwrap();
4606
4607        assert_eq!(result.text, "Final response");
4608
4609        // Check on_turn_complete was called
4610        let calls = on_turn_calls.read().await;
4611        assert_eq!(calls.len(), 1);
4612        assert_eq!(calls[0].0, "sess-123");
4613        assert_eq!(calls[0].1, "User prompt");
4614        assert_eq!(calls[0].2, "Final response");
4615    }
4616
4617    #[tokio::test]
4618    async fn test_agent_context_on_turn_complete_no_session() {
4619        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
4620            "Response",
4621        )]));
4622
4623        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4624
4625        let provider = Arc::new(MockContextProvider::new("memory-provider"));
4626        let on_turn_calls = provider.on_turn_calls.clone();
4627
4628        let config = AgentConfig {
4629            context_providers: vec![provider],
4630            ..Default::default()
4631        };
4632
4633        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
4634
4635        // Execute without session ID (uses execute() which passes None)
4636        let _result = agent.execute(&[], "Prompt", None).await.unwrap();
4637
4638        // on_turn_complete should NOT be called when session_id is None
4639        let calls = on_turn_calls.read().await;
4640        assert!(calls.is_empty());
4641    }
4642
4643    #[tokio::test]
4644    async fn test_agent_build_augmented_system_prompt() {
4645        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response("OK")]));
4646
4647        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4648
4649        let provider = MockContextProvider::new("test").with_items(vec![ContextItem::new(
4650            "doc-1",
4651            ContextType::Resource,
4652            "Auth uses JWT tokens.",
4653        )
4654        .with_source("viking://docs/auth")]);
4655
4656        let config = AgentConfig {
4657            prompt_slots: SystemPromptSlots {
4658                extra: Some("You are helpful.".to_string()),
4659                ..Default::default()
4660            },
4661            context_providers: vec![Arc::new(provider)],
4662            ..Default::default()
4663        };
4664
4665        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
4666
4667        // Test building augmented prompt
4668        let context_results = agent.resolve_context("test", None).await;
4669        let augmented = agent.build_augmented_system_prompt(&context_results);
4670
4671        let augmented_str = augmented.unwrap();
4672        assert!(augmented_str.contains("You are helpful."));
4673        assert!(augmented_str.contains("<context source=\"viking://docs/auth\" type=\"Resource\">"));
4674        assert!(augmented_str.contains("Auth uses JWT tokens."));
4675    }
4676
4677    // ========================================================================
4678    // Agentic Loop Integration Tests
4679    // ========================================================================
4680
4681    /// Helper: collect all events from a channel
4682    async fn collect_events(mut rx: mpsc::Receiver<AgentEvent>) -> Vec<AgentEvent> {
4683        let mut events = Vec::new();
4684        while let Ok(event) = rx.try_recv() {
4685            events.push(event);
4686        }
4687        // Drain remaining
4688        while let Some(event) = rx.recv().await {
4689            events.push(event);
4690        }
4691        events
4692    }
4693
4694    #[tokio::test]
4695    async fn test_agent_multi_turn_tool_chain() {
4696        // LLM calls tool A → sees result → calls tool B → sees result → final answer
4697        let mock_client = Arc::new(MockLlmClient::new(vec![
4698            // Turn 1: call ls
4699            MockLlmClient::tool_call_response(
4700                "t1",
4701                "bash",
4702                serde_json::json!({"command": "echo step1"}),
4703            ),
4704            // Turn 2: call another tool based on first result
4705            MockLlmClient::tool_call_response(
4706                "t2",
4707                "bash",
4708                serde_json::json!({"command": "echo step2"}),
4709            ),
4710            // Turn 3: final answer
4711            MockLlmClient::text_response("Completed both steps: step1 then step2"),
4712        ]));
4713
4714        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4715        let config = AgentConfig::default();
4716
4717        let agent = AgentLoop::new(
4718            mock_client.clone(),
4719            tool_executor,
4720            test_tool_context(),
4721            config,
4722        );
4723        let result = agent.execute(&[], "Run two steps", None).await.unwrap();
4724
4725        assert_eq!(result.text, "Completed both steps: step1 then step2");
4726        assert_eq!(result.tool_calls_count, 2);
4727        assert_eq!(mock_client.call_count.load(Ordering::SeqCst), 3);
4728
4729        // Verify message history: user → assistant(tool_use) → user(tool_result) → assistant(tool_use) → user(tool_result) → assistant(text)
4730        assert_eq!(result.messages[0].role, "user");
4731        assert_eq!(result.messages[1].role, "assistant"); // tool call 1
4732        assert_eq!(result.messages[2].role, "user"); // tool result 1 (Anthropic convention)
4733        assert_eq!(result.messages[3].role, "assistant"); // tool call 2
4734        assert_eq!(result.messages[4].role, "user"); // tool result 2
4735        assert_eq!(result.messages[5].role, "assistant"); // final text
4736        assert_eq!(result.messages.len(), 6);
4737    }
4738
4739    #[tokio::test]
4740    async fn test_agent_conversation_history_preserved() {
4741        // Pass existing history, verify it's preserved in output
4742        let existing_history = vec![
4743            Message::user("What is Rust?"),
4744            Message {
4745                role: "assistant".to_string(),
4746                content: vec![ContentBlock::Text {
4747                    text: "Rust is a systems programming language.".to_string(),
4748                }],
4749                reasoning_content: None,
4750            },
4751        ];
4752
4753        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
4754            "Rust was created by Graydon Hoare at Mozilla.",
4755        )]));
4756
4757        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4758        let agent = AgentLoop::new(
4759            mock_client.clone(),
4760            tool_executor,
4761            test_tool_context(),
4762            AgentConfig::default(),
4763        );
4764
4765        let result = agent
4766            .execute(&existing_history, "Who created it?", None)
4767            .await
4768            .unwrap();
4769
4770        // History should contain: old user + old assistant + new user + new assistant
4771        assert_eq!(result.messages.len(), 4);
4772        assert_eq!(result.messages[0].text(), "What is Rust?");
4773        assert_eq!(
4774            result.messages[1].text(),
4775            "Rust is a systems programming language."
4776        );
4777        assert_eq!(result.messages[2].text(), "Who created it?");
4778        assert_eq!(
4779            result.messages[3].text(),
4780            "Rust was created by Graydon Hoare at Mozilla."
4781        );
4782    }
4783
4784    #[tokio::test]
4785    async fn test_agent_event_stream_completeness() {
4786        // Verify full event sequence for a single tool call loop
4787        let mock_client = Arc::new(MockLlmClient::new(vec![
4788            MockLlmClient::tool_call_response(
4789                "t1",
4790                "bash",
4791                serde_json::json!({"command": "echo hi"}),
4792            ),
4793            MockLlmClient::text_response("Done"),
4794        ]));
4795
4796        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4797        let agent = AgentLoop::new(
4798            mock_client,
4799            tool_executor,
4800            test_tool_context(),
4801            AgentConfig::default(),
4802        );
4803
4804        let (tx, rx) = mpsc::channel(100);
4805        let result = agent.execute(&[], "Say hi", Some(tx)).await.unwrap();
4806        assert_eq!(result.text, "Done");
4807
4808        let events = collect_events(rx).await;
4809
4810        // Verify event sequence
4811        let event_types: Vec<&str> = events
4812            .iter()
4813            .map(|e| match e {
4814                AgentEvent::Start { .. } => "Start",
4815                AgentEvent::TurnStart { .. } => "TurnStart",
4816                AgentEvent::TurnEnd { .. } => "TurnEnd",
4817                AgentEvent::ToolEnd { .. } => "ToolEnd",
4818                AgentEvent::End { .. } => "End",
4819                _ => "Other",
4820            })
4821            .collect();
4822
4823        // Must start with Start, end with End
4824        assert_eq!(event_types.first(), Some(&"Start"));
4825        assert_eq!(event_types.last(), Some(&"End"));
4826
4827        // Must have 2 TurnStarts (tool call turn + final answer turn)
4828        let turn_starts = event_types.iter().filter(|&&t| t == "TurnStart").count();
4829        assert_eq!(turn_starts, 2);
4830
4831        // Must have 1 ToolEnd
4832        let tool_ends = event_types.iter().filter(|&&t| t == "ToolEnd").count();
4833        assert_eq!(tool_ends, 1);
4834    }
4835
4836    #[tokio::test]
4837    async fn test_agent_multiple_tools_single_turn() {
4838        // LLM returns 2 tool calls in one response
4839        let mock_client = Arc::new(MockLlmClient::new(vec![
4840            LlmResponse {
4841                message: Message {
4842                    role: "assistant".to_string(),
4843                    content: vec![
4844                        ContentBlock::ToolUse {
4845                            id: "t1".to_string(),
4846                            name: "bash".to_string(),
4847                            input: serde_json::json!({"command": "echo first"}),
4848                        },
4849                        ContentBlock::ToolUse {
4850                            id: "t2".to_string(),
4851                            name: "bash".to_string(),
4852                            input: serde_json::json!({"command": "echo second"}),
4853                        },
4854                    ],
4855                    reasoning_content: None,
4856                },
4857                usage: TokenUsage {
4858                    prompt_tokens: 10,
4859                    completion_tokens: 5,
4860                    total_tokens: 15,
4861                    cache_read_tokens: None,
4862                    cache_write_tokens: None,
4863                },
4864                stop_reason: Some("tool_use".to_string()),
4865                meta: None,
4866            },
4867            MockLlmClient::text_response("Both commands ran"),
4868        ]));
4869
4870        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4871        let agent = AgentLoop::new(
4872            mock_client.clone(),
4873            tool_executor,
4874            test_tool_context(),
4875            AgentConfig::default(),
4876        );
4877
4878        let result = agent.execute(&[], "Run both", None).await.unwrap();
4879
4880        assert_eq!(result.text, "Both commands ran");
4881        assert_eq!(result.tool_calls_count, 2);
4882        assert_eq!(mock_client.call_count.load(Ordering::SeqCst), 2); // Only 2 LLM calls
4883
4884        // Messages: user → assistant(2 tools) → user(tool_result) → user(tool_result) → assistant(text)
4885        assert_eq!(result.messages[0].role, "user");
4886        assert_eq!(result.messages[1].role, "assistant");
4887        assert_eq!(result.messages[2].role, "user"); // tool result 1
4888        assert_eq!(result.messages[3].role, "user"); // tool result 2
4889        assert_eq!(result.messages[4].role, "assistant");
4890    }
4891
4892    #[tokio::test]
4893    async fn test_agent_token_usage_accumulation() {
4894        // Verify usage sums across multiple turns
4895        let mock_client = Arc::new(MockLlmClient::new(vec![
4896            MockLlmClient::tool_call_response(
4897                "t1",
4898                "bash",
4899                serde_json::json!({"command": "echo x"}),
4900            ),
4901            MockLlmClient::text_response("Done"),
4902        ]));
4903
4904        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4905        let agent = AgentLoop::new(
4906            mock_client,
4907            tool_executor,
4908            test_tool_context(),
4909            AgentConfig::default(),
4910        );
4911
4912        let result = agent.execute(&[], "test", None).await.unwrap();
4913
4914        // Each mock response has prompt=10, completion=5, total=15
4915        // 2 LLM calls → 20 prompt, 10 completion, 30 total
4916        assert_eq!(result.usage.prompt_tokens, 20);
4917        assert_eq!(result.usage.completion_tokens, 10);
4918        assert_eq!(result.usage.total_tokens, 30);
4919    }
4920
4921    #[tokio::test]
4922    async fn test_agent_system_prompt_passed() {
4923        // Verify system prompt is used (MockLlmClient captures calls)
4924        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
4925            "I am a coding assistant.",
4926        )]));
4927
4928        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4929        let config = AgentConfig {
4930            prompt_slots: SystemPromptSlots {
4931                extra: Some("You are a coding assistant.".to_string()),
4932                ..Default::default()
4933            },
4934            ..Default::default()
4935        };
4936
4937        let agent = AgentLoop::new(
4938            mock_client.clone(),
4939            tool_executor,
4940            test_tool_context(),
4941            config,
4942        );
4943        let result = agent.execute(&[], "What are you?", None).await.unwrap();
4944
4945        assert_eq!(result.text, "I am a coding assistant.");
4946        assert_eq!(mock_client.call_count.load(Ordering::SeqCst), 1);
4947    }
4948
4949    #[tokio::test]
4950    async fn test_agent_max_rounds_with_persistent_tool_calls() {
4951        // LLM keeps calling tools forever — should hit max_tool_rounds
4952        let mut responses = Vec::new();
4953        for i in 0..15 {
4954            responses.push(MockLlmClient::tool_call_response(
4955                &format!("t{}", i),
4956                "bash",
4957                serde_json::json!({"command": format!("echo round{}", i)}),
4958            ));
4959        }
4960
4961        let mock_client = Arc::new(MockLlmClient::new(responses));
4962        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4963        let config = AgentConfig {
4964            max_tool_rounds: 5,
4965            ..Default::default()
4966        };
4967
4968        let agent = AgentLoop::new(
4969            mock_client.clone(),
4970            tool_executor,
4971            test_tool_context(),
4972            config,
4973        );
4974        let result = agent.execute(&[], "Loop forever", None).await;
4975
4976        assert!(result.is_err());
4977        let err = result.unwrap_err().to_string();
4978        assert!(err.contains("Max tool rounds (5) exceeded"));
4979    }
4980
4981    #[tokio::test]
4982    async fn test_agent_end_event_contains_final_text() {
4983        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
4984            "Final answer here",
4985        )]));
4986
4987        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4988        let agent = AgentLoop::new(
4989            mock_client,
4990            tool_executor,
4991            test_tool_context(),
4992            AgentConfig::default(),
4993        );
4994
4995        let (tx, rx) = mpsc::channel(100);
4996        agent.execute(&[], "test", Some(tx)).await.unwrap();
4997
4998        let events = collect_events(rx).await;
4999        let end_event = events.iter().find(|e| matches!(e, AgentEvent::End { .. }));
5000        assert!(end_event.is_some());
5001
5002        if let AgentEvent::End { text, usage, .. } = end_event.unwrap() {
5003            assert_eq!(text, "Final answer here");
5004            assert_eq!(usage.total_tokens, 15);
5005        }
5006    }
5007}
5008
5009#[cfg(test)]
5010mod extra_agent_tests {
5011    use super::*;
5012    use crate::agent::tests::MockLlmClient;
5013    use crate::queue::SessionQueueConfig;
5014    use crate::tools::ToolExecutor;
5015    use std::path::PathBuf;
5016    use std::sync::atomic::{AtomicUsize, Ordering};
5017
5018    fn test_tool_context() -> ToolContext {
5019        ToolContext::new(PathBuf::from("/tmp"))
5020    }
5021
5022    // ========================================================================
5023    // AgentConfig
5024    // ========================================================================
5025
5026    #[test]
5027    fn test_agent_config_debug() {
5028        let config = AgentConfig {
5029            prompt_slots: SystemPromptSlots {
5030                extra: Some("You are helpful".to_string()),
5031                ..Default::default()
5032            },
5033            tools: vec![],
5034            max_tool_rounds: 10,
5035            permission_checker: None,
5036            confirmation_manager: None,
5037            context_providers: vec![],
5038            planning_mode: PlanningMode::Enabled,
5039            goal_tracking: false,
5040            hook_engine: None,
5041            skill_registry: None,
5042            ..AgentConfig::default()
5043        };
5044        let debug = format!("{:?}", config);
5045        assert!(debug.contains("AgentConfig"));
5046        assert!(debug.contains("planning_mode"));
5047    }
5048
5049    #[test]
5050    fn test_agent_config_default_values() {
5051        let config = AgentConfig::default();
5052        assert_eq!(config.max_tool_rounds, MAX_TOOL_ROUNDS);
5053        assert_eq!(config.planning_mode, PlanningMode::Auto);
5054        assert!(!config.goal_tracking);
5055        assert!(config.context_providers.is_empty());
5056    }
5057
5058    // ========================================================================
5059    // AgentEvent serialization
5060    // ========================================================================
5061
5062    #[test]
5063    fn test_agent_event_serialize_start() {
5064        let event = AgentEvent::Start {
5065            prompt: "Hello".to_string(),
5066        };
5067        let json = serde_json::to_string(&event).unwrap();
5068        assert!(json.contains("agent_start"));
5069        assert!(json.contains("Hello"));
5070    }
5071
5072    #[test]
5073    fn test_agent_event_serialize_text_delta() {
5074        let event = AgentEvent::TextDelta {
5075            text: "chunk".to_string(),
5076        };
5077        let json = serde_json::to_string(&event).unwrap();
5078        assert!(json.contains("text_delta"));
5079    }
5080
5081    #[test]
5082    fn test_agent_event_serialize_tool_start() {
5083        let event = AgentEvent::ToolStart {
5084            id: "t1".to_string(),
5085            name: "bash".to_string(),
5086        };
5087        let json = serde_json::to_string(&event).unwrap();
5088        assert!(json.contains("tool_start"));
5089        assert!(json.contains("bash"));
5090    }
5091
5092    #[test]
5093    fn test_agent_event_serialize_tool_end() {
5094        let event = AgentEvent::ToolEnd {
5095            id: "t1".to_string(),
5096            name: "bash".to_string(),
5097            output: "hello".to_string(),
5098            exit_code: 0,
5099            metadata: None,
5100        };
5101        let json = serde_json::to_string(&event).unwrap();
5102        assert!(json.contains("tool_end"));
5103    }
5104
5105    #[test]
5106    fn test_agent_event_tool_end_has_metadata_field() {
5107        let event = AgentEvent::ToolEnd {
5108            id: "t1".to_string(),
5109            name: "write".to_string(),
5110            output: "Wrote 5 bytes".to_string(),
5111            exit_code: 0,
5112            metadata: Some(
5113                serde_json::json!({ "before": "old", "after": "new", "file_path": "f.txt" }),
5114            ),
5115        };
5116        let json = serde_json::to_string(&event).unwrap();
5117        assert!(json.contains("\"before\""));
5118    }
5119
5120    #[test]
5121    fn test_agent_event_serialize_error() {
5122        let event = AgentEvent::Error {
5123            message: "oops".to_string(),
5124        };
5125        let json = serde_json::to_string(&event).unwrap();
5126        assert!(json.contains("error"));
5127        assert!(json.contains("oops"));
5128    }
5129
5130    #[test]
5131    fn test_agent_event_serialize_confirmation_required() {
5132        let event = AgentEvent::ConfirmationRequired {
5133            tool_id: "t1".to_string(),
5134            tool_name: "bash".to_string(),
5135            args: serde_json::json!({"cmd": "rm"}),
5136            timeout_ms: 30000,
5137        };
5138        let json = serde_json::to_string(&event).unwrap();
5139        assert!(json.contains("confirmation_required"));
5140    }
5141
5142    #[test]
5143    fn test_agent_event_serialize_confirmation_received() {
5144        let event = AgentEvent::ConfirmationReceived {
5145            tool_id: "t1".to_string(),
5146            approved: true,
5147            reason: Some("safe".to_string()),
5148        };
5149        let json = serde_json::to_string(&event).unwrap();
5150        assert!(json.contains("confirmation_received"));
5151    }
5152
5153    #[test]
5154    fn test_agent_event_serialize_confirmation_timeout() {
5155        let event = AgentEvent::ConfirmationTimeout {
5156            tool_id: "t1".to_string(),
5157            action_taken: "rejected".to_string(),
5158        };
5159        let json = serde_json::to_string(&event).unwrap();
5160        assert!(json.contains("confirmation_timeout"));
5161    }
5162
5163    #[test]
5164    fn test_agent_event_serialize_external_task_pending() {
5165        let event = AgentEvent::ExternalTaskPending {
5166            task_id: "task-1".to_string(),
5167            session_id: "sess-1".to_string(),
5168            lane: crate::hitl::SessionLane::Execute,
5169            command_type: "bash".to_string(),
5170            payload: serde_json::json!({}),
5171            timeout_ms: 60000,
5172        };
5173        let json = serde_json::to_string(&event).unwrap();
5174        assert!(json.contains("external_task_pending"));
5175    }
5176
5177    #[test]
5178    fn test_agent_event_serialize_external_task_completed() {
5179        let event = AgentEvent::ExternalTaskCompleted {
5180            task_id: "task-1".to_string(),
5181            session_id: "sess-1".to_string(),
5182            success: false,
5183        };
5184        let json = serde_json::to_string(&event).unwrap();
5185        assert!(json.contains("external_task_completed"));
5186    }
5187
5188    #[test]
5189    fn test_agent_event_serialize_permission_denied() {
5190        let event = AgentEvent::PermissionDenied {
5191            tool_id: "t1".to_string(),
5192            tool_name: "bash".to_string(),
5193            args: serde_json::json!({}),
5194            reason: "denied".to_string(),
5195        };
5196        let json = serde_json::to_string(&event).unwrap();
5197        assert!(json.contains("permission_denied"));
5198    }
5199
5200    #[test]
5201    fn test_agent_event_serialize_context_compacted() {
5202        let event = AgentEvent::ContextCompacted {
5203            session_id: "sess-1".to_string(),
5204            before_messages: 100,
5205            after_messages: 20,
5206            percent_before: 0.85,
5207        };
5208        let json = serde_json::to_string(&event).unwrap();
5209        assert!(json.contains("context_compacted"));
5210    }
5211
5212    #[test]
5213    fn test_agent_event_serialize_turn_start() {
5214        let event = AgentEvent::TurnStart { turn: 3 };
5215        let json = serde_json::to_string(&event).unwrap();
5216        assert!(json.contains("turn_start"));
5217    }
5218
5219    #[test]
5220    fn test_agent_event_serialize_turn_end() {
5221        let event = AgentEvent::TurnEnd {
5222            turn: 3,
5223            usage: TokenUsage::default(),
5224        };
5225        let json = serde_json::to_string(&event).unwrap();
5226        assert!(json.contains("turn_end"));
5227    }
5228
5229    #[test]
5230    fn test_agent_event_serialize_end() {
5231        let event = AgentEvent::End {
5232            text: "Done".to_string(),
5233            usage: TokenUsage {
5234                prompt_tokens: 100,
5235                completion_tokens: 50,
5236                total_tokens: 150,
5237                cache_read_tokens: None,
5238                cache_write_tokens: None,
5239            },
5240            meta: None,
5241        };
5242        let json = serde_json::to_string(&event).unwrap();
5243        assert!(json.contains("agent_end"));
5244    }
5245
5246    // ========================================================================
5247    // AgentResult
5248    // ========================================================================
5249
5250    #[test]
5251    fn test_agent_result_fields() {
5252        let result = AgentResult {
5253            text: "output".to_string(),
5254            messages: vec![Message::user("hello")],
5255            usage: TokenUsage::default(),
5256            tool_calls_count: 3,
5257        };
5258        assert_eq!(result.text, "output");
5259        assert_eq!(result.messages.len(), 1);
5260        assert_eq!(result.tool_calls_count, 3);
5261    }
5262
5263    // ========================================================================
5264    // Missing AgentEvent serialization tests
5265    // ========================================================================
5266
5267    #[test]
5268    fn test_agent_event_serialize_context_resolving() {
5269        let event = AgentEvent::ContextResolving {
5270            providers: vec!["provider1".to_string(), "provider2".to_string()],
5271        };
5272        let json = serde_json::to_string(&event).unwrap();
5273        assert!(json.contains("context_resolving"));
5274        assert!(json.contains("provider1"));
5275    }
5276
5277    #[test]
5278    fn test_agent_event_serialize_context_resolved() {
5279        let event = AgentEvent::ContextResolved {
5280            total_items: 5,
5281            total_tokens: 1000,
5282        };
5283        let json = serde_json::to_string(&event).unwrap();
5284        assert!(json.contains("context_resolved"));
5285        assert!(json.contains("1000"));
5286    }
5287
5288    #[test]
5289    fn test_agent_event_serialize_command_dead_lettered() {
5290        let event = AgentEvent::CommandDeadLettered {
5291            command_id: "cmd-1".to_string(),
5292            command_type: "bash".to_string(),
5293            lane: "execute".to_string(),
5294            error: "timeout".to_string(),
5295            attempts: 3,
5296        };
5297        let json = serde_json::to_string(&event).unwrap();
5298        assert!(json.contains("command_dead_lettered"));
5299        assert!(json.contains("cmd-1"));
5300    }
5301
5302    #[test]
5303    fn test_agent_event_serialize_command_retry() {
5304        let event = AgentEvent::CommandRetry {
5305            command_id: "cmd-2".to_string(),
5306            command_type: "read".to_string(),
5307            lane: "query".to_string(),
5308            attempt: 2,
5309            delay_ms: 1000,
5310        };
5311        let json = serde_json::to_string(&event).unwrap();
5312        assert!(json.contains("command_retry"));
5313        assert!(json.contains("cmd-2"));
5314    }
5315
5316    #[test]
5317    fn test_agent_event_serialize_queue_alert() {
5318        let event = AgentEvent::QueueAlert {
5319            level: "warning".to_string(),
5320            alert_type: "depth".to_string(),
5321            message: "Queue depth exceeded".to_string(),
5322        };
5323        let json = serde_json::to_string(&event).unwrap();
5324        assert!(json.contains("queue_alert"));
5325        assert!(json.contains("warning"));
5326    }
5327
5328    #[test]
5329    fn test_agent_event_serialize_task_updated() {
5330        let event = AgentEvent::TaskUpdated {
5331            session_id: "sess-1".to_string(),
5332            tasks: vec![],
5333        };
5334        let json = serde_json::to_string(&event).unwrap();
5335        assert!(json.contains("task_updated"));
5336        assert!(json.contains("sess-1"));
5337    }
5338
5339    #[test]
5340    fn test_agent_event_serialize_memory_stored() {
5341        let event = AgentEvent::MemoryStored {
5342            memory_id: "mem-1".to_string(),
5343            memory_type: "conversation".to_string(),
5344            importance: 0.8,
5345            tags: vec!["important".to_string()],
5346        };
5347        let json = serde_json::to_string(&event).unwrap();
5348        assert!(json.contains("memory_stored"));
5349        assert!(json.contains("mem-1"));
5350    }
5351
5352    #[test]
5353    fn test_agent_event_serialize_memory_recalled() {
5354        let event = AgentEvent::MemoryRecalled {
5355            memory_id: "mem-2".to_string(),
5356            content: "Previous conversation".to_string(),
5357            relevance: 0.9,
5358        };
5359        let json = serde_json::to_string(&event).unwrap();
5360        assert!(json.contains("memory_recalled"));
5361        assert!(json.contains("mem-2"));
5362    }
5363
5364    #[test]
5365    fn test_agent_event_serialize_memories_searched() {
5366        let event = AgentEvent::MemoriesSearched {
5367            query: Some("search term".to_string()),
5368            tags: vec!["tag1".to_string()],
5369            result_count: 5,
5370        };
5371        let json = serde_json::to_string(&event).unwrap();
5372        assert!(json.contains("memories_searched"));
5373        assert!(json.contains("search term"));
5374    }
5375
5376    #[test]
5377    fn test_agent_event_serialize_memory_cleared() {
5378        let event = AgentEvent::MemoryCleared {
5379            tier: "short_term".to_string(),
5380            count: 10,
5381        };
5382        let json = serde_json::to_string(&event).unwrap();
5383        assert!(json.contains("memory_cleared"));
5384        assert!(json.contains("short_term"));
5385    }
5386
5387    #[test]
5388    fn test_agent_event_serialize_subagent_start() {
5389        let event = AgentEvent::SubagentStart {
5390            task_id: "task-1".to_string(),
5391            session_id: "child-sess".to_string(),
5392            parent_session_id: "parent-sess".to_string(),
5393            agent: "explore".to_string(),
5394            description: "Explore codebase".to_string(),
5395        };
5396        let json = serde_json::to_string(&event).unwrap();
5397        assert!(json.contains("subagent_start"));
5398        assert!(json.contains("explore"));
5399    }
5400
5401    #[test]
5402    fn test_agent_event_serialize_subagent_progress() {
5403        let event = AgentEvent::SubagentProgress {
5404            task_id: "task-1".to_string(),
5405            session_id: "child-sess".to_string(),
5406            status: "processing".to_string(),
5407            metadata: serde_json::json!({"progress": 50}),
5408        };
5409        let json = serde_json::to_string(&event).unwrap();
5410        assert!(json.contains("subagent_progress"));
5411        assert!(json.contains("processing"));
5412    }
5413
5414    #[test]
5415    fn test_agent_event_serialize_subagent_end() {
5416        let event = AgentEvent::SubagentEnd {
5417            task_id: "task-1".to_string(),
5418            session_id: "child-sess".to_string(),
5419            agent: "explore".to_string(),
5420            output: "Found 10 files".to_string(),
5421            success: true,
5422        };
5423        let json = serde_json::to_string(&event).unwrap();
5424        assert!(json.contains("subagent_end"));
5425        assert!(json.contains("Found 10 files"));
5426    }
5427
5428    #[test]
5429    fn test_agent_event_serialize_planning_start() {
5430        let event = AgentEvent::PlanningStart {
5431            prompt: "Build a web app".to_string(),
5432        };
5433        let json = serde_json::to_string(&event).unwrap();
5434        assert!(json.contains("planning_start"));
5435        assert!(json.contains("Build a web app"));
5436    }
5437
5438    #[test]
5439    fn test_agent_event_serialize_planning_end() {
5440        use crate::planning::{Complexity, ExecutionPlan};
5441        let plan = ExecutionPlan::new("Test goal".to_string(), Complexity::Simple);
5442        let event = AgentEvent::PlanningEnd {
5443            plan,
5444            estimated_steps: 3,
5445        };
5446        let json = serde_json::to_string(&event).unwrap();
5447        assert!(json.contains("planning_end"));
5448        assert!(json.contains("estimated_steps"));
5449    }
5450
5451    #[test]
5452    fn test_agent_event_serialize_step_start() {
5453        let event = AgentEvent::StepStart {
5454            step_id: "step-1".to_string(),
5455            description: "Initialize project".to_string(),
5456            step_number: 1,
5457            total_steps: 5,
5458        };
5459        let json = serde_json::to_string(&event).unwrap();
5460        assert!(json.contains("step_start"));
5461        assert!(json.contains("Initialize project"));
5462    }
5463
5464    #[test]
5465    fn test_agent_event_serialize_step_end() {
5466        let event = AgentEvent::StepEnd {
5467            step_id: "step-1".to_string(),
5468            status: TaskStatus::Completed,
5469            step_number: 1,
5470            total_steps: 5,
5471        };
5472        let json = serde_json::to_string(&event).unwrap();
5473        assert!(json.contains("step_end"));
5474        assert!(json.contains("step-1"));
5475    }
5476
5477    #[test]
5478    fn test_agent_event_serialize_goal_extracted() {
5479        use crate::planning::AgentGoal;
5480        let goal = AgentGoal::new("Complete the task".to_string());
5481        let event = AgentEvent::GoalExtracted { goal };
5482        let json = serde_json::to_string(&event).unwrap();
5483        assert!(json.contains("goal_extracted"));
5484    }
5485
5486    #[test]
5487    fn test_agent_event_serialize_goal_progress() {
5488        let event = AgentEvent::GoalProgress {
5489            goal: "Build app".to_string(),
5490            progress: 0.5,
5491            completed_steps: 2,
5492            total_steps: 4,
5493        };
5494        let json = serde_json::to_string(&event).unwrap();
5495        assert!(json.contains("goal_progress"));
5496        assert!(json.contains("0.5"));
5497    }
5498
5499    #[test]
5500    fn test_agent_event_serialize_goal_achieved() {
5501        let event = AgentEvent::GoalAchieved {
5502            goal: "Build app".to_string(),
5503            total_steps: 4,
5504            duration_ms: 5000,
5505        };
5506        let json = serde_json::to_string(&event).unwrap();
5507        assert!(json.contains("goal_achieved"));
5508        assert!(json.contains("5000"));
5509    }
5510
5511    #[tokio::test]
5512    async fn test_extract_goal_with_json_response() {
5513        // LlmPlanner expects JSON with "description" and "success_criteria" fields
5514        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
5515            r#"{"description": "Build web app", "success_criteria": ["App runs on port 3000", "Has login page"]}"#,
5516        )]));
5517        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5518        let agent = AgentLoop::new(
5519            mock_client,
5520            tool_executor,
5521            test_tool_context(),
5522            AgentConfig::default(),
5523        );
5524
5525        let goal = agent.extract_goal("Build a web app").await.unwrap();
5526        assert_eq!(goal.description, "Build web app");
5527        assert_eq!(goal.success_criteria.len(), 2);
5528        assert_eq!(goal.success_criteria[0], "App runs on port 3000");
5529    }
5530
5531    #[tokio::test]
5532    async fn test_extract_goal_fallback_on_non_json() {
5533        // Non-JSON response triggers fallback: returns the original prompt as goal
5534        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
5535            "Some non-JSON response",
5536        )]));
5537        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5538        let agent = AgentLoop::new(
5539            mock_client,
5540            tool_executor,
5541            test_tool_context(),
5542            AgentConfig::default(),
5543        );
5544
5545        let goal = agent.extract_goal("Do something").await.unwrap();
5546        // Fallback uses the original prompt as description
5547        assert_eq!(goal.description, "Do something");
5548        // Fallback adds 2 generic criteria
5549        assert_eq!(goal.success_criteria.len(), 2);
5550    }
5551
5552    #[tokio::test]
5553    async fn test_check_goal_achievement_json_yes() {
5554        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
5555            r#"{"achieved": true, "progress": 1.0, "remaining_criteria": []}"#,
5556        )]));
5557        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5558        let agent = AgentLoop::new(
5559            mock_client,
5560            tool_executor,
5561            test_tool_context(),
5562            AgentConfig::default(),
5563        );
5564
5565        let goal = crate::planning::AgentGoal::new("Test goal".to_string());
5566        let achieved = agent
5567            .check_goal_achievement(&goal, "All done")
5568            .await
5569            .unwrap();
5570        assert!(achieved);
5571    }
5572
5573    #[tokio::test]
5574    async fn test_check_goal_achievement_fallback_not_done() {
5575        // Non-JSON response triggers heuristic fallback
5576        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
5577            "invalid json",
5578        )]));
5579        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5580        let agent = AgentLoop::new(
5581            mock_client,
5582            tool_executor,
5583            test_tool_context(),
5584            AgentConfig::default(),
5585        );
5586
5587        let goal = crate::planning::AgentGoal::new("Test goal".to_string());
5588        // "still working" doesn't contain "complete"/"done"/"finished"
5589        let achieved = agent
5590            .check_goal_achievement(&goal, "still working")
5591            .await
5592            .unwrap();
5593        assert!(!achieved);
5594    }
5595
5596    // ========================================================================
5597    // build_augmented_system_prompt Tests
5598    // ========================================================================
5599
5600    #[test]
5601    fn test_build_augmented_system_prompt_empty_context() {
5602        let mock_client = Arc::new(MockLlmClient::new(vec![]));
5603        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5604        let config = AgentConfig {
5605            prompt_slots: SystemPromptSlots {
5606                extra: Some("Base prompt".to_string()),
5607                ..Default::default()
5608            },
5609            ..Default::default()
5610        };
5611        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
5612
5613        let result = agent.build_augmented_system_prompt(&[]);
5614        assert!(result.unwrap().contains("Base prompt"));
5615    }
5616
5617    #[test]
5618    fn test_build_augmented_system_prompt_no_custom_slots() {
5619        let mock_client = Arc::new(MockLlmClient::new(vec![]));
5620        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5621        let agent = AgentLoop::new(
5622            mock_client,
5623            tool_executor,
5624            test_tool_context(),
5625            AgentConfig::default(),
5626        );
5627
5628        let result = agent.build_augmented_system_prompt(&[]);
5629        // Default slots still produce the default agentic prompt
5630        assert!(result.is_some());
5631        assert!(result.unwrap().contains("Core Behaviour"));
5632    }
5633
5634    #[test]
5635    fn test_build_augmented_system_prompt_with_context_no_base() {
5636        use crate::context::{ContextItem, ContextResult, ContextType};
5637
5638        let mock_client = Arc::new(MockLlmClient::new(vec![]));
5639        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5640        let agent = AgentLoop::new(
5641            mock_client,
5642            tool_executor,
5643            test_tool_context(),
5644            AgentConfig::default(),
5645        );
5646
5647        let context = vec![ContextResult {
5648            provider: "test".to_string(),
5649            items: vec![ContextItem::new("id1", ContextType::Resource, "Content")],
5650            total_tokens: 10,
5651            truncated: false,
5652        }];
5653
5654        let result = agent.build_augmented_system_prompt(&context);
5655        assert!(result.is_some());
5656        let text = result.unwrap();
5657        assert!(text.contains("<context"));
5658        assert!(text.contains("Content"));
5659    }
5660
5661    // ========================================================================
5662    // AgentResult Clone and Debug
5663    // ========================================================================
5664
5665    #[test]
5666    fn test_agent_result_clone() {
5667        let result = AgentResult {
5668            text: "output".to_string(),
5669            messages: vec![Message::user("hello")],
5670            usage: TokenUsage::default(),
5671            tool_calls_count: 3,
5672        };
5673        let cloned = result.clone();
5674        assert_eq!(cloned.text, result.text);
5675        assert_eq!(cloned.tool_calls_count, result.tool_calls_count);
5676    }
5677
5678    #[test]
5679    fn test_agent_result_debug() {
5680        let result = AgentResult {
5681            text: "output".to_string(),
5682            messages: vec![Message::user("hello")],
5683            usage: TokenUsage::default(),
5684            tool_calls_count: 3,
5685        };
5686        let debug = format!("{:?}", result);
5687        assert!(debug.contains("AgentResult"));
5688        assert!(debug.contains("output"));
5689    }
5690
5691    // ========================================================================
5692    // handle_post_execution_metadata Tests
5693    // ========================================================================
5694
5695    // ========================================================================
5696    // ToolCommand adapter tests
5697    // ========================================================================
5698
5699    #[tokio::test]
5700    async fn test_tool_command_command_type() {
5701        let executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5702        let cmd = ToolCommand {
5703            tool_executor: executor,
5704            tool_name: "read".to_string(),
5705            tool_args: serde_json::json!({"file": "test.rs"}),
5706            skill_registry: None,
5707            tool_context: test_tool_context(),
5708        };
5709        assert_eq!(cmd.command_type(), "read");
5710    }
5711
5712    #[tokio::test]
5713    async fn test_tool_command_payload() {
5714        let executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5715        let args = serde_json::json!({"file": "test.rs", "offset": 10});
5716        let cmd = ToolCommand {
5717            tool_executor: executor,
5718            tool_name: "read".to_string(),
5719            tool_args: args.clone(),
5720            skill_registry: None,
5721            tool_context: test_tool_context(),
5722        };
5723        assert_eq!(cmd.payload(), args);
5724    }
5725
5726    // ========================================================================
5727    // AgentLoop with queue builder tests
5728    // ========================================================================
5729
5730    #[tokio::test(flavor = "multi_thread")]
5731    async fn test_agent_loop_with_queue() {
5732        use tokio::sync::broadcast;
5733
5734        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
5735            "Hello",
5736        )]));
5737        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5738        let config = AgentConfig::default();
5739
5740        let (event_tx, _) = broadcast::channel(100);
5741        let queue = SessionLaneQueue::new("test-session", SessionQueueConfig::default(), event_tx)
5742            .await
5743            .unwrap();
5744
5745        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config)
5746            .with_queue(Arc::new(queue));
5747
5748        assert!(agent.command_queue.is_some());
5749    }
5750
5751    #[tokio::test]
5752    async fn test_agent_loop_without_queue() {
5753        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
5754            "Hello",
5755        )]));
5756        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5757        let config = AgentConfig::default();
5758
5759        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
5760
5761        assert!(agent.command_queue.is_none());
5762    }
5763
5764    // ========================================================================
5765    // Parallel Plan Execution Tests
5766    // ========================================================================
5767
5768    #[tokio::test]
5769    async fn test_execute_plan_parallel_independent() {
5770        use crate::planning::{Complexity, ExecutionPlan, Task};
5771
5772        // 3 independent steps (no dependencies) — should all execute.
5773        // MockLlmClient needs one response per execute_loop call per step.
5774        let mock_client = Arc::new(MockLlmClient::new(vec![
5775            MockLlmClient::text_response("Step 1 done"),
5776            MockLlmClient::text_response("Step 2 done"),
5777            MockLlmClient::text_response("Step 3 done"),
5778        ]));
5779
5780        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5781        let config = AgentConfig::default();
5782        let agent = AgentLoop::new(
5783            mock_client.clone(),
5784            tool_executor,
5785            test_tool_context(),
5786            config,
5787        );
5788
5789        let mut plan = ExecutionPlan::new("Test parallel", Complexity::Simple);
5790        plan.add_step(Task::new("s1", "First step"));
5791        plan.add_step(Task::new("s2", "Second step"));
5792        plan.add_step(Task::new("s3", "Third step"));
5793
5794        let (tx, mut rx) = mpsc::channel(100);
5795        let result = agent.execute_plan(&[], &plan, Some(tx)).await.unwrap();
5796
5797        // All 3 steps should have been executed (3 * 15 = 45 total tokens)
5798        assert_eq!(result.usage.total_tokens, 45);
5799
5800        // Verify we received StepStart and StepEnd events for all 3 steps
5801        let mut step_starts = Vec::new();
5802        let mut step_ends = Vec::new();
5803        rx.close();
5804        while let Some(event) = rx.recv().await {
5805            match event {
5806                AgentEvent::StepStart { step_id, .. } => step_starts.push(step_id),
5807                AgentEvent::StepEnd {
5808                    step_id, status, ..
5809                } => {
5810                    assert_eq!(status, TaskStatus::Completed);
5811                    step_ends.push(step_id);
5812                }
5813                _ => {}
5814            }
5815        }
5816        assert_eq!(step_starts.len(), 3);
5817        assert_eq!(step_ends.len(), 3);
5818    }
5819
5820    #[tokio::test]
5821    async fn test_execute_plan_respects_dependencies() {
5822        use crate::planning::{Complexity, ExecutionPlan, Task};
5823
5824        // s1 and s2 are independent (wave 1), s3 depends on both (wave 2).
5825        // This requires 3 responses total.
5826        let mock_client = Arc::new(MockLlmClient::new(vec![
5827            MockLlmClient::text_response("Step 1 done"),
5828            MockLlmClient::text_response("Step 2 done"),
5829            MockLlmClient::text_response("Step 3 done"),
5830        ]));
5831
5832        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5833        let config = AgentConfig::default();
5834        let agent = AgentLoop::new(
5835            mock_client.clone(),
5836            tool_executor,
5837            test_tool_context(),
5838            config,
5839        );
5840
5841        let mut plan = ExecutionPlan::new("Test deps", Complexity::Medium);
5842        plan.add_step(Task::new("s1", "Independent A"));
5843        plan.add_step(Task::new("s2", "Independent B"));
5844        plan.add_step(
5845            Task::new("s3", "Depends on A+B")
5846                .with_dependencies(vec!["s1".to_string(), "s2".to_string()]),
5847        );
5848
5849        let (tx, mut rx) = mpsc::channel(100);
5850        let result = agent.execute_plan(&[], &plan, Some(tx)).await.unwrap();
5851
5852        // All 3 steps should have been executed (3 * 15 = 45 total tokens)
5853        assert_eq!(result.usage.total_tokens, 45);
5854
5855        // Verify ordering: s3's StepStart must come after s1 and s2's StepEnd
5856        let mut events = Vec::new();
5857        rx.close();
5858        while let Some(event) = rx.recv().await {
5859            match &event {
5860                AgentEvent::StepStart { step_id, .. } => {
5861                    events.push(format!("start:{}", step_id));
5862                }
5863                AgentEvent::StepEnd { step_id, .. } => {
5864                    events.push(format!("end:{}", step_id));
5865                }
5866                _ => {}
5867            }
5868        }
5869
5870        // s3 start must occur after both s1 end and s2 end
5871        let s1_end = events.iter().position(|e| e == "end:s1").unwrap();
5872        let s2_end = events.iter().position(|e| e == "end:s2").unwrap();
5873        let s3_start = events.iter().position(|e| e == "start:s3").unwrap();
5874        assert!(
5875            s3_start > s1_end,
5876            "s3 started before s1 ended: {:?}",
5877            events
5878        );
5879        assert!(
5880            s3_start > s2_end,
5881            "s3 started before s2 ended: {:?}",
5882            events
5883        );
5884
5885        // Final result should reflect step 3 (last sequential step)
5886        assert!(result.text.contains("Step 3 done") || !result.text.is_empty());
5887    }
5888
5889    #[tokio::test]
5890    async fn test_execute_plan_handles_step_failure() {
5891        use crate::planning::{Complexity, ExecutionPlan, Task};
5892
5893        // s1 succeeds, s2 depends on s1 (succeeds), s3 depends on nothing (succeeds),
5894        // s4 depends on a step that will fail (s_fail).
5895        // We simulate failure by providing no responses for s_fail's execute_loop.
5896        //
5897        // Simpler approach: s1 succeeds, s2 depends on s1 (will fail because no
5898        // mock response left), s3 is independent.
5899        // Layout: s1 (independent), s3 (independent) → wave 1 parallel
5900        //         s2 depends on s1 → wave 2
5901        //         s4 depends on s2 → wave 3 (should deadlock since s2 fails)
5902        let mock_client = Arc::new(MockLlmClient::new(vec![
5903            // Wave 1: s1 and s3 execute in parallel
5904            MockLlmClient::text_response("s1 done"),
5905            MockLlmClient::text_response("s3 done"),
5906            // Wave 2: s2 executes — but we give it no response, causing failure
5907            // Actually the MockLlmClient will fail with "No more mock responses"
5908        ]));
5909
5910        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5911        let config = AgentConfig::default();
5912        let agent = AgentLoop::new(
5913            mock_client.clone(),
5914            tool_executor,
5915            test_tool_context(),
5916            config,
5917        );
5918
5919        let mut plan = ExecutionPlan::new("Test failure", Complexity::Medium);
5920        plan.add_step(Task::new("s1", "Independent step"));
5921        plan.add_step(Task::new("s2", "Depends on s1").with_dependencies(vec!["s1".to_string()]));
5922        plan.add_step(Task::new("s3", "Another independent"));
5923        plan.add_step(Task::new("s4", "Depends on s2").with_dependencies(vec!["s2".to_string()]));
5924
5925        let (tx, mut rx) = mpsc::channel(100);
5926        let _result = agent.execute_plan(&[], &plan, Some(tx)).await.unwrap();
5927
5928        // s1 and s3 should succeed (wave 1), s2 should fail (wave 2),
5929        // s4 should never execute (deadlock — dep s2 failed, not completed)
5930        let mut completed_steps = Vec::new();
5931        let mut failed_steps = Vec::new();
5932        rx.close();
5933        while let Some(event) = rx.recv().await {
5934            if let AgentEvent::StepEnd {
5935                step_id, status, ..
5936            } = event
5937            {
5938                match status {
5939                    TaskStatus::Completed => completed_steps.push(step_id),
5940                    TaskStatus::Failed => failed_steps.push(step_id),
5941                    _ => {}
5942                }
5943            }
5944        }
5945
5946        assert!(
5947            completed_steps.contains(&"s1".to_string()),
5948            "s1 should complete"
5949        );
5950        assert!(
5951            completed_steps.contains(&"s3".to_string()),
5952            "s3 should complete"
5953        );
5954        assert!(failed_steps.contains(&"s2".to_string()), "s2 should fail");
5955        // s4 should NOT appear in either list — it was never started
5956        assert!(
5957            !completed_steps.contains(&"s4".to_string()),
5958            "s4 should not complete"
5959        );
5960        assert!(
5961            !failed_steps.contains(&"s4".to_string()),
5962            "s4 should not fail (never started)"
5963        );
5964    }
5965
5966    // ========================================================================
5967    // Phase 4: Error Recovery & Resilience Tests
5968    // ========================================================================
5969
5970    #[test]
5971    fn test_agent_config_resilience_defaults() {
5972        let config = AgentConfig::default();
5973        assert_eq!(config.max_parse_retries, 2);
5974        assert_eq!(config.tool_timeout_ms, None);
5975        assert_eq!(config.circuit_breaker_threshold, 3);
5976    }
5977
5978    /// 4.1 — Parse error recovery: bails after max_parse_retries exceeded
5979    #[tokio::test]
5980    async fn test_parse_error_recovery_bails_after_threshold() {
5981        // 3 parse errors with max_parse_retries=2: count reaches 3 > 2 → bail
5982        let mock_client = Arc::new(MockLlmClient::new(vec![
5983            MockLlmClient::tool_call_response(
5984                "c1",
5985                "bash",
5986                serde_json::json!({"__parse_error": "unexpected token at position 5"}),
5987            ),
5988            MockLlmClient::tool_call_response(
5989                "c2",
5990                "bash",
5991                serde_json::json!({"__parse_error": "missing closing brace"}),
5992            ),
5993            MockLlmClient::tool_call_response(
5994                "c3",
5995                "bash",
5996                serde_json::json!({"__parse_error": "still broken"}),
5997            ),
5998            MockLlmClient::text_response("Done"), // never reached
5999        ]));
6000
6001        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
6002        let config = AgentConfig {
6003            max_parse_retries: 2,
6004            ..AgentConfig::default()
6005        };
6006        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
6007        let result = agent.execute(&[], "Do something", None).await;
6008        assert!(result.is_err(), "should bail after parse error threshold");
6009        let err = result.unwrap_err().to_string();
6010        assert!(
6011            err.contains("malformed tool arguments"),
6012            "error should mention malformed tool arguments, got: {}",
6013            err
6014        );
6015    }
6016
6017    /// 4.1 — Parse error recovery: counter resets after a valid tool execution
6018    #[tokio::test]
6019    async fn test_parse_error_counter_resets_on_success() {
6020        // 2 parse errors (= max_parse_retries, not yet exceeded)
6021        // Then a valid tool call (resets counter)
6022        // Then final text — should NOT bail
6023        let mock_client = Arc::new(MockLlmClient::new(vec![
6024            MockLlmClient::tool_call_response(
6025                "c1",
6026                "bash",
6027                serde_json::json!({"__parse_error": "bad args"}),
6028            ),
6029            MockLlmClient::tool_call_response(
6030                "c2",
6031                "bash",
6032                serde_json::json!({"__parse_error": "bad args again"}),
6033            ),
6034            // Valid call — resets parse_error_count to 0
6035            MockLlmClient::tool_call_response(
6036                "c3",
6037                "bash",
6038                serde_json::json!({"command": "echo ok"}),
6039            ),
6040            MockLlmClient::text_response("All done"),
6041        ]));
6042
6043        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
6044        let config = AgentConfig {
6045            max_parse_retries: 2,
6046            ..AgentConfig::default()
6047        };
6048        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
6049        let result = agent.execute(&[], "Do something", None).await;
6050        assert!(
6051            result.is_ok(),
6052            "should not bail — counter reset after successful tool, got: {:?}",
6053            result.err()
6054        );
6055        assert_eq!(result.unwrap().text, "All done");
6056    }
6057
6058    /// 4.2 — Tool timeout: slow tool produces a timeout error result; session continues
6059    #[tokio::test]
6060    async fn test_tool_timeout_produces_error_result() {
6061        let mock_client = Arc::new(MockLlmClient::new(vec![
6062            MockLlmClient::tool_call_response(
6063                "t1",
6064                "bash",
6065                serde_json::json!({"command": "sleep 10"}),
6066            ),
6067            MockLlmClient::text_response("The command timed out."),
6068        ]));
6069
6070        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
6071        let config = AgentConfig {
6072            // 50ms — sleep 10 will never finish
6073            tool_timeout_ms: Some(50),
6074            ..AgentConfig::default()
6075        };
6076        let agent = AgentLoop::new(
6077            mock_client.clone(),
6078            tool_executor,
6079            test_tool_context(),
6080            config,
6081        );
6082        let result = agent.execute(&[], "Run sleep", None).await;
6083        assert!(
6084            result.is_ok(),
6085            "session should continue after tool timeout: {:?}",
6086            result.err()
6087        );
6088        assert_eq!(result.unwrap().text, "The command timed out.");
6089        // LLM called twice: initial request + response after timeout error
6090        assert_eq!(mock_client.call_count.load(Ordering::SeqCst), 2);
6091    }
6092
6093    /// 4.2 — Tool timeout: tool that finishes before the deadline succeeds normally
6094    #[tokio::test]
6095    async fn test_tool_within_timeout_succeeds() {
6096        let mock_client = Arc::new(MockLlmClient::new(vec![
6097            MockLlmClient::tool_call_response(
6098                "t1",
6099                "bash",
6100                serde_json::json!({"command": "echo fast"}),
6101            ),
6102            MockLlmClient::text_response("Command succeeded."),
6103        ]));
6104
6105        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
6106        let config = AgentConfig {
6107            tool_timeout_ms: Some(5_000), // 5 s — echo completes in <100ms
6108            ..AgentConfig::default()
6109        };
6110        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
6111        let result = agent.execute(&[], "Run something fast", None).await;
6112        assert!(
6113            result.is_ok(),
6114            "fast tool should succeed: {:?}",
6115            result.err()
6116        );
6117        assert_eq!(result.unwrap().text, "Command succeeded.");
6118    }
6119
6120    /// 4.3 — Circuit breaker: retries non-streaming LLM failures up to threshold
6121    #[tokio::test]
6122    async fn test_circuit_breaker_retries_non_streaming() {
6123        // Empty response list → every call bails with "No more mock responses"
6124        // threshold=2 → tries twice, then bails with circuit-breaker message
6125        let mock_client = Arc::new(MockLlmClient::new(vec![]));
6126
6127        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
6128        let config = AgentConfig {
6129            circuit_breaker_threshold: 2,
6130            ..AgentConfig::default()
6131        };
6132        let agent = AgentLoop::new(
6133            mock_client.clone(),
6134            tool_executor,
6135            test_tool_context(),
6136            config,
6137        );
6138        let result = agent.execute(&[], "Hello", None).await;
6139        assert!(result.is_err(), "should fail when LLM always errors");
6140        let err = result.unwrap_err().to_string();
6141        assert!(
6142            err.contains("circuit breaker"),
6143            "error should mention circuit breaker, got: {}",
6144            err
6145        );
6146        assert_eq!(
6147            mock_client.call_count.load(Ordering::SeqCst),
6148            2,
6149            "should make exactly threshold=2 LLM calls"
6150        );
6151    }
6152
6153    /// 4.3 — Circuit breaker: threshold=1 bails on the very first failure
6154    #[tokio::test]
6155    async fn test_circuit_breaker_threshold_one_no_retry() {
6156        let mock_client = Arc::new(MockLlmClient::new(vec![]));
6157
6158        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
6159        let config = AgentConfig {
6160            circuit_breaker_threshold: 1,
6161            ..AgentConfig::default()
6162        };
6163        let agent = AgentLoop::new(
6164            mock_client.clone(),
6165            tool_executor,
6166            test_tool_context(),
6167            config,
6168        );
6169        let result = agent.execute(&[], "Hello", None).await;
6170        assert!(result.is_err());
6171        assert_eq!(
6172            mock_client.call_count.load(Ordering::SeqCst),
6173            1,
6174            "with threshold=1 exactly one attempt should be made"
6175        );
6176    }
6177
6178    /// 4.3 — Circuit breaker: succeeds when LLM recovers before hitting threshold
6179    #[tokio::test]
6180    async fn test_circuit_breaker_succeeds_if_llm_recovers() {
6181        // First call fails, second call succeeds; threshold=3 — recovery within threshold
6182        struct FailOnceThenSucceed {
6183            inner: MockLlmClient,
6184            failed_once: std::sync::atomic::AtomicBool,
6185            call_count: AtomicUsize,
6186        }
6187
6188        #[async_trait::async_trait]
6189        impl LlmClient for FailOnceThenSucceed {
6190            async fn complete(
6191                &self,
6192                messages: &[Message],
6193                system: Option<&str>,
6194                tools: &[ToolDefinition],
6195            ) -> Result<LlmResponse> {
6196                self.call_count.fetch_add(1, Ordering::SeqCst);
6197                let already_failed = self
6198                    .failed_once
6199                    .swap(true, std::sync::atomic::Ordering::SeqCst);
6200                if !already_failed {
6201                    anyhow::bail!("transient network error");
6202                }
6203                self.inner.complete(messages, system, tools).await
6204            }
6205
6206            async fn complete_streaming(
6207                &self,
6208                messages: &[Message],
6209                system: Option<&str>,
6210                tools: &[ToolDefinition],
6211            ) -> Result<tokio::sync::mpsc::Receiver<crate::llm::StreamEvent>> {
6212                self.inner.complete_streaming(messages, system, tools).await
6213            }
6214        }
6215
6216        let mock = Arc::new(FailOnceThenSucceed {
6217            inner: MockLlmClient::new(vec![MockLlmClient::text_response("Recovered!")]),
6218            failed_once: std::sync::atomic::AtomicBool::new(false),
6219            call_count: AtomicUsize::new(0),
6220        });
6221
6222        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
6223        let config = AgentConfig {
6224            circuit_breaker_threshold: 3,
6225            ..AgentConfig::default()
6226        };
6227        let agent = AgentLoop::new(mock.clone(), tool_executor, test_tool_context(), config);
6228        let result = agent.execute(&[], "Hello", None).await;
6229        assert!(
6230            result.is_ok(),
6231            "should succeed when LLM recovers within threshold: {:?}",
6232            result.err()
6233        );
6234        assert_eq!(result.unwrap().text, "Recovered!");
6235        assert_eq!(
6236            mock.call_count.load(Ordering::SeqCst),
6237            2,
6238            "should have made exactly 2 calls (1 fail + 1 success)"
6239        );
6240    }
6241
6242    // ── Continuation detection tests ─────────────────────────────────────────
6243
6244    #[test]
6245    fn test_looks_incomplete_empty() {
6246        assert!(AgentLoop::looks_incomplete(""));
6247        assert!(AgentLoop::looks_incomplete("   "));
6248    }
6249
6250    #[test]
6251    fn test_looks_incomplete_trailing_colon() {
6252        assert!(AgentLoop::looks_incomplete("Let me check the file:"));
6253        assert!(AgentLoop::looks_incomplete("Next steps:"));
6254    }
6255
6256    #[test]
6257    fn test_looks_incomplete_ellipsis() {
6258        assert!(AgentLoop::looks_incomplete("Working on it..."));
6259        assert!(AgentLoop::looks_incomplete("Processing…"));
6260    }
6261
6262    #[test]
6263    fn test_looks_incomplete_intent_phrases() {
6264        assert!(AgentLoop::looks_incomplete(
6265            "I'll start by reading the file."
6266        ));
6267        assert!(AgentLoop::looks_incomplete(
6268            "Let me check the configuration."
6269        ));
6270        assert!(AgentLoop::looks_incomplete("I will now run the tests."));
6271        assert!(AgentLoop::looks_incomplete(
6272            "I need to update the Cargo.toml."
6273        ));
6274    }
6275
6276    #[test]
6277    fn test_looks_complete_final_answer() {
6278        // Clear final answers should NOT trigger continuation
6279        assert!(!AgentLoop::looks_incomplete(
6280            "The tests pass. All changes have been applied successfully."
6281        ));
6282        assert!(!AgentLoop::looks_incomplete(
6283            "Done. I've updated the three files and verified the build succeeds."
6284        ));
6285        assert!(!AgentLoop::looks_incomplete("42"));
6286        assert!(!AgentLoop::looks_incomplete("Yes."));
6287    }
6288
6289    #[test]
6290    fn test_looks_incomplete_multiline_complete() {
6291        let text = "Here is the summary:\n\n- Fixed the bug in agent.rs\n- All tests pass\n- Build succeeds";
6292        assert!(!AgentLoop::looks_incomplete(text));
6293    }
6294}