a3s_code_core/
agent.rs

1//! Agent Loop Implementation
2//!
3//! The agent loop handles the core conversation cycle:
4//! 1. User sends a prompt
5//! 2. LLM generates a response (possibly with tool calls)
6//! 3. If tool calls present, execute them and send results back
7//! 4. Repeat until LLM returns without tool calls
8//!
9//! This implements agentic behavior where the LLM can use tools
10//! to accomplish tasks agentically.
11
12use crate::context::{ContextProvider, ContextQuery, ContextResult};
13use crate::hitl::ConfirmationProvider;
14use crate::hooks::{
15    ErrorType, GenerateEndEvent, GenerateStartEvent, HookEvent, HookExecutor, HookResult,
16    OnErrorEvent, PostResponseEvent, PostToolUseEvent, PrePromptEvent, PreToolUseEvent,
17    TokenUsageInfo, ToolCallInfo, ToolResultData,
18};
19use crate::llm::{LlmClient, LlmResponse, Message, TokenUsage, ToolDefinition};
20use crate::permissions::{PermissionChecker, PermissionDecision};
21use crate::planning::{AgentGoal, ExecutionPlan, TaskStatus};
22use crate::prompts::{AgentStyle, DetectionConfidence, PlanningMode, SystemPromptSlots};
23use crate::queue::SessionCommand;
24use crate::session_lane_queue::SessionLaneQueue;
25use crate::tool_search::ToolIndex;
26use crate::tools::{ToolContext, ToolExecutor, ToolStreamEvent};
27use anyhow::{Context, Result};
28use async_trait::async_trait;
29use futures::future::join_all;
30use serde::{Deserialize, Serialize};
31use serde_json::Value;
32use std::sync::Arc;
33use std::time::Duration;
34use tokio::sync::{mpsc, RwLock};
35
36/// Maximum number of tool execution rounds before stopping
37const MAX_TOOL_ROUNDS: usize = 50;
38
39/// Agent configuration
40#[derive(Clone)]
41pub struct AgentConfig {
42    /// Slot-based system prompt customization.
43    ///
44    /// Users can customize specific parts (role, guidelines, response style, extra)
45    /// without overriding the core agentic capabilities. The default agentic core
46    /// (tool usage, autonomous behavior, completion criteria) is always preserved.
47    pub prompt_slots: SystemPromptSlots,
48    pub tools: Vec<ToolDefinition>,
49    pub max_tool_rounds: usize,
50    /// Optional security provider for input taint tracking and output sanitization
51    pub security_provider: Option<Arc<dyn crate::security::SecurityProvider>>,
52    /// Optional permission checker for tool execution control
53    pub permission_checker: Option<Arc<dyn PermissionChecker>>,
54    /// Optional confirmation manager for HITL (Human-in-the-Loop)
55    pub confirmation_manager: Option<Arc<dyn ConfirmationProvider>>,
56    /// Context providers for augmenting prompts with external context
57    pub context_providers: Vec<Arc<dyn ContextProvider>>,
58    /// Planning mode — Auto (detect from message), Enabled, or Disabled.
59    pub planning_mode: PlanningMode,
60    /// Enable goal tracking
61    pub goal_tracking: bool,
62    /// Optional hook engine for firing lifecycle events (PreToolUse, PostToolUse, etc.)
63    pub hook_engine: Option<Arc<dyn HookExecutor>>,
64    /// Optional skill registry for tool permission enforcement
65    pub skill_registry: Option<Arc<crate::skills::SkillRegistry>>,
66    /// Max consecutive malformed-tool-args errors before aborting (default: 2).
67    ///
68    /// When the LLM returns tool arguments with `__parse_error`, the error is
69    /// fed back as a tool result. After this many consecutive parse errors the
70    /// loop bails instead of retrying indefinitely.
71    pub max_parse_retries: u32,
72    /// Per-tool execution timeout in milliseconds (`None` = no timeout).
73    ///
74    /// When set, each tool execution is wrapped in `tokio::time::timeout`.
75    /// A timeout produces an error result sent back to the LLM rather than
76    /// crashing the session.
77    pub tool_timeout_ms: Option<u64>,
78    /// Circuit-breaker threshold: max consecutive LLM API failures before
79    /// aborting (default: 3).
80    ///
81    /// In non-streaming mode, transient LLM failures are retried up to this
82    /// many times (with short exponential backoff) before the loop bails.
83    /// In streaming mode, any failure is fatal (events cannot be replayed).
84    pub circuit_breaker_threshold: u32,
85    /// Max consecutive identical tool signatures before aborting (default: 3).
86    ///
87    /// A tool signature is the exact combination of tool name + compact JSON
88    /// arguments. This prevents the agent from getting stuck repeating the same
89    /// tool call in a loop, for example repeatedly fetching the same URL.
90    pub duplicate_tool_call_threshold: u32,
91    /// Enable auto-compaction when context usage exceeds threshold.
92    pub auto_compact: bool,
93    /// Context usage percentage threshold to trigger auto-compaction (0.0 - 1.0).
94    /// Default: 0.80 (80%).
95    pub auto_compact_threshold: f32,
96    /// Maximum context window size in tokens (used for auto-compact calculation).
97    /// Default: 200_000.
98    pub max_context_tokens: usize,
99    /// LLM client reference for auto-compaction (needs to call LLM for summarization).
100    pub llm_client: Option<Arc<dyn LlmClient>>,
101    /// Optional agent memory for auto-remember after tool execution and recall before prompts.
102    pub memory: Option<Arc<crate::memory::AgentMemory>>,
103    /// Inject a continuation message when the LLM stops calling tools before the
104    /// task is complete. Enabled by default. Set to `false` to disable.
105    ///
106    /// When enabled, if the LLM produces a response with no tool calls but the
107    /// response text looks like an intermediate step (not a final answer), the
108    /// loop injects [`crate::prompts::CONTINUATION`] as a user message and
109    /// continues for up to `max_continuation_turns` additional turns.
110    pub continuation_enabled: bool,
111    /// Maximum number of continuation injections per execution (default: 3).
112    ///
113    /// Prevents infinite loops when the LLM repeatedly stops without completing.
114    pub max_continuation_turns: u32,
115    /// Optional tool search index for filtering tools per-turn.
116    ///
117    /// When set, only tools matching the user prompt are sent to the LLM,
118    /// reducing context usage when many MCP tools are registered.
119    pub tool_index: Option<ToolIndex>,
120}
121
122impl std::fmt::Debug for AgentConfig {
123    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
124        f.debug_struct("AgentConfig")
125            .field("prompt_slots", &self.prompt_slots)
126            .field("tools", &self.tools)
127            .field("max_tool_rounds", &self.max_tool_rounds)
128            .field("security_provider", &self.security_provider.is_some())
129            .field("permission_checker", &self.permission_checker.is_some())
130            .field("confirmation_manager", &self.confirmation_manager.is_some())
131            .field("context_providers", &self.context_providers.len())
132            .field("planning_mode", &self.planning_mode)
133            .field("goal_tracking", &self.goal_tracking)
134            .field("hook_engine", &self.hook_engine.is_some())
135            .field(
136                "skill_registry",
137                &self.skill_registry.as_ref().map(|r| r.len()),
138            )
139            .field("max_parse_retries", &self.max_parse_retries)
140            .field("tool_timeout_ms", &self.tool_timeout_ms)
141            .field("circuit_breaker_threshold", &self.circuit_breaker_threshold)
142            .field(
143                "duplicate_tool_call_threshold",
144                &self.duplicate_tool_call_threshold,
145            )
146            .field("auto_compact", &self.auto_compact)
147            .field("auto_compact_threshold", &self.auto_compact_threshold)
148            .field("max_context_tokens", &self.max_context_tokens)
149            .field("continuation_enabled", &self.continuation_enabled)
150            .field("max_continuation_turns", &self.max_continuation_turns)
151            .field("memory", &self.memory.is_some())
152            .field("tool_index", &self.tool_index.as_ref().map(|i| i.len()))
153            .finish()
154    }
155}
156
157impl Default for AgentConfig {
158    fn default() -> Self {
159        Self {
160            prompt_slots: SystemPromptSlots::default(),
161            tools: Vec::new(), // Tools are provided by ToolExecutor
162            max_tool_rounds: MAX_TOOL_ROUNDS,
163            security_provider: None,
164            permission_checker: None,
165            confirmation_manager: None,
166            context_providers: Vec::new(),
167            planning_mode: PlanningMode::default(),
168            goal_tracking: false,
169            hook_engine: None,
170            skill_registry: Some(Arc::new(crate::skills::SkillRegistry::with_builtins())),
171            max_parse_retries: 2,
172            tool_timeout_ms: None,
173            circuit_breaker_threshold: 3,
174            duplicate_tool_call_threshold: 3,
175            auto_compact: false,
176            auto_compact_threshold: 0.80,
177            max_context_tokens: 200_000,
178            llm_client: None,
179            memory: None,
180            continuation_enabled: true,
181            max_continuation_turns: 3,
182            tool_index: None,
183        }
184    }
185}
186
187/// Events emitted during agent execution
188///
189/// Subscribe via [`Session::subscribe_events()`](crate::session::Session::subscribe_events).
190/// New variants may be added in minor releases — always include a wildcard arm
191/// (`_ => {}`) when matching.
192#[derive(Debug, Clone, Serialize, Deserialize)]
193#[serde(tag = "type")]
194#[non_exhaustive]
195pub enum AgentEvent {
196    /// Agent started processing
197    #[serde(rename = "agent_start")]
198    Start { prompt: String },
199
200    /// Runtime agent style/mode selected for the current execution.
201    #[serde(rename = "agent_mode_changed")]
202    AgentModeChanged {
203        /// Stable UI/runtime mode label, e.g. "general", "planning", "explore".
204        mode: String,
205        /// Canonical built-in agent name associated with this mode.
206        agent: String,
207        /// Human-readable explanation of the selected style.
208        description: String,
209    },
210
211    /// LLM turn started
212    #[serde(rename = "turn_start")]
213    TurnStart { turn: usize },
214
215    /// Text delta from streaming
216    #[serde(rename = "text_delta")]
217    TextDelta { text: String },
218
219    /// Tool execution started
220    #[serde(rename = "tool_start")]
221    ToolStart { id: String, name: String },
222
223    /// Tool input delta from streaming (partial JSON arguments)
224    #[serde(rename = "tool_input_delta")]
225    ToolInputDelta { delta: String },
226
227    /// Tool execution completed
228    #[serde(rename = "tool_end")]
229    ToolEnd {
230        id: String,
231        name: String,
232        output: String,
233        exit_code: i32,
234        #[serde(skip_serializing_if = "Option::is_none")]
235        metadata: Option<serde_json::Value>,
236    },
237
238    /// Intermediate tool output (streaming delta)
239    #[serde(rename = "tool_output_delta")]
240    ToolOutputDelta {
241        id: String,
242        name: String,
243        delta: String,
244    },
245
246    /// LLM turn completed
247    #[serde(rename = "turn_end")]
248    TurnEnd { turn: usize, usage: TokenUsage },
249
250    /// Agent completed
251    #[serde(rename = "agent_end")]
252    End {
253        text: String,
254        usage: TokenUsage,
255        #[serde(skip_serializing_if = "Option::is_none")]
256        meta: Option<crate::llm::LlmResponseMeta>,
257    },
258
259    /// Error occurred
260    #[serde(rename = "error")]
261    Error { message: String },
262
263    /// Tool execution requires confirmation (HITL)
264    #[serde(rename = "confirmation_required")]
265    ConfirmationRequired {
266        tool_id: String,
267        tool_name: String,
268        args: serde_json::Value,
269        timeout_ms: u64,
270    },
271
272    /// Confirmation received from user (HITL)
273    #[serde(rename = "confirmation_received")]
274    ConfirmationReceived {
275        tool_id: String,
276        approved: bool,
277        reason: Option<String>,
278    },
279
280    /// Confirmation timed out (HITL)
281    #[serde(rename = "confirmation_timeout")]
282    ConfirmationTimeout {
283        tool_id: String,
284        action_taken: String, // "rejected" or "auto_approved"
285    },
286
287    /// External task pending (needs SDK processing)
288    #[serde(rename = "external_task_pending")]
289    ExternalTaskPending {
290        task_id: String,
291        session_id: String,
292        lane: crate::hitl::SessionLane,
293        command_type: String,
294        payload: serde_json::Value,
295        timeout_ms: u64,
296    },
297
298    /// External task completed
299    #[serde(rename = "external_task_completed")]
300    ExternalTaskCompleted {
301        task_id: String,
302        session_id: String,
303        success: bool,
304    },
305
306    /// Tool execution denied by permission policy
307    #[serde(rename = "permission_denied")]
308    PermissionDenied {
309        tool_id: String,
310        tool_name: String,
311        args: serde_json::Value,
312        reason: String,
313    },
314
315    /// Context resolution started
316    #[serde(rename = "context_resolving")]
317    ContextResolving { providers: Vec<String> },
318
319    /// Context resolution completed
320    #[serde(rename = "context_resolved")]
321    ContextResolved {
322        total_items: usize,
323        total_tokens: usize,
324    },
325
326    // ========================================================================
327    // a3s-lane integration events
328    // ========================================================================
329    /// Command moved to dead letter queue after exhausting retries
330    #[serde(rename = "command_dead_lettered")]
331    CommandDeadLettered {
332        command_id: String,
333        command_type: String,
334        lane: String,
335        error: String,
336        attempts: u32,
337    },
338
339    /// Command retry attempt
340    #[serde(rename = "command_retry")]
341    CommandRetry {
342        command_id: String,
343        command_type: String,
344        lane: String,
345        attempt: u32,
346        delay_ms: u64,
347    },
348
349    /// Queue alert (depth warning, latency alert, etc.)
350    #[serde(rename = "queue_alert")]
351    QueueAlert {
352        level: String,
353        alert_type: String,
354        message: String,
355    },
356
357    // ========================================================================
358    // Task tracking events
359    // ========================================================================
360    /// Task list updated
361    #[serde(rename = "task_updated")]
362    TaskUpdated {
363        session_id: String,
364        tasks: Vec<crate::planning::Task>,
365    },
366
367    // ========================================================================
368    // Memory System events (Phase 3)
369    // ========================================================================
370    /// Memory stored
371    #[serde(rename = "memory_stored")]
372    MemoryStored {
373        memory_id: String,
374        memory_type: String,
375        importance: f32,
376        tags: Vec<String>,
377    },
378
379    /// Memory recalled
380    #[serde(rename = "memory_recalled")]
381    MemoryRecalled {
382        memory_id: String,
383        content: String,
384        relevance: f32,
385    },
386
387    /// Memories searched
388    #[serde(rename = "memories_searched")]
389    MemoriesSearched {
390        query: Option<String>,
391        tags: Vec<String>,
392        result_count: usize,
393    },
394
395    /// Memory cleared
396    #[serde(rename = "memory_cleared")]
397    MemoryCleared {
398        tier: String, // "long_term", "short_term", "working"
399        count: u64,
400    },
401
402    // ========================================================================
403    // Subagent events
404    // ========================================================================
405    /// Subagent task started
406    #[serde(rename = "subagent_start")]
407    SubagentStart {
408        /// Unique task identifier
409        task_id: String,
410        /// Child session ID
411        session_id: String,
412        /// Parent session ID
413        parent_session_id: String,
414        /// Agent type (e.g., "explore", "general")
415        agent: String,
416        /// Short description of the task
417        description: String,
418    },
419
420    /// Subagent task progress update
421    #[serde(rename = "subagent_progress")]
422    SubagentProgress {
423        /// Task identifier
424        task_id: String,
425        /// Child session ID
426        session_id: String,
427        /// Progress status message
428        status: String,
429        /// Additional metadata
430        metadata: serde_json::Value,
431    },
432
433    /// Subagent task completed
434    #[serde(rename = "subagent_end")]
435    SubagentEnd {
436        /// Task identifier
437        task_id: String,
438        /// Child session ID
439        session_id: String,
440        /// Agent type
441        agent: String,
442        /// Task output/result
443        output: String,
444        /// Whether the task succeeded
445        success: bool,
446    },
447
448    // ========================================================================
449    // Planning and Goal Tracking Events (Phase 1)
450    // ========================================================================
451    /// Planning phase started
452    #[serde(rename = "planning_start")]
453    PlanningStart { prompt: String },
454
455    /// Planning phase completed
456    #[serde(rename = "planning_end")]
457    PlanningEnd {
458        plan: ExecutionPlan,
459        estimated_steps: usize,
460    },
461
462    /// Step execution started
463    #[serde(rename = "step_start")]
464    StepStart {
465        step_id: String,
466        description: String,
467        step_number: usize,
468        total_steps: usize,
469    },
470
471    /// Step execution completed
472    #[serde(rename = "step_end")]
473    StepEnd {
474        step_id: String,
475        status: TaskStatus,
476        step_number: usize,
477        total_steps: usize,
478    },
479
480    /// Goal extracted from prompt
481    #[serde(rename = "goal_extracted")]
482    GoalExtracted { goal: AgentGoal },
483
484    /// Goal progress update
485    #[serde(rename = "goal_progress")]
486    GoalProgress {
487        goal: String,
488        progress: f32,
489        completed_steps: usize,
490        total_steps: usize,
491    },
492
493    /// Goal achieved
494    #[serde(rename = "goal_achieved")]
495    GoalAchieved {
496        goal: String,
497        total_steps: usize,
498        duration_ms: i64,
499    },
500
501    // ========================================================================
502    // Context Compaction events
503    // ========================================================================
504    /// Context automatically compacted due to high usage
505    #[serde(rename = "context_compacted")]
506    ContextCompacted {
507        session_id: String,
508        before_messages: usize,
509        after_messages: usize,
510        percent_before: f32,
511    },
512
513    // ========================================================================
514    // Persistence events
515    // ========================================================================
516    /// Session persistence failed — SDK clients should handle this
517    #[serde(rename = "persistence_failed")]
518    PersistenceFailed {
519        session_id: String,
520        operation: String,
521        error: String,
522    },
523
524    // ========================================================================
525    // Side question (btw)
526    // ========================================================================
527    /// Ephemeral side question answered.
528    ///
529    /// Emitted by [`crate::agent_api::AgentSession::btw()`] in streaming mode.
530    /// The answer is never added to conversation history.
531    #[serde(rename = "btw_answer")]
532    BtwAnswer {
533        question: String,
534        answer: String,
535        usage: TokenUsage,
536    },
537}
538
539/// Result of agent execution
540#[derive(Debug, Clone)]
541pub struct AgentResult {
542    pub text: String,
543    pub messages: Vec<Message>,
544    pub usage: TokenUsage,
545    pub tool_calls_count: usize,
546}
547
548// ============================================================================
549// ToolCommand — bridges ToolExecutor to SessionCommand for queue submission
550// ============================================================================
551
552/// Adapter that implements `SessionCommand` for tool execution via the queue.
553///
554/// Wraps a `ToolExecutor` call so it can be submitted to `SessionLaneQueue`.
555pub struct ToolCommand {
556    tool_executor: Arc<ToolExecutor>,
557    tool_name: String,
558    tool_args: Value,
559    tool_context: ToolContext,
560    skill_registry: Option<Arc<crate::skills::SkillRegistry>>,
561}
562
563impl ToolCommand {
564    /// Create a new ToolCommand
565    pub fn new(
566        tool_executor: Arc<ToolExecutor>,
567        tool_name: String,
568        tool_args: Value,
569        tool_context: ToolContext,
570        skill_registry: Option<Arc<crate::skills::SkillRegistry>>,
571    ) -> Self {
572        Self {
573            tool_executor,
574            tool_name,
575            tool_args,
576            tool_context,
577            skill_registry,
578        }
579    }
580}
581
582#[async_trait]
583impl SessionCommand for ToolCommand {
584    async fn execute(&self) -> Result<Value> {
585        // Check skill-based tool permissions
586        if let Some(registry) = &self.skill_registry {
587            let instruction_skills = registry.by_kind(crate::skills::SkillKind::Instruction);
588
589            // If there are instruction skills with tool restrictions, check permissions
590            let has_restrictions = instruction_skills.iter().any(|s| s.allowed_tools.is_some());
591
592            if has_restrictions {
593                let mut allowed = false;
594
595                for skill in &instruction_skills {
596                    if skill.is_tool_allowed(&self.tool_name) {
597                        allowed = true;
598                        break;
599                    }
600                }
601
602                if !allowed {
603                    return Err(anyhow::anyhow!(
604                        "Tool '{}' is not allowed by any active skill. Active skills restrict tools to their allowed-tools lists.",
605                        self.tool_name
606                    ));
607                }
608            }
609        }
610
611        // Execute the tool
612        let result = self
613            .tool_executor
614            .execute_with_context(&self.tool_name, &self.tool_args, &self.tool_context)
615            .await?;
616        Ok(serde_json::json!({
617            "output": result.output,
618            "exit_code": result.exit_code,
619            "metadata": result.metadata,
620        }))
621    }
622
623    fn command_type(&self) -> &str {
624        &self.tool_name
625    }
626
627    fn payload(&self) -> Value {
628        self.tool_args.clone()
629    }
630}
631
632// ============================================================================
633// AgentLoop
634// ============================================================================
635
636/// Agent loop executor
637#[derive(Clone)]
638pub struct AgentLoop {
639    llm_client: Arc<dyn LlmClient>,
640    tool_executor: Arc<ToolExecutor>,
641    tool_context: ToolContext,
642    config: AgentConfig,
643    /// Optional per-session tool metrics collector
644    tool_metrics: Option<Arc<RwLock<crate::telemetry::ToolMetrics>>>,
645    /// Optional lane queue for priority-based tool execution
646    command_queue: Option<Arc<SessionLaneQueue>>,
647    /// Optional progress tracker for real-time tool/token usage tracking
648    progress_tracker: Option<Arc<tokio::sync::RwLock<crate::task::ProgressTracker>>>,
649    /// Optional task manager for centralized task lifecycle tracking
650    task_manager: Option<Arc<crate::task::TaskManager>>,
651}
652
653impl AgentLoop {
654    pub fn new(
655        llm_client: Arc<dyn LlmClient>,
656        tool_executor: Arc<ToolExecutor>,
657        tool_context: ToolContext,
658        config: AgentConfig,
659    ) -> Self {
660        Self {
661            llm_client,
662            tool_executor,
663            tool_context,
664            config,
665            tool_metrics: None,
666            command_queue: None,
667            progress_tracker: None,
668            task_manager: None,
669        }
670    }
671
672    /// Set the progress tracker for real-time tool/token usage tracking.
673    pub fn with_progress_tracker(
674        mut self,
675        tracker: Arc<tokio::sync::RwLock<crate::task::ProgressTracker>>,
676    ) -> Self {
677        self.progress_tracker = Some(tracker);
678        self
679    }
680
681    /// Set the task manager for centralized task lifecycle tracking.
682    pub fn with_task_manager(mut self, manager: Arc<crate::task::TaskManager>) -> Self {
683        self.task_manager = Some(manager);
684        self
685    }
686
687    /// Set the tool metrics collector for this agent loop
688    pub fn with_tool_metrics(
689        mut self,
690        metrics: Arc<RwLock<crate::telemetry::ToolMetrics>>,
691    ) -> Self {
692        self.tool_metrics = Some(metrics);
693        self
694    }
695
696    /// Set the lane queue for priority-based tool execution.
697    ///
698    /// When set, tools are routed through the lane queue which supports
699    /// External task handling for multi-machine parallel processing.
700    pub fn with_queue(mut self, queue: Arc<SessionLaneQueue>) -> Self {
701        self.command_queue = Some(queue);
702        self
703    }
704
705    /// Track a tool call result in the progress tracker.
706    fn track_tool_result(&self, tool_name: &str, args: &serde_json::Value, exit_code: i32) {
707        if let Some(ref tracker) = self.progress_tracker {
708            let args_summary = Self::compact_json_args(args);
709            let success = exit_code == 0;
710            if let Ok(mut guard) = tracker.try_write() {
711                guard.track_tool_call(tool_name, args_summary, success);
712            }
713        }
714    }
715
716    /// Compact JSON arguments to a short summary string for tracking.
717    fn compact_json_args(args: &serde_json::Value) -> String {
718        let raw = match args {
719            serde_json::Value::Null => String::new(),
720            serde_json::Value::String(s) => s.clone(),
721            _ => serde_json::to_string(args).unwrap_or_default(),
722        };
723        let compact = raw.split_whitespace().collect::<Vec<_>>().join(" ");
724        if compact.len() > 180 {
725            format!("{}...", &compact[..180])
726        } else {
727            compact
728        }
729    }
730
731    /// Execute a tool, applying the configured timeout if set.
732    ///
733    /// On timeout, returns an error describing which tool timed out and after
734    /// how many milliseconds. The caller converts this to a tool-result error
735    /// message that is fed back to the LLM.
736    async fn execute_tool_timed(
737        &self,
738        name: &str,
739        args: &serde_json::Value,
740        ctx: &ToolContext,
741    ) -> anyhow::Result<crate::tools::ToolResult> {
742        let fut = self.tool_executor.execute_with_context(name, args, ctx);
743        if let Some(timeout_ms) = self.config.tool_timeout_ms {
744            match tokio::time::timeout(Duration::from_millis(timeout_ms), fut).await {
745                Ok(result) => result,
746                Err(_) => Err(anyhow::anyhow!(
747                    "Tool '{}' timed out after {}ms",
748                    name,
749                    timeout_ms
750                )),
751            }
752        } else {
753            fut.await
754        }
755    }
756
757    /// Convert a tool execution result into the (output, exit_code, is_error, metadata, images) tuple.
758    fn tool_result_to_tuple(
759        result: anyhow::Result<crate::tools::ToolResult>,
760    ) -> (
761        String,
762        i32,
763        bool,
764        Option<serde_json::Value>,
765        Vec<crate::llm::Attachment>,
766    ) {
767        match result {
768            Ok(r) => (
769                r.output,
770                r.exit_code,
771                r.exit_code != 0,
772                r.metadata,
773                r.images,
774            ),
775            Err(e) => {
776                let msg = e.to_string();
777                // Classify the error so the LLM knows whether retrying makes sense.
778                let hint = if Self::is_transient_error(&msg) {
779                    " [transient — you may retry this tool call]"
780                } else {
781                    " [permanent — do not retry without changing the arguments]"
782                };
783                (
784                    format!("Tool execution error: {}{}", msg, hint),
785                    1,
786                    true,
787                    None,
788                    Vec::new(),
789                )
790            }
791        }
792    }
793
794    /// Inspect the workspace for well-known project marker files and return a short
795    /// `## Project Context` section that the agent can use without any manual configuration.
796    /// Returns an empty string when the workspace type cannot be determined.
797    fn detect_project_hint(workspace: &std::path::Path) -> String {
798        struct Marker {
799            file: &'static str,
800            lang: &'static str,
801            tip: &'static str,
802        }
803
804        let markers = [
805            Marker {
806                file: "Cargo.toml",
807                lang: "Rust",
808                tip: "Use `cargo build`, `cargo test`, `cargo clippy`, and `cargo fmt`. \
809                  Prefer `anyhow` / `thiserror` for error handling. \
810                  Follow the Microsoft Rust Guidelines (no panics in library code, \
811                  async-first with Tokio).",
812            },
813            Marker {
814                file: "package.json",
815                lang: "Node.js / TypeScript",
816                tip: "Check `package.json` for the package manager (npm/yarn/pnpm/bun) \
817                  and available scripts. Prefer TypeScript with strict mode. \
818                  Use ESM imports unless the project is CommonJS.",
819            },
820            Marker {
821                file: "pyproject.toml",
822                lang: "Python",
823                tip: "Use the package manager declared in `pyproject.toml` \
824                  (uv, poetry, hatch, etc.). Prefer type hints and async/await for I/O.",
825            },
826            Marker {
827                file: "setup.py",
828                lang: "Python",
829                tip: "Legacy Python project. Prefer type hints and async/await for I/O.",
830            },
831            Marker {
832                file: "requirements.txt",
833                lang: "Python",
834                tip: "Python project with pip-style dependencies. \
835                  Prefer type hints and async/await for I/O.",
836            },
837            Marker {
838                file: "go.mod",
839                lang: "Go",
840                tip: "Use `go build ./...` and `go test ./...`. \
841                  Follow standard Go project layout. Use `gofmt` for formatting.",
842            },
843            Marker {
844                file: "pom.xml",
845                lang: "Java / Maven",
846                tip: "Use `mvn compile`, `mvn test`, `mvn package`. \
847                  Follow standard Maven project structure.",
848            },
849            Marker {
850                file: "build.gradle",
851                lang: "Java / Gradle",
852                tip: "Use `./gradlew build` and `./gradlew test`. \
853                  Follow standard Gradle project structure.",
854            },
855            Marker {
856                file: "build.gradle.kts",
857                lang: "Kotlin / Gradle",
858                tip: "Use `./gradlew build` and `./gradlew test`. \
859                  Prefer Kotlin coroutines for async work.",
860            },
861            Marker {
862                file: "CMakeLists.txt",
863                lang: "C / C++",
864                tip: "Use `cmake -B build && cmake --build build`. \
865                  Check for `compile_commands.json` for IDE tooling.",
866            },
867            Marker {
868                file: "Makefile",
869                lang: "C / C++ (or generic)",
870                tip: "Use `make` or `make <target>`. \
871                  Check available targets with `make help` or by reading the Makefile.",
872            },
873        ];
874
875        // Check for C# / .NET — no single fixed filename, so glob for *.csproj / *.sln
876        let is_dotnet = workspace.join("*.csproj").exists() || {
877            // Fast check: look for any .csproj or .sln in the workspace root
878            std::fs::read_dir(workspace)
879                .map(|entries| {
880                    entries.flatten().any(|e| {
881                        let name = e.file_name();
882                        let s = name.to_string_lossy();
883                        s.ends_with(".csproj") || s.ends_with(".sln")
884                    })
885                })
886                .unwrap_or(false)
887        };
888
889        if is_dotnet {
890            return "## Project Context\n\nThis is a **C# / .NET** project. \
891             Use `dotnet build`, `dotnet test`, and `dotnet run`. \
892             Follow C# coding conventions and async/await patterns."
893                .to_string();
894        }
895
896        for marker in &markers {
897            if workspace.join(marker.file).exists() {
898                return format!(
899                    "## Project Context\n\nThis is a **{}** project. {}",
900                    marker.lang, marker.tip
901                );
902            }
903        }
904
905        String::new()
906    }
907
908    /// Returns `true` for errors that are likely transient (network, timeout, I/O contention).
909    /// Used to annotate tool error messages so the LLM knows whether retrying is safe.
910    fn is_transient_error(msg: &str) -> bool {
911        let lower = msg.to_lowercase();
912        lower.contains("timeout")
913        || lower.contains("timed out")
914        || lower.contains("connection refused")
915        || lower.contains("connection reset")
916        || lower.contains("broken pipe")
917        || lower.contains("temporarily unavailable")
918        || lower.contains("resource temporarily unavailable")
919        || lower.contains("os error 11")  // EAGAIN
920        || lower.contains("os error 35")  // EAGAIN on macOS
921        || lower.contains("rate limit")
922        || lower.contains("too many requests")
923        || lower.contains("service unavailable")
924        || lower.contains("network unreachable")
925    }
926
927    /// Returns `true` when a tool writes a file and is safe to run concurrently with other
928    /// independent writes (no ordering dependencies, no side-channel output).
929    fn is_parallel_safe_write(name: &str, _args: &serde_json::Value) -> bool {
930        matches!(
931            name,
932            "write_file" | "edit_file" | "create_file" | "append_to_file" | "replace_in_file"
933        )
934    }
935
936    /// Extract the target file path from write-tool arguments so we can check for conflicts.
937    fn extract_write_path(args: &serde_json::Value) -> Option<String> {
938        // write_file / create_file / append_to_file / replace_in_file use "path"
939        // edit_file uses "path" as well
940        args.get("path")
941            .and_then(|v| v.as_str())
942            .map(|s| s.to_string())
943    }
944
945    /// Execute a tool through the lane queue (if configured) or directly.
946    /// Wraps execution in task lifecycle if task_manager is configured.
947    async fn execute_tool_queued_or_direct(
948        &self,
949        name: &str,
950        args: &serde_json::Value,
951        ctx: &ToolContext,
952    ) -> anyhow::Result<crate::tools::ToolResult> {
953        // Create task for this tool execution if task_manager is available
954        let task_id = if let Some(ref tm) = self.task_manager {
955            let task = crate::task::Task::tool(name, args.clone());
956            let id = task.id;
957            tm.spawn(task);
958            // Start the task immediately
959            let _ = tm.start(id);
960            Some(id)
961        } else {
962            None
963        };
964
965        let result = self
966            .execute_tool_queued_or_direct_inner(name, args, ctx)
967            .await;
968
969        // Complete or fail the task based on result
970        if let Some(ref tm) = self.task_manager {
971            if let Some(tid) = task_id {
972                match &result {
973                    Ok(r) => {
974                        let output = serde_json::json!({
975                            "output": r.output.clone(),
976                            "exit_code": r.exit_code,
977                        });
978                        let _ = tm.complete(tid, Some(output));
979                    }
980                    Err(e) => {
981                        let _ = tm.fail(tid, e.to_string());
982                    }
983                }
984            }
985        }
986
987        result
988    }
989
990    /// Inner execution without task lifecycle wrapping.
991    async fn execute_tool_queued_or_direct_inner(
992        &self,
993        name: &str,
994        args: &serde_json::Value,
995        ctx: &ToolContext,
996    ) -> anyhow::Result<crate::tools::ToolResult> {
997        if let Some(ref queue) = self.command_queue {
998            let command = ToolCommand::new(
999                Arc::clone(&self.tool_executor),
1000                name.to_string(),
1001                args.clone(),
1002                ctx.clone(),
1003                self.config.skill_registry.clone(),
1004            );
1005            let rx = queue.submit_by_tool(name, Box::new(command)).await;
1006            match rx.await {
1007                Ok(Ok(value)) => {
1008                    let output = value["output"]
1009                        .as_str()
1010                        .ok_or_else(|| {
1011                            anyhow::anyhow!(
1012                                "Queue result missing 'output' field for tool '{}'",
1013                                name
1014                            )
1015                        })?
1016                        .to_string();
1017                    let exit_code = value["exit_code"].as_i64().unwrap_or(0) as i32;
1018                    return Ok(crate::tools::ToolResult {
1019                        name: name.to_string(),
1020                        output,
1021                        exit_code,
1022                        metadata: None,
1023                        images: Vec::new(),
1024                    });
1025                }
1026                Ok(Err(e)) => {
1027                    tracing::warn!(
1028                        "Queue execution failed for tool '{}', falling back to direct: {}",
1029                        name,
1030                        e
1031                    );
1032                }
1033                Err(_) => {
1034                    tracing::warn!(
1035                        "Queue channel closed for tool '{}', falling back to direct",
1036                        name
1037                    );
1038                }
1039            }
1040        }
1041        self.execute_tool_timed(name, args, ctx).await
1042    }
1043
1044    /// Call the LLM, handling streaming vs non-streaming internally.
1045    ///
1046    /// Streaming events (`TextDelta`, `ToolStart`) are forwarded to `event_tx`
1047    /// as they arrive. Non-streaming mode simply awaits the complete response.
1048    ///
1049    /// When a `ToolIndex` is configured, tools are filtered per-turn based on
1050    /// the last user message, reducing context usage with large tool sets.
1051    ///
1052    /// Returns `Err` on any LLM API failure. The circuit breaker in
1053    /// `execute_loop` wraps this call with retry logic for non-streaming mode.
1054    async fn call_llm(
1055        &self,
1056        messages: &[Message],
1057        system: Option<&str>,
1058        event_tx: &Option<mpsc::Sender<AgentEvent>>,
1059        cancel_token: &tokio_util::sync::CancellationToken,
1060    ) -> anyhow::Result<LlmResponse> {
1061        // Filter tools through ToolIndex if configured
1062        let tools = if let Some(ref index) = self.config.tool_index {
1063            let query = messages
1064                .iter()
1065                .rev()
1066                .find(|m| m.role == "user")
1067                .and_then(|m| {
1068                    m.content.iter().find_map(|b| match b {
1069                        crate::llm::ContentBlock::Text { text } => Some(text.as_str()),
1070                        _ => None,
1071                    })
1072                })
1073                .unwrap_or("");
1074            let matches = index.search(query, index.len());
1075            let matched_names: std::collections::HashSet<&str> =
1076                matches.iter().map(|m| m.name.as_str()).collect();
1077            self.config
1078                .tools
1079                .iter()
1080                .filter(|t| matched_names.contains(t.name.as_str()))
1081                .cloned()
1082                .collect::<Vec<_>>()
1083        } else {
1084            self.config.tools.clone()
1085        };
1086
1087        if event_tx.is_some() {
1088            let mut stream_rx = match self
1089                .llm_client
1090                .complete_streaming(messages, system, &tools)
1091                .await
1092            {
1093                Ok(rx) => rx,
1094                Err(stream_error) => {
1095                    tracing::warn!(
1096                        error = %stream_error,
1097                        "LLM streaming setup failed; falling back to non-streaming completion"
1098                    );
1099                    return self
1100                        .llm_client
1101                        .complete(messages, system, &tools)
1102                        .await
1103                        .with_context(|| {
1104                            format!(
1105                                "LLM streaming call failed ({stream_error}); non-streaming fallback also failed"
1106                            )
1107                        });
1108                }
1109            };
1110
1111            let mut final_response: Option<LlmResponse> = None;
1112            loop {
1113                tokio::select! {
1114                    _ = cancel_token.cancelled() => {
1115                        tracing::info!("🛑 LLM streaming cancelled by CancellationToken");
1116                        anyhow::bail!("Operation cancelled by user");
1117                    }
1118                    event = stream_rx.recv() => {
1119                        match event {
1120                            Some(crate::llm::StreamEvent::TextDelta(text)) => {
1121                                if let Some(tx) = event_tx {
1122                                    tx.send(AgentEvent::TextDelta { text }).await.ok();
1123                                }
1124                            }
1125                            Some(crate::llm::StreamEvent::ToolUseStart { id, name }) => {
1126                                if let Some(tx) = event_tx {
1127                                    tx.send(AgentEvent::ToolStart { id, name }).await.ok();
1128                                }
1129                            }
1130                            Some(crate::llm::StreamEvent::ToolUseInputDelta(delta)) => {
1131                                if let Some(tx) = event_tx {
1132                                    tx.send(AgentEvent::ToolInputDelta { delta }).await.ok();
1133                                }
1134                            }
1135                            Some(crate::llm::StreamEvent::Done(resp)) => {
1136                                final_response = Some(resp);
1137                                break;
1138                            }
1139                            None => break,
1140                        }
1141                    }
1142                }
1143            }
1144            final_response.context("Stream ended without final response")
1145        } else {
1146            self.llm_client
1147                .complete(messages, system, &tools)
1148                .await
1149                .context("LLM call failed")
1150        }
1151    }
1152
1153    /// Create a tool context with streaming support.
1154    ///
1155    /// When `event_tx` is Some, spawns a forwarder task that converts
1156    /// `ToolStreamEvent::OutputDelta` into `AgentEvent::ToolOutputDelta`
1157    /// and sends them to the agent event channel.
1158    ///
1159    /// Returns the augmented `ToolContext`. The forwarder task runs until
1160    /// the tool-side sender is dropped (i.e., tool execution finishes).
1161    fn streaming_tool_context(
1162        &self,
1163        event_tx: &Option<mpsc::Sender<AgentEvent>>,
1164        tool_id: &str,
1165        tool_name: &str,
1166    ) -> ToolContext {
1167        let mut ctx = self.tool_context.clone();
1168        if let Some(agent_tx) = event_tx {
1169            let (tool_tx, mut tool_rx) = mpsc::channel::<ToolStreamEvent>(64);
1170            ctx.event_tx = Some(tool_tx);
1171
1172            let agent_tx = agent_tx.clone();
1173            let tool_id = tool_id.to_string();
1174            let tool_name = tool_name.to_string();
1175            tokio::spawn(async move {
1176                while let Some(event) = tool_rx.recv().await {
1177                    match event {
1178                        ToolStreamEvent::OutputDelta(delta) => {
1179                            agent_tx
1180                                .send(AgentEvent::ToolOutputDelta {
1181                                    id: tool_id.clone(),
1182                                    name: tool_name.clone(),
1183                                    delta,
1184                                })
1185                                .await
1186                                .ok();
1187                        }
1188                    }
1189                }
1190            });
1191        }
1192        ctx
1193    }
1194
1195    /// Resolve context from all providers for a given prompt
1196    ///
1197    /// Returns aggregated context results from all configured providers.
1198    async fn resolve_context(&self, prompt: &str, session_id: Option<&str>) -> Vec<ContextResult> {
1199        if self.config.context_providers.is_empty() {
1200            return Vec::new();
1201        }
1202
1203        let query = ContextQuery::new(prompt).with_session_id(session_id.unwrap_or(""));
1204
1205        let futures = self
1206            .config
1207            .context_providers
1208            .iter()
1209            .map(|p| p.query(&query));
1210        let outcomes = join_all(futures).await;
1211
1212        outcomes
1213            .into_iter()
1214            .enumerate()
1215            .filter_map(|(i, r)| match r {
1216                Ok(result) if !result.is_empty() => Some(result),
1217                Ok(_) => None,
1218                Err(e) => {
1219                    tracing::warn!(
1220                        "Context provider '{}' failed: {}",
1221                        self.config.context_providers[i].name(),
1222                        e
1223                    );
1224                    None
1225                }
1226            })
1227            .collect()
1228    }
1229
1230    /// Detect whether the LLM's no-tool-call response looks like an intermediate
1231    /// step rather than a genuine final answer.
1232    ///
1233    /// Returns `true` when continuation should be injected. Heuristics:
1234    /// - Response ends with a colon or ellipsis (mid-thought)
1235    /// - Response contains phrases that signal incomplete work
1236    /// - Response is very short (< 80 chars) and doesn't look like a summary
1237    fn looks_incomplete(text: &str) -> bool {
1238        let t = text.trim();
1239        if t.is_empty() {
1240            return true;
1241        }
1242        // Very short responses that aren't clearly a final answer
1243        if t.len() < 80 && !t.contains('\n') {
1244            // Short single-line — could be a genuine one-liner answer, keep going
1245            // only if it ends with punctuation that signals continuation
1246            let ends_continuation =
1247                t.ends_with(':') || t.ends_with("...") || t.ends_with('…') || t.ends_with(',');
1248            if ends_continuation {
1249                return true;
1250            }
1251        }
1252        // Phrases that signal the LLM is describing what it will do rather than doing it
1253        let incomplete_phrases = [
1254            "i'll ",
1255            "i will ",
1256            "let me ",
1257            "i need to ",
1258            "i should ",
1259            "next, i",
1260            "first, i",
1261            "now i",
1262            "i'll start",
1263            "i'll begin",
1264            "i'll now",
1265            "let's start",
1266            "let's begin",
1267            "to do this",
1268            "i'm going to",
1269        ];
1270        let lower = t.to_lowercase();
1271        for phrase in &incomplete_phrases {
1272            if lower.contains(phrase) {
1273                return true;
1274            }
1275        }
1276        false
1277    }
1278
1279    /// Get the assembled system prompt from slots.
1280    #[allow(dead_code)]
1281    fn system_prompt(&self) -> String {
1282        self.config.prompt_slots.build()
1283    }
1284
1285    /// Get the assembled system prompt from slots with an explicit style.
1286    fn system_prompt_for_style(&self, style: AgentStyle) -> String {
1287        let mut slots = self.config.prompt_slots.clone();
1288        slots.style = Some(style);
1289        slots.build()
1290    }
1291
1292    async fn resolve_effective_style(&self, prompt: &str) -> AgentStyle {
1293        if let Some(style) = self.config.prompt_slots.style {
1294            return style;
1295        }
1296
1297        let (style, confidence) = AgentStyle::detect_with_confidence(prompt);
1298        if confidence != DetectionConfidence::Low {
1299            return style;
1300        }
1301
1302        match AgentStyle::detect_with_llm(self.llm_client.as_ref(), prompt).await {
1303            Ok(classified_style) => {
1304                tracing::debug!(
1305                    intent.classification = ?classified_style,
1306                    intent.source = "llm",
1307                    "Intent classified via LLM"
1308                );
1309                classified_style
1310            }
1311            Err(e) => {
1312                tracing::warn!(error = %e, "LLM intent classification failed, using keyword detection");
1313                style
1314            }
1315        }
1316    }
1317
1318    /// Build augmented system prompt with context
1319    #[allow(dead_code)]
1320    fn build_augmented_system_prompt(&self, context_results: &[ContextResult]) -> Option<String> {
1321        let base = self.system_prompt();
1322        self.build_augmented_system_prompt_with_base(&base, context_results)
1323    }
1324
1325    fn build_augmented_system_prompt_with_base(
1326        &self,
1327        base: &str,
1328        context_results: &[ContextResult],
1329    ) -> Option<String> {
1330        let base = base.to_string();
1331
1332        // Use live tool executor definitions so tools added via add_mcp_server() are included
1333        let live_tools = self.tool_executor.definitions();
1334        let mcp_tools: Vec<&ToolDefinition> = live_tools
1335            .iter()
1336            .filter(|t| t.name.starts_with("mcp__"))
1337            .collect();
1338
1339        let mcp_section = if mcp_tools.is_empty() {
1340            String::new()
1341        } else {
1342            let mut lines = vec![
1343                "## MCP Tools".to_string(),
1344                String::new(),
1345                "The following MCP (Model Context Protocol) tools are available. Use them when the task requires external capabilities beyond the built-in tools:".to_string(),
1346                String::new(),
1347            ];
1348            for tool in &mcp_tools {
1349                let display = format!("- `{}` — {}", tool.name, tool.description);
1350                lines.push(display);
1351            }
1352            lines.join("\n")
1353        };
1354
1355        let parts: Vec<&str> = [base.as_str(), mcp_section.as_str()]
1356            .iter()
1357            .filter(|s| !s.is_empty())
1358            .copied()
1359            .collect();
1360
1361        // Auto-detect project type from workspace and inject language-specific guidelines,
1362        // but only when the user hasn't already set a custom `guidelines` slot.
1363        let project_hint = if self.config.prompt_slots.guidelines.is_none() {
1364            Self::detect_project_hint(&self.tool_context.workspace)
1365        } else {
1366            String::new()
1367        };
1368
1369        if context_results.is_empty() {
1370            if project_hint.is_empty() {
1371                return Some(parts.join("\n\n"));
1372            }
1373            return Some(format!("{}\n\n{}", parts.join("\n\n"), project_hint));
1374        }
1375
1376        // Build context XML block
1377        let context_xml: String = context_results
1378            .iter()
1379            .map(|r| r.to_xml())
1380            .collect::<Vec<_>>()
1381            .join("\n\n");
1382
1383        if project_hint.is_empty() {
1384            Some(format!("{}\n\n{}", parts.join("\n\n"), context_xml))
1385        } else {
1386            Some(format!(
1387                "{}\n\n{}\n\n{}",
1388                parts.join("\n\n"),
1389                project_hint,
1390                context_xml
1391            ))
1392        }
1393    }
1394
1395    /// Notify providers of turn completion for memory extraction
1396    async fn notify_turn_complete(&self, session_id: &str, prompt: &str, response: &str) {
1397        let futures = self
1398            .config
1399            .context_providers
1400            .iter()
1401            .map(|p| p.on_turn_complete(session_id, prompt, response));
1402        let outcomes = join_all(futures).await;
1403
1404        for (i, result) in outcomes.into_iter().enumerate() {
1405            if let Err(e) = result {
1406                tracing::warn!(
1407                    "Context provider '{}' on_turn_complete failed: {}",
1408                    self.config.context_providers[i].name(),
1409                    e
1410                );
1411            }
1412        }
1413    }
1414
1415    /// Fire PreToolUse hook event before tool execution.
1416    /// Returns the HookResult which may block the tool call.
1417    async fn fire_pre_tool_use(
1418        &self,
1419        session_id: &str,
1420        tool_name: &str,
1421        args: &serde_json::Value,
1422        recent_tools: Vec<String>,
1423    ) -> Option<HookResult> {
1424        if let Some(he) = &self.config.hook_engine {
1425            let event = HookEvent::PreToolUse(PreToolUseEvent {
1426                session_id: session_id.to_string(),
1427                tool: tool_name.to_string(),
1428                args: args.clone(),
1429                working_directory: self.tool_context.workspace.to_string_lossy().to_string(),
1430                recent_tools,
1431            });
1432            let result = he.fire(&event).await;
1433            if result.is_block() {
1434                return Some(result);
1435            }
1436        }
1437        None
1438    }
1439
1440    /// Fire PostToolUse hook event after tool execution (fire-and-forget).
1441    async fn fire_post_tool_use(
1442        &self,
1443        session_id: &str,
1444        tool_name: &str,
1445        args: &serde_json::Value,
1446        output: &str,
1447        success: bool,
1448        duration_ms: u64,
1449    ) {
1450        if let Some(he) = &self.config.hook_engine {
1451            let event = HookEvent::PostToolUse(PostToolUseEvent {
1452                session_id: session_id.to_string(),
1453                tool: tool_name.to_string(),
1454                args: args.clone(),
1455                result: ToolResultData {
1456                    success,
1457                    output: output.to_string(),
1458                    exit_code: if success { Some(0) } else { Some(1) },
1459                    duration_ms,
1460                },
1461            });
1462            let he = Arc::clone(he);
1463            tokio::spawn(async move {
1464                let _ = he.fire(&event).await;
1465            });
1466        }
1467    }
1468
1469    /// Fire GenerateStart hook event before an LLM call.
1470    async fn fire_generate_start(
1471        &self,
1472        session_id: &str,
1473        prompt: &str,
1474        system_prompt: &Option<String>,
1475    ) {
1476        if let Some(he) = &self.config.hook_engine {
1477            let event = HookEvent::GenerateStart(GenerateStartEvent {
1478                session_id: session_id.to_string(),
1479                prompt: prompt.to_string(),
1480                system_prompt: system_prompt.clone(),
1481                model_provider: String::new(),
1482                model_name: String::new(),
1483                available_tools: self.config.tools.iter().map(|t| t.name.clone()).collect(),
1484            });
1485            let _ = he.fire(&event).await;
1486        }
1487    }
1488
1489    /// Fire GenerateEnd hook event after an LLM call.
1490    async fn fire_generate_end(
1491        &self,
1492        session_id: &str,
1493        prompt: &str,
1494        response: &LlmResponse,
1495        duration_ms: u64,
1496    ) {
1497        if let Some(he) = &self.config.hook_engine {
1498            let tool_calls: Vec<ToolCallInfo> = response
1499                .tool_calls()
1500                .iter()
1501                .map(|tc| ToolCallInfo {
1502                    name: tc.name.clone(),
1503                    args: tc.args.clone(),
1504                })
1505                .collect();
1506
1507            let event = HookEvent::GenerateEnd(GenerateEndEvent {
1508                session_id: session_id.to_string(),
1509                prompt: prompt.to_string(),
1510                response_text: response.text().to_string(),
1511                tool_calls,
1512                usage: TokenUsageInfo {
1513                    prompt_tokens: response.usage.prompt_tokens as i32,
1514                    completion_tokens: response.usage.completion_tokens as i32,
1515                    total_tokens: response.usage.total_tokens as i32,
1516                },
1517                duration_ms,
1518            });
1519            let _ = he.fire(&event).await;
1520        }
1521    }
1522
1523    /// Fire PrePrompt hook event before prompt augmentation.
1524    /// Returns optional modified prompt text from the hook.
1525    async fn fire_pre_prompt(
1526        &self,
1527        session_id: &str,
1528        prompt: &str,
1529        system_prompt: &Option<String>,
1530        message_count: usize,
1531    ) -> Option<String> {
1532        if let Some(he) = &self.config.hook_engine {
1533            let event = HookEvent::PrePrompt(PrePromptEvent {
1534                session_id: session_id.to_string(),
1535                prompt: prompt.to_string(),
1536                system_prompt: system_prompt.clone(),
1537                message_count,
1538            });
1539            let result = he.fire(&event).await;
1540            if let HookResult::Continue(Some(modified)) = result {
1541                // Extract modified prompt from hook response
1542                if let Some(new_prompt) = modified.get("prompt").and_then(|v| v.as_str()) {
1543                    return Some(new_prompt.to_string());
1544                }
1545            }
1546        }
1547        None
1548    }
1549
1550    /// Fire PostResponse hook event after the agent loop completes.
1551    async fn fire_post_response(
1552        &self,
1553        session_id: &str,
1554        response_text: &str,
1555        tool_calls_count: usize,
1556        usage: &TokenUsage,
1557        duration_ms: u64,
1558    ) {
1559        if let Some(he) = &self.config.hook_engine {
1560            let event = HookEvent::PostResponse(PostResponseEvent {
1561                session_id: session_id.to_string(),
1562                response_text: response_text.to_string(),
1563                tool_calls_count,
1564                usage: TokenUsageInfo {
1565                    prompt_tokens: usage.prompt_tokens as i32,
1566                    completion_tokens: usage.completion_tokens as i32,
1567                    total_tokens: usage.total_tokens as i32,
1568                },
1569                duration_ms,
1570            });
1571            let he = Arc::clone(he);
1572            tokio::spawn(async move {
1573                let _ = he.fire(&event).await;
1574            });
1575        }
1576    }
1577
1578    /// Fire OnError hook event when an error occurs.
1579    async fn fire_on_error(
1580        &self,
1581        session_id: &str,
1582        error_type: ErrorType,
1583        error_message: &str,
1584        context: serde_json::Value,
1585    ) {
1586        if let Some(he) = &self.config.hook_engine {
1587            let event = HookEvent::OnError(OnErrorEvent {
1588                session_id: session_id.to_string(),
1589                error_type,
1590                error_message: error_message.to_string(),
1591                context,
1592            });
1593            let he = Arc::clone(he);
1594            tokio::spawn(async move {
1595                let _ = he.fire(&event).await;
1596            });
1597        }
1598    }
1599
1600    /// Execute the agent loop for a prompt
1601    ///
1602    /// Takes the conversation history and a new user prompt.
1603    /// Returns the agent result and updated message history.
1604    /// When event_tx is provided, uses streaming LLM API for real-time text output.
1605    pub async fn execute(
1606        &self,
1607        history: &[Message],
1608        prompt: &str,
1609        event_tx: Option<mpsc::Sender<AgentEvent>>,
1610    ) -> Result<AgentResult> {
1611        self.execute_with_session(history, prompt, None, event_tx, None)
1612            .await
1613    }
1614
1615    /// Execute the agent loop with pre-built messages (user message already included).
1616    ///
1617    /// Used by `send_with_attachments` / `stream_with_attachments` where the
1618    /// user message contains multi-modal content and is already appended to
1619    /// the messages vec.
1620    pub async fn execute_from_messages(
1621        &self,
1622        messages: Vec<Message>,
1623        session_id: Option<&str>,
1624        event_tx: Option<mpsc::Sender<AgentEvent>>,
1625        cancel_token: Option<&tokio_util::sync::CancellationToken>,
1626    ) -> Result<AgentResult> {
1627        let default_token = tokio_util::sync::CancellationToken::new();
1628        let token = cancel_token.unwrap_or(&default_token);
1629        tracing::info!(
1630            a3s.session.id = session_id.unwrap_or("none"),
1631            a3s.agent.max_turns = self.config.max_tool_rounds,
1632            "a3s.agent.execute_from_messages started"
1633        );
1634
1635        // Extract the last user message text for hooks, memory recall, and events.
1636        // Pass empty prompt so execute_loop skips adding a duplicate user message,
1637        // but provide effective_prompt for hook/memory/event purposes.
1638        let effective_prompt = messages
1639            .iter()
1640            .rev()
1641            .find(|m| m.role == "user")
1642            .map(|m| m.text())
1643            .unwrap_or_default();
1644
1645        let result = self
1646            .execute_loop_inner(
1647                &messages,
1648                "",
1649                &effective_prompt,
1650                session_id,
1651                event_tx,
1652                token,
1653                true, // emit_end: this is a standalone execution
1654            )
1655            .await;
1656
1657        match &result {
1658            Ok(r) => tracing::info!(
1659                a3s.agent.tool_calls_count = r.tool_calls_count,
1660                a3s.llm.total_tokens = r.usage.total_tokens,
1661                "a3s.agent.execute_from_messages completed"
1662            ),
1663            Err(e) => tracing::warn!(
1664                error = %e,
1665                "a3s.agent.execute_from_messages failed"
1666            ),
1667        }
1668
1669        result
1670    }
1671
1672    /// Execute the agent loop for a prompt with session context
1673    ///
1674    /// Takes the conversation history, user prompt, and optional session ID.
1675    /// When session_id is provided, context providers can use it for session-specific context.
1676    pub async fn execute_with_session(
1677        &self,
1678        history: &[Message],
1679        prompt: &str,
1680        session_id: Option<&str>,
1681        event_tx: Option<mpsc::Sender<AgentEvent>>,
1682        cancel_token: Option<&tokio_util::sync::CancellationToken>,
1683    ) -> Result<AgentResult> {
1684        let default_token = tokio_util::sync::CancellationToken::new();
1685        let token = cancel_token.unwrap_or(&default_token);
1686        tracing::info!(
1687            a3s.session.id = session_id.unwrap_or("none"),
1688            a3s.agent.max_turns = self.config.max_tool_rounds,
1689            "a3s.agent.execute started"
1690        );
1691
1692        let effective_style = self.resolve_effective_style(prompt).await;
1693
1694        // Determine whether to use planning mode
1695        let use_planning = if self.config.planning_mode == PlanningMode::Auto {
1696            effective_style.requires_planning()
1697        } else {
1698            // Explicit mode: Enabled or Disabled
1699            self.config.planning_mode.should_plan(prompt)
1700        };
1701
1702        // Create agent task if task_manager is available
1703        let task_id = if let Some(ref tm) = self.task_manager {
1704            let workspace = self.tool_context.workspace.display().to_string();
1705            let task = crate::task::Task::agent("agent", &workspace, prompt);
1706            let id = task.id;
1707            tm.spawn(task);
1708            let _ = tm.start(id);
1709            Some(id)
1710        } else {
1711            None
1712        };
1713
1714        let result = if use_planning {
1715            self.execute_with_planning(history, prompt, event_tx).await
1716        } else {
1717            self.execute_loop(history, prompt, session_id, event_tx, token, true)
1718                .await
1719        };
1720
1721        // Complete or fail agent task based on result
1722        if let Some(ref tm) = self.task_manager {
1723            if let Some(tid) = task_id {
1724                match &result {
1725                    Ok(r) => {
1726                        let output = serde_json::json!({
1727                            "text": r.text,
1728                            "tool_calls_count": r.tool_calls_count,
1729                            "usage": r.usage,
1730                        });
1731                        let _ = tm.complete(tid, Some(output));
1732                    }
1733                    Err(e) => {
1734                        let _ = tm.fail(tid, e.to_string());
1735                    }
1736                }
1737            }
1738        }
1739
1740        match &result {
1741            Ok(r) => {
1742                tracing::info!(
1743                    a3s.agent.tool_calls_count = r.tool_calls_count,
1744                    a3s.llm.total_tokens = r.usage.total_tokens,
1745                    "a3s.agent.execute completed"
1746                );
1747                // Fire PostResponse hook
1748                self.fire_post_response(
1749                    session_id.unwrap_or(""),
1750                    &r.text,
1751                    r.tool_calls_count,
1752                    &r.usage,
1753                    0, // duration tracked externally
1754                )
1755                .await;
1756            }
1757            Err(e) => {
1758                tracing::warn!(
1759                    error = %e,
1760                    "a3s.agent.execute failed"
1761                );
1762                // Fire OnError hook
1763                self.fire_on_error(
1764                    session_id.unwrap_or(""),
1765                    ErrorType::Other,
1766                    &e.to_string(),
1767                    serde_json::json!({"phase": "execute"}),
1768                )
1769                .await;
1770            }
1771        }
1772
1773        result
1774    }
1775
1776    /// Core execution loop (without planning routing).
1777    ///
1778    /// This is the inner loop that runs LLM calls and tool executions.
1779    /// Called directly by `execute_with_session` (after planning check)
1780    /// and by `execute_plan` (for individual steps, bypassing planning).
1781    async fn execute_loop(
1782        &self,
1783        history: &[Message],
1784        prompt: &str,
1785        session_id: Option<&str>,
1786        event_tx: Option<mpsc::Sender<AgentEvent>>,
1787        cancel_token: &tokio_util::sync::CancellationToken,
1788        emit_end: bool,
1789    ) -> Result<AgentResult> {
1790        // When called via execute_loop, the prompt is used for both
1791        // message-adding and hook/memory/event purposes.
1792        self.execute_loop_inner(
1793            history,
1794            prompt,
1795            prompt,
1796            session_id,
1797            event_tx,
1798            cancel_token,
1799            emit_end,
1800        )
1801        .await
1802    }
1803
1804    /// Inner execution loop.
1805    ///
1806    /// `msg_prompt` controls whether a user message is appended (empty = skip).
1807    /// `effective_prompt` is used for hooks, memory recall, taint tracking, and events.
1808    /// `emit_end` controls whether to send `AgentEvent::End` when the loop completes
1809    /// (should be false when called from `execute_plan` to avoid duplicate End events).
1810    #[allow(clippy::too_many_arguments)]
1811    async fn execute_loop_inner(
1812        &self,
1813        history: &[Message],
1814        msg_prompt: &str,
1815        effective_prompt: &str,
1816        session_id: Option<&str>,
1817        event_tx: Option<mpsc::Sender<AgentEvent>>,
1818        cancel_token: &tokio_util::sync::CancellationToken,
1819        emit_end: bool,
1820    ) -> Result<AgentResult> {
1821        let mut messages = history.to_vec();
1822        let mut total_usage = TokenUsage::default();
1823        let mut tool_calls_count = 0;
1824        let mut turn = 0;
1825        // Consecutive malformed-tool-args errors (4.1 parse error recovery)
1826        let mut parse_error_count: u32 = 0;
1827        // Continuation injection counter
1828        let mut continuation_count: u32 = 0;
1829        let mut recent_tool_signatures: Vec<String> = Vec::new();
1830        let style_prompt = if effective_prompt.is_empty() {
1831            msg_prompt
1832        } else {
1833            effective_prompt
1834        };
1835        let effective_style = self.resolve_effective_style(style_prompt).await;
1836        let effective_system_prompt = self.system_prompt_for_style(effective_style);
1837        if let Some(tx) = &event_tx {
1838            tx.send(AgentEvent::AgentModeChanged {
1839                mode: effective_style.runtime_mode().to_string(),
1840                agent: effective_style.builtin_agent_name().to_string(),
1841                description: effective_style.description().to_string(),
1842            })
1843            .await
1844            .ok();
1845        }
1846
1847        // Send start event
1848        if let Some(tx) = &event_tx {
1849            tx.send(AgentEvent::Start {
1850                prompt: effective_prompt.to_string(),
1851            })
1852            .await
1853            .ok();
1854        }
1855
1856        // Forward queue events (CommandDeadLettered, CommandRetry, QueueAlert) to event stream
1857        let _queue_forward_handle =
1858            if let (Some(ref queue), Some(ref tx)) = (&self.command_queue, &event_tx) {
1859                let mut rx = queue.subscribe();
1860                let tx = tx.clone();
1861                Some(tokio::spawn(async move {
1862                    while let Ok(event) = rx.recv().await {
1863                        if tx.send(event).await.is_err() {
1864                            break;
1865                        }
1866                    }
1867                }))
1868            } else {
1869                None
1870            };
1871
1872        // Fire PrePrompt hook (may modify the prompt)
1873        let built_system_prompt = Some(effective_system_prompt.clone());
1874        let hooked_prompt = if let Some(modified) = self
1875            .fire_pre_prompt(
1876                session_id.unwrap_or(""),
1877                effective_prompt,
1878                &built_system_prompt,
1879                messages.len(),
1880            )
1881            .await
1882        {
1883            modified
1884        } else {
1885            effective_prompt.to_string()
1886        };
1887        let effective_prompt = hooked_prompt.as_str();
1888
1889        // Taint-track the incoming prompt for sensitive data detection
1890        if let Some(ref sp) = self.config.security_provider {
1891            sp.taint_input(effective_prompt);
1892        }
1893
1894        // Recall relevant memories and inject into system prompt
1895        let system_with_memory = if let Some(ref memory) = self.config.memory {
1896            match memory.recall_similar(effective_prompt, 5).await {
1897                Ok(items) if !items.is_empty() => {
1898                    if let Some(tx) = &event_tx {
1899                        for item in &items {
1900                            tx.send(AgentEvent::MemoryRecalled {
1901                                memory_id: item.id.clone(),
1902                                content: item.content.clone(),
1903                                relevance: item.relevance_score(),
1904                            })
1905                            .await
1906                            .ok();
1907                        }
1908                        tx.send(AgentEvent::MemoriesSearched {
1909                            query: Some(effective_prompt.to_string()),
1910                            tags: Vec::new(),
1911                            result_count: items.len(),
1912                        })
1913                        .await
1914                        .ok();
1915                    }
1916                    let memory_context = items
1917                        .iter()
1918                        .map(|i| format!("- {}", i.content))
1919                        .collect::<Vec<_>>()
1920                        .join(
1921                            "
1922",
1923                        );
1924                    let base = effective_system_prompt.clone();
1925                    Some(format!(
1926                        "{}
1927
1928## Relevant past experience
1929{}",
1930                        base, memory_context
1931                    ))
1932                }
1933                _ => Some(effective_system_prompt.clone()),
1934            }
1935        } else {
1936            Some(effective_system_prompt.clone())
1937        };
1938
1939        // Resolve context from providers on first turn (before adding user message)
1940        let augmented_system = if !self.config.context_providers.is_empty() {
1941            // Send context resolving event
1942            if let Some(tx) = &event_tx {
1943                let provider_names: Vec<String> = self
1944                    .config
1945                    .context_providers
1946                    .iter()
1947                    .map(|p| p.name().to_string())
1948                    .collect();
1949                tx.send(AgentEvent::ContextResolving {
1950                    providers: provider_names,
1951                })
1952                .await
1953                .ok();
1954            }
1955
1956            tracing::info!(
1957                a3s.context.providers = self.config.context_providers.len() as i64,
1958                "Context resolution started"
1959            );
1960            let context_results = self.resolve_context(effective_prompt, session_id).await;
1961
1962            // Send context resolved event
1963            if let Some(tx) = &event_tx {
1964                let total_items: usize = context_results.iter().map(|r| r.items.len()).sum();
1965                let total_tokens: usize = context_results.iter().map(|r| r.total_tokens).sum();
1966
1967                tracing::info!(
1968                    context_items = total_items,
1969                    context_tokens = total_tokens,
1970                    "Context resolution completed"
1971                );
1972
1973                tx.send(AgentEvent::ContextResolved {
1974                    total_items,
1975                    total_tokens,
1976                })
1977                .await
1978                .ok();
1979            }
1980
1981            self.build_augmented_system_prompt_with_base(&effective_system_prompt, &context_results)
1982        } else {
1983            Some(effective_system_prompt.clone())
1984        };
1985
1986        // Merge memory context into system prompt
1987        let base_prompt = effective_system_prompt.clone();
1988        let augmented_system = match (augmented_system, system_with_memory) {
1989            (Some(ctx), Some(mem)) if ctx != mem => Some(ctx.replacen(&base_prompt, &mem, 1)),
1990            (Some(ctx), _) => Some(ctx),
1991            (None, mem) => mem,
1992        };
1993
1994        // Add user message
1995        if !msg_prompt.is_empty() {
1996            messages.push(Message::user(msg_prompt));
1997        }
1998
1999        loop {
2000            turn += 1;
2001
2002            if turn > self.config.max_tool_rounds {
2003                let error = format!("Max tool rounds ({}) exceeded", self.config.max_tool_rounds);
2004                if let Some(tx) = &event_tx {
2005                    tx.send(AgentEvent::Error {
2006                        message: error.clone(),
2007                    })
2008                    .await
2009                    .ok();
2010                }
2011                anyhow::bail!(error);
2012            }
2013
2014            // Send turn start event
2015            if let Some(tx) = &event_tx {
2016                tx.send(AgentEvent::TurnStart { turn }).await.ok();
2017            }
2018
2019            tracing::info!(
2020                turn = turn,
2021                max_turns = self.config.max_tool_rounds,
2022                "Agent turn started"
2023            );
2024
2025            // Call LLM - use streaming if we have an event channel
2026            tracing::info!(
2027                a3s.llm.streaming = event_tx.is_some(),
2028                "LLM completion started"
2029            );
2030
2031            // Fire GenerateStart hook
2032            self.fire_generate_start(
2033                session_id.unwrap_or(""),
2034                effective_prompt,
2035                &augmented_system,
2036            )
2037            .await;
2038
2039            let llm_start = std::time::Instant::now();
2040            // Circuit breaker (4.3): retry non-streaming LLM calls on transient failures.
2041            // Each failure increments `consecutive_llm_errors`; on success it resets to 0.
2042            // Streaming mode bails immediately on failure (events can't be replayed).
2043            let response = {
2044                let threshold = self.config.circuit_breaker_threshold.max(1);
2045                let mut attempt = 0u32;
2046                loop {
2047                    attempt += 1;
2048                    let result = self
2049                        .call_llm(
2050                            &messages,
2051                            augmented_system.as_deref(),
2052                            &event_tx,
2053                            cancel_token,
2054                        )
2055                        .await;
2056                    match result {
2057                        Ok(r) => {
2058                            break r;
2059                        }
2060                        // Never retry if cancelled
2061                        Err(e) if cancel_token.is_cancelled() => {
2062                            anyhow::bail!(e);
2063                        }
2064                        // Retry when: non-streaming under threshold, OR first streaming attempt
2065                        Err(e) if attempt < threshold && (event_tx.is_none() || attempt == 1) => {
2066                            tracing::warn!(
2067                                turn = turn,
2068                                attempt = attempt,
2069                                threshold = threshold,
2070                                error = %e,
2071                                "LLM call failed, will retry"
2072                            );
2073                            tokio::time::sleep(Duration::from_millis(100 * attempt as u64)).await;
2074                        }
2075                        // Threshold exceeded or streaming mid-stream: bail
2076                        Err(e) => {
2077                            let msg = if attempt > 1 {
2078                                format!(
2079                                    "LLM circuit breaker triggered: failed after {} attempt(s): {}",
2080                                    attempt, e
2081                                )
2082                            } else {
2083                                format!("LLM call failed: {}", e)
2084                            };
2085                            tracing::error!(turn = turn, attempt = attempt, "{}", msg);
2086                            // Fire OnError hook for LLM failure
2087                            self.fire_on_error(
2088                                session_id.unwrap_or(""),
2089                                ErrorType::LlmFailure,
2090                                &msg,
2091                                serde_json::json!({"turn": turn, "attempt": attempt}),
2092                            )
2093                            .await;
2094                            if let Some(tx) = &event_tx {
2095                                tx.send(AgentEvent::Error {
2096                                    message: msg.clone(),
2097                                })
2098                                .await
2099                                .ok();
2100                            }
2101                            anyhow::bail!(msg);
2102                        }
2103                    }
2104                }
2105            };
2106
2107            // Update usage
2108            total_usage.prompt_tokens += response.usage.prompt_tokens;
2109            total_usage.completion_tokens += response.usage.completion_tokens;
2110            total_usage.total_tokens += response.usage.total_tokens;
2111
2112            // Track token usage in progress tracker
2113            if let Some(ref tracker) = self.progress_tracker {
2114                let token_usage = crate::task::TaskTokenUsage {
2115                    input_tokens: response.usage.prompt_tokens as u64,
2116                    output_tokens: response.usage.completion_tokens as u64,
2117                    cache_read_tokens: response.usage.cache_read_tokens.unwrap_or(0) as u64,
2118                    cache_write_tokens: response.usage.cache_write_tokens.unwrap_or(0) as u64,
2119                };
2120                if let Ok(mut guard) = tracker.try_write() {
2121                    guard.track_tokens(token_usage);
2122                }
2123            }
2124
2125            // Record LLM completion telemetry
2126            let llm_duration = llm_start.elapsed();
2127            tracing::info!(
2128                turn = turn,
2129                streaming = event_tx.is_some(),
2130                prompt_tokens = response.usage.prompt_tokens,
2131                completion_tokens = response.usage.completion_tokens,
2132                total_tokens = response.usage.total_tokens,
2133                stop_reason = response.stop_reason.as_deref().unwrap_or("unknown"),
2134                duration_ms = llm_duration.as_millis() as u64,
2135                "LLM completion finished"
2136            );
2137
2138            // Fire GenerateEnd hook
2139            self.fire_generate_end(
2140                session_id.unwrap_or(""),
2141                effective_prompt,
2142                &response,
2143                llm_duration.as_millis() as u64,
2144            )
2145            .await;
2146
2147            // Record LLM usage on the llm span
2148            crate::telemetry::record_llm_usage(
2149                response.usage.prompt_tokens,
2150                response.usage.completion_tokens,
2151                response.usage.total_tokens,
2152                response.stop_reason.as_deref(),
2153            );
2154            // Log turn token usage
2155            tracing::info!(
2156                turn = turn,
2157                a3s.llm.total_tokens = response.usage.total_tokens,
2158                "Turn token usage"
2159            );
2160
2161            // Add assistant message to history
2162            messages.push(response.message.clone());
2163
2164            // Check for tool calls
2165            let tool_calls = response.tool_calls();
2166
2167            // Send turn end event
2168            if let Some(tx) = &event_tx {
2169                tx.send(AgentEvent::TurnEnd {
2170                    turn,
2171                    usage: response.usage.clone(),
2172                })
2173                .await
2174                .ok();
2175            }
2176
2177            // Auto-compact: check if context usage exceeds threshold
2178            if self.config.auto_compact {
2179                let used = response.usage.prompt_tokens;
2180                let max = self.config.max_context_tokens;
2181                let threshold = self.config.auto_compact_threshold;
2182
2183                if crate::session::compaction::should_auto_compact(used, max, threshold) {
2184                    let before_len = messages.len();
2185                    let percent_before = used as f32 / max as f32;
2186
2187                    tracing::info!(
2188                        used_tokens = used,
2189                        max_tokens = max,
2190                        percent = percent_before,
2191                        threshold = threshold,
2192                        "Auto-compact triggered"
2193                    );
2194
2195                    // Step 1: Prune large tool outputs first (cheap, no LLM call)
2196                    if let Some(pruned) = crate::session::compaction::prune_tool_outputs(&messages)
2197                    {
2198                        messages = pruned;
2199                        tracing::info!("Tool output pruning applied");
2200                    }
2201
2202                    // Step 2: Full summarization using the agent's LLM client
2203                    if let Ok(Some(compacted)) = crate::session::compaction::compact_messages(
2204                        session_id.unwrap_or(""),
2205                        &messages,
2206                        &self.llm_client,
2207                    )
2208                    .await
2209                    {
2210                        messages = compacted;
2211                    }
2212
2213                    // Emit compaction event
2214                    if let Some(tx) = &event_tx {
2215                        tx.send(AgentEvent::ContextCompacted {
2216                            session_id: session_id.unwrap_or("").to_string(),
2217                            before_messages: before_len,
2218                            after_messages: messages.len(),
2219                            percent_before,
2220                        })
2221                        .await
2222                        .ok();
2223                    }
2224                }
2225            }
2226
2227            if tool_calls.is_empty() {
2228                // No tool calls — check if we should inject a continuation message
2229                // before treating this as a final answer.
2230                let final_text = response.text();
2231
2232                if self.config.continuation_enabled
2233                    && continuation_count < self.config.max_continuation_turns
2234                    && turn < self.config.max_tool_rounds  // never inject past the turn limit
2235                    && Self::looks_incomplete(&final_text)
2236                {
2237                    continuation_count += 1;
2238                    tracing::info!(
2239                        turn = turn,
2240                        continuation = continuation_count,
2241                        max_continuation = self.config.max_continuation_turns,
2242                        "Injecting continuation message — response looks incomplete"
2243                    );
2244                    // Inject continuation as a user message and keep looping
2245                    messages.push(Message::user(crate::prompts::CONTINUATION));
2246                    continue;
2247                }
2248
2249                // Sanitize output to redact any sensitive data before returning
2250                let final_text = if let Some(ref sp) = self.config.security_provider {
2251                    sp.sanitize_output(&final_text)
2252                } else {
2253                    final_text
2254                };
2255
2256                // Record final totals
2257                tracing::info!(
2258                    tool_calls_count = tool_calls_count,
2259                    total_prompt_tokens = total_usage.prompt_tokens,
2260                    total_completion_tokens = total_usage.completion_tokens,
2261                    total_tokens = total_usage.total_tokens,
2262                    turns = turn,
2263                    "Agent execution completed"
2264                );
2265
2266                if emit_end {
2267                    if let Some(tx) = &event_tx {
2268                        tx.send(AgentEvent::End {
2269                            text: final_text.clone(),
2270                            usage: total_usage.clone(),
2271                            meta: response.meta.clone(),
2272                        })
2273                        .await
2274                        .ok();
2275                    }
2276                }
2277
2278                // Notify context providers of turn completion for memory extraction
2279                if let Some(sid) = session_id {
2280                    self.notify_turn_complete(sid, effective_prompt, &final_text)
2281                        .await;
2282                }
2283
2284                return Ok(AgentResult {
2285                    text: final_text,
2286                    messages,
2287                    usage: total_usage,
2288                    tool_calls_count,
2289                });
2290            }
2291
2292            // Execute tools sequentially
2293            // Fast path: when all tool calls are independent file writes and no hooks/HITL
2294            // are configured, execute them concurrently to avoid serial I/O bottleneck.
2295            let tool_calls = if self.config.hook_engine.is_none()
2296                && self.config.confirmation_manager.is_none()
2297                && tool_calls.len() > 1
2298                && tool_calls
2299                    .iter()
2300                    .all(|tc| Self::is_parallel_safe_write(&tc.name, &tc.args))
2301                && {
2302                    // All target paths must be distinct (no write-write conflicts)
2303                    let paths: Vec<_> = tool_calls
2304                        .iter()
2305                        .filter_map(|tc| Self::extract_write_path(&tc.args))
2306                        .collect();
2307                    paths.len() == tool_calls.len()
2308                        && paths.iter().collect::<std::collections::HashSet<_>>().len()
2309                            == paths.len()
2310                } {
2311                tracing::info!(
2312                    count = tool_calls.len(),
2313                    "Parallel write batch: executing {} independent file writes concurrently",
2314                    tool_calls.len()
2315                );
2316
2317                let futures: Vec<_> = tool_calls
2318                    .iter()
2319                    .map(|tc| {
2320                        let ctx = self.tool_context.clone();
2321                        let executor = Arc::clone(&self.tool_executor);
2322                        let name = tc.name.clone();
2323                        let args = tc.args.clone();
2324                        async move { executor.execute_with_context(&name, &args, &ctx).await }
2325                    })
2326                    .collect();
2327
2328                let results = join_all(futures).await;
2329
2330                // Post-process results in original order (sequential, preserves message ordering)
2331                for (tc, result) in tool_calls.iter().zip(results) {
2332                    tool_calls_count += 1;
2333                    let (output, exit_code, is_error, metadata, images) =
2334                        Self::tool_result_to_tuple(result);
2335
2336                    // Track tool call in progress tracker
2337                    self.track_tool_result(&tc.name, &tc.args, exit_code);
2338
2339                    let output = if let Some(ref sp) = self.config.security_provider {
2340                        sp.sanitize_output(&output)
2341                    } else {
2342                        output
2343                    };
2344
2345                    if let Some(tx) = &event_tx {
2346                        tx.send(AgentEvent::ToolEnd {
2347                            id: tc.id.clone(),
2348                            name: tc.name.clone(),
2349                            output: output.clone(),
2350                            exit_code,
2351                            metadata,
2352                        })
2353                        .await
2354                        .ok();
2355                    }
2356
2357                    if images.is_empty() {
2358                        messages.push(Message::tool_result(&tc.id, &output, is_error));
2359                    } else {
2360                        messages.push(Message::tool_result_with_images(
2361                            &tc.id, &output, &images, is_error,
2362                        ));
2363                    }
2364                }
2365
2366                // Skip the sequential loop below
2367                continue;
2368            } else {
2369                tool_calls
2370            };
2371
2372            for tool_call in tool_calls {
2373                tool_calls_count += 1;
2374
2375                let tool_start = std::time::Instant::now();
2376
2377                tracing::info!(
2378                    tool_name = tool_call.name.as_str(),
2379                    tool_id = tool_call.id.as_str(),
2380                    "Tool execution started"
2381                );
2382
2383                // Send tool start event (only if not already sent during streaming)
2384                // In streaming mode, ToolStart is sent when we receive ToolUseStart from LLM
2385                // But we still need to send ToolEnd after execution
2386
2387                // Check for malformed tool arguments from LLM (4.1 parse error recovery)
2388                if let Some(parse_error) =
2389                    tool_call.args.get("__parse_error").and_then(|v| v.as_str())
2390                {
2391                    parse_error_count += 1;
2392                    let error_msg = format!("Error: {}", parse_error);
2393                    tracing::warn!(
2394                        tool = tool_call.name.as_str(),
2395                        parse_error_count = parse_error_count,
2396                        max_parse_retries = self.config.max_parse_retries,
2397                        "Malformed tool arguments from LLM"
2398                    );
2399
2400                    if let Some(tx) = &event_tx {
2401                        tx.send(AgentEvent::ToolEnd {
2402                            id: tool_call.id.clone(),
2403                            name: tool_call.name.clone(),
2404                            output: error_msg.clone(),
2405                            exit_code: 1,
2406                            metadata: None,
2407                        })
2408                        .await
2409                        .ok();
2410                    }
2411
2412                    messages.push(Message::tool_result(&tool_call.id, &error_msg, true));
2413
2414                    if parse_error_count > self.config.max_parse_retries {
2415                        let msg = format!(
2416                            "LLM produced malformed tool arguments {} time(s) in a row \
2417                             (max_parse_retries={}); giving up",
2418                            parse_error_count, self.config.max_parse_retries
2419                        );
2420                        tracing::error!("{}", msg);
2421                        if let Some(tx) = &event_tx {
2422                            tx.send(AgentEvent::Error {
2423                                message: msg.clone(),
2424                            })
2425                            .await
2426                            .ok();
2427                        }
2428                        anyhow::bail!(msg);
2429                    }
2430                    continue;
2431                }
2432
2433                // Tool args are valid — reset parse error counter
2434                parse_error_count = 0;
2435
2436                // Check skill-based tool permissions
2437                if let Some(ref registry) = self.config.skill_registry {
2438                    let instruction_skills =
2439                        registry.by_kind(crate::skills::SkillKind::Instruction);
2440                    let has_restrictions =
2441                        instruction_skills.iter().any(|s| s.allowed_tools.is_some());
2442                    if has_restrictions {
2443                        let allowed = instruction_skills
2444                            .iter()
2445                            .any(|s| s.is_tool_allowed(&tool_call.name));
2446                        if !allowed {
2447                            let msg = format!(
2448                                "Tool '{}' is not allowed by any active skill.",
2449                                tool_call.name
2450                            );
2451                            tracing::info!(
2452                                tool_name = tool_call.name.as_str(),
2453                                "Tool blocked by skill registry"
2454                            );
2455                            if let Some(tx) = &event_tx {
2456                                tx.send(AgentEvent::PermissionDenied {
2457                                    tool_id: tool_call.id.clone(),
2458                                    tool_name: tool_call.name.clone(),
2459                                    args: tool_call.args.clone(),
2460                                    reason: msg.clone(),
2461                                })
2462                                .await
2463                                .ok();
2464                            }
2465                            messages.push(Message::tool_result(&tool_call.id, &msg, true));
2466                            continue;
2467                        }
2468                    }
2469                }
2470
2471                // Fire PreToolUse hook (may block the tool call)
2472                if let Some(HookResult::Block(reason)) = self
2473                    .fire_pre_tool_use(
2474                        session_id.unwrap_or(""),
2475                        &tool_call.name,
2476                        &tool_call.args,
2477                        recent_tool_signatures.clone(),
2478                    )
2479                    .await
2480                {
2481                    let msg = format!("Tool '{}' blocked by hook: {}", tool_call.name, reason);
2482                    tracing::info!(
2483                        tool_name = tool_call.name.as_str(),
2484                        "Tool blocked by PreToolUse hook"
2485                    );
2486
2487                    if let Some(tx) = &event_tx {
2488                        tx.send(AgentEvent::PermissionDenied {
2489                            tool_id: tool_call.id.clone(),
2490                            tool_name: tool_call.name.clone(),
2491                            args: tool_call.args.clone(),
2492                            reason: reason.clone(),
2493                        })
2494                        .await
2495                        .ok();
2496                    }
2497
2498                    messages.push(Message::tool_result(&tool_call.id, &msg, true));
2499                    continue;
2500                }
2501
2502                // Check permission before executing tool
2503                let permission_decision = if let Some(checker) = &self.config.permission_checker {
2504                    checker.check(&tool_call.name, &tool_call.args)
2505                } else {
2506                    // No policy configured — default to Ask so HITL can still intervene
2507                    PermissionDecision::Ask
2508                };
2509
2510                let (output, exit_code, is_error, metadata, images) = match permission_decision {
2511                    PermissionDecision::Deny => {
2512                        tracing::info!(
2513                            tool_name = tool_call.name.as_str(),
2514                            permission = "deny",
2515                            "Tool permission denied"
2516                        );
2517                        // Tool execution denied by permission policy
2518                        let denial_msg = format!(
2519                            "Permission denied: Tool '{}' is blocked by permission policy.",
2520                            tool_call.name
2521                        );
2522
2523                        // Send permission denied event
2524                        if let Some(tx) = &event_tx {
2525                            tx.send(AgentEvent::PermissionDenied {
2526                                tool_id: tool_call.id.clone(),
2527                                tool_name: tool_call.name.clone(),
2528                                args: tool_call.args.clone(),
2529                                reason: "Blocked by deny rule in permission policy".to_string(),
2530                            })
2531                            .await
2532                            .ok();
2533                        }
2534
2535                        (denial_msg, 1, true, None, Vec::new())
2536                    }
2537                    PermissionDecision::Allow => {
2538                        tracing::info!(
2539                            tool_name = tool_call.name.as_str(),
2540                            permission = "allow",
2541                            "Tool permission: allow"
2542                        );
2543                        // Permission explicitly allows — execute directly, no HITL
2544                        let stream_ctx =
2545                            self.streaming_tool_context(&event_tx, &tool_call.id, &tool_call.name);
2546                        let result = self
2547                            .execute_tool_queued_or_direct(
2548                                &tool_call.name,
2549                                &tool_call.args,
2550                                &stream_ctx,
2551                            )
2552                            .await;
2553
2554                        let tuple = Self::tool_result_to_tuple(result);
2555                        // Track tool call in progress tracker
2556                        let (_, exit_code, _, _, _) = tuple;
2557                        self.track_tool_result(&tool_call.name, &tool_call.args, exit_code);
2558                        tuple
2559                    }
2560                    PermissionDecision::Ask => {
2561                        tracing::info!(
2562                            tool_name = tool_call.name.as_str(),
2563                            permission = "ask",
2564                            "Tool permission: ask"
2565                        );
2566                        // Permission says Ask — delegate to HITL confirmation manager
2567                        if let Some(cm) = &self.config.confirmation_manager {
2568                            // Check YOLO lanes: if the tool's lane is in YOLO mode, skip confirmation
2569                            if !cm.requires_confirmation(&tool_call.name).await {
2570                                let stream_ctx = self.streaming_tool_context(
2571                                    &event_tx,
2572                                    &tool_call.id,
2573                                    &tool_call.name,
2574                                );
2575                                let result = self
2576                                    .execute_tool_queued_or_direct(
2577                                        &tool_call.name,
2578                                        &tool_call.args,
2579                                        &stream_ctx,
2580                                    )
2581                                    .await;
2582
2583                                let (output, exit_code, is_error, metadata, images) =
2584                                    Self::tool_result_to_tuple(result);
2585
2586                                // Track tool call in progress tracker
2587                                self.track_tool_result(&tool_call.name, &tool_call.args, exit_code);
2588
2589                                // Add tool result to messages
2590                                if images.is_empty() {
2591                                    messages.push(Message::tool_result(
2592                                        &tool_call.id,
2593                                        &output,
2594                                        is_error,
2595                                    ));
2596                                } else {
2597                                    messages.push(Message::tool_result_with_images(
2598                                        &tool_call.id,
2599                                        &output,
2600                                        &images,
2601                                        is_error,
2602                                    ));
2603                                }
2604
2605                                // Record tool result on the tool span for early exit
2606                                let tool_duration = tool_start.elapsed();
2607                                crate::telemetry::record_tool_result(exit_code, tool_duration);
2608
2609                                // Send ToolEnd event
2610                                if let Some(tx) = &event_tx {
2611                                    tx.send(AgentEvent::ToolEnd {
2612                                        id: tool_call.id.clone(),
2613                                        name: tool_call.name.clone(),
2614                                        output: output.clone(),
2615                                        exit_code,
2616                                        metadata,
2617                                    })
2618                                    .await
2619                                    .ok();
2620                                }
2621
2622                                // Fire PostToolUse hook (fire-and-forget)
2623                                self.fire_post_tool_use(
2624                                    session_id.unwrap_or(""),
2625                                    &tool_call.name,
2626                                    &tool_call.args,
2627                                    &output,
2628                                    exit_code == 0,
2629                                    tool_duration.as_millis() as u64,
2630                                )
2631                                .await;
2632
2633                                continue; // Skip the rest, move to next tool call
2634                            }
2635
2636                            // Get timeout from policy
2637                            let policy = cm.policy().await;
2638                            let timeout_ms = policy.default_timeout_ms;
2639                            let timeout_action = policy.timeout_action;
2640
2641                            // Request confirmation (this emits ConfirmationRequired event)
2642                            let rx = cm
2643                                .request_confirmation(
2644                                    &tool_call.id,
2645                                    &tool_call.name,
2646                                    &tool_call.args,
2647                                )
2648                                .await;
2649
2650                            // Forward ConfirmationRequired to the streaming event channel
2651                            // so external consumers (e.g. SafeClaw engine) can relay it
2652                            // to the browser UI.
2653                            if let Some(tx) = &event_tx {
2654                                tx.send(AgentEvent::ConfirmationRequired {
2655                                    tool_id: tool_call.id.clone(),
2656                                    tool_name: tool_call.name.clone(),
2657                                    args: tool_call.args.clone(),
2658                                    timeout_ms,
2659                                })
2660                                .await
2661                                .ok();
2662                            }
2663
2664                            // Wait for confirmation with timeout
2665                            let confirmation_result =
2666                                tokio::time::timeout(Duration::from_millis(timeout_ms), rx).await;
2667
2668                            match confirmation_result {
2669                                Ok(Ok(response)) => {
2670                                    // Forward ConfirmationReceived
2671                                    if let Some(tx) = &event_tx {
2672                                        tx.send(AgentEvent::ConfirmationReceived {
2673                                            tool_id: tool_call.id.clone(),
2674                                            approved: response.approved,
2675                                            reason: response.reason.clone(),
2676                                        })
2677                                        .await
2678                                        .ok();
2679                                    }
2680                                    if response.approved {
2681                                        let stream_ctx = self.streaming_tool_context(
2682                                            &event_tx,
2683                                            &tool_call.id,
2684                                            &tool_call.name,
2685                                        );
2686                                        let result = self
2687                                            .execute_tool_queued_or_direct(
2688                                                &tool_call.name,
2689                                                &tool_call.args,
2690                                                &stream_ctx,
2691                                            )
2692                                            .await;
2693
2694                                        let tuple = Self::tool_result_to_tuple(result);
2695                                        // Track tool call in progress tracker
2696                                        let (_, exit_code, _, _, _) = tuple;
2697                                        self.track_tool_result(
2698                                            &tool_call.name,
2699                                            &tool_call.args,
2700                                            exit_code,
2701                                        );
2702                                        tuple
2703                                    } else {
2704                                        let rejection_msg = format!(
2705                                            "Tool '{}' execution was REJECTED by the user. Reason: {}. \
2706                                             DO NOT retry this tool call unless the user explicitly asks you to.",
2707                                            tool_call.name,
2708                                            response.reason.unwrap_or_else(|| "No reason provided".to_string())
2709                                        );
2710                                        (rejection_msg, 1, true, None, Vec::new())
2711                                    }
2712                                }
2713                                Ok(Err(_)) => {
2714                                    // Forward ConfirmationTimeout (channel closed = effectively timed out)
2715                                    if let Some(tx) = &event_tx {
2716                                        tx.send(AgentEvent::ConfirmationTimeout {
2717                                            tool_id: tool_call.id.clone(),
2718                                            action_taken: "rejected".to_string(),
2719                                        })
2720                                        .await
2721                                        .ok();
2722                                    }
2723                                    let msg = format!(
2724                                        "Tool '{}' confirmation failed: confirmation channel closed",
2725                                        tool_call.name
2726                                    );
2727                                    (msg, 1, true, None, Vec::new())
2728                                }
2729                                Err(_) => {
2730                                    cm.check_timeouts().await;
2731
2732                                    // Forward ConfirmationTimeout
2733                                    if let Some(tx) = &event_tx {
2734                                        tx.send(AgentEvent::ConfirmationTimeout {
2735                                            tool_id: tool_call.id.clone(),
2736                                            action_taken: match timeout_action {
2737                                                crate::hitl::TimeoutAction::Reject => {
2738                                                    "rejected".to_string()
2739                                                }
2740                                                crate::hitl::TimeoutAction::AutoApprove => {
2741                                                    "auto_approved".to_string()
2742                                                }
2743                                            },
2744                                        })
2745                                        .await
2746                                        .ok();
2747                                    }
2748
2749                                    match timeout_action {
2750                                        crate::hitl::TimeoutAction::Reject => {
2751                                            let msg = format!(
2752                                                "Tool '{}' execution was REJECTED: user confirmation timed out after {}ms. \
2753                                                 DO NOT retry this tool call — the user did not approve it. \
2754                                                 Inform the user that the operation requires their approval and ask them to try again.",
2755                                                tool_call.name, timeout_ms
2756                                            );
2757                                            (msg, 1, true, None, Vec::new())
2758                                        }
2759                                        crate::hitl::TimeoutAction::AutoApprove => {
2760                                            let stream_ctx = self.streaming_tool_context(
2761                                                &event_tx,
2762                                                &tool_call.id,
2763                                                &tool_call.name,
2764                                            );
2765                                            let result = self
2766                                                .execute_tool_queued_or_direct(
2767                                                    &tool_call.name,
2768                                                    &tool_call.args,
2769                                                    &stream_ctx,
2770                                                )
2771                                                .await;
2772
2773                                            let tuple = Self::tool_result_to_tuple(result);
2774                                            // Track tool call in progress tracker
2775                                            let (_, exit_code, _, _, _) = tuple;
2776                                            self.track_tool_result(
2777                                                &tool_call.name,
2778                                                &tool_call.args,
2779                                                exit_code,
2780                                            );
2781                                            tuple
2782                                        }
2783                                    }
2784                                }
2785                            }
2786                        } else {
2787                            // Ask without confirmation manager — safe deny
2788                            let msg = format!(
2789                                "Tool '{}' requires confirmation but no HITL confirmation manager is configured. \
2790                                 Configure a confirmation policy to enable tool execution.",
2791                                tool_call.name
2792                            );
2793                            tracing::warn!(
2794                                tool_name = tool_call.name.as_str(),
2795                                "Tool requires confirmation but no HITL manager configured"
2796                            );
2797                            (msg, 1, true, None, Vec::new())
2798                        }
2799                    }
2800                };
2801
2802                let tool_duration = tool_start.elapsed();
2803                crate::telemetry::record_tool_result(exit_code, tool_duration);
2804
2805                // Sanitize tool output for sensitive data before it enters the message history
2806                let output = if let Some(ref sp) = self.config.security_provider {
2807                    sp.sanitize_output(&output)
2808                } else {
2809                    output
2810                };
2811
2812                recent_tool_signatures.push(format!(
2813                    "{}:{} => {}",
2814                    tool_call.name,
2815                    serde_json::to_string(&tool_call.args).unwrap_or_default(),
2816                    if is_error { "error" } else { "ok" }
2817                ));
2818                if recent_tool_signatures.len() > 8 {
2819                    let overflow = recent_tool_signatures.len() - 8;
2820                    recent_tool_signatures.drain(0..overflow);
2821                }
2822
2823                // Fire PostToolUse hook (fire-and-forget)
2824                self.fire_post_tool_use(
2825                    session_id.unwrap_or(""),
2826                    &tool_call.name,
2827                    &tool_call.args,
2828                    &output,
2829                    exit_code == 0,
2830                    tool_duration.as_millis() as u64,
2831                )
2832                .await;
2833
2834                // Auto-remember tool result in long-term memory
2835                if let Some(ref memory) = self.config.memory {
2836                    let tools_used = [tool_call.name.clone()];
2837                    let remember_result = if exit_code == 0 {
2838                        memory
2839                            .remember_success(effective_prompt, &tools_used, &output)
2840                            .await
2841                    } else {
2842                        memory
2843                            .remember_failure(effective_prompt, &output, &tools_used)
2844                            .await
2845                    };
2846                    match remember_result {
2847                        Ok(()) => {
2848                            if let Some(tx) = &event_tx {
2849                                let item_type = if exit_code == 0 { "success" } else { "failure" };
2850                                tx.send(AgentEvent::MemoryStored {
2851                                    memory_id: uuid::Uuid::new_v4().to_string(),
2852                                    memory_type: item_type.to_string(),
2853                                    importance: if exit_code == 0 { 0.8 } else { 0.9 },
2854                                    tags: vec![item_type.to_string(), tool_call.name.clone()],
2855                                })
2856                                .await
2857                                .ok();
2858                            }
2859                        }
2860                        Err(e) => {
2861                            tracing::warn!("Failed to store memory after tool execution: {}", e);
2862                        }
2863                    }
2864                }
2865
2866                // Send tool end event
2867                if let Some(tx) = &event_tx {
2868                    tx.send(AgentEvent::ToolEnd {
2869                        id: tool_call.id.clone(),
2870                        name: tool_call.name.clone(),
2871                        output: output.clone(),
2872                        exit_code,
2873                        metadata,
2874                    })
2875                    .await
2876                    .ok();
2877                }
2878
2879                // Add tool result to messages
2880                if images.is_empty() {
2881                    messages.push(Message::tool_result(&tool_call.id, &output, is_error));
2882                } else {
2883                    messages.push(Message::tool_result_with_images(
2884                        &tool_call.id,
2885                        &output,
2886                        &images,
2887                        is_error,
2888                    ));
2889                }
2890            }
2891        }
2892    }
2893
2894    /// Execute with streaming events
2895    pub async fn execute_streaming(
2896        &self,
2897        history: &[Message],
2898        prompt: &str,
2899    ) -> Result<(
2900        mpsc::Receiver<AgentEvent>,
2901        tokio::task::JoinHandle<Result<AgentResult>>,
2902        tokio_util::sync::CancellationToken,
2903    )> {
2904        let (tx, rx) = mpsc::channel(100);
2905        let cancel_token = tokio_util::sync::CancellationToken::new();
2906
2907        let llm_client = self.llm_client.clone();
2908        let tool_executor = self.tool_executor.clone();
2909        let tool_context = self.tool_context.clone();
2910        let config = self.config.clone();
2911        let tool_metrics = self.tool_metrics.clone();
2912        let command_queue = self.command_queue.clone();
2913        let history = history.to_vec();
2914        let prompt = prompt.to_string();
2915        let token_clone = cancel_token.clone();
2916
2917        let handle = tokio::spawn(async move {
2918            let mut agent = AgentLoop::new(llm_client, tool_executor, tool_context, config);
2919            if let Some(metrics) = tool_metrics {
2920                agent = agent.with_tool_metrics(metrics);
2921            }
2922            if let Some(queue) = command_queue {
2923                agent = agent.with_queue(queue);
2924            }
2925            agent
2926                .execute_with_session(&history, &prompt, None, Some(tx), Some(&token_clone))
2927                .await
2928        });
2929
2930        Ok((rx, handle, cancel_token))
2931    }
2932
2933    /// Create an execution plan for a prompt
2934    ///
2935    /// Delegates to [`LlmPlanner`] for structured JSON plan generation,
2936    /// falling back to heuristic planning if the LLM call fails.
2937    pub async fn plan(&self, prompt: &str, _context: Option<&str>) -> Result<ExecutionPlan> {
2938        use crate::planning::LlmPlanner;
2939
2940        match LlmPlanner::create_plan(&self.llm_client, prompt).await {
2941            Ok(plan) => Ok(plan),
2942            Err(e) => {
2943                tracing::warn!("LLM plan creation failed, using fallback: {}", e);
2944                Ok(LlmPlanner::fallback_plan(prompt))
2945            }
2946        }
2947    }
2948
2949    /// Execute with planning phase
2950    pub async fn execute_with_planning(
2951        &self,
2952        history: &[Message],
2953        prompt: &str,
2954        event_tx: Option<mpsc::Sender<AgentEvent>>,
2955    ) -> Result<AgentResult> {
2956        // Send planning start event
2957        if let Some(tx) = &event_tx {
2958            tx.send(AgentEvent::PlanningStart {
2959                prompt: prompt.to_string(),
2960            })
2961            .await
2962            .ok();
2963        }
2964
2965        // Extract goal when goal_tracking is enabled
2966        let goal = if self.config.goal_tracking {
2967            let g = self.extract_goal(prompt).await?;
2968            if let Some(tx) = &event_tx {
2969                tx.send(AgentEvent::GoalExtracted { goal: g.clone() })
2970                    .await
2971                    .ok();
2972            }
2973            Some(g)
2974        } else {
2975            None
2976        };
2977
2978        // Create execution plan
2979        let plan = self.plan(prompt, None).await?;
2980
2981        // Send planning end event
2982        if let Some(tx) = &event_tx {
2983            tx.send(AgentEvent::PlanningEnd {
2984                estimated_steps: plan.steps.len(),
2985                plan: plan.clone(),
2986            })
2987            .await
2988            .ok();
2989        }
2990
2991        let plan_start = std::time::Instant::now();
2992
2993        // Execute the plan step by step
2994        let result = self.execute_plan(history, &plan, event_tx.clone()).await?;
2995
2996        // Emit the final End event (execute_loop_inner does not emit End in planning mode)
2997        if let Some(tx) = &event_tx {
2998            tx.send(AgentEvent::End {
2999                text: result.text.clone(),
3000                usage: result.usage.clone(),
3001                meta: None,
3002            })
3003            .await
3004            .ok();
3005        }
3006
3007        // Check goal achievement when goal_tracking is enabled
3008        if self.config.goal_tracking {
3009            if let Some(ref g) = goal {
3010                let achieved = self.check_goal_achievement(g, &result.text).await?;
3011                if achieved {
3012                    if let Some(tx) = &event_tx {
3013                        tx.send(AgentEvent::GoalAchieved {
3014                            goal: g.description.clone(),
3015                            total_steps: result.messages.len(),
3016                            duration_ms: plan_start.elapsed().as_millis() as i64,
3017                        })
3018                        .await
3019                        .ok();
3020                    }
3021                }
3022            }
3023        }
3024
3025        Ok(result)
3026    }
3027
3028    /// Execute an execution plan using wave-based dependency-aware scheduling.
3029    ///
3030    /// Steps with no unmet dependencies are grouped into "waves". A wave with
3031    /// a single step executes sequentially (preserving the history chain). A
3032    /// wave with multiple independent steps executes them in parallel via
3033    /// `JoinSet`, then merges their results back into the shared history.
3034    async fn execute_plan(
3035        &self,
3036        history: &[Message],
3037        plan: &ExecutionPlan,
3038        event_tx: Option<mpsc::Sender<AgentEvent>>,
3039    ) -> Result<AgentResult> {
3040        let mut plan = plan.clone();
3041        let mut current_history = history.to_vec();
3042        let mut total_usage = TokenUsage::default();
3043        let mut tool_calls_count = 0;
3044        let total_steps = plan.steps.len();
3045
3046        // Add initial user message with the goal
3047        let steps_text = plan
3048            .steps
3049            .iter()
3050            .enumerate()
3051            .map(|(i, step)| format!("{}. {}", i + 1, step.content))
3052            .collect::<Vec<_>>()
3053            .join("\n");
3054        current_history.push(Message::user(&crate::prompts::render(
3055            crate::prompts::PLAN_EXECUTE_GOAL,
3056            &[("goal", &plan.goal), ("steps", &steps_text)],
3057        )));
3058
3059        loop {
3060            let ready: Vec<String> = plan
3061                .get_ready_steps()
3062                .iter()
3063                .map(|s| s.id.clone())
3064                .collect();
3065
3066            if ready.is_empty() {
3067                // All done or deadlock
3068                if plan.has_deadlock() {
3069                    tracing::warn!(
3070                        "Plan deadlock detected: {} pending steps with unresolvable dependencies",
3071                        plan.pending_count()
3072                    );
3073                }
3074                break;
3075            }
3076
3077            if ready.len() == 1 {
3078                // === Single step: sequential execution (preserves history chain) ===
3079                let step_id = &ready[0];
3080                let step = plan
3081                    .steps
3082                    .iter()
3083                    .find(|s| s.id == *step_id)
3084                    .ok_or_else(|| anyhow::anyhow!("step '{}' not found in plan", step_id))?
3085                    .clone();
3086                let step_number = plan
3087                    .steps
3088                    .iter()
3089                    .position(|s| s.id == *step_id)
3090                    .unwrap_or(0)
3091                    + 1;
3092
3093                // Send step start event
3094                if let Some(tx) = &event_tx {
3095                    tx.send(AgentEvent::StepStart {
3096                        step_id: step.id.clone(),
3097                        description: step.content.clone(),
3098                        step_number,
3099                        total_steps,
3100                    })
3101                    .await
3102                    .ok();
3103                }
3104
3105                plan.mark_status(&step.id, TaskStatus::InProgress);
3106
3107                let step_prompt = crate::prompts::render(
3108                    crate::prompts::PLAN_EXECUTE_STEP,
3109                    &[
3110                        ("step_num", &step_number.to_string()),
3111                        ("description", &step.content),
3112                    ],
3113                );
3114
3115                match self
3116                    .execute_loop(
3117                        &current_history,
3118                        &step_prompt,
3119                        None,
3120                        event_tx.clone(),
3121                        &tokio_util::sync::CancellationToken::new(),
3122                        false, // emit_end: false — End is emitted by execute_with_planning after execute_plan
3123                    )
3124                    .await
3125                {
3126                    Ok(result) => {
3127                        current_history = result.messages.clone();
3128                        total_usage.prompt_tokens += result.usage.prompt_tokens;
3129                        total_usage.completion_tokens += result.usage.completion_tokens;
3130                        total_usage.total_tokens += result.usage.total_tokens;
3131                        tool_calls_count += result.tool_calls_count;
3132                        plan.mark_status(&step.id, TaskStatus::Completed);
3133
3134                        if let Some(tx) = &event_tx {
3135                            tx.send(AgentEvent::StepEnd {
3136                                step_id: step.id.clone(),
3137                                status: TaskStatus::Completed,
3138                                step_number,
3139                                total_steps,
3140                            })
3141                            .await
3142                            .ok();
3143                        }
3144                    }
3145                    Err(e) => {
3146                        tracing::error!("Plan step '{}' failed: {}", step.id, e);
3147                        plan.mark_status(&step.id, TaskStatus::Failed);
3148
3149                        if let Some(tx) = &event_tx {
3150                            tx.send(AgentEvent::StepEnd {
3151                                step_id: step.id.clone(),
3152                                status: TaskStatus::Failed,
3153                                step_number,
3154                                total_steps,
3155                            })
3156                            .await
3157                            .ok();
3158                        }
3159                    }
3160                }
3161            } else {
3162                // === Multiple steps: parallel execution via JoinSet ===
3163                // NOTE: Each parallel branch gets a clone of the base history.
3164                // Individual branch histories (tool calls, LLM turns) are NOT merged
3165                // back — only a summary message is appended. This is a deliberate
3166                // trade-off: merging divergent histories in a deterministic order is
3167                // complex and the summary approach keeps the context window manageable.
3168                let ready_steps: Vec<_> = ready
3169                    .iter()
3170                    .filter_map(|id| {
3171                        let step = plan.steps.iter().find(|s| s.id == *id)?.clone();
3172                        let step_number =
3173                            plan.steps.iter().position(|s| s.id == *id).unwrap_or(0) + 1;
3174                        Some((step, step_number))
3175                    })
3176                    .collect();
3177
3178                // Mark all as InProgress and emit StepStart events
3179                for (step, step_number) in &ready_steps {
3180                    plan.mark_status(&step.id, TaskStatus::InProgress);
3181                    if let Some(tx) = &event_tx {
3182                        tx.send(AgentEvent::StepStart {
3183                            step_id: step.id.clone(),
3184                            description: step.content.clone(),
3185                            step_number: *step_number,
3186                            total_steps,
3187                        })
3188                        .await
3189                        .ok();
3190                    }
3191                }
3192
3193                // Spawn all into JoinSet, each with a clone of the base history
3194                let mut join_set = tokio::task::JoinSet::new();
3195                for (step, step_number) in &ready_steps {
3196                    let base_history = current_history.clone();
3197                    let agent_clone = self.clone();
3198                    let tx = event_tx.clone();
3199                    let step_clone = step.clone();
3200                    let sn = *step_number;
3201
3202                    join_set.spawn(async move {
3203                        let prompt = crate::prompts::render(
3204                            crate::prompts::PLAN_EXECUTE_STEP,
3205                            &[
3206                                ("step_num", &sn.to_string()),
3207                                ("description", &step_clone.content),
3208                            ],
3209                        );
3210                        let result = agent_clone
3211                            .execute_loop(
3212                                &base_history,
3213                                &prompt,
3214                                None,
3215                                tx,
3216                                &tokio_util::sync::CancellationToken::new(),
3217                                false, // emit_end: false — End is emitted by execute_with_planning after execute_plan
3218                            )
3219                            .await;
3220                        (step_clone.id, sn, result)
3221                    });
3222                }
3223
3224                // Collect results
3225                let mut parallel_summaries = Vec::new();
3226                while let Some(join_result) = join_set.join_next().await {
3227                    match join_result {
3228                        Ok((step_id, step_number, step_result)) => match step_result {
3229                            Ok(result) => {
3230                                total_usage.prompt_tokens += result.usage.prompt_tokens;
3231                                total_usage.completion_tokens += result.usage.completion_tokens;
3232                                total_usage.total_tokens += result.usage.total_tokens;
3233                                tool_calls_count += result.tool_calls_count;
3234                                plan.mark_status(&step_id, TaskStatus::Completed);
3235
3236                                // Collect the final assistant text for context merging
3237                                parallel_summaries.push(format!(
3238                                    "- Step {} ({}): {}",
3239                                    step_number, step_id, result.text
3240                                ));
3241
3242                                if let Some(tx) = &event_tx {
3243                                    tx.send(AgentEvent::StepEnd {
3244                                        step_id,
3245                                        status: TaskStatus::Completed,
3246                                        step_number,
3247                                        total_steps,
3248                                    })
3249                                    .await
3250                                    .ok();
3251                                }
3252                            }
3253                            Err(e) => {
3254                                tracing::error!("Plan step '{}' failed: {}", step_id, e);
3255                                plan.mark_status(&step_id, TaskStatus::Failed);
3256
3257                                if let Some(tx) = &event_tx {
3258                                    tx.send(AgentEvent::StepEnd {
3259                                        step_id,
3260                                        status: TaskStatus::Failed,
3261                                        step_number,
3262                                        total_steps,
3263                                    })
3264                                    .await
3265                                    .ok();
3266                                }
3267                            }
3268                        },
3269                        Err(e) => {
3270                            tracing::error!("JoinSet task panicked: {}", e);
3271                        }
3272                    }
3273                }
3274
3275                // Merge parallel results into history for subsequent steps
3276                if !parallel_summaries.is_empty() {
3277                    parallel_summaries.sort(); // Deterministic ordering
3278                    let results_text = parallel_summaries.join("\n");
3279                    current_history.push(Message::user(&crate::prompts::render(
3280                        crate::prompts::PLAN_PARALLEL_RESULTS,
3281                        &[("results", &results_text)],
3282                    )));
3283                }
3284            }
3285
3286            // Emit GoalProgress after each wave
3287            if self.config.goal_tracking {
3288                let completed = plan
3289                    .steps
3290                    .iter()
3291                    .filter(|s| s.status == TaskStatus::Completed)
3292                    .count();
3293                if let Some(tx) = &event_tx {
3294                    tx.send(AgentEvent::GoalProgress {
3295                        goal: plan.goal.clone(),
3296                        progress: plan.progress(),
3297                        completed_steps: completed,
3298                        total_steps,
3299                    })
3300                    .await
3301                    .ok();
3302                }
3303            }
3304        }
3305
3306        // Get final response
3307        let final_text = current_history
3308            .last()
3309            .map(|m| {
3310                m.content
3311                    .iter()
3312                    .filter_map(|block| {
3313                        if let crate::llm::ContentBlock::Text { text } = block {
3314                            Some(text.as_str())
3315                        } else {
3316                            None
3317                        }
3318                    })
3319                    .collect::<Vec<_>>()
3320                    .join("\n")
3321            })
3322            .unwrap_or_default();
3323
3324        Ok(AgentResult {
3325            text: final_text,
3326            messages: current_history,
3327            usage: total_usage,
3328            tool_calls_count,
3329        })
3330    }
3331
3332    /// Extract goal from prompt
3333    ///
3334    /// Delegates to [`LlmPlanner`] for structured JSON goal extraction,
3335    /// falling back to heuristic logic if the LLM call fails.
3336    pub async fn extract_goal(&self, prompt: &str) -> Result<AgentGoal> {
3337        use crate::planning::LlmPlanner;
3338
3339        match LlmPlanner::extract_goal(&self.llm_client, prompt).await {
3340            Ok(goal) => Ok(goal),
3341            Err(e) => {
3342                tracing::warn!("LLM goal extraction failed, using fallback: {}", e);
3343                Ok(LlmPlanner::fallback_goal(prompt))
3344            }
3345        }
3346    }
3347
3348    /// Check if goal is achieved
3349    ///
3350    /// Delegates to [`LlmPlanner`] for structured JSON achievement check,
3351    /// falling back to heuristic logic if the LLM call fails.
3352    pub async fn check_goal_achievement(
3353        &self,
3354        goal: &AgentGoal,
3355        current_state: &str,
3356    ) -> Result<bool> {
3357        use crate::planning::LlmPlanner;
3358
3359        match LlmPlanner::check_achievement(&self.llm_client, goal, current_state).await {
3360            Ok(result) => Ok(result.achieved),
3361            Err(e) => {
3362                tracing::warn!("LLM achievement check failed, using fallback: {}", e);
3363                let result = LlmPlanner::fallback_check_achievement(goal, current_state);
3364                Ok(result.achieved)
3365            }
3366        }
3367    }
3368}
3369
3370#[cfg(test)]
3371mod tests {
3372    use super::*;
3373    use crate::llm::{ContentBlock, StreamEvent};
3374    use crate::permissions::PermissionPolicy;
3375    use crate::tools::ToolExecutor;
3376    use std::path::PathBuf;
3377    use std::sync::atomic::{AtomicUsize, Ordering};
3378
3379    /// Create a default ToolContext for tests
3380    fn test_tool_context() -> ToolContext {
3381        ToolContext::new(PathBuf::from("/tmp"))
3382    }
3383
3384    #[test]
3385    fn test_agent_config_default() {
3386        let config = AgentConfig::default();
3387        assert!(config.prompt_slots.is_empty());
3388        assert!(config.tools.is_empty()); // Tools are provided externally
3389        assert_eq!(config.max_tool_rounds, MAX_TOOL_ROUNDS);
3390        assert!(config.permission_checker.is_none());
3391        assert!(config.context_providers.is_empty());
3392        // Built-in skills are always present by default
3393        let registry = config
3394            .skill_registry
3395            .expect("skill_registry must be Some by default");
3396        assert!(registry.len() >= 7, "expected at least 7 built-in skills");
3397        assert!(registry.get("code-search").is_some());
3398        assert!(registry.get("find-bugs").is_some());
3399    }
3400
3401    // ========================================================================
3402    // Mock LLM Client for Testing
3403    // ========================================================================
3404
3405    /// Mock LLM client that returns predefined responses
3406    pub(crate) struct MockLlmClient {
3407        /// Responses to return (consumed in order)
3408        responses: std::sync::Mutex<Vec<LlmResponse>>,
3409        /// Number of calls made
3410        pub(crate) call_count: AtomicUsize,
3411    }
3412
3413    impl MockLlmClient {
3414        pub(crate) fn new(responses: Vec<LlmResponse>) -> Self {
3415            Self {
3416                responses: std::sync::Mutex::new(responses),
3417                call_count: AtomicUsize::new(0),
3418            }
3419        }
3420
3421        /// Create a response with text only (no tool calls)
3422        pub(crate) fn text_response(text: &str) -> LlmResponse {
3423            LlmResponse {
3424                message: Message {
3425                    role: "assistant".to_string(),
3426                    content: vec![ContentBlock::Text {
3427                        text: text.to_string(),
3428                    }],
3429                    reasoning_content: None,
3430                },
3431                usage: TokenUsage {
3432                    prompt_tokens: 10,
3433                    completion_tokens: 5,
3434                    total_tokens: 15,
3435                    cache_read_tokens: None,
3436                    cache_write_tokens: None,
3437                },
3438                stop_reason: Some("end_turn".to_string()),
3439                meta: None,
3440            }
3441        }
3442
3443        /// Create a response with a tool call
3444        pub(crate) fn tool_call_response(
3445            tool_id: &str,
3446            tool_name: &str,
3447            args: serde_json::Value,
3448        ) -> LlmResponse {
3449            LlmResponse {
3450                message: Message {
3451                    role: "assistant".to_string(),
3452                    content: vec![ContentBlock::ToolUse {
3453                        id: tool_id.to_string(),
3454                        name: tool_name.to_string(),
3455                        input: args,
3456                    }],
3457                    reasoning_content: None,
3458                },
3459                usage: TokenUsage {
3460                    prompt_tokens: 10,
3461                    completion_tokens: 5,
3462                    total_tokens: 15,
3463                    cache_read_tokens: None,
3464                    cache_write_tokens: None,
3465                },
3466                stop_reason: Some("tool_use".to_string()),
3467                meta: None,
3468            }
3469        }
3470    }
3471
3472    #[async_trait::async_trait]
3473    impl LlmClient for MockLlmClient {
3474        async fn complete(
3475            &self,
3476            _messages: &[Message],
3477            _system: Option<&str>,
3478            _tools: &[ToolDefinition],
3479        ) -> Result<LlmResponse> {
3480            self.call_count.fetch_add(1, Ordering::SeqCst);
3481            let mut responses = self.responses.lock().unwrap();
3482            if responses.is_empty() {
3483                anyhow::bail!("No more mock responses available");
3484            }
3485            Ok(responses.remove(0))
3486        }
3487
3488        async fn complete_streaming(
3489            &self,
3490            _messages: &[Message],
3491            _system: Option<&str>,
3492            _tools: &[ToolDefinition],
3493        ) -> Result<mpsc::Receiver<StreamEvent>> {
3494            self.call_count.fetch_add(1, Ordering::SeqCst);
3495            let mut responses = self.responses.lock().unwrap();
3496            if responses.is_empty() {
3497                anyhow::bail!("No more mock responses available");
3498            }
3499            let response = responses.remove(0);
3500
3501            let (tx, rx) = mpsc::channel(10);
3502            tokio::spawn(async move {
3503                // Send text deltas if any
3504                for block in &response.message.content {
3505                    if let ContentBlock::Text { text } = block {
3506                        tx.send(StreamEvent::TextDelta(text.clone())).await.ok();
3507                    }
3508                }
3509                tx.send(StreamEvent::Done(response)).await.ok();
3510            });
3511
3512            Ok(rx)
3513        }
3514    }
3515
3516    // ========================================================================
3517    // Agent Loop Tests
3518    // ========================================================================
3519
3520    #[tokio::test]
3521    async fn test_agent_simple_response() {
3522        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
3523            "Hello, I'm an AI assistant.",
3524        )]));
3525
3526        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
3527        let config = AgentConfig::default();
3528
3529        let agent = AgentLoop::new(
3530            mock_client.clone(),
3531            tool_executor,
3532            test_tool_context(),
3533            config,
3534        );
3535        let result = agent.execute(&[], "Hello", None).await.unwrap();
3536
3537        assert_eq!(result.text, "Hello, I'm an AI assistant.");
3538        assert_eq!(result.tool_calls_count, 0);
3539        assert_eq!(mock_client.call_count.load(Ordering::SeqCst), 1);
3540    }
3541
3542    #[tokio::test]
3543    async fn test_agent_with_tool_call() {
3544        let mock_client = Arc::new(MockLlmClient::new(vec![
3545            // First response: tool call
3546            MockLlmClient::tool_call_response(
3547                "tool-1",
3548                "bash",
3549                serde_json::json!({"command": "echo hello"}),
3550            ),
3551            // Second response: final text
3552            MockLlmClient::text_response("The command output was: hello"),
3553        ]));
3554
3555        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
3556        let config = AgentConfig::default();
3557
3558        let agent = AgentLoop::new(
3559            mock_client.clone(),
3560            tool_executor,
3561            test_tool_context(),
3562            config,
3563        );
3564        let result = agent.execute(&[], "Run echo hello", None).await.unwrap();
3565
3566        assert_eq!(result.text, "The command output was: hello");
3567        assert_eq!(result.tool_calls_count, 1);
3568        assert_eq!(mock_client.call_count.load(Ordering::SeqCst), 2);
3569    }
3570
3571    #[tokio::test]
3572    async fn test_agent_permission_deny() {
3573        let mock_client = Arc::new(MockLlmClient::new(vec![
3574            // First response: tool call that will be denied
3575            MockLlmClient::tool_call_response(
3576                "tool-1",
3577                "bash",
3578                serde_json::json!({"command": "rm -rf /tmp/test"}),
3579            ),
3580            // Second response: LLM responds to the denial
3581            MockLlmClient::text_response(
3582                "I cannot execute that command due to permission restrictions.",
3583            ),
3584        ]));
3585
3586        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
3587
3588        // Create permission policy that denies rm commands
3589        let permission_policy = PermissionPolicy::new().deny("bash(rm:*)");
3590
3591        let config = AgentConfig {
3592            permission_checker: Some(Arc::new(permission_policy)),
3593            ..Default::default()
3594        };
3595
3596        let (tx, mut rx) = mpsc::channel(100);
3597        let agent = AgentLoop::new(
3598            mock_client.clone(),
3599            tool_executor,
3600            test_tool_context(),
3601            config,
3602        );
3603        let result = agent.execute(&[], "Delete files", Some(tx)).await.unwrap();
3604
3605        // Check that we received a PermissionDenied event
3606        let mut found_permission_denied = false;
3607        while let Ok(event) = rx.try_recv() {
3608            if let AgentEvent::PermissionDenied { tool_name, .. } = event {
3609                assert_eq!(tool_name, "bash");
3610                found_permission_denied = true;
3611            }
3612        }
3613        assert!(
3614            found_permission_denied,
3615            "Should have received PermissionDenied event"
3616        );
3617
3618        assert_eq!(result.tool_calls_count, 1);
3619    }
3620
3621    #[tokio::test]
3622    async fn test_agent_permission_allow() {
3623        let mock_client = Arc::new(MockLlmClient::new(vec![
3624            // First response: tool call that will be allowed
3625            MockLlmClient::tool_call_response(
3626                "tool-1",
3627                "bash",
3628                serde_json::json!({"command": "echo hello"}),
3629            ),
3630            // Second response: final text
3631            MockLlmClient::text_response("Done!"),
3632        ]));
3633
3634        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
3635
3636        // Create permission policy that allows echo commands
3637        let permission_policy = PermissionPolicy::new()
3638            .allow("bash(echo:*)")
3639            .deny("bash(rm:*)");
3640
3641        let config = AgentConfig {
3642            permission_checker: Some(Arc::new(permission_policy)),
3643            ..Default::default()
3644        };
3645
3646        let agent = AgentLoop::new(
3647            mock_client.clone(),
3648            tool_executor,
3649            test_tool_context(),
3650            config,
3651        );
3652        let result = agent.execute(&[], "Echo hello", None).await.unwrap();
3653
3654        assert_eq!(result.text, "Done!");
3655        assert_eq!(result.tool_calls_count, 1);
3656    }
3657
3658    #[tokio::test]
3659    async fn test_agent_streaming_events() {
3660        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
3661            "Hello!",
3662        )]));
3663
3664        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
3665        let config = AgentConfig::default();
3666
3667        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
3668        let (mut rx, handle, _cancel_token) = agent.execute_streaming(&[], "Hi").await.unwrap();
3669
3670        // Collect events
3671        let mut events = Vec::new();
3672        while let Some(event) = rx.recv().await {
3673            events.push(event);
3674        }
3675
3676        let result = handle.await.unwrap().unwrap();
3677        assert_eq!(result.text, "Hello!");
3678
3679        // Check we received Start and End events
3680        assert!(events.iter().any(|e| matches!(e, AgentEvent::Start { .. })));
3681        assert!(events.iter().any(|e| matches!(e, AgentEvent::End { .. })));
3682    }
3683
3684    #[tokio::test]
3685    async fn test_agent_max_tool_rounds() {
3686        // Create a mock that always returns tool calls (infinite loop)
3687        let responses: Vec<LlmResponse> = (0..100)
3688            .map(|i| {
3689                MockLlmClient::tool_call_response(
3690                    &format!("tool-{}", i),
3691                    "bash",
3692                    serde_json::json!({"command": "echo loop"}),
3693                )
3694            })
3695            .collect();
3696
3697        let mock_client = Arc::new(MockLlmClient::new(responses));
3698        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
3699
3700        let config = AgentConfig {
3701            max_tool_rounds: 3,
3702            ..Default::default()
3703        };
3704
3705        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
3706        let result = agent.execute(&[], "Loop forever", None).await;
3707
3708        // Should fail due to max tool rounds exceeded
3709        assert!(result.is_err());
3710        assert!(result.unwrap_err().to_string().contains("Max tool rounds"));
3711    }
3712
3713    #[tokio::test]
3714    async fn test_agent_no_permission_policy_defaults_to_ask() {
3715        // When no permission policy is set, tools default to Ask.
3716        // Without a confirmation manager, Ask = safe deny.
3717        let mock_client = Arc::new(MockLlmClient::new(vec![
3718            MockLlmClient::tool_call_response(
3719                "tool-1",
3720                "bash",
3721                serde_json::json!({"command": "rm -rf /tmp/test"}),
3722            ),
3723            MockLlmClient::text_response("Denied!"),
3724        ]));
3725
3726        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
3727        let config = AgentConfig {
3728            permission_checker: None, // No policy → defaults to Ask
3729            // No confirmation_manager → safe deny
3730            ..Default::default()
3731        };
3732
3733        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
3734        let result = agent.execute(&[], "Delete", None).await.unwrap();
3735
3736        // Should be denied (no policy + no CM = safe deny)
3737        assert_eq!(result.text, "Denied!");
3738        assert_eq!(result.tool_calls_count, 1);
3739    }
3740
3741    #[tokio::test]
3742    async fn test_agent_permission_ask_without_cm_denies() {
3743        // When permission is Ask and no confirmation manager configured,
3744        // tool execution should be denied (safe default).
3745        let mock_client = Arc::new(MockLlmClient::new(vec![
3746            MockLlmClient::tool_call_response(
3747                "tool-1",
3748                "bash",
3749                serde_json::json!({"command": "echo test"}),
3750            ),
3751            MockLlmClient::text_response("Denied!"),
3752        ]));
3753
3754        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
3755
3756        // Create policy where bash falls through to Ask (default)
3757        let permission_policy = PermissionPolicy::new(); // Default decision is Ask
3758
3759        let config = AgentConfig {
3760            permission_checker: Some(Arc::new(permission_policy)),
3761            // No confirmation_manager — safe deny
3762            ..Default::default()
3763        };
3764
3765        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
3766        let result = agent.execute(&[], "Echo", None).await.unwrap();
3767
3768        // Should deny (Ask without CM = safe deny)
3769        assert_eq!(result.text, "Denied!");
3770        // The tool result should contain the denial message
3771        assert!(result.tool_calls_count >= 1);
3772    }
3773
3774    // ========================================================================
3775    // HITL (Human-in-the-Loop) Tests
3776    // ========================================================================
3777
3778    #[tokio::test]
3779    async fn test_agent_hitl_approved() {
3780        use crate::hitl::{ConfirmationManager, ConfirmationPolicy};
3781        use tokio::sync::broadcast;
3782
3783        let mock_client = Arc::new(MockLlmClient::new(vec![
3784            MockLlmClient::tool_call_response(
3785                "tool-1",
3786                "bash",
3787                serde_json::json!({"command": "echo hello"}),
3788            ),
3789            MockLlmClient::text_response("Command executed!"),
3790        ]));
3791
3792        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
3793
3794        // Create HITL confirmation manager with policy enabled
3795        let (event_tx, _event_rx) = broadcast::channel(100);
3796        let hitl_policy = ConfirmationPolicy {
3797            enabled: true,
3798            ..Default::default()
3799        };
3800        let confirmation_manager = Arc::new(ConfirmationManager::new(hitl_policy, event_tx));
3801
3802        // Create permission policy that returns Ask for bash
3803        let permission_policy = PermissionPolicy::new(); // Default is Ask
3804
3805        let config = AgentConfig {
3806            permission_checker: Some(Arc::new(permission_policy)),
3807            confirmation_manager: Some(confirmation_manager.clone()),
3808            ..Default::default()
3809        };
3810
3811        // Spawn a task to approve the confirmation
3812        let cm_clone = confirmation_manager.clone();
3813        tokio::spawn(async move {
3814            // Wait a bit for the confirmation request to be created
3815            tokio::time::sleep(std::time::Duration::from_millis(50)).await;
3816            // Approve it
3817            cm_clone.confirm("tool-1", true, None).await.ok();
3818        });
3819
3820        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
3821        let result = agent.execute(&[], "Run echo", None).await.unwrap();
3822
3823        assert_eq!(result.text, "Command executed!");
3824        assert_eq!(result.tool_calls_count, 1);
3825    }
3826
3827    #[tokio::test]
3828    async fn test_agent_hitl_rejected() {
3829        use crate::hitl::{ConfirmationManager, ConfirmationPolicy};
3830        use tokio::sync::broadcast;
3831
3832        let mock_client = Arc::new(MockLlmClient::new(vec![
3833            MockLlmClient::tool_call_response(
3834                "tool-1",
3835                "bash",
3836                serde_json::json!({"command": "rm -rf /"}),
3837            ),
3838            MockLlmClient::text_response("Understood, I won't do that."),
3839        ]));
3840
3841        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
3842
3843        // Create HITL confirmation manager
3844        let (event_tx, _event_rx) = broadcast::channel(100);
3845        let hitl_policy = ConfirmationPolicy {
3846            enabled: true,
3847            ..Default::default()
3848        };
3849        let confirmation_manager = Arc::new(ConfirmationManager::new(hitl_policy, event_tx));
3850
3851        // Permission policy returns Ask
3852        let permission_policy = PermissionPolicy::new();
3853
3854        let config = AgentConfig {
3855            permission_checker: Some(Arc::new(permission_policy)),
3856            confirmation_manager: Some(confirmation_manager.clone()),
3857            ..Default::default()
3858        };
3859
3860        // Spawn a task to reject the confirmation
3861        let cm_clone = confirmation_manager.clone();
3862        tokio::spawn(async move {
3863            tokio::time::sleep(std::time::Duration::from_millis(50)).await;
3864            cm_clone
3865                .confirm("tool-1", false, Some("Too dangerous".to_string()))
3866                .await
3867                .ok();
3868        });
3869
3870        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
3871        let result = agent.execute(&[], "Delete everything", None).await.unwrap();
3872
3873        // LLM should respond to the rejection
3874        assert_eq!(result.text, "Understood, I won't do that.");
3875    }
3876
3877    #[tokio::test]
3878    async fn test_agent_hitl_timeout_reject() {
3879        use crate::hitl::{ConfirmationManager, ConfirmationPolicy, TimeoutAction};
3880        use tokio::sync::broadcast;
3881
3882        let mock_client = Arc::new(MockLlmClient::new(vec![
3883            MockLlmClient::tool_call_response(
3884                "tool-1",
3885                "bash",
3886                serde_json::json!({"command": "echo test"}),
3887            ),
3888            MockLlmClient::text_response("Timed out, I understand."),
3889        ]));
3890
3891        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
3892
3893        // Create HITL with very short timeout and Reject action
3894        let (event_tx, _event_rx) = broadcast::channel(100);
3895        let hitl_policy = ConfirmationPolicy {
3896            enabled: true,
3897            default_timeout_ms: 50, // Very short timeout
3898            timeout_action: TimeoutAction::Reject,
3899            ..Default::default()
3900        };
3901        let confirmation_manager = Arc::new(ConfirmationManager::new(hitl_policy, event_tx));
3902
3903        let permission_policy = PermissionPolicy::new();
3904
3905        let config = AgentConfig {
3906            permission_checker: Some(Arc::new(permission_policy)),
3907            confirmation_manager: Some(confirmation_manager),
3908            ..Default::default()
3909        };
3910
3911        // Don't approve - let it timeout
3912        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
3913        let result = agent.execute(&[], "Echo", None).await.unwrap();
3914
3915        // Should get timeout rejection response from LLM
3916        assert_eq!(result.text, "Timed out, I understand.");
3917    }
3918
3919    #[tokio::test]
3920    async fn test_agent_hitl_timeout_auto_approve() {
3921        use crate::hitl::{ConfirmationManager, ConfirmationPolicy, TimeoutAction};
3922        use tokio::sync::broadcast;
3923
3924        let mock_client = Arc::new(MockLlmClient::new(vec![
3925            MockLlmClient::tool_call_response(
3926                "tool-1",
3927                "bash",
3928                serde_json::json!({"command": "echo hello"}),
3929            ),
3930            MockLlmClient::text_response("Auto-approved and executed!"),
3931        ]));
3932
3933        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
3934
3935        // Create HITL with very short timeout and AutoApprove action
3936        let (event_tx, _event_rx) = broadcast::channel(100);
3937        let hitl_policy = ConfirmationPolicy {
3938            enabled: true,
3939            default_timeout_ms: 50, // Very short timeout
3940            timeout_action: TimeoutAction::AutoApprove,
3941            ..Default::default()
3942        };
3943        let confirmation_manager = Arc::new(ConfirmationManager::new(hitl_policy, event_tx));
3944
3945        let permission_policy = PermissionPolicy::new();
3946
3947        let config = AgentConfig {
3948            permission_checker: Some(Arc::new(permission_policy)),
3949            confirmation_manager: Some(confirmation_manager),
3950            ..Default::default()
3951        };
3952
3953        // Don't approve - let it timeout and auto-approve
3954        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
3955        let result = agent.execute(&[], "Echo", None).await.unwrap();
3956
3957        // Should auto-approve on timeout and execute
3958        assert_eq!(result.text, "Auto-approved and executed!");
3959        assert_eq!(result.tool_calls_count, 1);
3960    }
3961
3962    #[tokio::test]
3963    async fn test_agent_hitl_confirmation_events() {
3964        use crate::hitl::{ConfirmationManager, ConfirmationPolicy};
3965        use tokio::sync::broadcast;
3966
3967        let mock_client = Arc::new(MockLlmClient::new(vec![
3968            MockLlmClient::tool_call_response(
3969                "tool-1",
3970                "bash",
3971                serde_json::json!({"command": "echo test"}),
3972            ),
3973            MockLlmClient::text_response("Done!"),
3974        ]));
3975
3976        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
3977
3978        // Create HITL confirmation manager
3979        let (event_tx, mut event_rx) = broadcast::channel(100);
3980        let hitl_policy = ConfirmationPolicy {
3981            enabled: true,
3982            default_timeout_ms: 5000, // Long enough timeout
3983            ..Default::default()
3984        };
3985        let confirmation_manager = Arc::new(ConfirmationManager::new(hitl_policy, event_tx));
3986
3987        let permission_policy = PermissionPolicy::new();
3988
3989        let config = AgentConfig {
3990            permission_checker: Some(Arc::new(permission_policy)),
3991            confirmation_manager: Some(confirmation_manager.clone()),
3992            ..Default::default()
3993        };
3994
3995        // Spawn task to approve and collect events
3996        let cm_clone = confirmation_manager.clone();
3997        let event_handle = tokio::spawn(async move {
3998            let mut events = Vec::new();
3999            // Wait for ConfirmationRequired event
4000            while let Ok(event) = event_rx.recv().await {
4001                events.push(event.clone());
4002                if let AgentEvent::ConfirmationRequired { tool_id, .. } = event {
4003                    // Approve it
4004                    cm_clone.confirm(&tool_id, true, None).await.ok();
4005                    // Wait for ConfirmationReceived
4006                    if let Ok(recv_event) = event_rx.recv().await {
4007                        events.push(recv_event);
4008                    }
4009                    break;
4010                }
4011            }
4012            events
4013        });
4014
4015        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
4016        let _result = agent.execute(&[], "Echo", None).await.unwrap();
4017
4018        // Check events
4019        let events = event_handle.await.unwrap();
4020        assert!(
4021            events
4022                .iter()
4023                .any(|e| matches!(e, AgentEvent::ConfirmationRequired { .. })),
4024            "Should have ConfirmationRequired event"
4025        );
4026        assert!(
4027            events
4028                .iter()
4029                .any(|e| matches!(e, AgentEvent::ConfirmationReceived { approved: true, .. })),
4030            "Should have ConfirmationReceived event with approved=true"
4031        );
4032    }
4033
4034    #[tokio::test]
4035    async fn test_agent_hitl_disabled_auto_executes() {
4036        // When HITL is disabled, tools should execute automatically even with Ask permission
4037        use crate::hitl::{ConfirmationManager, ConfirmationPolicy};
4038        use tokio::sync::broadcast;
4039
4040        let mock_client = Arc::new(MockLlmClient::new(vec![
4041            MockLlmClient::tool_call_response(
4042                "tool-1",
4043                "bash",
4044                serde_json::json!({"command": "echo auto"}),
4045            ),
4046            MockLlmClient::text_response("Auto executed!"),
4047        ]));
4048
4049        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4050
4051        // Create HITL with enabled=false
4052        let (event_tx, _event_rx) = broadcast::channel(100);
4053        let hitl_policy = ConfirmationPolicy {
4054            enabled: false, // HITL disabled
4055            ..Default::default()
4056        };
4057        let confirmation_manager = Arc::new(ConfirmationManager::new(hitl_policy, event_tx));
4058
4059        let permission_policy = PermissionPolicy::new(); // Default is Ask
4060
4061        let config = AgentConfig {
4062            permission_checker: Some(Arc::new(permission_policy)),
4063            confirmation_manager: Some(confirmation_manager),
4064            ..Default::default()
4065        };
4066
4067        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
4068        let result = agent.execute(&[], "Echo", None).await.unwrap();
4069
4070        // Should execute without waiting for confirmation
4071        assert_eq!(result.text, "Auto executed!");
4072        assert_eq!(result.tool_calls_count, 1);
4073    }
4074
4075    #[tokio::test]
4076    async fn test_agent_hitl_with_permission_deny_skips_hitl() {
4077        // When permission is Deny, HITL should not be triggered
4078        use crate::hitl::{ConfirmationManager, ConfirmationPolicy};
4079        use tokio::sync::broadcast;
4080
4081        let mock_client = Arc::new(MockLlmClient::new(vec![
4082            MockLlmClient::tool_call_response(
4083                "tool-1",
4084                "bash",
4085                serde_json::json!({"command": "rm -rf /"}),
4086            ),
4087            MockLlmClient::text_response("Blocked by permission."),
4088        ]));
4089
4090        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4091
4092        // Create HITL enabled
4093        let (event_tx, mut event_rx) = broadcast::channel(100);
4094        let hitl_policy = ConfirmationPolicy {
4095            enabled: true,
4096            ..Default::default()
4097        };
4098        let confirmation_manager = Arc::new(ConfirmationManager::new(hitl_policy, event_tx));
4099
4100        // Permission policy denies rm commands
4101        let permission_policy = PermissionPolicy::new().deny("bash(rm:*)");
4102
4103        let config = AgentConfig {
4104            permission_checker: Some(Arc::new(permission_policy)),
4105            confirmation_manager: Some(confirmation_manager),
4106            ..Default::default()
4107        };
4108
4109        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
4110        let result = agent.execute(&[], "Delete", None).await.unwrap();
4111
4112        // Should be denied without HITL
4113        assert_eq!(result.text, "Blocked by permission.");
4114
4115        // Should NOT have any ConfirmationRequired events
4116        let mut found_confirmation = false;
4117        while let Ok(event) = event_rx.try_recv() {
4118            if matches!(event, AgentEvent::ConfirmationRequired { .. }) {
4119                found_confirmation = true;
4120            }
4121        }
4122        assert!(
4123            !found_confirmation,
4124            "HITL should not be triggered when permission is Deny"
4125        );
4126    }
4127
4128    #[tokio::test]
4129    async fn test_agent_hitl_with_permission_allow_skips_hitl() {
4130        // When permission is Allow, HITL confirmation is skipped entirely.
4131        // PermissionPolicy is the declarative rule engine; Allow = execute directly.
4132        use crate::hitl::{ConfirmationManager, ConfirmationPolicy};
4133        use tokio::sync::broadcast;
4134
4135        let mock_client = Arc::new(MockLlmClient::new(vec![
4136            MockLlmClient::tool_call_response(
4137                "tool-1",
4138                "bash",
4139                serde_json::json!({"command": "echo hello"}),
4140            ),
4141            MockLlmClient::text_response("Allowed!"),
4142        ]));
4143
4144        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4145
4146        // Create HITL enabled
4147        let (event_tx, mut event_rx) = broadcast::channel(100);
4148        let hitl_policy = ConfirmationPolicy {
4149            enabled: true,
4150            ..Default::default()
4151        };
4152        let confirmation_manager = Arc::new(ConfirmationManager::new(hitl_policy, event_tx));
4153
4154        // Permission policy allows echo commands
4155        let permission_policy = PermissionPolicy::new().allow("bash(echo:*)");
4156
4157        let config = AgentConfig {
4158            permission_checker: Some(Arc::new(permission_policy)),
4159            confirmation_manager: Some(confirmation_manager.clone()),
4160            ..Default::default()
4161        };
4162
4163        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
4164        let result = agent.execute(&[], "Echo", None).await.unwrap();
4165
4166        // Should execute directly without HITL (permission Allow skips confirmation)
4167        assert_eq!(result.text, "Allowed!");
4168
4169        // Should NOT have ConfirmationRequired event (Allow bypasses HITL)
4170        let mut found_confirmation = false;
4171        while let Ok(event) = event_rx.try_recv() {
4172            if matches!(event, AgentEvent::ConfirmationRequired { .. }) {
4173                found_confirmation = true;
4174            }
4175        }
4176        assert!(
4177            !found_confirmation,
4178            "Permission Allow should skip HITL confirmation"
4179        );
4180    }
4181
4182    #[tokio::test]
4183    async fn test_agent_hitl_multiple_tool_calls() {
4184        // Test multiple tool calls in sequence with HITL
4185        use crate::hitl::{ConfirmationManager, ConfirmationPolicy};
4186        use tokio::sync::broadcast;
4187
4188        let mock_client = Arc::new(MockLlmClient::new(vec![
4189            // First response: two tool calls
4190            LlmResponse {
4191                message: Message {
4192                    role: "assistant".to_string(),
4193                    content: vec![
4194                        ContentBlock::ToolUse {
4195                            id: "tool-1".to_string(),
4196                            name: "bash".to_string(),
4197                            input: serde_json::json!({"command": "echo first"}),
4198                        },
4199                        ContentBlock::ToolUse {
4200                            id: "tool-2".to_string(),
4201                            name: "bash".to_string(),
4202                            input: serde_json::json!({"command": "echo second"}),
4203                        },
4204                    ],
4205                    reasoning_content: None,
4206                },
4207                usage: TokenUsage {
4208                    prompt_tokens: 10,
4209                    completion_tokens: 5,
4210                    total_tokens: 15,
4211                    cache_read_tokens: None,
4212                    cache_write_tokens: None,
4213                },
4214                stop_reason: Some("tool_use".to_string()),
4215                meta: None,
4216            },
4217            MockLlmClient::text_response("Both executed!"),
4218        ]));
4219
4220        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4221
4222        // Create HITL
4223        let (event_tx, _event_rx) = broadcast::channel(100);
4224        let hitl_policy = ConfirmationPolicy {
4225            enabled: true,
4226            default_timeout_ms: 5000,
4227            ..Default::default()
4228        };
4229        let confirmation_manager = Arc::new(ConfirmationManager::new(hitl_policy, event_tx));
4230
4231        let permission_policy = PermissionPolicy::new(); // Default Ask
4232
4233        let config = AgentConfig {
4234            permission_checker: Some(Arc::new(permission_policy)),
4235            confirmation_manager: Some(confirmation_manager.clone()),
4236            ..Default::default()
4237        };
4238
4239        // Spawn task to approve both tools
4240        let cm_clone = confirmation_manager.clone();
4241        tokio::spawn(async move {
4242            tokio::time::sleep(std::time::Duration::from_millis(30)).await;
4243            cm_clone.confirm("tool-1", true, None).await.ok();
4244            tokio::time::sleep(std::time::Duration::from_millis(30)).await;
4245            cm_clone.confirm("tool-2", true, None).await.ok();
4246        });
4247
4248        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
4249        let result = agent.execute(&[], "Run both", None).await.unwrap();
4250
4251        assert_eq!(result.text, "Both executed!");
4252        assert_eq!(result.tool_calls_count, 2);
4253    }
4254
4255    #[tokio::test]
4256    async fn test_agent_hitl_partial_approval() {
4257        // Test: first tool approved, second rejected
4258        use crate::hitl::{ConfirmationManager, ConfirmationPolicy};
4259        use tokio::sync::broadcast;
4260
4261        let mock_client = Arc::new(MockLlmClient::new(vec![
4262            // First response: two tool calls
4263            LlmResponse {
4264                message: Message {
4265                    role: "assistant".to_string(),
4266                    content: vec![
4267                        ContentBlock::ToolUse {
4268                            id: "tool-1".to_string(),
4269                            name: "bash".to_string(),
4270                            input: serde_json::json!({"command": "echo safe"}),
4271                        },
4272                        ContentBlock::ToolUse {
4273                            id: "tool-2".to_string(),
4274                            name: "bash".to_string(),
4275                            input: serde_json::json!({"command": "rm -rf /"}),
4276                        },
4277                    ],
4278                    reasoning_content: None,
4279                },
4280                usage: TokenUsage {
4281                    prompt_tokens: 10,
4282                    completion_tokens: 5,
4283                    total_tokens: 15,
4284                    cache_read_tokens: None,
4285                    cache_write_tokens: None,
4286                },
4287                stop_reason: Some("tool_use".to_string()),
4288                meta: None,
4289            },
4290            MockLlmClient::text_response("First worked, second rejected."),
4291        ]));
4292
4293        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4294
4295        let (event_tx, _event_rx) = broadcast::channel(100);
4296        let hitl_policy = ConfirmationPolicy {
4297            enabled: true,
4298            default_timeout_ms: 5000,
4299            ..Default::default()
4300        };
4301        let confirmation_manager = Arc::new(ConfirmationManager::new(hitl_policy, event_tx));
4302
4303        let permission_policy = PermissionPolicy::new();
4304
4305        let config = AgentConfig {
4306            permission_checker: Some(Arc::new(permission_policy)),
4307            confirmation_manager: Some(confirmation_manager.clone()),
4308            ..Default::default()
4309        };
4310
4311        // Approve first, reject second
4312        let cm_clone = confirmation_manager.clone();
4313        tokio::spawn(async move {
4314            tokio::time::sleep(std::time::Duration::from_millis(30)).await;
4315            cm_clone.confirm("tool-1", true, None).await.ok();
4316            tokio::time::sleep(std::time::Duration::from_millis(30)).await;
4317            cm_clone
4318                .confirm("tool-2", false, Some("Dangerous".to_string()))
4319                .await
4320                .ok();
4321        });
4322
4323        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
4324        let result = agent.execute(&[], "Run both", None).await.unwrap();
4325
4326        assert_eq!(result.text, "First worked, second rejected.");
4327        assert_eq!(result.tool_calls_count, 2);
4328    }
4329
4330    #[tokio::test]
4331    async fn test_agent_hitl_yolo_mode_auto_approves() {
4332        // YOLO mode: specific lanes auto-approve without confirmation
4333        use crate::hitl::{ConfirmationManager, ConfirmationPolicy, SessionLane};
4334        use tokio::sync::broadcast;
4335
4336        let mock_client = Arc::new(MockLlmClient::new(vec![
4337            MockLlmClient::tool_call_response(
4338                "tool-1",
4339                "read", // Query lane tool
4340                serde_json::json!({"path": "/tmp/test.txt"}),
4341            ),
4342            MockLlmClient::text_response("File read!"),
4343        ]));
4344
4345        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4346
4347        // YOLO mode for Query lane (read, glob, ls, grep)
4348        let (event_tx, mut event_rx) = broadcast::channel(100);
4349        let mut yolo_lanes = std::collections::HashSet::new();
4350        yolo_lanes.insert(SessionLane::Query);
4351        let hitl_policy = ConfirmationPolicy {
4352            enabled: true,
4353            yolo_lanes, // Auto-approve query operations
4354            ..Default::default()
4355        };
4356        let confirmation_manager = Arc::new(ConfirmationManager::new(hitl_policy, event_tx));
4357
4358        let permission_policy = PermissionPolicy::new();
4359
4360        let config = AgentConfig {
4361            permission_checker: Some(Arc::new(permission_policy)),
4362            confirmation_manager: Some(confirmation_manager),
4363            ..Default::default()
4364        };
4365
4366        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
4367        let result = agent.execute(&[], "Read file", None).await.unwrap();
4368
4369        // Should auto-execute without confirmation (YOLO mode)
4370        assert_eq!(result.text, "File read!");
4371
4372        // Should NOT have ConfirmationRequired for yolo lane
4373        let mut found_confirmation = false;
4374        while let Ok(event) = event_rx.try_recv() {
4375            if matches!(event, AgentEvent::ConfirmationRequired { .. }) {
4376                found_confirmation = true;
4377            }
4378        }
4379        assert!(
4380            !found_confirmation,
4381            "YOLO mode should not trigger confirmation"
4382        );
4383    }
4384
4385    #[tokio::test]
4386    async fn test_agent_config_with_all_options() {
4387        use crate::hitl::{ConfirmationManager, ConfirmationPolicy};
4388        use tokio::sync::broadcast;
4389
4390        let (event_tx, _) = broadcast::channel(100);
4391        let hitl_policy = ConfirmationPolicy::default();
4392        let confirmation_manager = Arc::new(ConfirmationManager::new(hitl_policy, event_tx));
4393
4394        let permission_policy = PermissionPolicy::new().allow("bash(*)");
4395
4396        let config = AgentConfig {
4397            prompt_slots: SystemPromptSlots {
4398                extra: Some("Test system prompt".to_string()),
4399                ..Default::default()
4400            },
4401            tools: vec![],
4402            max_tool_rounds: 10,
4403            permission_checker: Some(Arc::new(permission_policy)),
4404            confirmation_manager: Some(confirmation_manager),
4405            context_providers: vec![],
4406            planning_mode: PlanningMode::default(),
4407            goal_tracking: false,
4408            hook_engine: None,
4409            skill_registry: None,
4410            ..AgentConfig::default()
4411        };
4412
4413        assert!(config.prompt_slots.build().contains("Test system prompt"));
4414        assert_eq!(config.max_tool_rounds, 10);
4415        assert!(config.permission_checker.is_some());
4416        assert!(config.confirmation_manager.is_some());
4417        assert!(config.context_providers.is_empty());
4418
4419        // Test Debug trait
4420        let debug_str = format!("{:?}", config);
4421        assert!(debug_str.contains("AgentConfig"));
4422        assert!(debug_str.contains("permission_checker: true"));
4423        assert!(debug_str.contains("confirmation_manager: true"));
4424        assert!(debug_str.contains("context_providers: 0"));
4425    }
4426
4427    // ========================================================================
4428    // Context Provider Tests
4429    // ========================================================================
4430
4431    use crate::context::{ContextItem, ContextType};
4432
4433    /// Mock context provider for testing
4434    struct MockContextProvider {
4435        name: String,
4436        items: Vec<ContextItem>,
4437        on_turn_calls: std::sync::Arc<tokio::sync::RwLock<Vec<(String, String, String)>>>,
4438    }
4439
4440    impl MockContextProvider {
4441        fn new(name: &str) -> Self {
4442            Self {
4443                name: name.to_string(),
4444                items: Vec::new(),
4445                on_turn_calls: std::sync::Arc::new(tokio::sync::RwLock::new(Vec::new())),
4446            }
4447        }
4448
4449        fn with_items(mut self, items: Vec<ContextItem>) -> Self {
4450            self.items = items;
4451            self
4452        }
4453    }
4454
4455    #[async_trait::async_trait]
4456    impl ContextProvider for MockContextProvider {
4457        fn name(&self) -> &str {
4458            &self.name
4459        }
4460
4461        async fn query(&self, _query: &ContextQuery) -> anyhow::Result<ContextResult> {
4462            let mut result = ContextResult::new(&self.name);
4463            for item in &self.items {
4464                result.add_item(item.clone());
4465            }
4466            Ok(result)
4467        }
4468
4469        async fn on_turn_complete(
4470            &self,
4471            session_id: &str,
4472            prompt: &str,
4473            response: &str,
4474        ) -> anyhow::Result<()> {
4475            let mut calls = self.on_turn_calls.write().await;
4476            calls.push((
4477                session_id.to_string(),
4478                prompt.to_string(),
4479                response.to_string(),
4480            ));
4481            Ok(())
4482        }
4483    }
4484
4485    #[tokio::test]
4486    async fn test_agent_with_context_provider() {
4487        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
4488            "Response using context",
4489        )]));
4490
4491        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4492
4493        let provider =
4494            MockContextProvider::new("test-provider").with_items(vec![ContextItem::new(
4495                "ctx-1",
4496                ContextType::Resource,
4497                "Relevant context here",
4498            )
4499            .with_source("test://docs/example")]);
4500
4501        let config = AgentConfig {
4502            prompt_slots: SystemPromptSlots {
4503                extra: Some("You are helpful.".to_string()),
4504                ..Default::default()
4505            },
4506            context_providers: vec![Arc::new(provider)],
4507            ..Default::default()
4508        };
4509
4510        let agent = AgentLoop::new(
4511            mock_client.clone(),
4512            tool_executor,
4513            test_tool_context(),
4514            config,
4515        );
4516        let result = agent.execute(&[], "What is X?", None).await.unwrap();
4517
4518        assert_eq!(result.text, "Response using context");
4519        assert_eq!(mock_client.call_count.load(Ordering::SeqCst), 1);
4520    }
4521
4522    #[tokio::test]
4523    async fn test_agent_context_provider_events() {
4524        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
4525            "Answer",
4526        )]));
4527
4528        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4529
4530        let provider =
4531            MockContextProvider::new("event-provider").with_items(vec![ContextItem::new(
4532                "item-1",
4533                ContextType::Memory,
4534                "Memory content",
4535            )
4536            .with_token_count(50)]);
4537
4538        let config = AgentConfig {
4539            context_providers: vec![Arc::new(provider)],
4540            ..Default::default()
4541        };
4542
4543        let (tx, mut rx) = mpsc::channel(100);
4544        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
4545        let _result = agent.execute(&[], "Test prompt", Some(tx)).await.unwrap();
4546
4547        // Collect events
4548        let mut events = Vec::new();
4549        while let Ok(event) = rx.try_recv() {
4550            events.push(event);
4551        }
4552
4553        // Should have ContextResolving and ContextResolved events
4554        assert!(
4555            events
4556                .iter()
4557                .any(|e| matches!(e, AgentEvent::ContextResolving { .. })),
4558            "Should have ContextResolving event"
4559        );
4560        assert!(
4561            events
4562                .iter()
4563                .any(|e| matches!(e, AgentEvent::ContextResolved { .. })),
4564            "Should have ContextResolved event"
4565        );
4566
4567        // Check context resolved values
4568        for event in &events {
4569            if let AgentEvent::ContextResolved {
4570                total_items,
4571                total_tokens,
4572            } = event
4573            {
4574                assert_eq!(*total_items, 1);
4575                assert_eq!(*total_tokens, 50);
4576            }
4577        }
4578    }
4579
4580    #[tokio::test]
4581    async fn test_agent_multiple_context_providers() {
4582        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
4583            "Combined response",
4584        )]));
4585
4586        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4587
4588        let provider1 = MockContextProvider::new("provider-1").with_items(vec![ContextItem::new(
4589            "p1-1",
4590            ContextType::Resource,
4591            "Resource from P1",
4592        )
4593        .with_token_count(100)]);
4594
4595        let provider2 = MockContextProvider::new("provider-2").with_items(vec![
4596            ContextItem::new("p2-1", ContextType::Memory, "Memory from P2").with_token_count(50),
4597            ContextItem::new("p2-2", ContextType::Skill, "Skill from P2").with_token_count(75),
4598        ]);
4599
4600        let config = AgentConfig {
4601            prompt_slots: SystemPromptSlots {
4602                extra: Some("Base system prompt.".to_string()),
4603                ..Default::default()
4604            },
4605            context_providers: vec![Arc::new(provider1), Arc::new(provider2)],
4606            ..Default::default()
4607        };
4608
4609        let (tx, mut rx) = mpsc::channel(100);
4610        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
4611        let result = agent.execute(&[], "Query", Some(tx)).await.unwrap();
4612
4613        assert_eq!(result.text, "Combined response");
4614
4615        // Check context resolved event has combined totals
4616        while let Ok(event) = rx.try_recv() {
4617            if let AgentEvent::ContextResolved {
4618                total_items,
4619                total_tokens,
4620            } = event
4621            {
4622                assert_eq!(total_items, 3); // 1 + 2
4623                assert_eq!(total_tokens, 225); // 100 + 50 + 75
4624            }
4625        }
4626    }
4627
4628    #[tokio::test]
4629    async fn test_agent_no_context_providers() {
4630        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
4631            "No context",
4632        )]));
4633
4634        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4635
4636        // No context providers
4637        let config = AgentConfig::default();
4638
4639        let (tx, mut rx) = mpsc::channel(100);
4640        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
4641        let result = agent.execute(&[], "Simple prompt", Some(tx)).await.unwrap();
4642
4643        assert_eq!(result.text, "No context");
4644
4645        // Should NOT have context events when no providers
4646        let mut events = Vec::new();
4647        while let Ok(event) = rx.try_recv() {
4648            events.push(event);
4649        }
4650
4651        assert!(
4652            !events
4653                .iter()
4654                .any(|e| matches!(e, AgentEvent::ContextResolving { .. })),
4655            "Should NOT have ContextResolving event"
4656        );
4657    }
4658
4659    #[tokio::test]
4660    async fn test_agent_context_on_turn_complete() {
4661        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
4662            "Final response",
4663        )]));
4664
4665        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4666
4667        let provider = Arc::new(MockContextProvider::new("memory-provider"));
4668        let on_turn_calls = provider.on_turn_calls.clone();
4669
4670        let config = AgentConfig {
4671            context_providers: vec![provider],
4672            ..Default::default()
4673        };
4674
4675        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
4676
4677        // Execute with session ID
4678        let result = agent
4679            .execute_with_session(&[], "User prompt", Some("sess-123"), None, None)
4680            .await
4681            .unwrap();
4682
4683        assert_eq!(result.text, "Final response");
4684
4685        // Check on_turn_complete was called
4686        let calls = on_turn_calls.read().await;
4687        assert_eq!(calls.len(), 1);
4688        assert_eq!(calls[0].0, "sess-123");
4689        assert_eq!(calls[0].1, "User prompt");
4690        assert_eq!(calls[0].2, "Final response");
4691    }
4692
4693    #[tokio::test]
4694    async fn test_agent_context_on_turn_complete_no_session() {
4695        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
4696            "Response",
4697        )]));
4698
4699        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4700
4701        let provider = Arc::new(MockContextProvider::new("memory-provider"));
4702        let on_turn_calls = provider.on_turn_calls.clone();
4703
4704        let config = AgentConfig {
4705            context_providers: vec![provider],
4706            ..Default::default()
4707        };
4708
4709        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
4710
4711        // Execute without session ID (uses execute() which passes None)
4712        let _result = agent.execute(&[], "Prompt", None).await.unwrap();
4713
4714        // on_turn_complete should NOT be called when session_id is None
4715        let calls = on_turn_calls.read().await;
4716        assert!(calls.is_empty());
4717    }
4718
4719    #[tokio::test]
4720    async fn test_agent_build_augmented_system_prompt() {
4721        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response("OK")]));
4722
4723        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4724
4725        let provider = MockContextProvider::new("test").with_items(vec![ContextItem::new(
4726            "doc-1",
4727            ContextType::Resource,
4728            "Auth uses JWT tokens.",
4729        )
4730        .with_source("viking://docs/auth")]);
4731
4732        let config = AgentConfig {
4733            prompt_slots: SystemPromptSlots {
4734                extra: Some("You are helpful.".to_string()),
4735                ..Default::default()
4736            },
4737            context_providers: vec![Arc::new(provider)],
4738            ..Default::default()
4739        };
4740
4741        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
4742
4743        // Test building augmented prompt
4744        let context_results = agent.resolve_context("test", None).await;
4745        let augmented = agent.build_augmented_system_prompt(&context_results);
4746
4747        let augmented_str = augmented.unwrap();
4748        assert!(augmented_str.contains("You are helpful."));
4749        assert!(augmented_str.contains("<context source=\"viking://docs/auth\" type=\"Resource\">"));
4750        assert!(augmented_str.contains("Auth uses JWT tokens."));
4751    }
4752
4753    // ========================================================================
4754    // Agentic Loop Integration Tests
4755    // ========================================================================
4756
4757    /// Helper: collect all events from a channel
4758    async fn collect_events(mut rx: mpsc::Receiver<AgentEvent>) -> Vec<AgentEvent> {
4759        let mut events = Vec::new();
4760        while let Ok(event) = rx.try_recv() {
4761            events.push(event);
4762        }
4763        // Drain remaining
4764        while let Some(event) = rx.recv().await {
4765            events.push(event);
4766        }
4767        events
4768    }
4769
4770    #[tokio::test]
4771    async fn test_agent_multi_turn_tool_chain() {
4772        // LLM calls tool A → sees result → calls tool B → sees result → final answer
4773        let mock_client = Arc::new(MockLlmClient::new(vec![
4774            // Turn 1: call ls
4775            MockLlmClient::tool_call_response(
4776                "t1",
4777                "bash",
4778                serde_json::json!({"command": "echo step1"}),
4779            ),
4780            // Turn 2: call another tool based on first result
4781            MockLlmClient::tool_call_response(
4782                "t2",
4783                "bash",
4784                serde_json::json!({"command": "echo step2"}),
4785            ),
4786            // Turn 3: final answer
4787            MockLlmClient::text_response("Completed both steps: step1 then step2"),
4788        ]));
4789
4790        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4791        let config = AgentConfig::default();
4792
4793        let agent = AgentLoop::new(
4794            mock_client.clone(),
4795            tool_executor,
4796            test_tool_context(),
4797            config,
4798        );
4799        let result = agent.execute(&[], "Run two steps", None).await.unwrap();
4800
4801        assert_eq!(result.text, "Completed both steps: step1 then step2");
4802        assert_eq!(result.tool_calls_count, 2);
4803        assert_eq!(mock_client.call_count.load(Ordering::SeqCst), 3);
4804
4805        // Verify message history: user → assistant(tool_use) → user(tool_result) → assistant(tool_use) → user(tool_result) → assistant(text)
4806        assert_eq!(result.messages[0].role, "user");
4807        assert_eq!(result.messages[1].role, "assistant"); // tool call 1
4808        assert_eq!(result.messages[2].role, "user"); // tool result 1 (Anthropic convention)
4809        assert_eq!(result.messages[3].role, "assistant"); // tool call 2
4810        assert_eq!(result.messages[4].role, "user"); // tool result 2
4811        assert_eq!(result.messages[5].role, "assistant"); // final text
4812        assert_eq!(result.messages.len(), 6);
4813    }
4814
4815    #[tokio::test]
4816    async fn test_agent_conversation_history_preserved() {
4817        // Pass existing history, verify it's preserved in output
4818        let existing_history = vec![
4819            Message::user("What is Rust?"),
4820            Message {
4821                role: "assistant".to_string(),
4822                content: vec![ContentBlock::Text {
4823                    text: "Rust is a systems programming language.".to_string(),
4824                }],
4825                reasoning_content: None,
4826            },
4827        ];
4828
4829        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
4830            "Rust was created by Graydon Hoare at Mozilla.",
4831        )]));
4832
4833        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4834        let agent = AgentLoop::new(
4835            mock_client.clone(),
4836            tool_executor,
4837            test_tool_context(),
4838            AgentConfig::default(),
4839        );
4840
4841        let result = agent
4842            .execute(&existing_history, "Who created it?", None)
4843            .await
4844            .unwrap();
4845
4846        // History should contain: old user + old assistant + new user + new assistant
4847        assert_eq!(result.messages.len(), 4);
4848        assert_eq!(result.messages[0].text(), "What is Rust?");
4849        assert_eq!(
4850            result.messages[1].text(),
4851            "Rust is a systems programming language."
4852        );
4853        assert_eq!(result.messages[2].text(), "Who created it?");
4854        assert_eq!(
4855            result.messages[3].text(),
4856            "Rust was created by Graydon Hoare at Mozilla."
4857        );
4858    }
4859
4860    #[tokio::test]
4861    async fn test_agent_event_stream_completeness() {
4862        // Verify full event sequence for a single tool call loop
4863        let mock_client = Arc::new(MockLlmClient::new(vec![
4864            MockLlmClient::tool_call_response(
4865                "t1",
4866                "bash",
4867                serde_json::json!({"command": "echo hi"}),
4868            ),
4869            MockLlmClient::text_response("Done"),
4870        ]));
4871
4872        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4873        let agent = AgentLoop::new(
4874            mock_client,
4875            tool_executor,
4876            test_tool_context(),
4877            AgentConfig::default(),
4878        );
4879
4880        let (tx, rx) = mpsc::channel(100);
4881        let result = agent.execute(&[], "Say hi", Some(tx)).await.unwrap();
4882        assert_eq!(result.text, "Done");
4883
4884        let events = collect_events(rx).await;
4885
4886        // Verify event sequence
4887        let event_types: Vec<&str> = events
4888            .iter()
4889            .map(|e| match e {
4890                AgentEvent::Start { .. } => "Start",
4891                AgentEvent::TurnStart { .. } => "TurnStart",
4892                AgentEvent::TurnEnd { .. } => "TurnEnd",
4893                AgentEvent::ToolEnd { .. } => "ToolEnd",
4894                AgentEvent::End { .. } => "End",
4895                _ => "Other",
4896            })
4897            .collect();
4898
4899        // Must start with Start, end with End
4900        assert_eq!(event_types.first(), Some(&"Start"));
4901        assert_eq!(event_types.last(), Some(&"End"));
4902
4903        // Must have 2 TurnStarts (tool call turn + final answer turn)
4904        let turn_starts = event_types.iter().filter(|&&t| t == "TurnStart").count();
4905        assert_eq!(turn_starts, 2);
4906
4907        // Must have 1 ToolEnd
4908        let tool_ends = event_types.iter().filter(|&&t| t == "ToolEnd").count();
4909        assert_eq!(tool_ends, 1);
4910    }
4911
4912    #[tokio::test]
4913    async fn test_agent_multiple_tools_single_turn() {
4914        // LLM returns 2 tool calls in one response
4915        let mock_client = Arc::new(MockLlmClient::new(vec![
4916            LlmResponse {
4917                message: Message {
4918                    role: "assistant".to_string(),
4919                    content: vec![
4920                        ContentBlock::ToolUse {
4921                            id: "t1".to_string(),
4922                            name: "bash".to_string(),
4923                            input: serde_json::json!({"command": "echo first"}),
4924                        },
4925                        ContentBlock::ToolUse {
4926                            id: "t2".to_string(),
4927                            name: "bash".to_string(),
4928                            input: serde_json::json!({"command": "echo second"}),
4929                        },
4930                    ],
4931                    reasoning_content: None,
4932                },
4933                usage: TokenUsage {
4934                    prompt_tokens: 10,
4935                    completion_tokens: 5,
4936                    total_tokens: 15,
4937                    cache_read_tokens: None,
4938                    cache_write_tokens: None,
4939                },
4940                stop_reason: Some("tool_use".to_string()),
4941                meta: None,
4942            },
4943            MockLlmClient::text_response("Both commands ran"),
4944        ]));
4945
4946        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4947        let agent = AgentLoop::new(
4948            mock_client.clone(),
4949            tool_executor,
4950            test_tool_context(),
4951            AgentConfig::default(),
4952        );
4953
4954        let result = agent.execute(&[], "Run both", None).await.unwrap();
4955
4956        assert_eq!(result.text, "Both commands ran");
4957        assert_eq!(result.tool_calls_count, 2);
4958        assert_eq!(mock_client.call_count.load(Ordering::SeqCst), 2); // Only 2 LLM calls
4959
4960        // Messages: user → assistant(2 tools) → user(tool_result) → user(tool_result) → assistant(text)
4961        assert_eq!(result.messages[0].role, "user");
4962        assert_eq!(result.messages[1].role, "assistant");
4963        assert_eq!(result.messages[2].role, "user"); // tool result 1
4964        assert_eq!(result.messages[3].role, "user"); // tool result 2
4965        assert_eq!(result.messages[4].role, "assistant");
4966    }
4967
4968    #[tokio::test]
4969    async fn test_agent_token_usage_accumulation() {
4970        // Verify usage sums across multiple turns
4971        let mock_client = Arc::new(MockLlmClient::new(vec![
4972            MockLlmClient::tool_call_response(
4973                "t1",
4974                "bash",
4975                serde_json::json!({"command": "echo x"}),
4976            ),
4977            MockLlmClient::text_response("Done"),
4978        ]));
4979
4980        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4981        let agent = AgentLoop::new(
4982            mock_client,
4983            tool_executor,
4984            test_tool_context(),
4985            AgentConfig::default(),
4986        );
4987
4988        let result = agent.execute(&[], "test", None).await.unwrap();
4989
4990        // Each mock response has prompt=10, completion=5, total=15
4991        // 2 LLM calls → 20 prompt, 10 completion, 30 total
4992        assert_eq!(result.usage.prompt_tokens, 20);
4993        assert_eq!(result.usage.completion_tokens, 10);
4994        assert_eq!(result.usage.total_tokens, 30);
4995    }
4996
4997    #[tokio::test]
4998    async fn test_agent_system_prompt_passed() {
4999        // Verify system prompt is used (MockLlmClient captures calls)
5000        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
5001            "I am a coding assistant.",
5002        )]));
5003
5004        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5005        let config = AgentConfig {
5006            prompt_slots: SystemPromptSlots {
5007                extra: Some("You are a coding assistant.".to_string()),
5008                ..Default::default()
5009            },
5010            ..Default::default()
5011        };
5012
5013        let agent = AgentLoop::new(
5014            mock_client.clone(),
5015            tool_executor,
5016            test_tool_context(),
5017            config,
5018        );
5019        let result = agent.execute(&[], "What are you?", None).await.unwrap();
5020
5021        assert_eq!(result.text, "I am a coding assistant.");
5022        assert_eq!(mock_client.call_count.load(Ordering::SeqCst), 1);
5023    }
5024
5025    #[tokio::test]
5026    async fn test_agent_max_rounds_with_persistent_tool_calls() {
5027        // LLM keeps calling tools forever — should hit max_tool_rounds
5028        let mut responses = Vec::new();
5029        for i in 0..15 {
5030            responses.push(MockLlmClient::tool_call_response(
5031                &format!("t{}", i),
5032                "bash",
5033                serde_json::json!({"command": format!("echo round{}", i)}),
5034            ));
5035        }
5036
5037        let mock_client = Arc::new(MockLlmClient::new(responses));
5038        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5039        let config = AgentConfig {
5040            max_tool_rounds: 5,
5041            ..Default::default()
5042        };
5043
5044        let agent = AgentLoop::new(
5045            mock_client.clone(),
5046            tool_executor,
5047            test_tool_context(),
5048            config,
5049        );
5050        let result = agent.execute(&[], "Loop forever", None).await;
5051
5052        assert!(result.is_err());
5053        let err = result.unwrap_err().to_string();
5054        assert!(err.contains("Max tool rounds (5) exceeded"));
5055    }
5056
5057    #[tokio::test]
5058    async fn test_agent_end_event_contains_final_text() {
5059        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
5060            "Final answer here",
5061        )]));
5062
5063        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5064        let agent = AgentLoop::new(
5065            mock_client,
5066            tool_executor,
5067            test_tool_context(),
5068            AgentConfig::default(),
5069        );
5070
5071        let (tx, rx) = mpsc::channel(100);
5072        agent.execute(&[], "test", Some(tx)).await.unwrap();
5073
5074        let events = collect_events(rx).await;
5075        let end_event = events.iter().find(|e| matches!(e, AgentEvent::End { .. }));
5076        assert!(end_event.is_some());
5077
5078        if let AgentEvent::End { text, usage, .. } = end_event.unwrap() {
5079            assert_eq!(text, "Final answer here");
5080            assert_eq!(usage.total_tokens, 15);
5081        }
5082    }
5083}
5084
5085#[cfg(test)]
5086mod extra_agent_tests {
5087    use super::*;
5088    use crate::agent::tests::MockLlmClient;
5089    use crate::queue::SessionQueueConfig;
5090    use crate::tools::ToolExecutor;
5091    use std::path::PathBuf;
5092    use std::sync::atomic::{AtomicUsize, Ordering};
5093
5094    fn test_tool_context() -> ToolContext {
5095        ToolContext::new(PathBuf::from("/tmp"))
5096    }
5097
5098    // ========================================================================
5099    // AgentConfig
5100    // ========================================================================
5101
5102    #[test]
5103    fn test_agent_config_debug() {
5104        let config = AgentConfig {
5105            prompt_slots: SystemPromptSlots {
5106                extra: Some("You are helpful".to_string()),
5107                ..Default::default()
5108            },
5109            tools: vec![],
5110            max_tool_rounds: 10,
5111            permission_checker: None,
5112            confirmation_manager: None,
5113            context_providers: vec![],
5114            planning_mode: PlanningMode::Enabled,
5115            goal_tracking: false,
5116            hook_engine: None,
5117            skill_registry: None,
5118            ..AgentConfig::default()
5119        };
5120        let debug = format!("{:?}", config);
5121        assert!(debug.contains("AgentConfig"));
5122        assert!(debug.contains("planning_mode"));
5123    }
5124
5125    #[test]
5126    fn test_agent_config_default_values() {
5127        let config = AgentConfig::default();
5128        assert_eq!(config.max_tool_rounds, MAX_TOOL_ROUNDS);
5129        assert_eq!(config.planning_mode, PlanningMode::Auto);
5130        assert!(!config.goal_tracking);
5131        assert!(config.context_providers.is_empty());
5132    }
5133
5134    // ========================================================================
5135    // AgentEvent serialization
5136    // ========================================================================
5137
5138    #[test]
5139    fn test_agent_event_serialize_start() {
5140        let event = AgentEvent::Start {
5141            prompt: "Hello".to_string(),
5142        };
5143        let json = serde_json::to_string(&event).unwrap();
5144        assert!(json.contains("agent_start"));
5145        assert!(json.contains("Hello"));
5146    }
5147
5148    #[test]
5149    fn test_agent_event_serialize_text_delta() {
5150        let event = AgentEvent::TextDelta {
5151            text: "chunk".to_string(),
5152        };
5153        let json = serde_json::to_string(&event).unwrap();
5154        assert!(json.contains("text_delta"));
5155    }
5156
5157    #[test]
5158    fn test_agent_event_serialize_tool_start() {
5159        let event = AgentEvent::ToolStart {
5160            id: "t1".to_string(),
5161            name: "bash".to_string(),
5162        };
5163        let json = serde_json::to_string(&event).unwrap();
5164        assert!(json.contains("tool_start"));
5165        assert!(json.contains("bash"));
5166    }
5167
5168    #[test]
5169    fn test_agent_event_serialize_tool_end() {
5170        let event = AgentEvent::ToolEnd {
5171            id: "t1".to_string(),
5172            name: "bash".to_string(),
5173            output: "hello".to_string(),
5174            exit_code: 0,
5175            metadata: None,
5176        };
5177        let json = serde_json::to_string(&event).unwrap();
5178        assert!(json.contains("tool_end"));
5179    }
5180
5181    #[test]
5182    fn test_agent_event_tool_end_has_metadata_field() {
5183        let event = AgentEvent::ToolEnd {
5184            id: "t1".to_string(),
5185            name: "write".to_string(),
5186            output: "Wrote 5 bytes".to_string(),
5187            exit_code: 0,
5188            metadata: Some(
5189                serde_json::json!({ "before": "old", "after": "new", "file_path": "f.txt" }),
5190            ),
5191        };
5192        let json = serde_json::to_string(&event).unwrap();
5193        assert!(json.contains("\"before\""));
5194    }
5195
5196    #[test]
5197    fn test_agent_event_serialize_error() {
5198        let event = AgentEvent::Error {
5199            message: "oops".to_string(),
5200        };
5201        let json = serde_json::to_string(&event).unwrap();
5202        assert!(json.contains("error"));
5203        assert!(json.contains("oops"));
5204    }
5205
5206    #[test]
5207    fn test_agent_event_serialize_confirmation_required() {
5208        let event = AgentEvent::ConfirmationRequired {
5209            tool_id: "t1".to_string(),
5210            tool_name: "bash".to_string(),
5211            args: serde_json::json!({"cmd": "rm"}),
5212            timeout_ms: 30000,
5213        };
5214        let json = serde_json::to_string(&event).unwrap();
5215        assert!(json.contains("confirmation_required"));
5216    }
5217
5218    #[test]
5219    fn test_agent_event_serialize_confirmation_received() {
5220        let event = AgentEvent::ConfirmationReceived {
5221            tool_id: "t1".to_string(),
5222            approved: true,
5223            reason: Some("safe".to_string()),
5224        };
5225        let json = serde_json::to_string(&event).unwrap();
5226        assert!(json.contains("confirmation_received"));
5227    }
5228
5229    #[test]
5230    fn test_agent_event_serialize_confirmation_timeout() {
5231        let event = AgentEvent::ConfirmationTimeout {
5232            tool_id: "t1".to_string(),
5233            action_taken: "rejected".to_string(),
5234        };
5235        let json = serde_json::to_string(&event).unwrap();
5236        assert!(json.contains("confirmation_timeout"));
5237    }
5238
5239    #[test]
5240    fn test_agent_event_serialize_external_task_pending() {
5241        let event = AgentEvent::ExternalTaskPending {
5242            task_id: "task-1".to_string(),
5243            session_id: "sess-1".to_string(),
5244            lane: crate::hitl::SessionLane::Execute,
5245            command_type: "bash".to_string(),
5246            payload: serde_json::json!({}),
5247            timeout_ms: 60000,
5248        };
5249        let json = serde_json::to_string(&event).unwrap();
5250        assert!(json.contains("external_task_pending"));
5251    }
5252
5253    #[test]
5254    fn test_agent_event_serialize_external_task_completed() {
5255        let event = AgentEvent::ExternalTaskCompleted {
5256            task_id: "task-1".to_string(),
5257            session_id: "sess-1".to_string(),
5258            success: false,
5259        };
5260        let json = serde_json::to_string(&event).unwrap();
5261        assert!(json.contains("external_task_completed"));
5262    }
5263
5264    #[test]
5265    fn test_agent_event_serialize_permission_denied() {
5266        let event = AgentEvent::PermissionDenied {
5267            tool_id: "t1".to_string(),
5268            tool_name: "bash".to_string(),
5269            args: serde_json::json!({}),
5270            reason: "denied".to_string(),
5271        };
5272        let json = serde_json::to_string(&event).unwrap();
5273        assert!(json.contains("permission_denied"));
5274    }
5275
5276    #[test]
5277    fn test_agent_event_serialize_context_compacted() {
5278        let event = AgentEvent::ContextCompacted {
5279            session_id: "sess-1".to_string(),
5280            before_messages: 100,
5281            after_messages: 20,
5282            percent_before: 0.85,
5283        };
5284        let json = serde_json::to_string(&event).unwrap();
5285        assert!(json.contains("context_compacted"));
5286    }
5287
5288    #[test]
5289    fn test_agent_event_serialize_turn_start() {
5290        let event = AgentEvent::TurnStart { turn: 3 };
5291        let json = serde_json::to_string(&event).unwrap();
5292        assert!(json.contains("turn_start"));
5293    }
5294
5295    #[test]
5296    fn test_agent_event_serialize_turn_end() {
5297        let event = AgentEvent::TurnEnd {
5298            turn: 3,
5299            usage: TokenUsage::default(),
5300        };
5301        let json = serde_json::to_string(&event).unwrap();
5302        assert!(json.contains("turn_end"));
5303    }
5304
5305    #[test]
5306    fn test_agent_event_serialize_end() {
5307        let event = AgentEvent::End {
5308            text: "Done".to_string(),
5309            usage: TokenUsage {
5310                prompt_tokens: 100,
5311                completion_tokens: 50,
5312                total_tokens: 150,
5313                cache_read_tokens: None,
5314                cache_write_tokens: None,
5315            },
5316            meta: None,
5317        };
5318        let json = serde_json::to_string(&event).unwrap();
5319        assert!(json.contains("agent_end"));
5320    }
5321
5322    // ========================================================================
5323    // AgentResult
5324    // ========================================================================
5325
5326    #[test]
5327    fn test_agent_result_fields() {
5328        let result = AgentResult {
5329            text: "output".to_string(),
5330            messages: vec![Message::user("hello")],
5331            usage: TokenUsage::default(),
5332            tool_calls_count: 3,
5333        };
5334        assert_eq!(result.text, "output");
5335        assert_eq!(result.messages.len(), 1);
5336        assert_eq!(result.tool_calls_count, 3);
5337    }
5338
5339    // ========================================================================
5340    // Missing AgentEvent serialization tests
5341    // ========================================================================
5342
5343    #[test]
5344    fn test_agent_event_serialize_context_resolving() {
5345        let event = AgentEvent::ContextResolving {
5346            providers: vec!["provider1".to_string(), "provider2".to_string()],
5347        };
5348        let json = serde_json::to_string(&event).unwrap();
5349        assert!(json.contains("context_resolving"));
5350        assert!(json.contains("provider1"));
5351    }
5352
5353    #[test]
5354    fn test_agent_event_serialize_context_resolved() {
5355        let event = AgentEvent::ContextResolved {
5356            total_items: 5,
5357            total_tokens: 1000,
5358        };
5359        let json = serde_json::to_string(&event).unwrap();
5360        assert!(json.contains("context_resolved"));
5361        assert!(json.contains("1000"));
5362    }
5363
5364    #[test]
5365    fn test_agent_event_serialize_command_dead_lettered() {
5366        let event = AgentEvent::CommandDeadLettered {
5367            command_id: "cmd-1".to_string(),
5368            command_type: "bash".to_string(),
5369            lane: "execute".to_string(),
5370            error: "timeout".to_string(),
5371            attempts: 3,
5372        };
5373        let json = serde_json::to_string(&event).unwrap();
5374        assert!(json.contains("command_dead_lettered"));
5375        assert!(json.contains("cmd-1"));
5376    }
5377
5378    #[test]
5379    fn test_agent_event_serialize_command_retry() {
5380        let event = AgentEvent::CommandRetry {
5381            command_id: "cmd-2".to_string(),
5382            command_type: "read".to_string(),
5383            lane: "query".to_string(),
5384            attempt: 2,
5385            delay_ms: 1000,
5386        };
5387        let json = serde_json::to_string(&event).unwrap();
5388        assert!(json.contains("command_retry"));
5389        assert!(json.contains("cmd-2"));
5390    }
5391
5392    #[test]
5393    fn test_agent_event_serialize_queue_alert() {
5394        let event = AgentEvent::QueueAlert {
5395            level: "warning".to_string(),
5396            alert_type: "depth".to_string(),
5397            message: "Queue depth exceeded".to_string(),
5398        };
5399        let json = serde_json::to_string(&event).unwrap();
5400        assert!(json.contains("queue_alert"));
5401        assert!(json.contains("warning"));
5402    }
5403
5404    #[test]
5405    fn test_agent_event_serialize_task_updated() {
5406        let event = AgentEvent::TaskUpdated {
5407            session_id: "sess-1".to_string(),
5408            tasks: vec![],
5409        };
5410        let json = serde_json::to_string(&event).unwrap();
5411        assert!(json.contains("task_updated"));
5412        assert!(json.contains("sess-1"));
5413    }
5414
5415    #[test]
5416    fn test_agent_event_serialize_memory_stored() {
5417        let event = AgentEvent::MemoryStored {
5418            memory_id: "mem-1".to_string(),
5419            memory_type: "conversation".to_string(),
5420            importance: 0.8,
5421            tags: vec!["important".to_string()],
5422        };
5423        let json = serde_json::to_string(&event).unwrap();
5424        assert!(json.contains("memory_stored"));
5425        assert!(json.contains("mem-1"));
5426    }
5427
5428    #[test]
5429    fn test_agent_event_serialize_memory_recalled() {
5430        let event = AgentEvent::MemoryRecalled {
5431            memory_id: "mem-2".to_string(),
5432            content: "Previous conversation".to_string(),
5433            relevance: 0.9,
5434        };
5435        let json = serde_json::to_string(&event).unwrap();
5436        assert!(json.contains("memory_recalled"));
5437        assert!(json.contains("mem-2"));
5438    }
5439
5440    #[test]
5441    fn test_agent_event_serialize_memories_searched() {
5442        let event = AgentEvent::MemoriesSearched {
5443            query: Some("search term".to_string()),
5444            tags: vec!["tag1".to_string()],
5445            result_count: 5,
5446        };
5447        let json = serde_json::to_string(&event).unwrap();
5448        assert!(json.contains("memories_searched"));
5449        assert!(json.contains("search term"));
5450    }
5451
5452    #[test]
5453    fn test_agent_event_serialize_memory_cleared() {
5454        let event = AgentEvent::MemoryCleared {
5455            tier: "short_term".to_string(),
5456            count: 10,
5457        };
5458        let json = serde_json::to_string(&event).unwrap();
5459        assert!(json.contains("memory_cleared"));
5460        assert!(json.contains("short_term"));
5461    }
5462
5463    #[test]
5464    fn test_agent_event_serialize_subagent_start() {
5465        let event = AgentEvent::SubagentStart {
5466            task_id: "task-1".to_string(),
5467            session_id: "child-sess".to_string(),
5468            parent_session_id: "parent-sess".to_string(),
5469            agent: "explore".to_string(),
5470            description: "Explore codebase".to_string(),
5471        };
5472        let json = serde_json::to_string(&event).unwrap();
5473        assert!(json.contains("subagent_start"));
5474        assert!(json.contains("explore"));
5475    }
5476
5477    #[test]
5478    fn test_agent_event_serialize_subagent_progress() {
5479        let event = AgentEvent::SubagentProgress {
5480            task_id: "task-1".to_string(),
5481            session_id: "child-sess".to_string(),
5482            status: "processing".to_string(),
5483            metadata: serde_json::json!({"progress": 50}),
5484        };
5485        let json = serde_json::to_string(&event).unwrap();
5486        assert!(json.contains("subagent_progress"));
5487        assert!(json.contains("processing"));
5488    }
5489
5490    #[test]
5491    fn test_agent_event_serialize_subagent_end() {
5492        let event = AgentEvent::SubagentEnd {
5493            task_id: "task-1".to_string(),
5494            session_id: "child-sess".to_string(),
5495            agent: "explore".to_string(),
5496            output: "Found 10 files".to_string(),
5497            success: true,
5498        };
5499        let json = serde_json::to_string(&event).unwrap();
5500        assert!(json.contains("subagent_end"));
5501        assert!(json.contains("Found 10 files"));
5502    }
5503
5504    #[test]
5505    fn test_agent_event_serialize_planning_start() {
5506        let event = AgentEvent::PlanningStart {
5507            prompt: "Build a web app".to_string(),
5508        };
5509        let json = serde_json::to_string(&event).unwrap();
5510        assert!(json.contains("planning_start"));
5511        assert!(json.contains("Build a web app"));
5512    }
5513
5514    #[test]
5515    fn test_agent_event_serialize_planning_end() {
5516        use crate::planning::{Complexity, ExecutionPlan};
5517        let plan = ExecutionPlan::new("Test goal".to_string(), Complexity::Simple);
5518        let event = AgentEvent::PlanningEnd {
5519            plan,
5520            estimated_steps: 3,
5521        };
5522        let json = serde_json::to_string(&event).unwrap();
5523        assert!(json.contains("planning_end"));
5524        assert!(json.contains("estimated_steps"));
5525    }
5526
5527    #[test]
5528    fn test_agent_event_serialize_step_start() {
5529        let event = AgentEvent::StepStart {
5530            step_id: "step-1".to_string(),
5531            description: "Initialize project".to_string(),
5532            step_number: 1,
5533            total_steps: 5,
5534        };
5535        let json = serde_json::to_string(&event).unwrap();
5536        assert!(json.contains("step_start"));
5537        assert!(json.contains("Initialize project"));
5538    }
5539
5540    #[test]
5541    fn test_agent_event_serialize_step_end() {
5542        let event = AgentEvent::StepEnd {
5543            step_id: "step-1".to_string(),
5544            status: TaskStatus::Completed,
5545            step_number: 1,
5546            total_steps: 5,
5547        };
5548        let json = serde_json::to_string(&event).unwrap();
5549        assert!(json.contains("step_end"));
5550        assert!(json.contains("step-1"));
5551    }
5552
5553    #[test]
5554    fn test_agent_event_serialize_goal_extracted() {
5555        use crate::planning::AgentGoal;
5556        let goal = AgentGoal::new("Complete the task".to_string());
5557        let event = AgentEvent::GoalExtracted { goal };
5558        let json = serde_json::to_string(&event).unwrap();
5559        assert!(json.contains("goal_extracted"));
5560    }
5561
5562    #[test]
5563    fn test_agent_event_serialize_goal_progress() {
5564        let event = AgentEvent::GoalProgress {
5565            goal: "Build app".to_string(),
5566            progress: 0.5,
5567            completed_steps: 2,
5568            total_steps: 4,
5569        };
5570        let json = serde_json::to_string(&event).unwrap();
5571        assert!(json.contains("goal_progress"));
5572        assert!(json.contains("0.5"));
5573    }
5574
5575    #[test]
5576    fn test_agent_event_serialize_goal_achieved() {
5577        let event = AgentEvent::GoalAchieved {
5578            goal: "Build app".to_string(),
5579            total_steps: 4,
5580            duration_ms: 5000,
5581        };
5582        let json = serde_json::to_string(&event).unwrap();
5583        assert!(json.contains("goal_achieved"));
5584        assert!(json.contains("5000"));
5585    }
5586
5587    #[tokio::test]
5588    async fn test_extract_goal_with_json_response() {
5589        // LlmPlanner expects JSON with "description" and "success_criteria" fields
5590        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
5591            r#"{"description": "Build web app", "success_criteria": ["App runs on port 3000", "Has login page"]}"#,
5592        )]));
5593        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5594        let agent = AgentLoop::new(
5595            mock_client,
5596            tool_executor,
5597            test_tool_context(),
5598            AgentConfig::default(),
5599        );
5600
5601        let goal = agent.extract_goal("Build a web app").await.unwrap();
5602        assert_eq!(goal.description, "Build web app");
5603        assert_eq!(goal.success_criteria.len(), 2);
5604        assert_eq!(goal.success_criteria[0], "App runs on port 3000");
5605    }
5606
5607    #[tokio::test]
5608    async fn test_extract_goal_fallback_on_non_json() {
5609        // Non-JSON response triggers fallback: returns the original prompt as goal
5610        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
5611            "Some non-JSON response",
5612        )]));
5613        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5614        let agent = AgentLoop::new(
5615            mock_client,
5616            tool_executor,
5617            test_tool_context(),
5618            AgentConfig::default(),
5619        );
5620
5621        let goal = agent.extract_goal("Do something").await.unwrap();
5622        // Fallback uses the original prompt as description
5623        assert_eq!(goal.description, "Do something");
5624        // Fallback adds 2 generic criteria
5625        assert_eq!(goal.success_criteria.len(), 2);
5626    }
5627
5628    #[tokio::test]
5629    async fn test_check_goal_achievement_json_yes() {
5630        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
5631            r#"{"achieved": true, "progress": 1.0, "remaining_criteria": []}"#,
5632        )]));
5633        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5634        let agent = AgentLoop::new(
5635            mock_client,
5636            tool_executor,
5637            test_tool_context(),
5638            AgentConfig::default(),
5639        );
5640
5641        let goal = crate::planning::AgentGoal::new("Test goal".to_string());
5642        let achieved = agent
5643            .check_goal_achievement(&goal, "All done")
5644            .await
5645            .unwrap();
5646        assert!(achieved);
5647    }
5648
5649    #[tokio::test]
5650    async fn test_check_goal_achievement_fallback_not_done() {
5651        // Non-JSON response triggers heuristic fallback
5652        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
5653            "invalid json",
5654        )]));
5655        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5656        let agent = AgentLoop::new(
5657            mock_client,
5658            tool_executor,
5659            test_tool_context(),
5660            AgentConfig::default(),
5661        );
5662
5663        let goal = crate::planning::AgentGoal::new("Test goal".to_string());
5664        // "still working" doesn't contain "complete"/"done"/"finished"
5665        let achieved = agent
5666            .check_goal_achievement(&goal, "still working")
5667            .await
5668            .unwrap();
5669        assert!(!achieved);
5670    }
5671
5672    // ========================================================================
5673    // build_augmented_system_prompt Tests
5674    // ========================================================================
5675
5676    #[test]
5677    fn test_build_augmented_system_prompt_empty_context() {
5678        let mock_client = Arc::new(MockLlmClient::new(vec![]));
5679        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5680        let config = AgentConfig {
5681            prompt_slots: SystemPromptSlots {
5682                extra: Some("Base prompt".to_string()),
5683                ..Default::default()
5684            },
5685            ..Default::default()
5686        };
5687        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
5688
5689        let result = agent.build_augmented_system_prompt(&[]);
5690        assert!(result.unwrap().contains("Base prompt"));
5691    }
5692
5693    #[test]
5694    fn test_build_augmented_system_prompt_no_custom_slots() {
5695        let mock_client = Arc::new(MockLlmClient::new(vec![]));
5696        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5697        let agent = AgentLoop::new(
5698            mock_client,
5699            tool_executor,
5700            test_tool_context(),
5701            AgentConfig::default(),
5702        );
5703
5704        let result = agent.build_augmented_system_prompt(&[]);
5705        // Default slots still produce the default agentic prompt
5706        assert!(result.is_some());
5707        assert!(result.unwrap().contains("Core Behaviour"));
5708    }
5709
5710    #[test]
5711    fn test_build_augmented_system_prompt_with_context_no_base() {
5712        use crate::context::{ContextItem, ContextResult, ContextType};
5713
5714        let mock_client = Arc::new(MockLlmClient::new(vec![]));
5715        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5716        let agent = AgentLoop::new(
5717            mock_client,
5718            tool_executor,
5719            test_tool_context(),
5720            AgentConfig::default(),
5721        );
5722
5723        let context = vec![ContextResult {
5724            provider: "test".to_string(),
5725            items: vec![ContextItem::new("id1", ContextType::Resource, "Content")],
5726            total_tokens: 10,
5727            truncated: false,
5728        }];
5729
5730        let result = agent.build_augmented_system_prompt(&context);
5731        assert!(result.is_some());
5732        let text = result.unwrap();
5733        assert!(text.contains("<context"));
5734        assert!(text.contains("Content"));
5735    }
5736
5737    // ========================================================================
5738    // AgentResult Clone and Debug
5739    // ========================================================================
5740
5741    #[test]
5742    fn test_agent_result_clone() {
5743        let result = AgentResult {
5744            text: "output".to_string(),
5745            messages: vec![Message::user("hello")],
5746            usage: TokenUsage::default(),
5747            tool_calls_count: 3,
5748        };
5749        let cloned = result.clone();
5750        assert_eq!(cloned.text, result.text);
5751        assert_eq!(cloned.tool_calls_count, result.tool_calls_count);
5752    }
5753
5754    #[test]
5755    fn test_agent_result_debug() {
5756        let result = AgentResult {
5757            text: "output".to_string(),
5758            messages: vec![Message::user("hello")],
5759            usage: TokenUsage::default(),
5760            tool_calls_count: 3,
5761        };
5762        let debug = format!("{:?}", result);
5763        assert!(debug.contains("AgentResult"));
5764        assert!(debug.contains("output"));
5765    }
5766
5767    // ========================================================================
5768    // handle_post_execution_metadata Tests
5769    // ========================================================================
5770
5771    // ========================================================================
5772    // ToolCommand adapter tests
5773    // ========================================================================
5774
5775    #[tokio::test]
5776    async fn test_tool_command_command_type() {
5777        let executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5778        let cmd = ToolCommand {
5779            tool_executor: executor,
5780            tool_name: "read".to_string(),
5781            tool_args: serde_json::json!({"file": "test.rs"}),
5782            skill_registry: None,
5783            tool_context: test_tool_context(),
5784        };
5785        assert_eq!(cmd.command_type(), "read");
5786    }
5787
5788    #[tokio::test]
5789    async fn test_tool_command_payload() {
5790        let executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5791        let args = serde_json::json!({"file": "test.rs", "offset": 10});
5792        let cmd = ToolCommand {
5793            tool_executor: executor,
5794            tool_name: "read".to_string(),
5795            tool_args: args.clone(),
5796            skill_registry: None,
5797            tool_context: test_tool_context(),
5798        };
5799        assert_eq!(cmd.payload(), args);
5800    }
5801
5802    // ========================================================================
5803    // AgentLoop with queue builder tests
5804    // ========================================================================
5805
5806    #[tokio::test(flavor = "multi_thread")]
5807    async fn test_agent_loop_with_queue() {
5808        use tokio::sync::broadcast;
5809
5810        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
5811            "Hello",
5812        )]));
5813        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5814        let config = AgentConfig::default();
5815
5816        let (event_tx, _) = broadcast::channel(100);
5817        let queue = SessionLaneQueue::new("test-session", SessionQueueConfig::default(), event_tx)
5818            .await
5819            .unwrap();
5820
5821        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config)
5822            .with_queue(Arc::new(queue));
5823
5824        assert!(agent.command_queue.is_some());
5825    }
5826
5827    #[tokio::test]
5828    async fn test_agent_loop_without_queue() {
5829        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
5830            "Hello",
5831        )]));
5832        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5833        let config = AgentConfig::default();
5834
5835        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
5836
5837        assert!(agent.command_queue.is_none());
5838    }
5839
5840    // ========================================================================
5841    // Parallel Plan Execution Tests
5842    // ========================================================================
5843
5844    #[tokio::test]
5845    async fn test_execute_plan_parallel_independent() {
5846        use crate::planning::{Complexity, ExecutionPlan, Task};
5847
5848        // 3 independent steps (no dependencies) — should all execute.
5849        // MockLlmClient needs one response per execute_loop call per step.
5850        let mock_client = Arc::new(MockLlmClient::new(vec![
5851            MockLlmClient::text_response("Step 1 done"),
5852            MockLlmClient::text_response("Step 2 done"),
5853            MockLlmClient::text_response("Step 3 done"),
5854        ]));
5855
5856        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5857        let config = AgentConfig::default();
5858        let agent = AgentLoop::new(
5859            mock_client.clone(),
5860            tool_executor,
5861            test_tool_context(),
5862            config,
5863        );
5864
5865        let mut plan = ExecutionPlan::new("Test parallel", Complexity::Simple);
5866        plan.add_step(Task::new("s1", "First step"));
5867        plan.add_step(Task::new("s2", "Second step"));
5868        plan.add_step(Task::new("s3", "Third step"));
5869
5870        let (tx, mut rx) = mpsc::channel(100);
5871        let result = agent.execute_plan(&[], &plan, Some(tx)).await.unwrap();
5872
5873        // All 3 steps should have been executed (3 * 15 = 45 total tokens)
5874        assert_eq!(result.usage.total_tokens, 45);
5875
5876        // Verify we received StepStart and StepEnd events for all 3 steps
5877        let mut step_starts = Vec::new();
5878        let mut step_ends = Vec::new();
5879        rx.close();
5880        while let Some(event) = rx.recv().await {
5881            match event {
5882                AgentEvent::StepStart { step_id, .. } => step_starts.push(step_id),
5883                AgentEvent::StepEnd {
5884                    step_id, status, ..
5885                } => {
5886                    assert_eq!(status, TaskStatus::Completed);
5887                    step_ends.push(step_id);
5888                }
5889                _ => {}
5890            }
5891        }
5892        assert_eq!(step_starts.len(), 3);
5893        assert_eq!(step_ends.len(), 3);
5894    }
5895
5896    #[tokio::test]
5897    async fn test_execute_plan_respects_dependencies() {
5898        use crate::planning::{Complexity, ExecutionPlan, Task};
5899
5900        // s1 and s2 are independent (wave 1), s3 depends on both (wave 2).
5901        // This requires 3 responses total.
5902        let mock_client = Arc::new(MockLlmClient::new(vec![
5903            MockLlmClient::text_response("Step 1 done"),
5904            MockLlmClient::text_response("Step 2 done"),
5905            MockLlmClient::text_response("Step 3 done"),
5906        ]));
5907
5908        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5909        let config = AgentConfig::default();
5910        let agent = AgentLoop::new(
5911            mock_client.clone(),
5912            tool_executor,
5913            test_tool_context(),
5914            config,
5915        );
5916
5917        let mut plan = ExecutionPlan::new("Test deps", Complexity::Medium);
5918        plan.add_step(Task::new("s1", "Independent A"));
5919        plan.add_step(Task::new("s2", "Independent B"));
5920        plan.add_step(
5921            Task::new("s3", "Depends on A+B")
5922                .with_dependencies(vec!["s1".to_string(), "s2".to_string()]),
5923        );
5924
5925        let (tx, mut rx) = mpsc::channel(100);
5926        let result = agent.execute_plan(&[], &plan, Some(tx)).await.unwrap();
5927
5928        // All 3 steps should have been executed (3 * 15 = 45 total tokens)
5929        assert_eq!(result.usage.total_tokens, 45);
5930
5931        // Verify ordering: s3's StepStart must come after s1 and s2's StepEnd
5932        let mut events = Vec::new();
5933        rx.close();
5934        while let Some(event) = rx.recv().await {
5935            match &event {
5936                AgentEvent::StepStart { step_id, .. } => {
5937                    events.push(format!("start:{}", step_id));
5938                }
5939                AgentEvent::StepEnd { step_id, .. } => {
5940                    events.push(format!("end:{}", step_id));
5941                }
5942                _ => {}
5943            }
5944        }
5945
5946        // s3 start must occur after both s1 end and s2 end
5947        let s1_end = events.iter().position(|e| e == "end:s1").unwrap();
5948        let s2_end = events.iter().position(|e| e == "end:s2").unwrap();
5949        let s3_start = events.iter().position(|e| e == "start:s3").unwrap();
5950        assert!(
5951            s3_start > s1_end,
5952            "s3 started before s1 ended: {:?}",
5953            events
5954        );
5955        assert!(
5956            s3_start > s2_end,
5957            "s3 started before s2 ended: {:?}",
5958            events
5959        );
5960
5961        // Final result should reflect step 3 (last sequential step)
5962        assert!(result.text.contains("Step 3 done") || !result.text.is_empty());
5963    }
5964
5965    #[tokio::test]
5966    async fn test_execute_plan_handles_step_failure() {
5967        use crate::planning::{Complexity, ExecutionPlan, Task};
5968
5969        // s1 succeeds, s2 depends on s1 (succeeds), s3 depends on nothing (succeeds),
5970        // s4 depends on a step that will fail (s_fail).
5971        // We simulate failure by providing no responses for s_fail's execute_loop.
5972        //
5973        // Simpler approach: s1 succeeds, s2 depends on s1 (will fail because no
5974        // mock response left), s3 is independent.
5975        // Layout: s1 (independent), s3 (independent) → wave 1 parallel
5976        //         s2 depends on s1 → wave 2
5977        //         s4 depends on s2 → wave 3 (should deadlock since s2 fails)
5978        let mock_client = Arc::new(MockLlmClient::new(vec![
5979            // Wave 1: s1 and s3 execute in parallel
5980            MockLlmClient::text_response("s1 done"),
5981            MockLlmClient::text_response("s3 done"),
5982            // Wave 2: s2 executes — but we give it no response, causing failure
5983            // Actually the MockLlmClient will fail with "No more mock responses"
5984        ]));
5985
5986        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5987        let config = AgentConfig::default();
5988        let agent = AgentLoop::new(
5989            mock_client.clone(),
5990            tool_executor,
5991            test_tool_context(),
5992            config,
5993        );
5994
5995        let mut plan = ExecutionPlan::new("Test failure", Complexity::Medium);
5996        plan.add_step(Task::new("s1", "Independent step"));
5997        plan.add_step(Task::new("s2", "Depends on s1").with_dependencies(vec!["s1".to_string()]));
5998        plan.add_step(Task::new("s3", "Another independent"));
5999        plan.add_step(Task::new("s4", "Depends on s2").with_dependencies(vec!["s2".to_string()]));
6000
6001        let (tx, mut rx) = mpsc::channel(100);
6002        let _result = agent.execute_plan(&[], &plan, Some(tx)).await.unwrap();
6003
6004        // s1 and s3 should succeed (wave 1), s2 should fail (wave 2),
6005        // s4 should never execute (deadlock — dep s2 failed, not completed)
6006        let mut completed_steps = Vec::new();
6007        let mut failed_steps = Vec::new();
6008        rx.close();
6009        while let Some(event) = rx.recv().await {
6010            if let AgentEvent::StepEnd {
6011                step_id, status, ..
6012            } = event
6013            {
6014                match status {
6015                    TaskStatus::Completed => completed_steps.push(step_id),
6016                    TaskStatus::Failed => failed_steps.push(step_id),
6017                    _ => {}
6018                }
6019            }
6020        }
6021
6022        assert!(
6023            completed_steps.contains(&"s1".to_string()),
6024            "s1 should complete"
6025        );
6026        assert!(
6027            completed_steps.contains(&"s3".to_string()),
6028            "s3 should complete"
6029        );
6030        assert!(failed_steps.contains(&"s2".to_string()), "s2 should fail");
6031        // s4 should NOT appear in either list — it was never started
6032        assert!(
6033            !completed_steps.contains(&"s4".to_string()),
6034            "s4 should not complete"
6035        );
6036        assert!(
6037            !failed_steps.contains(&"s4".to_string()),
6038            "s4 should not fail (never started)"
6039        );
6040    }
6041
6042    // ========================================================================
6043    // Phase 4: Error Recovery & Resilience Tests
6044    // ========================================================================
6045
6046    #[test]
6047    fn test_agent_config_resilience_defaults() {
6048        let config = AgentConfig::default();
6049        assert_eq!(config.max_parse_retries, 2);
6050        assert_eq!(config.tool_timeout_ms, None);
6051        assert_eq!(config.circuit_breaker_threshold, 3);
6052    }
6053
6054    /// 4.1 — Parse error recovery: bails after max_parse_retries exceeded
6055    #[tokio::test]
6056    async fn test_parse_error_recovery_bails_after_threshold() {
6057        // 3 parse errors with max_parse_retries=2: count reaches 3 > 2 → bail
6058        let mock_client = Arc::new(MockLlmClient::new(vec![
6059            MockLlmClient::tool_call_response(
6060                "c1",
6061                "bash",
6062                serde_json::json!({"__parse_error": "unexpected token at position 5"}),
6063            ),
6064            MockLlmClient::tool_call_response(
6065                "c2",
6066                "bash",
6067                serde_json::json!({"__parse_error": "missing closing brace"}),
6068            ),
6069            MockLlmClient::tool_call_response(
6070                "c3",
6071                "bash",
6072                serde_json::json!({"__parse_error": "still broken"}),
6073            ),
6074            MockLlmClient::text_response("Done"), // never reached
6075        ]));
6076
6077        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
6078        let config = AgentConfig {
6079            max_parse_retries: 2,
6080            ..AgentConfig::default()
6081        };
6082        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
6083        let result = agent.execute(&[], "Do something", None).await;
6084        assert!(result.is_err(), "should bail after parse error threshold");
6085        let err = result.unwrap_err().to_string();
6086        assert!(
6087            err.contains("malformed tool arguments"),
6088            "error should mention malformed tool arguments, got: {}",
6089            err
6090        );
6091    }
6092
6093    /// 4.1 — Parse error recovery: counter resets after a valid tool execution
6094    #[tokio::test]
6095    async fn test_parse_error_counter_resets_on_success() {
6096        // 2 parse errors (= max_parse_retries, not yet exceeded)
6097        // Then a valid tool call (resets counter)
6098        // Then final text — should NOT bail
6099        let mock_client = Arc::new(MockLlmClient::new(vec![
6100            MockLlmClient::tool_call_response(
6101                "c1",
6102                "bash",
6103                serde_json::json!({"__parse_error": "bad args"}),
6104            ),
6105            MockLlmClient::tool_call_response(
6106                "c2",
6107                "bash",
6108                serde_json::json!({"__parse_error": "bad args again"}),
6109            ),
6110            // Valid call — resets parse_error_count to 0
6111            MockLlmClient::tool_call_response(
6112                "c3",
6113                "bash",
6114                serde_json::json!({"command": "echo ok"}),
6115            ),
6116            MockLlmClient::text_response("All done"),
6117        ]));
6118
6119        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
6120        let config = AgentConfig {
6121            max_parse_retries: 2,
6122            ..AgentConfig::default()
6123        };
6124        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
6125        let result = agent.execute(&[], "Do something", None).await;
6126        assert!(
6127            result.is_ok(),
6128            "should not bail — counter reset after successful tool, got: {:?}",
6129            result.err()
6130        );
6131        assert_eq!(result.unwrap().text, "All done");
6132    }
6133
6134    /// 4.2 — Tool timeout: slow tool produces a timeout error result; session continues
6135    #[tokio::test]
6136    async fn test_tool_timeout_produces_error_result() {
6137        let mock_client = Arc::new(MockLlmClient::new(vec![
6138            MockLlmClient::tool_call_response(
6139                "t1",
6140                "bash",
6141                serde_json::json!({"command": "sleep 10"}),
6142            ),
6143            MockLlmClient::text_response("The command timed out."),
6144        ]));
6145
6146        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
6147        let config = AgentConfig {
6148            // 50ms — sleep 10 will never finish
6149            tool_timeout_ms: Some(50),
6150            ..AgentConfig::default()
6151        };
6152        let agent = AgentLoop::new(
6153            mock_client.clone(),
6154            tool_executor,
6155            test_tool_context(),
6156            config,
6157        );
6158        let result = agent.execute(&[], "Run sleep", None).await;
6159        assert!(
6160            result.is_ok(),
6161            "session should continue after tool timeout: {:?}",
6162            result.err()
6163        );
6164        assert_eq!(result.unwrap().text, "The command timed out.");
6165        // LLM called twice: initial request + response after timeout error
6166        assert_eq!(mock_client.call_count.load(Ordering::SeqCst), 2);
6167    }
6168
6169    /// 4.2 — Tool timeout: tool that finishes before the deadline succeeds normally
6170    #[tokio::test]
6171    async fn test_tool_within_timeout_succeeds() {
6172        let mock_client = Arc::new(MockLlmClient::new(vec![
6173            MockLlmClient::tool_call_response(
6174                "t1",
6175                "bash",
6176                serde_json::json!({"command": "echo fast"}),
6177            ),
6178            MockLlmClient::text_response("Command succeeded."),
6179        ]));
6180
6181        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
6182        let config = AgentConfig {
6183            tool_timeout_ms: Some(5_000), // 5 s — echo completes in <100ms
6184            ..AgentConfig::default()
6185        };
6186        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
6187        let result = agent.execute(&[], "Run something fast", None).await;
6188        assert!(
6189            result.is_ok(),
6190            "fast tool should succeed: {:?}",
6191            result.err()
6192        );
6193        assert_eq!(result.unwrap().text, "Command succeeded.");
6194    }
6195
6196    /// 4.3 — Circuit breaker: retries non-streaming LLM failures up to threshold
6197    #[tokio::test]
6198    async fn test_circuit_breaker_retries_non_streaming() {
6199        // Empty response list → every call bails with "No more mock responses"
6200        // threshold=2 → tries twice, then bails with circuit-breaker message
6201        let mock_client = Arc::new(MockLlmClient::new(vec![]));
6202
6203        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
6204        let config = AgentConfig {
6205            circuit_breaker_threshold: 2,
6206            ..AgentConfig::default()
6207        };
6208        let agent = AgentLoop::new(
6209            mock_client.clone(),
6210            tool_executor,
6211            test_tool_context(),
6212            config,
6213        );
6214        let result = agent.execute(&[], "Hello", None).await;
6215        assert!(result.is_err(), "should fail when LLM always errors");
6216        let err = result.unwrap_err().to_string();
6217        assert!(
6218            err.contains("circuit breaker"),
6219            "error should mention circuit breaker, got: {}",
6220            err
6221        );
6222        assert_eq!(
6223            mock_client.call_count.load(Ordering::SeqCst),
6224            2,
6225            "should make exactly threshold=2 LLM calls"
6226        );
6227    }
6228
6229    /// 4.3 — Circuit breaker: threshold=1 bails on the very first failure
6230    #[tokio::test]
6231    async fn test_circuit_breaker_threshold_one_no_retry() {
6232        let mock_client = Arc::new(MockLlmClient::new(vec![]));
6233
6234        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
6235        let config = AgentConfig {
6236            circuit_breaker_threshold: 1,
6237            ..AgentConfig::default()
6238        };
6239        let agent = AgentLoop::new(
6240            mock_client.clone(),
6241            tool_executor,
6242            test_tool_context(),
6243            config,
6244        );
6245        let result = agent.execute(&[], "Hello", None).await;
6246        assert!(result.is_err());
6247        assert_eq!(
6248            mock_client.call_count.load(Ordering::SeqCst),
6249            1,
6250            "with threshold=1 exactly one attempt should be made"
6251        );
6252    }
6253
6254    /// 4.3 — Circuit breaker: succeeds when LLM recovers before hitting threshold
6255    #[tokio::test]
6256    async fn test_circuit_breaker_succeeds_if_llm_recovers() {
6257        // First call fails, second call succeeds; threshold=3 — recovery within threshold
6258        struct FailOnceThenSucceed {
6259            inner: MockLlmClient,
6260            failed_once: std::sync::atomic::AtomicBool,
6261            call_count: AtomicUsize,
6262        }
6263
6264        #[async_trait::async_trait]
6265        impl LlmClient for FailOnceThenSucceed {
6266            async fn complete(
6267                &self,
6268                messages: &[Message],
6269                system: Option<&str>,
6270                tools: &[ToolDefinition],
6271            ) -> Result<LlmResponse> {
6272                self.call_count.fetch_add(1, Ordering::SeqCst);
6273                let already_failed = self
6274                    .failed_once
6275                    .swap(true, std::sync::atomic::Ordering::SeqCst);
6276                if !already_failed {
6277                    anyhow::bail!("transient network error");
6278                }
6279                self.inner.complete(messages, system, tools).await
6280            }
6281
6282            async fn complete_streaming(
6283                &self,
6284                messages: &[Message],
6285                system: Option<&str>,
6286                tools: &[ToolDefinition],
6287            ) -> Result<tokio::sync::mpsc::Receiver<crate::llm::StreamEvent>> {
6288                self.inner.complete_streaming(messages, system, tools).await
6289            }
6290        }
6291
6292        let mock = Arc::new(FailOnceThenSucceed {
6293            inner: MockLlmClient::new(vec![MockLlmClient::text_response("Recovered!")]),
6294            failed_once: std::sync::atomic::AtomicBool::new(false),
6295            call_count: AtomicUsize::new(0),
6296        });
6297
6298        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
6299        let config = AgentConfig {
6300            circuit_breaker_threshold: 3,
6301            ..AgentConfig::default()
6302        };
6303        let agent = AgentLoop::new(mock.clone(), tool_executor, test_tool_context(), config);
6304        let result = agent.execute(&[], "Hello", None).await;
6305        assert!(
6306            result.is_ok(),
6307            "should succeed when LLM recovers within threshold: {:?}",
6308            result.err()
6309        );
6310        assert_eq!(result.unwrap().text, "Recovered!");
6311        assert_eq!(
6312            mock.call_count.load(Ordering::SeqCst),
6313            2,
6314            "should have made exactly 2 calls (1 fail + 1 success)"
6315        );
6316    }
6317
6318    // ── Continuation detection tests ─────────────────────────────────────────
6319
6320    #[test]
6321    fn test_looks_incomplete_empty() {
6322        assert!(AgentLoop::looks_incomplete(""));
6323        assert!(AgentLoop::looks_incomplete("   "));
6324    }
6325
6326    #[test]
6327    fn test_looks_incomplete_trailing_colon() {
6328        assert!(AgentLoop::looks_incomplete("Let me check the file:"));
6329        assert!(AgentLoop::looks_incomplete("Next steps:"));
6330    }
6331
6332    #[test]
6333    fn test_looks_incomplete_ellipsis() {
6334        assert!(AgentLoop::looks_incomplete("Working on it..."));
6335        assert!(AgentLoop::looks_incomplete("Processing…"));
6336    }
6337
6338    #[test]
6339    fn test_looks_incomplete_intent_phrases() {
6340        assert!(AgentLoop::looks_incomplete(
6341            "I'll start by reading the file."
6342        ));
6343        assert!(AgentLoop::looks_incomplete(
6344            "Let me check the configuration."
6345        ));
6346        assert!(AgentLoop::looks_incomplete("I will now run the tests."));
6347        assert!(AgentLoop::looks_incomplete(
6348            "I need to update the Cargo.toml."
6349        ));
6350    }
6351
6352    #[test]
6353    fn test_looks_complete_final_answer() {
6354        // Clear final answers should NOT trigger continuation
6355        assert!(!AgentLoop::looks_incomplete(
6356            "The tests pass. All changes have been applied successfully."
6357        ));
6358        assert!(!AgentLoop::looks_incomplete(
6359            "Done. I've updated the three files and verified the build succeeds."
6360        ));
6361        assert!(!AgentLoop::looks_incomplete("42"));
6362        assert!(!AgentLoop::looks_incomplete("Yes."));
6363    }
6364
6365    #[test]
6366    fn test_looks_incomplete_multiline_complete() {
6367        let text = "Here is the summary:\n\n- Fixed the bug in agent.rs\n- All tests pass\n- Build succeeds";
6368        assert!(!AgentLoop::looks_incomplete(text));
6369    }
6370}
a3s_code_core/agent.rs

a3s_code_core/
agent.rs