Skip to main content

a3s_code_core/
agent.rs

1//! Agent Loop Implementation
2//!
3//! The agent loop handles the core conversation cycle:
4//! 1. User sends a prompt
5//! 2. LLM generates a response (possibly with tool calls)
6//! 3. If tool calls present, execute them and send results back
7//! 4. Repeat until LLM returns without tool calls
8//!
9//! This implements agentic behavior where the LLM can use tools
10//! to accomplish tasks agentically.
11
12#[cfg(feature = "ahp")]
13use crate::ahp::InjectedContext;
14#[cfg(feature = "ahp")]
15use crate::context::{ContextItem, ContextType};
16use crate::context::{ContextProvider, ContextQuery, ContextResult};
17use crate::hitl::ConfirmationProvider;
18use crate::hooks::{
19    ErrorType, GenerateEndEvent, GenerateStartEvent, HookEvent, HookExecutor, HookResult,
20    IntentDetectionEvent, OnErrorEvent, PostResponseEvent, PostToolUseEvent,
21    PreContextPerceptionEvent, PrePromptEvent, PreToolUseEvent, TokenUsageInfo, ToolCallInfo,
22    ToolResultData,
23};
24use crate::llm::{LlmClient, LlmResponse, Message, TokenUsage, ToolDefinition};
25use crate::permissions::{PermissionChecker, PermissionDecision};
26use crate::planning::{AgentGoal, ExecutionPlan, TaskStatus};
27use crate::prompts::{AgentStyle, DetectionConfidence, PlanningMode, SystemPromptSlots};
28use crate::queue::SessionCommand;
29use crate::session_lane_queue::SessionLaneQueue;
30use crate::text::truncate_utf8;
31use crate::tool_search::ToolIndex;
32use crate::tools::{ToolContext, ToolExecutor, ToolStreamEvent};
33use anyhow::{Context, Result};
34use async_trait::async_trait;
35use futures::future::join_all;
36use serde::{Deserialize, Serialize};
37use serde_json::Value;
38use std::sync::Arc;
39use std::time::Duration;
40use tokio::sync::{mpsc, RwLock};
41
42/// Maximum number of tool execution rounds before stopping
43const MAX_TOOL_ROUNDS: usize = 50;
44
45/// Agent configuration
46#[derive(Clone)]
47pub struct AgentConfig {
48    /// Slot-based system prompt customization.
49    ///
50    /// Users can customize specific parts (role, guidelines, response style, extra)
51    /// without overriding the core agentic capabilities. The default agentic core
52    /// (tool usage, autonomous behavior, completion criteria) is always preserved.
53    pub prompt_slots: SystemPromptSlots,
54    pub tools: Vec<ToolDefinition>,
55    pub max_tool_rounds: usize,
56    /// Optional security provider for input taint tracking and output sanitization
57    pub security_provider: Option<Arc<dyn crate::security::SecurityProvider>>,
58    /// Optional permission checker for tool execution control
59    pub permission_checker: Option<Arc<dyn PermissionChecker>>,
60    /// Optional confirmation manager for HITL (Human-in-the-Loop)
61    pub confirmation_manager: Option<Arc<dyn ConfirmationProvider>>,
62    /// Context providers for augmenting prompts with external context
63    pub context_providers: Vec<Arc<dyn ContextProvider>>,
64    /// Planning mode — Auto (detect from message), Enabled, or Disabled.
65    pub planning_mode: PlanningMode,
66    /// Enable goal tracking
67    pub goal_tracking: bool,
68    /// Optional hook engine for firing lifecycle events (PreToolUse, PostToolUse, etc.)
69    pub hook_engine: Option<Arc<dyn HookExecutor>>,
70    /// Optional skill registry for tool permission enforcement
71    pub skill_registry: Option<Arc<crate::skills::SkillRegistry>>,
72    /// Max consecutive malformed-tool-args errors before aborting (default: 2).
73    ///
74    /// When the LLM returns tool arguments with `__parse_error`, the error is
75    /// fed back as a tool result. After this many consecutive parse errors the
76    /// loop bails instead of retrying indefinitely.
77    pub max_parse_retries: u32,
78    /// Per-tool execution timeout in milliseconds (`None` = no timeout).
79    ///
80    /// When set, each tool execution is wrapped in `tokio::time::timeout`.
81    /// A timeout produces an error result sent back to the LLM rather than
82    /// crashing the session.
83    pub tool_timeout_ms: Option<u64>,
84    /// Circuit-breaker threshold: max consecutive LLM API failures before
85    /// aborting (default: 3).
86    ///
87    /// In non-streaming mode, transient LLM failures are retried up to this
88    /// many times (with short exponential backoff) before the loop bails.
89    /// In streaming mode, any failure is fatal (events cannot be replayed).
90    pub circuit_breaker_threshold: u32,
91    /// Max consecutive identical tool signatures before aborting (default: 3).
92    ///
93    /// A tool signature is the exact combination of tool name + compact JSON
94    /// arguments. This prevents the agent from getting stuck repeating the same
95    /// tool call in a loop, for example repeatedly fetching the same URL.
96    pub duplicate_tool_call_threshold: u32,
97    /// Enable auto-compaction when context usage exceeds threshold.
98    pub auto_compact: bool,
99    /// Context usage percentage threshold to trigger auto-compaction (0.0 - 1.0).
100    /// Default: 0.80 (80%).
101    pub auto_compact_threshold: f32,
102    /// Maximum context window size in tokens (used for auto-compact calculation).
103    /// Default: 200_000.
104    pub max_context_tokens: usize,
105    /// LLM client reference for auto-compaction (needs to call LLM for summarization).
106    pub llm_client: Option<Arc<dyn LlmClient>>,
107    /// Optional agent memory for auto-remember after tool execution and recall before prompts.
108    pub memory: Option<Arc<crate::memory::AgentMemory>>,
109    /// Inject a continuation message when the LLM stops calling tools before the
110    /// task is complete. Enabled by default. Set to `false` to disable.
111    ///
112    /// When enabled, if the LLM produces a response with no tool calls but the
113    /// response text looks like an intermediate step (not a final answer), the
114    /// loop injects [`crate::prompts::CONTINUATION`] as a user message and
115    /// continues for up to `max_continuation_turns` additional turns.
116    pub continuation_enabled: bool,
117    /// Maximum number of continuation injections per execution (default: 3).
118    ///
119    /// Prevents infinite loops when the LLM repeatedly stops without completing.
120    pub max_continuation_turns: u32,
121    /// Optional tool search index for filtering tools per-turn.
122    ///
123    /// When set, only tools matching the user prompt are sent to the LLM,
124    /// reducing context usage when many MCP tools are registered.
125    pub tool_index: Option<ToolIndex>,
126    /// Optional subagent registry for auto-delegation.
127    ///
128    /// When set, the agent loop can auto-detect when to launch subagents
129    /// based on prompt patterns or agent style.
130    pub subagent_registry: Option<Arc<crate::subagent::AgentRegistry>>,
131    /// Callback for when a subagent should be launched.
132    ///
133    /// When `should_launch_subagent` returns Some, this callback is invoked
134    /// with the agent definition and prompt. The callback should return
135    /// `Some(result)` if it handled the subagent launch, or `None` to
136    /// fall back to normal execution.
137    #[allow(clippy::type_complexity)]
138    pub on_subagent_launch: Option<
139        Arc<
140            dyn Fn(&crate::subagent::AgentDefinition, &str) -> Option<Result<AgentResult>>
141                + Send
142                + Sync,
143        >,
144    >,
145}
146
147impl std::fmt::Debug for AgentConfig {
148    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
149        f.debug_struct("AgentConfig")
150            .field("prompt_slots", &self.prompt_slots)
151            .field("tools", &self.tools)
152            .field("max_tool_rounds", &self.max_tool_rounds)
153            .field("security_provider", &self.security_provider.is_some())
154            .field("permission_checker", &self.permission_checker.is_some())
155            .field("confirmation_manager", &self.confirmation_manager.is_some())
156            .field("context_providers", &self.context_providers.len())
157            .field("planning_mode", &self.planning_mode)
158            .field("goal_tracking", &self.goal_tracking)
159            .field("hook_engine", &self.hook_engine.is_some())
160            .field(
161                "skill_registry",
162                &self.skill_registry.as_ref().map(|r| r.len()),
163            )
164            .field("max_parse_retries", &self.max_parse_retries)
165            .field("tool_timeout_ms", &self.tool_timeout_ms)
166            .field("circuit_breaker_threshold", &self.circuit_breaker_threshold)
167            .field(
168                "duplicate_tool_call_threshold",
169                &self.duplicate_tool_call_threshold,
170            )
171            .field("auto_compact", &self.auto_compact)
172            .field("auto_compact_threshold", &self.auto_compact_threshold)
173            .field("max_context_tokens", &self.max_context_tokens)
174            .field("continuation_enabled", &self.continuation_enabled)
175            .field("max_continuation_turns", &self.max_continuation_turns)
176            .field("memory", &self.memory.is_some())
177            .field("tool_index", &self.tool_index.as_ref().map(|i| i.len()))
178            .field(
179                "subagent_registry",
180                &self.subagent_registry.as_ref().map(|r| r.len()),
181            )
182            .field("on_subagent_launch", &self.on_subagent_launch.is_some())
183            .finish()
184    }
185}
186
187impl Default for AgentConfig {
188    fn default() -> Self {
189        Self {
190            prompt_slots: SystemPromptSlots::default(),
191            tools: Vec::new(), // Tools are provided by ToolExecutor
192            max_tool_rounds: MAX_TOOL_ROUNDS,
193            security_provider: None,
194            permission_checker: None,
195            confirmation_manager: None,
196            context_providers: Vec::new(),
197            planning_mode: PlanningMode::default(),
198            goal_tracking: false,
199            hook_engine: None,
200            skill_registry: Some(Arc::new(crate::skills::SkillRegistry::with_builtins())),
201            max_parse_retries: 2,
202            tool_timeout_ms: None,
203            circuit_breaker_threshold: 3,
204            duplicate_tool_call_threshold: 3,
205            auto_compact: false,
206            auto_compact_threshold: 0.80,
207            max_context_tokens: 200_000,
208            llm_client: None,
209            memory: None,
210            continuation_enabled: true,
211            max_continuation_turns: 3,
212            tool_index: None,
213            subagent_registry: None,
214            on_subagent_launch: None,
215        }
216    }
217}
218
219/// Events emitted during agent execution
220///
221/// Subscribe via [`Session::subscribe_events()`](crate::session::Session::subscribe_events).
222/// New variants may be added in minor releases — always include a wildcard arm
223/// (`_ => {}`) when matching.
224#[derive(Debug, Clone, Serialize, Deserialize)]
225#[serde(tag = "type")]
226#[non_exhaustive]
227pub enum AgentEvent {
228    /// Agent started processing
229    #[serde(rename = "agent_start")]
230    Start { prompt: String },
231
232    /// Runtime agent style/mode selected for the current execution.
233    #[serde(rename = "agent_mode_changed")]
234    AgentModeChanged {
235        /// Stable UI/runtime mode label, e.g. "general", "planning", "explore".
236        mode: String,
237        /// Canonical built-in agent name associated with this mode.
238        agent: String,
239        /// Human-readable explanation of the selected style.
240        description: String,
241    },
242
243    /// LLM turn started
244    #[serde(rename = "turn_start")]
245    TurnStart { turn: usize },
246
247    /// Text delta from streaming
248    #[serde(rename = "text_delta")]
249    TextDelta { text: String },
250
251    /// Tool execution started
252    #[serde(rename = "tool_start")]
253    ToolStart { id: String, name: String },
254
255    /// Tool input delta from streaming (partial JSON arguments)
256    #[serde(rename = "tool_input_delta")]
257    ToolInputDelta { delta: String },
258
259    /// Tool execution completed
260    #[serde(rename = "tool_end")]
261    ToolEnd {
262        id: String,
263        name: String,
264        output: String,
265        exit_code: i32,
266        #[serde(skip_serializing_if = "Option::is_none")]
267        metadata: Option<serde_json::Value>,
268    },
269
270    /// Intermediate tool output (streaming delta)
271    #[serde(rename = "tool_output_delta")]
272    ToolOutputDelta {
273        id: String,
274        name: String,
275        delta: String,
276    },
277
278    /// LLM turn completed
279    #[serde(rename = "turn_end")]
280    TurnEnd { turn: usize, usage: TokenUsage },
281
282    /// Agent completed
283    #[serde(rename = "agent_end")]
284    End {
285        text: String,
286        usage: TokenUsage,
287        #[serde(skip_serializing_if = "Option::is_none")]
288        meta: Option<crate::llm::LlmResponseMeta>,
289    },
290
291    /// Error occurred
292    #[serde(rename = "error")]
293    Error { message: String },
294
295    /// Tool execution requires confirmation (HITL)
296    #[serde(rename = "confirmation_required")]
297    ConfirmationRequired {
298        tool_id: String,
299        tool_name: String,
300        args: serde_json::Value,
301        timeout_ms: u64,
302    },
303
304    /// Confirmation received from user (HITL)
305    #[serde(rename = "confirmation_received")]
306    ConfirmationReceived {
307        tool_id: String,
308        approved: bool,
309        reason: Option<String>,
310    },
311
312    /// Confirmation timed out (HITL)
313    #[serde(rename = "confirmation_timeout")]
314    ConfirmationTimeout {
315        tool_id: String,
316        action_taken: String, // "rejected" or "auto_approved"
317    },
318
319    /// External task pending (needs SDK processing)
320    #[serde(rename = "external_task_pending")]
321    ExternalTaskPending {
322        task_id: String,
323        session_id: String,
324        lane: crate::hitl::SessionLane,
325        command_type: String,
326        payload: serde_json::Value,
327        timeout_ms: u64,
328    },
329
330    /// External task completed
331    #[serde(rename = "external_task_completed")]
332    ExternalTaskCompleted {
333        task_id: String,
334        session_id: String,
335        success: bool,
336    },
337
338    /// Tool execution denied by permission policy
339    #[serde(rename = "permission_denied")]
340    PermissionDenied {
341        tool_id: String,
342        tool_name: String,
343        args: serde_json::Value,
344        reason: String,
345    },
346
347    /// Context resolution started
348    #[serde(rename = "context_resolving")]
349    ContextResolving { providers: Vec<String> },
350
351    /// Context resolution completed
352    #[serde(rename = "context_resolved")]
353    ContextResolved {
354        total_items: usize,
355        total_tokens: usize,
356    },
357
358    // ========================================================================
359    // a3s-lane integration events
360    // ========================================================================
361    /// Command moved to dead letter queue after exhausting retries
362    #[serde(rename = "command_dead_lettered")]
363    CommandDeadLettered {
364        command_id: String,
365        command_type: String,
366        lane: String,
367        error: String,
368        attempts: u32,
369    },
370
371    /// Command retry attempt
372    #[serde(rename = "command_retry")]
373    CommandRetry {
374        command_id: String,
375        command_type: String,
376        lane: String,
377        attempt: u32,
378        delay_ms: u64,
379    },
380
381    /// Queue alert (depth warning, latency alert, etc.)
382    #[serde(rename = "queue_alert")]
383    QueueAlert {
384        level: String,
385        alert_type: String,
386        message: String,
387    },
388
389    // ========================================================================
390    // Task tracking events
391    // ========================================================================
392    /// Task list updated
393    #[serde(rename = "task_updated")]
394    TaskUpdated {
395        session_id: String,
396        tasks: Vec<crate::planning::Task>,
397    },
398
399    // ========================================================================
400    // Memory System events (Phase 3)
401    // ========================================================================
402    /// Memory stored
403    #[serde(rename = "memory_stored")]
404    MemoryStored {
405        memory_id: String,
406        memory_type: String,
407        importance: f32,
408        tags: Vec<String>,
409    },
410
411    /// Memory recalled
412    #[serde(rename = "memory_recalled")]
413    MemoryRecalled {
414        memory_id: String,
415        content: String,
416        relevance: f32,
417    },
418
419    /// Memories searched
420    #[serde(rename = "memories_searched")]
421    MemoriesSearched {
422        query: Option<String>,
423        tags: Vec<String>,
424        result_count: usize,
425    },
426
427    /// Memory cleared
428    #[serde(rename = "memory_cleared")]
429    MemoryCleared {
430        tier: String, // "long_term", "short_term", "working"
431        count: u64,
432    },
433
434    // ========================================================================
435    // Subagent events
436    // ========================================================================
437    /// Subagent task started
438    #[serde(rename = "subagent_start")]
439    SubagentStart {
440        /// Unique task identifier
441        task_id: String,
442        /// Child session ID
443        session_id: String,
444        /// Parent session ID
445        parent_session_id: String,
446        /// Agent type (e.g., "explore", "general")
447        agent: String,
448        /// Short description of the task
449        description: String,
450    },
451
452    /// Subagent task progress update
453    #[serde(rename = "subagent_progress")]
454    SubagentProgress {
455        /// Task identifier
456        task_id: String,
457        /// Child session ID
458        session_id: String,
459        /// Progress status message
460        status: String,
461        /// Additional metadata
462        metadata: serde_json::Value,
463    },
464
465    /// Subagent task completed
466    #[serde(rename = "subagent_end")]
467    SubagentEnd {
468        /// Task identifier
469        task_id: String,
470        /// Child session ID
471        session_id: String,
472        /// Agent type
473        agent: String,
474        /// Task output/result
475        output: String,
476        /// Whether the task succeeded
477        success: bool,
478    },
479
480    // ========================================================================
481    // Planning and Goal Tracking Events (Phase 1)
482    // ========================================================================
483    /// Planning phase started
484    #[serde(rename = "planning_start")]
485    PlanningStart { prompt: String },
486
487    /// Planning phase completed
488    #[serde(rename = "planning_end")]
489    PlanningEnd {
490        plan: ExecutionPlan,
491        estimated_steps: usize,
492    },
493
494    /// Step execution started
495    #[serde(rename = "step_start")]
496    StepStart {
497        step_id: String,
498        description: String,
499        step_number: usize,
500        total_steps: usize,
501    },
502
503    /// Step execution completed
504    #[serde(rename = "step_end")]
505    StepEnd {
506        step_id: String,
507        status: TaskStatus,
508        step_number: usize,
509        total_steps: usize,
510    },
511
512    /// Goal extracted from prompt
513    #[serde(rename = "goal_extracted")]
514    GoalExtracted { goal: AgentGoal },
515
516    /// Goal progress update
517    #[serde(rename = "goal_progress")]
518    GoalProgress {
519        goal: String,
520        progress: f32,
521        completed_steps: usize,
522        total_steps: usize,
523    },
524
525    /// Goal achieved
526    #[serde(rename = "goal_achieved")]
527    GoalAchieved {
528        goal: String,
529        total_steps: usize,
530        duration_ms: i64,
531    },
532
533    // ========================================================================
534    // Context Compaction events
535    // ========================================================================
536    /// Context automatically compacted due to high usage
537    #[serde(rename = "context_compacted")]
538    ContextCompacted {
539        session_id: String,
540        before_messages: usize,
541        after_messages: usize,
542        percent_before: f32,
543    },
544
545    // ========================================================================
546    // Persistence events
547    // ========================================================================
548    /// Session persistence failed — SDK clients should handle this
549    #[serde(rename = "persistence_failed")]
550    PersistenceFailed {
551        session_id: String,
552        operation: String,
553        error: String,
554    },
555
556    // ========================================================================
557    // Side question (btw)
558    // ========================================================================
559    /// Ephemeral side question answered.
560    ///
561    /// Emitted by [`crate::agent_api::AgentSession::btw()`] in streaming mode.
562    /// The answer is never added to conversation history.
563    #[serde(rename = "btw_answer")]
564    BtwAnswer {
565        question: String,
566        answer: String,
567        usage: TokenUsage,
568    },
569}
570
571/// Result of agent execution
572#[derive(Debug, Clone)]
573pub struct AgentResult {
574    pub text: String,
575    pub messages: Vec<Message>,
576    pub usage: TokenUsage,
577    pub tool_calls_count: usize,
578}
579
580// ============================================================================
581// ToolCommand — bridges ToolExecutor to SessionCommand for queue submission
582// ============================================================================
583
584/// Adapter that implements `SessionCommand` for tool execution via the queue.
585///
586/// Wraps a `ToolExecutor` call so it can be submitted to `SessionLaneQueue`.
587pub struct ToolCommand {
588    tool_executor: Arc<ToolExecutor>,
589    tool_name: String,
590    tool_args: Value,
591    tool_context: ToolContext,
592    skill_registry: Option<Arc<crate::skills::SkillRegistry>>,
593}
594
595impl ToolCommand {
596    /// Create a new ToolCommand
597    pub fn new(
598        tool_executor: Arc<ToolExecutor>,
599        tool_name: String,
600        tool_args: Value,
601        tool_context: ToolContext,
602        skill_registry: Option<Arc<crate::skills::SkillRegistry>>,
603    ) -> Self {
604        Self {
605            tool_executor,
606            tool_name,
607            tool_args,
608            tool_context,
609            skill_registry,
610        }
611    }
612}
613
614#[async_trait]
615impl SessionCommand for ToolCommand {
616    async fn execute(&self) -> Result<Value> {
617        // Check skill-based tool permissions
618        if let Some(registry) = &self.skill_registry {
619            let instruction_skills = registry.by_kind(crate::skills::SkillKind::Instruction);
620
621            // If there are instruction skills with tool restrictions, check permissions
622            let has_restrictions = instruction_skills.iter().any(|s| s.allowed_tools.is_some());
623
624            if has_restrictions {
625                let mut allowed = false;
626
627                for skill in &instruction_skills {
628                    if skill.is_tool_allowed(&self.tool_name) {
629                        allowed = true;
630                        break;
631                    }
632                }
633
634                if !allowed {
635                    return Err(anyhow::anyhow!(
636                        "Tool '{}' is not allowed by any active skill. Active skills restrict tools to their allowed-tools lists.",
637                        self.tool_name
638                    ));
639                }
640            }
641        }
642
643        // Execute the tool
644        let result = self
645            .tool_executor
646            .execute_with_context(&self.tool_name, &self.tool_args, &self.tool_context)
647            .await?;
648        Ok(serde_json::json!({
649            "output": result.output,
650            "exit_code": result.exit_code,
651            "metadata": result.metadata,
652        }))
653    }
654
655    fn command_type(&self) -> &str {
656        &self.tool_name
657    }
658
659    fn payload(&self) -> Value {
660        self.tool_args.clone()
661    }
662}
663
664// ============================================================================
665// AgentLoop
666// ============================================================================
667
668/// Agent loop executor
669#[derive(Clone)]
670pub struct AgentLoop {
671    llm_client: Arc<dyn LlmClient>,
672    tool_executor: Arc<ToolExecutor>,
673    tool_context: ToolContext,
674    config: AgentConfig,
675    /// Optional per-session tool metrics collector
676    tool_metrics: Option<Arc<RwLock<crate::telemetry::ToolMetrics>>>,
677    /// Optional lane queue for priority-based tool execution
678    command_queue: Option<Arc<SessionLaneQueue>>,
679    /// Optional progress tracker for real-time tool/token usage tracking
680    progress_tracker: Option<Arc<tokio::sync::RwLock<crate::task::ProgressTracker>>>,
681    /// Optional task manager for centralized task lifecycle tracking
682    task_manager: Option<Arc<crate::task::TaskManager>>,
683}
684
685// ============================================================================
686// Intent Detection Helpers (for AHP Context Perception)
687// ============================================================================
688
689/// Extract a target name from the prompt (e.g., function name, file path).
690#[allow(clippy::extra_unused_lifetimes)]
691fn extract_target_name_from_prompt<'a>(prompt: &str, _patterns: &[&str]) -> String {
692    // Try to extract quoted strings first
693    if let Some(start) = prompt.find('"') {
694        if let Some(end) = prompt[start + 1..].find('"') {
695            return prompt[start + 1..start + 1 + end].to_string();
696        }
697    }
698
699    // Try single quotes
700    if let Some(start) = prompt.find('\'') {
701        if let Some(end) = prompt[start + 1..].find('\'') {
702            return prompt[start + 1..start + 1 + end].to_string();
703        }
704    }
705
706    // Try backticks
707    if let Some(start) = prompt.find('`') {
708        if let Some(end) = prompt[start + 1..].find('`') {
709            return prompt[start + 1..start + 1 + end].to_string();
710        }
711    }
712
713    // Fall back to extracting a reasonable word boundary
714    let words: Vec<&str> = prompt.split_whitespace().collect();
715    if words.len() > 2 {
716        // Look for likely target words (after "the", "find", "where is", etc.)
717        for word in words.iter() {
718            if word.len() > 3
719                && !["where", "what", "find", "the", "how", "is", "are"].contains(word)
720            {
721                return word.to_string();
722            }
723        }
724    }
725
726    String::new()
727}
728
729/// Detect the domain from prompt keywords.
730fn detect_domain_from_prompt(prompt: &str) -> String {
731    let lower = prompt.to_lowercase();
732
733    if lower.contains("rust") || lower.contains("cargo") || lower.contains(".rs") {
734        "rust".to_string()
735    } else if lower.contains("javascript")
736        || lower.contains("typescript")
737        || lower.contains("node")
738        || lower.contains(".js")
739        || lower.contains(".ts")
740    {
741        "javascript".to_string()
742    } else if lower.contains("python") || lower.contains(".py") {
743        "python".to_string()
744    } else if lower.contains("go") || lower.contains(".go") {
745        "go".to_string()
746    } else if lower.contains("java") || lower.contains(".java") {
747        "java".to_string()
748    } else if lower.contains("docker") || lower.contains("container") {
749        "docker".to_string()
750    } else if lower.contains("kubernetes") || lower.contains("k8s") {
751        "kubernetes".to_string()
752    } else if lower.contains("sql")
753        || lower.contains("database")
754        || lower.contains("postgres")
755        || lower.contains("mysql")
756    {
757        "database".to_string()
758    } else if lower.contains("api") || lower.contains("rest") || lower.contains("grpc") {
759        "api".to_string()
760    } else if lower.contains("auth")
761        || lower.contains("login")
762        || lower.contains("password")
763        || lower.contains("token")
764    {
765        "security".to_string()
766    } else if lower.contains("test") || lower.contains("spec") || lower.contains("mock") {
767        "testing".to_string()
768    } else {
769        "general".to_string()
770    }
771}
772
773/// Result from IntentDetection harness
774#[derive(Debug, Clone, Serialize, Deserialize)]
775pub struct IntentDetectionResult {
776    /// Detected intent: "locate" | "understand" | "retrieve" | "explore" | "reason" | "validate" | "compare" | "track"
777    pub detected_intent: String,
778    /// Confidence score 0.0 - 1.0
779    pub confidence: f32,
780    /// Optional target hints from the harness
781    #[serde(skip_serializing_if = "Option::is_none")]
782    pub target_hints: Option<TargetHints>,
783}
784
785/// Target hints from IntentDetection harness
786#[derive(Debug, Clone, Serialize, Deserialize)]
787pub struct TargetHints {
788    #[serde(skip_serializing_if = "Option::is_none")]
789    pub target_type: Option<String>,
790    #[serde(skip_serializing_if = "Option::is_none")]
791    pub target_name: Option<String>,
792    #[serde(skip_serializing_if = "Option::is_none")]
793    pub domain: Option<String>,
794}
795
796/// Detect language hint from prompt characters.
797fn detect_language_hint(prompt: &str) -> Option<String> {
798    // Check for Chinese characters
799    if prompt
800        .chars()
801        .any(|c| ('\u{4e00}'..='\u{9fff}').contains(&c))
802    {
803        return Some("zh".to_string());
804    }
805    // Check for Japanese characters (Hiragana, Katakana, or CJK unified ideographs outside Chinese range)
806    if prompt
807        .chars()
808        .any(|c| ('\u{3040}'..='\u{309f}').contains(&c) || ('\u{30a0}'..='\u{30ff}').contains(&c))
809    {
810        return Some("ja".to_string());
811    }
812    // Check for Korean characters
813    if prompt
814        .chars()
815        .any(|c| ('\u{ac00}'..='\u{d7af}').contains(&c))
816    {
817        return Some("ko".to_string());
818    }
819    // Check for Arabic
820    if prompt
821        .chars()
822        .any(|c| ('\u{0600}'..='\u{06ff}').contains(&c))
823    {
824        return Some("ar".to_string());
825    }
826    // Check for Russian/Cyrillic
827    if prompt
828        .chars()
829        .any(|c| ('\u{0400}'..='\u{04ff}').contains(&c))
830    {
831        return Some("ru".to_string());
832    }
833    None
834}
835
836/// Build PreContextPerceptionEvent from IntentDetection result.
837fn build_pre_context_perception_from_intent(
838    result: IntentDetectionResult,
839    prompt: &str,
840    session_id: &str,
841    workspace: &str,
842) -> PreContextPerceptionEvent {
843    let target_hints = result.target_hints;
844    PreContextPerceptionEvent {
845        session_id: session_id.to_string(),
846        intent: result.detected_intent,
847        target_type: target_hints
848            .as_ref()
849            .and_then(|h| h.target_type.clone())
850            .unwrap_or_else(|| "unknown".to_string()),
851        target_name: target_hints
852            .as_ref()
853            .and_then(|h| h.target_name.clone())
854            .unwrap_or_else(|| extract_target_name_from_prompt(prompt, &[])),
855        domain: target_hints
856            .as_ref()
857            .and_then(|h| h.domain.clone())
858            .unwrap_or_else(|| detect_domain_from_prompt(prompt)),
859        query: Some(prompt.to_string()),
860        working_directory: workspace.to_string(),
861        urgency: "normal".to_string(),
862    }
863}
864
865/// Rough token estimation (~4 chars per token for English/code).
866#[cfg(feature = "ahp")]
867fn estimate_tokens(text: &str) -> usize {
868    text.len() / 4
869}
870
871impl AgentLoop {
872    pub fn new(
873        llm_client: Arc<dyn LlmClient>,
874        tool_executor: Arc<ToolExecutor>,
875        tool_context: ToolContext,
876        config: AgentConfig,
877    ) -> Self {
878        Self {
879            llm_client,
880            tool_executor,
881            tool_context,
882            config,
883            tool_metrics: None,
884            command_queue: None,
885            progress_tracker: None,
886            task_manager: None,
887        }
888    }
889
890    /// Set the progress tracker for real-time tool/token usage tracking.
891    pub fn with_progress_tracker(
892        mut self,
893        tracker: Arc<tokio::sync::RwLock<crate::task::ProgressTracker>>,
894    ) -> Self {
895        self.progress_tracker = Some(tracker);
896        self
897    }
898
899    /// Set the task manager for centralized task lifecycle tracking.
900    pub fn with_task_manager(mut self, manager: Arc<crate::task::TaskManager>) -> Self {
901        self.task_manager = Some(manager);
902        self
903    }
904
905    /// Set the tool metrics collector for this agent loop
906    pub fn with_tool_metrics(
907        mut self,
908        metrics: Arc<RwLock<crate::telemetry::ToolMetrics>>,
909    ) -> Self {
910        self.tool_metrics = Some(metrics);
911        self
912    }
913
914    /// Set the lane queue for priority-based tool execution.
915    ///
916    /// When set, tools are routed through the lane queue which supports
917    /// External task handling for multi-machine parallel processing.
918    pub fn with_queue(mut self, queue: Arc<SessionLaneQueue>) -> Self {
919        self.command_queue = Some(queue);
920        self
921    }
922
923    /// Track a tool call result in the progress tracker.
924    fn track_tool_result(&self, tool_name: &str, args: &serde_json::Value, exit_code: i32) {
925        if let Some(ref tracker) = self.progress_tracker {
926            let args_summary = Self::compact_json_args(args);
927            let success = exit_code == 0;
928            if let Ok(mut guard) = tracker.try_write() {
929                guard.track_tool_call(tool_name, args_summary, success);
930            }
931        }
932    }
933
934    /// Compact JSON arguments to a short summary string for tracking.
935    fn compact_json_args(args: &serde_json::Value) -> String {
936        let raw = match args {
937            serde_json::Value::Null => String::new(),
938            serde_json::Value::String(s) => s.clone(),
939            _ => serde_json::to_string(args).unwrap_or_default(),
940        };
941        let compact = raw.split_whitespace().collect::<Vec<_>>().join(" ");
942        if compact.len() > 180 {
943            format!("{}...", truncate_utf8(&compact, 180))
944        } else {
945            compact
946        }
947    }
948
949    /// Execute a tool, applying the configured timeout if set.
950    ///
951    /// On timeout, returns an error describing which tool timed out and after
952    /// how many milliseconds. The caller converts this to a tool-result error
953    /// message that is fed back to the LLM.
954    async fn execute_tool_timed(
955        &self,
956        name: &str,
957        args: &serde_json::Value,
958        ctx: &ToolContext,
959    ) -> anyhow::Result<crate::tools::ToolResult> {
960        let fut = self.tool_executor.execute_with_context(name, args, ctx);
961        if let Some(timeout_ms) = self.config.tool_timeout_ms {
962            match tokio::time::timeout(Duration::from_millis(timeout_ms), fut).await {
963                Ok(result) => result,
964                Err(_) => Err(anyhow::anyhow!(
965                    "Tool '{}' timed out after {}ms",
966                    name,
967                    timeout_ms
968                )),
969            }
970        } else {
971            fut.await
972        }
973    }
974
975    /// Convert a tool execution result into the (output, exit_code, is_error, metadata, images) tuple.
976    fn tool_result_to_tuple(
977        result: anyhow::Result<crate::tools::ToolResult>,
978    ) -> (
979        String,
980        i32,
981        bool,
982        Option<serde_json::Value>,
983        Vec<crate::llm::Attachment>,
984    ) {
985        match result {
986            Ok(r) => (
987                r.output,
988                r.exit_code,
989                r.exit_code != 0,
990                r.metadata,
991                r.images,
992            ),
993            Err(e) => {
994                let msg = e.to_string();
995                // Classify the error so the LLM knows whether retrying makes sense.
996                let hint = if Self::is_transient_error(&msg) {
997                    " [transient — you may retry this tool call]"
998                } else {
999                    " [permanent — do not retry without changing the arguments]"
1000                };
1001                (
1002                    format!("Tool execution error: {}{}", msg, hint),
1003                    1,
1004                    true,
1005                    None,
1006                    Vec::new(),
1007                )
1008            }
1009        }
1010    }
1011
1012    /// Inspect the workspace for well-known project marker files and return a short
1013    /// `## Project Context` section that the agent can use without any manual configuration.
1014    /// Returns an empty string when the workspace type cannot be determined.
1015    fn detect_project_hint(workspace: &std::path::Path) -> String {
1016        struct Marker {
1017            file: &'static str,
1018            lang: &'static str,
1019            tip: &'static str,
1020        }
1021
1022        let markers = [
1023            Marker {
1024                file: "Cargo.toml",
1025                lang: "Rust",
1026                tip: "Use `cargo build`, `cargo test`, `cargo clippy`, and `cargo fmt`. \
1027                  Prefer `anyhow` / `thiserror` for error handling. \
1028                  Follow the Microsoft Rust Guidelines (no panics in library code, \
1029                  async-first with Tokio).",
1030            },
1031            Marker {
1032                file: "package.json",
1033                lang: "Node.js / TypeScript",
1034                tip: "Check `package.json` for the package manager (npm/yarn/pnpm/bun) \
1035                  and available scripts. Prefer TypeScript with strict mode. \
1036                  Use ESM imports unless the project is CommonJS.",
1037            },
1038            Marker {
1039                file: "pyproject.toml",
1040                lang: "Python",
1041                tip: "Use the package manager declared in `pyproject.toml` \
1042                  (uv, poetry, hatch, etc.). Prefer type hints and async/await for I/O.",
1043            },
1044            Marker {
1045                file: "setup.py",
1046                lang: "Python",
1047                tip: "Legacy Python project. Prefer type hints and async/await for I/O.",
1048            },
1049            Marker {
1050                file: "requirements.txt",
1051                lang: "Python",
1052                tip: "Python project with pip-style dependencies. \
1053                  Prefer type hints and async/await for I/O.",
1054            },
1055            Marker {
1056                file: "go.mod",
1057                lang: "Go",
1058                tip: "Use `go build ./...` and `go test ./...`. \
1059                  Follow standard Go project layout. Use `gofmt` for formatting.",
1060            },
1061            Marker {
1062                file: "pom.xml",
1063                lang: "Java / Maven",
1064                tip: "Use `mvn compile`, `mvn test`, `mvn package`. \
1065                  Follow standard Maven project structure.",
1066            },
1067            Marker {
1068                file: "build.gradle",
1069                lang: "Java / Gradle",
1070                tip: "Use `./gradlew build` and `./gradlew test`. \
1071                  Follow standard Gradle project structure.",
1072            },
1073            Marker {
1074                file: "build.gradle.kts",
1075                lang: "Kotlin / Gradle",
1076                tip: "Use `./gradlew build` and `./gradlew test`. \
1077                  Prefer Kotlin coroutines for async work.",
1078            },
1079            Marker {
1080                file: "CMakeLists.txt",
1081                lang: "C / C++",
1082                tip: "Use `cmake -B build && cmake --build build`. \
1083                  Check for `compile_commands.json` for IDE tooling.",
1084            },
1085            Marker {
1086                file: "Makefile",
1087                lang: "C / C++ (or generic)",
1088                tip: "Use `make` or `make <target>`. \
1089                  Check available targets with `make help` or by reading the Makefile.",
1090            },
1091        ];
1092
1093        // Check for C# / .NET — no single fixed filename, so glob for *.csproj / *.sln
1094        let is_dotnet = workspace.join("*.csproj").exists() || {
1095            // Fast check: look for any .csproj or .sln in the workspace root
1096            std::fs::read_dir(workspace)
1097                .map(|entries| {
1098                    entries.flatten().any(|e| {
1099                        let name = e.file_name();
1100                        let s = name.to_string_lossy();
1101                        s.ends_with(".csproj") || s.ends_with(".sln")
1102                    })
1103                })
1104                .unwrap_or(false)
1105        };
1106
1107        if is_dotnet {
1108            return "## Project Context\n\nThis is a **C# / .NET** project. \
1109             Use `dotnet build`, `dotnet test`, and `dotnet run`. \
1110             Follow C# coding conventions and async/await patterns."
1111                .to_string();
1112        }
1113
1114        for marker in &markers {
1115            if workspace.join(marker.file).exists() {
1116                return format!(
1117                    "## Project Context\n\nThis is a **{}** project. {}",
1118                    marker.lang, marker.tip
1119                );
1120            }
1121        }
1122
1123        String::new()
1124    }
1125
1126    /// Returns `true` for errors that are likely transient (network, timeout, I/O contention).
1127    /// Used to annotate tool error messages so the LLM knows whether retrying is safe.
1128    fn is_transient_error(msg: &str) -> bool {
1129        let lower = msg.to_lowercase();
1130        lower.contains("timeout")
1131        || lower.contains("timed out")
1132        || lower.contains("connection refused")
1133        || lower.contains("connection reset")
1134        || lower.contains("broken pipe")
1135        || lower.contains("temporarily unavailable")
1136        || lower.contains("resource temporarily unavailable")
1137        || lower.contains("os error 11")  // EAGAIN
1138        || lower.contains("os error 35")  // EAGAIN on macOS
1139        || lower.contains("rate limit")
1140        || lower.contains("too many requests")
1141        || lower.contains("service unavailable")
1142        || lower.contains("network unreachable")
1143    }
1144
1145    /// Returns `true` when a tool writes a file and is safe to run concurrently with other
1146    /// independent writes (no ordering dependencies, no side-channel output).
1147    fn is_parallel_safe_write(name: &str, _args: &serde_json::Value) -> bool {
1148        matches!(
1149            name,
1150            "write_file" | "edit_file" | "create_file" | "append_to_file" | "replace_in_file"
1151        )
1152    }
1153
1154    /// Extract the target file path from write-tool arguments so we can check for conflicts.
1155    fn extract_write_path(args: &serde_json::Value) -> Option<String> {
1156        // write_file / create_file / append_to_file / replace_in_file use "path"
1157        // edit_file uses "path" as well
1158        args.get("path")
1159            .and_then(|v| v.as_str())
1160            .map(|s| s.to_string())
1161    }
1162
1163    /// Execute a tool through the lane queue (if configured) or directly.
1164    /// Wraps execution in task lifecycle if task_manager is configured.
1165    async fn execute_tool_queued_or_direct(
1166        &self,
1167        name: &str,
1168        args: &serde_json::Value,
1169        ctx: &ToolContext,
1170    ) -> anyhow::Result<crate::tools::ToolResult> {
1171        // Create task for this tool execution if task_manager is available
1172        let task_id = if let Some(ref tm) = self.task_manager {
1173            let task = crate::task::Task::tool(name, args.clone());
1174            let id = task.id;
1175            tm.spawn(task);
1176            // Start the task immediately
1177            let _ = tm.start(id);
1178            Some(id)
1179        } else {
1180            None
1181        };
1182
1183        let result = self
1184            .execute_tool_queued_or_direct_inner(name, args, ctx)
1185            .await;
1186
1187        // Complete or fail the task based on result
1188        if let Some(ref tm) = self.task_manager {
1189            if let Some(tid) = task_id {
1190                match &result {
1191                    Ok(r) => {
1192                        let output = serde_json::json!({
1193                            "output": r.output.clone(),
1194                            "exit_code": r.exit_code,
1195                        });
1196                        let _ = tm.complete(tid, Some(output));
1197                    }
1198                    Err(e) => {
1199                        let _ = tm.fail(tid, e.to_string());
1200                    }
1201                }
1202            }
1203        }
1204
1205        result
1206    }
1207
1208    /// Inner execution without task lifecycle wrapping.
1209    async fn execute_tool_queued_or_direct_inner(
1210        &self,
1211        name: &str,
1212        args: &serde_json::Value,
1213        ctx: &ToolContext,
1214    ) -> anyhow::Result<crate::tools::ToolResult> {
1215        if let Some(ref queue) = self.command_queue {
1216            let command = ToolCommand::new(
1217                Arc::clone(&self.tool_executor),
1218                name.to_string(),
1219                args.clone(),
1220                ctx.clone(),
1221                self.config.skill_registry.clone(),
1222            );
1223            let rx = queue.submit_by_tool(name, Box::new(command)).await;
1224            match rx.await {
1225                Ok(Ok(value)) => {
1226                    let output = value["output"]
1227                        .as_str()
1228                        .ok_or_else(|| {
1229                            anyhow::anyhow!(
1230                                "Queue result missing 'output' field for tool '{}'",
1231                                name
1232                            )
1233                        })?
1234                        .to_string();
1235                    let exit_code = value["exit_code"].as_i64().unwrap_or(0) as i32;
1236                    return Ok(crate::tools::ToolResult {
1237                        name: name.to_string(),
1238                        output,
1239                        exit_code,
1240                        metadata: None,
1241                        images: Vec::new(),
1242                    });
1243                }
1244                Ok(Err(e)) => {
1245                    tracing::warn!(
1246                        "Queue execution failed for tool '{}', falling back to direct: {}",
1247                        name,
1248                        e
1249                    );
1250                }
1251                Err(_) => {
1252                    tracing::warn!(
1253                        "Queue channel closed for tool '{}', falling back to direct",
1254                        name
1255                    );
1256                }
1257            }
1258        }
1259        self.execute_tool_timed(name, args, ctx).await
1260    }
1261
1262    /// Call the LLM, handling streaming vs non-streaming internally.
1263    ///
1264    /// Streaming events (`TextDelta`, `ToolStart`) are forwarded to `event_tx`
1265    /// as they arrive. Non-streaming mode simply awaits the complete response.
1266    ///
1267    /// When a `ToolIndex` is configured, tools are filtered per-turn based on
1268    /// the last user message, reducing context usage with large tool sets.
1269    ///
1270    /// Returns `Err` on any LLM API failure. The circuit breaker in
1271    /// `execute_loop` wraps this call with retry logic for non-streaming mode.
1272    async fn call_llm(
1273        &self,
1274        messages: &[Message],
1275        system: Option<&str>,
1276        event_tx: &Option<mpsc::Sender<AgentEvent>>,
1277        cancel_token: &tokio_util::sync::CancellationToken,
1278    ) -> anyhow::Result<LlmResponse> {
1279        // Filter tools through ToolIndex if configured
1280        let tools = if let Some(ref index) = self.config.tool_index {
1281            let query = messages
1282                .iter()
1283                .rev()
1284                .find(|m| m.role == "user")
1285                .and_then(|m| {
1286                    m.content.iter().find_map(|b| match b {
1287                        crate::llm::ContentBlock::Text { text } => Some(text.as_str()),
1288                        _ => None,
1289                    })
1290                })
1291                .unwrap_or("");
1292            let matches = index.search(query, index.len());
1293            let matched_names: std::collections::HashSet<&str> =
1294                matches.iter().map(|m| m.name.as_str()).collect();
1295            self.config
1296                .tools
1297                .iter()
1298                .filter(|t| matched_names.contains(t.name.as_str()))
1299                .cloned()
1300                .collect::<Vec<_>>()
1301        } else {
1302            self.config.tools.clone()
1303        };
1304
1305        if event_tx.is_some() {
1306            let mut stream_rx = match self
1307                .llm_client
1308                .complete_streaming(messages, system, &tools, cancel_token.clone())
1309                .await
1310            {
1311                Ok(rx) => rx,
1312                Err(stream_error) => {
1313                    // Do not fall back to non-streaming if cancelled — propagate cancellation
1314                    if cancel_token.is_cancelled() {
1315                        anyhow::bail!("Operation cancelled by user");
1316                    }
1317                    tracing::warn!(
1318                        error = %stream_error,
1319                        "LLM streaming setup failed; falling back to non-streaming completion"
1320                    );
1321                    return self
1322                        .llm_client
1323                        .complete(messages, system, &tools)
1324                        .await
1325                        .with_context(|| {
1326                            format!(
1327                                "LLM streaming call failed ({stream_error}); non-streaming fallback also failed"
1328                            )
1329                        });
1330                }
1331            };
1332
1333            let mut final_response: Option<LlmResponse> = None;
1334            loop {
1335                tokio::select! {
1336                    _ = cancel_token.cancelled() => {
1337                        tracing::info!("🛑 LLM streaming cancelled by CancellationToken");
1338                        anyhow::bail!("Operation cancelled by user");
1339                    }
1340                    event = stream_rx.recv() => {
1341                        match event {
1342                            Some(crate::llm::StreamEvent::TextDelta(text)) => {
1343                                if let Some(tx) = event_tx {
1344                                    tx.send(AgentEvent::TextDelta { text }).await.ok();
1345                                }
1346                            }
1347                            Some(crate::llm::StreamEvent::ToolUseStart { id, name }) => {
1348                                if let Some(tx) = event_tx {
1349                                    tx.send(AgentEvent::ToolStart { id, name }).await.ok();
1350                                }
1351                            }
1352                            Some(crate::llm::StreamEvent::ToolUseInputDelta(delta)) => {
1353                                if let Some(tx) = event_tx {
1354                                    tx.send(AgentEvent::ToolInputDelta { delta }).await.ok();
1355                                }
1356                            }
1357                            Some(crate::llm::StreamEvent::Done(resp)) => {
1358                                final_response = Some(resp);
1359                                break;
1360                            }
1361                            None => break,
1362                        }
1363                    }
1364                }
1365            }
1366            final_response.context("Stream ended without final response")
1367        } else {
1368            self.llm_client
1369                .complete(messages, system, &tools)
1370                .await
1371                .context("LLM call failed")
1372        }
1373    }
1374
1375    /// Create a tool context with streaming support.
1376    ///
1377    /// When `event_tx` is Some, spawns a forwarder task that converts
1378    /// `ToolStreamEvent::OutputDelta` into `AgentEvent::ToolOutputDelta`
1379    /// and sends them to the agent event channel.
1380    ///
1381    /// Returns the augmented `ToolContext`. The forwarder task runs until
1382    /// the tool-side sender is dropped (i.e., tool execution finishes).
1383    fn streaming_tool_context(
1384        &self,
1385        event_tx: &Option<mpsc::Sender<AgentEvent>>,
1386        tool_id: &str,
1387        tool_name: &str,
1388    ) -> ToolContext {
1389        let mut ctx = self.tool_context.clone();
1390        if let Some(agent_tx) = event_tx {
1391            let (tool_tx, mut tool_rx) = mpsc::channel::<ToolStreamEvent>(64);
1392            ctx.event_tx = Some(tool_tx);
1393
1394            let agent_tx = agent_tx.clone();
1395            let tool_id = tool_id.to_string();
1396            let tool_name = tool_name.to_string();
1397            tokio::spawn(async move {
1398                while let Some(event) = tool_rx.recv().await {
1399                    match event {
1400                        ToolStreamEvent::OutputDelta(delta) => {
1401                            agent_tx
1402                                .send(AgentEvent::ToolOutputDelta {
1403                                    id: tool_id.clone(),
1404                                    name: tool_name.clone(),
1405                                    delta,
1406                                })
1407                                .await
1408                                .ok();
1409                        }
1410                    }
1411                }
1412            });
1413        }
1414        ctx
1415    }
1416
1417    /// Resolve context from all providers for a given prompt
1418    ///
1419    /// Returns aggregated context results from all configured providers.
1420    async fn resolve_context(&self, prompt: &str, session_id: Option<&str>) -> Vec<ContextResult> {
1421        if self.config.context_providers.is_empty() {
1422            return Vec::new();
1423        }
1424
1425        let query = ContextQuery::new(prompt).with_session_id(session_id.unwrap_or(""));
1426
1427        let futures = self
1428            .config
1429            .context_providers
1430            .iter()
1431            .map(|p| p.query(&query));
1432        let outcomes = join_all(futures).await;
1433
1434        outcomes
1435            .into_iter()
1436            .enumerate()
1437            .filter_map(|(i, r)| match r {
1438                Ok(result) if !result.is_empty() => Some(result),
1439                Ok(_) => None,
1440                Err(e) => {
1441                    tracing::warn!(
1442                        "Context provider '{}' failed: {}",
1443                        self.config.context_providers[i].name(),
1444                        e
1445                    );
1446                    None
1447                }
1448            })
1449            .collect()
1450    }
1451
1452    /// Detect whether the LLM's no-tool-call response looks like an intermediate
1453    /// step rather than a genuine final answer.
1454    ///
1455    /// Returns `true` when continuation should be injected. Heuristics:
1456    /// - Response ends with a colon or ellipsis (mid-thought)
1457    /// - Response contains phrases that signal incomplete work
1458    /// - Response is very short (< 80 chars) and doesn't look like a summary
1459    fn looks_incomplete(text: &str) -> bool {
1460        let t = text.trim();
1461        if t.is_empty() {
1462            return true;
1463        }
1464        // Very short responses that aren't clearly a final answer
1465        if t.len() < 80 && !t.contains('\n') {
1466            // Short single-line — could be a genuine one-liner answer, keep going
1467            // only if it ends with punctuation that signals continuation
1468            let ends_continuation =
1469                t.ends_with(':') || t.ends_with("...") || t.ends_with('…') || t.ends_with(',');
1470            if ends_continuation {
1471                return true;
1472            }
1473        }
1474        // Phrases that signal the LLM is describing what it will do rather than doing it
1475        let incomplete_phrases = [
1476            "i'll ",
1477            "i will ",
1478            "let me ",
1479            "i need to ",
1480            "i should ",
1481            "next, i",
1482            "first, i",
1483            "now i",
1484            "i'll start",
1485            "i'll begin",
1486            "i'll now",
1487            "let's start",
1488            "let's begin",
1489            "to do this",
1490            "i'm going to",
1491        ];
1492        let lower = t.to_lowercase();
1493        for phrase in &incomplete_phrases {
1494            if lower.contains(phrase) {
1495                return true;
1496            }
1497        }
1498        false
1499    }
1500
1501    /// Get the assembled system prompt from slots.
1502    #[allow(dead_code)]
1503    fn system_prompt(&self) -> String {
1504        self.config.prompt_slots.build()
1505    }
1506
1507    /// Get the assembled system prompt from slots with an explicit style.
1508    fn system_prompt_for_style(&self, style: AgentStyle) -> String {
1509        let mut slots = self.config.prompt_slots.clone();
1510        slots.style = Some(style);
1511        slots.build()
1512    }
1513
1514    async fn resolve_effective_style(&self, prompt: &str) -> AgentStyle {
1515        if let Some(style) = self.config.prompt_slots.style {
1516            return style;
1517        }
1518
1519        let (style, confidence) = AgentStyle::detect_with_confidence(prompt);
1520        if confidence != DetectionConfidence::Low {
1521            return style;
1522        }
1523
1524        match AgentStyle::detect_with_llm(self.llm_client.as_ref(), prompt).await {
1525            Ok(classified_style) => {
1526                tracing::debug!(
1527                    intent.classification = ?classified_style,
1528                    intent.source = "llm",
1529                    "Intent classified via LLM"
1530                );
1531                classified_style
1532            }
1533            Err(e) => {
1534                tracing::warn!(error = %e, "LLM intent classification failed, using keyword detection");
1535                style
1536            }
1537        }
1538    }
1539
1540    /// Detect if a subagent should be launched for this task.
1541    ///
1542    /// Checks for explicit `[subagent:name]` syntax first, then falls back
1543    /// to style-based auto-detection if a registry is configured.
1544    ///
1545    /// Returns `Some((AgentDefinition, cleaned_prompt))` if subagent should be launched,
1546    /// or `None` if normal execution should continue.
1547    fn should_launch_subagent(
1548        &self,
1549        prompt: &str,
1550        style: AgentStyle,
1551    ) -> Option<(Arc<crate::subagent::AgentDefinition>, String)> {
1552        let registry = self.config.subagent_registry.as_ref()?;
1553
1554        // Step 1: Check for explicit [subagent:name] syntax
1555        if let Some(caps) = prompt.find("[subagent:") {
1556            // `end_offset` is offset from `caps` to ']'
1557            if let Some(end_offset) = prompt[caps..].find(']') {
1558                // `end_abs` is the absolute index of ']'
1559                let end_abs = caps + end_offset;
1560                // name is from after "[subagent:" to ']'
1561                let name = &prompt[caps + 10..end_abs];
1562                if let Some(agent_def) = registry.get(name) {
1563                    // Remove the [subagent:name] tag from the prompt
1564                    let after_tag = prompt[end_abs + 1..].trim();
1565                    let cleaned = if caps > 0 {
1566                        format!("{} {}", &prompt[..caps].trim(), after_tag)
1567                    } else {
1568                        after_tag.to_string()
1569                    };
1570                    tracing::info!(subagent = %name, "Explicit subagent request detected");
1571                    return Some((Arc::new(agent_def), cleaned.trim().to_string()));
1572                }
1573            }
1574        }
1575
1576        // Step 2: Style-based auto-detection
1577        let agent_name = match style {
1578            AgentStyle::Explore => "explore",
1579            AgentStyle::Plan => "plan",
1580            AgentStyle::Verification => "verification",
1581            AgentStyle::CodeReview => "review",
1582            AgentStyle::GeneralPurpose => return None,
1583        };
1584
1585        if let Some(agent_def) = registry.get(agent_name) {
1586            tracing::info!(
1587                subagent = %agent_name,
1588                style = ?style,
1589                "Auto-detected subagent launch based on style"
1590            );
1591            return Some((Arc::new(agent_def), prompt.to_string()));
1592        }
1593
1594        None
1595    }
1596
1597    /// Detect if context perception is needed based on user prompt.
1598    ///
1599    /// Returns `Some(PreContextPerceptionEvent)` if the prompt suggests the model
1600    /// needs workspace knowledge (finding files, understanding code, etc.).
1601    pub fn detect_context_perception_intent(
1602        &self,
1603        prompt: &str,
1604        session_id: &str,
1605        workspace: &str,
1606    ) -> Option<PreContextPerceptionEvent> {
1607        let lower = prompt.to_lowercase();
1608
1609        // Pattern matching for different intents that suggest context perception is needed
1610        let intents: &[(&[&str], &str)] = &[
1611            // Locate: finding files, functions, resources
1612            (
1613                &[
1614                    "where is",
1615                    "where are",
1616                    "find the file",
1617                    "find all",
1618                    "find files",
1619                    "who wrote",
1620                    "locate",
1621                    "search for",
1622                    "look for",
1623                    "search",
1624                ],
1625                "locate",
1626            ),
1627            // Understand: explaining how something works
1628            (
1629                &[
1630                    "how does",
1631                    "what does",
1632                    "explain",
1633                    "understand",
1634                    "what is this",
1635                    "how does this work",
1636                ],
1637                "understand",
1638            ),
1639            // Retrieve: recalling from memory/past
1640            (
1641                &[
1642                    "remember",
1643                    "earlier",
1644                    "before",
1645                    "previously",
1646                    "last time",
1647                    "past",
1648                    "previous",
1649                ],
1650                "retrieve",
1651            ),
1652            // Explore: understanding structure
1653            (
1654                &[
1655                    "how is organized",
1656                    "project structure",
1657                    "what files",
1658                    "show me the structure",
1659                    "explore",
1660                ],
1661                "explore",
1662            ),
1663            // Reason: asking why/causality
1664            (
1665                &[
1666                    "why did",
1667                    "why is",
1668                    "cause",
1669                    "reason",
1670                    "what happened",
1671                    "why does",
1672                ],
1673                "reason",
1674            ),
1675            // Validate: checking correctness
1676            (
1677                &["is this correct", "verify", "validate", "check if", "debug"],
1678                "validate",
1679            ),
1680            // Compare: comparing things
1681            (
1682                &[
1683                    "difference between",
1684                    "compare",
1685                    "versus",
1686                    " vs ",
1687                    "different from",
1688                ],
1689                "compare",
1690            ),
1691            // Track: status/history
1692            (
1693                &[
1694                    "status",
1695                    "progress",
1696                    "how far",
1697                    "history",
1698                    "what's the current",
1699                ],
1700                "track",
1701            ),
1702        ];
1703
1704        // Detect target type from keywords
1705        let target_type = if lower.contains("function") || lower.contains("method") {
1706            "function"
1707        } else if lower.contains("file") || lower.contains("config") {
1708            "file"
1709        } else if lower.contains("class") {
1710            "entity"
1711        } else if lower.contains("module") || lower.contains("package") {
1712            "module"
1713        } else if lower.contains("test") {
1714            "test"
1715        } else {
1716            "unknown"
1717        };
1718
1719        // Find matching intent
1720        let matched_intent = intents
1721            .iter()
1722            .find(|(patterns, _)| patterns.iter().any(|p| lower.contains(p)));
1723
1724        matched_intent.map(|(patterns, intent)| {
1725            // Extract target name if possible (simplified extraction)
1726            let target_name = extract_target_name_from_prompt(prompt, patterns);
1727
1728            PreContextPerceptionEvent {
1729                session_id: session_id.to_string(),
1730                intent: intent.to_string(),
1731                target_type: target_type.to_string(),
1732                target_name,
1733                domain: detect_domain_from_prompt(prompt),
1734                query: Some(prompt.to_string()),
1735                working_directory: workspace.to_string(),
1736                urgency: "normal".to_string(),
1737            }
1738        })
1739    }
1740
1741    /// Fire PreContextPerception hook and wait for harness decision.
1742    async fn fire_pre_context_perception(&self, event: &PreContextPerceptionEvent) -> HookResult {
1743        if let Some(he) = &self.config.hook_engine {
1744            let hook_event = HookEvent::PreContextPerception(event.clone());
1745            he.fire(&hook_event).await
1746        } else {
1747            HookResult::continue_()
1748        }
1749    }
1750
1751    /// Fire IntentDetection hook and wait for harness decision.
1752    ///
1753    /// This is called on every prompt to detect user intent via the AHP harness.
1754    /// Returns the detected intent if the harness provides one, or None if blocked/failed.
1755    async fn fire_intent_detection(
1756        &self,
1757        prompt: &str,
1758        session_id: &str,
1759        workspace: &str,
1760    ) -> Option<IntentDetectionResult> {
1761        let event = IntentDetectionEvent {
1762            session_id: session_id.to_string(),
1763            prompt: prompt.to_string(),
1764            workspace: workspace.to_string(),
1765            language_hint: detect_language_hint(prompt),
1766        };
1767
1768        let hook_result = if let Some(he) = &self.config.hook_engine {
1769            let hook_event = HookEvent::IntentDetection(event);
1770            he.fire(&hook_event).await
1771        } else {
1772            return None;
1773        };
1774
1775        match hook_result {
1776            HookResult::Continue(Some(modified)) => {
1777                // Parse the intent detection result
1778                serde_json::from_value::<IntentDetectionResult>(modified).ok()
1779            }
1780            HookResult::Block(_) => {
1781                // Harness blocked intent detection - use fallback
1782                tracing::info!("AHP harness blocked intent detection");
1783                None
1784            }
1785            _ => None,
1786        }
1787    }
1788
1789    /// Apply injected context from AHP harness decision.
1790    #[cfg(feature = "ahp")]
1791    fn apply_injected_context(&self, injected: InjectedContext) -> Vec<ContextResult> {
1792        let mut results = Vec::new();
1793
1794        // Convert facts to ContextResult
1795        if !injected.facts.is_empty() {
1796            let items: Vec<ContextItem> = injected
1797                .facts
1798                .into_iter()
1799                .map(|f| {
1800                    let token_count = estimate_tokens(&f.content);
1801                    ContextItem {
1802                        id: uuid::Uuid::new_v4().to_string(),
1803                        context_type: ContextType::Resource,
1804                        content: f.content,
1805                        token_count,
1806                        relevance: f.confidence,
1807                        source: Some(f.source),
1808                        metadata: std::collections::HashMap::new(),
1809                    }
1810                })
1811                .collect();
1812
1813            let total_tokens: usize = items.iter().map(|i| i.token_count).sum();
1814
1815            results.push(ContextResult {
1816                items,
1817                total_tokens,
1818                provider: "ahp_harness".to_string(),
1819                truncated: false,
1820            });
1821        }
1822
1823        // Handle file_contents
1824        if let Some(file_contents) = injected.file_contents {
1825            let items: Vec<ContextItem> = file_contents
1826                .into_iter()
1827                .map(|f| {
1828                    let token_count = estimate_tokens(&f.snippet);
1829                    ContextItem {
1830                        id: uuid::Uuid::new_v4().to_string(),
1831                        context_type: ContextType::Resource,
1832                        content: f.snippet,
1833                        token_count,
1834                        relevance: f.relevance_score,
1835                        source: Some(f.path),
1836                        metadata: std::collections::HashMap::new(),
1837                    }
1838                })
1839                .collect();
1840
1841            let total_tokens: usize = items.iter().map(|i| i.token_count).sum();
1842
1843            results.push(ContextResult {
1844                items,
1845                total_tokens,
1846                provider: "ahp_harness".to_string(),
1847                truncated: false,
1848            });
1849        }
1850
1851        // Handle project_summary
1852        if let Some(summary) = injected.project_summary {
1853            let content = format!(
1854                "Project: {}\n{}",
1855                summary.project_name, summary.structure_description
1856            );
1857            let token_count = estimate_tokens(&content);
1858
1859            results.push(ContextResult {
1860                items: vec![ContextItem {
1861                    id: uuid::Uuid::new_v4().to_string(),
1862                    context_type: ContextType::Resource,
1863                    content,
1864                    token_count,
1865                    relevance: 0.9,
1866                    source: Some("ahp://project-summary".to_string()),
1867                    metadata: std::collections::HashMap::new(),
1868                }],
1869                total_tokens: token_count,
1870                provider: "ahp_harness".to_string(),
1871                truncated: false,
1872            });
1873        }
1874
1875        // Handle knowledge
1876        if let Some(knowledge) = injected.knowledge {
1877            let items: Vec<ContextItem> = knowledge
1878                .into_iter()
1879                .map(|k| {
1880                    let token_count = estimate_tokens(&k);
1881                    ContextItem {
1882                        id: uuid::Uuid::new_v4().to_string(),
1883                        context_type: ContextType::Resource,
1884                        content: k,
1885                        token_count,
1886                        relevance: 0.8,
1887                        source: Some("ahp://knowledge".to_string()),
1888                        metadata: std::collections::HashMap::new(),
1889                    }
1890                })
1891                .collect();
1892
1893            let total_tokens: usize = items.iter().map(|i| i.token_count).sum();
1894
1895            results.push(ContextResult {
1896                items,
1897                total_tokens,
1898                provider: "ahp_harness".to_string(),
1899                truncated: false,
1900            });
1901        }
1902
1903        results
1904    }
1905
1906    /// Build augmented system prompt with context
1907    #[allow(dead_code)]
1908    fn build_augmented_system_prompt(&self, context_results: &[ContextResult]) -> Option<String> {
1909        let base = self.system_prompt();
1910        self.build_augmented_system_prompt_with_base(&base, context_results)
1911    }
1912
1913    fn build_augmented_system_prompt_with_base(
1914        &self,
1915        base: &str,
1916        context_results: &[ContextResult],
1917    ) -> Option<String> {
1918        let base = base.to_string();
1919
1920        // Use live tool executor definitions so tools added via add_mcp_server() are included
1921        let live_tools = self.tool_executor.definitions();
1922        let mcp_tools: Vec<&ToolDefinition> = live_tools
1923            .iter()
1924            .filter(|t| t.name.starts_with("mcp__"))
1925            .collect();
1926
1927        let mcp_section = if mcp_tools.is_empty() {
1928            String::new()
1929        } else {
1930            let mut lines = vec![
1931                "## MCP Tools".to_string(),
1932                String::new(),
1933                "The following MCP (Model Context Protocol) tools are available. Use them when the task requires external capabilities beyond the built-in tools:".to_string(),
1934                String::new(),
1935            ];
1936            for tool in &mcp_tools {
1937                let display = format!("- `{}` — {}", tool.name, tool.description);
1938                lines.push(display);
1939            }
1940            lines.join("\n")
1941        };
1942
1943        let parts: Vec<&str> = [base.as_str(), mcp_section.as_str()]
1944            .iter()
1945            .filter(|s| !s.is_empty())
1946            .copied()
1947            .collect();
1948
1949        // Auto-detect project type from workspace and inject language-specific guidelines,
1950        // but only when the user hasn't already set a custom `guidelines` slot.
1951        let project_hint = if self.config.prompt_slots.guidelines.is_none() {
1952            Self::detect_project_hint(&self.tool_context.workspace)
1953        } else {
1954            String::new()
1955        };
1956
1957        if context_results.is_empty() {
1958            if project_hint.is_empty() {
1959                return Some(parts.join("\n\n"));
1960            }
1961            return Some(format!("{}\n\n{}", parts.join("\n\n"), project_hint));
1962        }
1963
1964        // Build context XML block
1965        let context_xml: String = context_results
1966            .iter()
1967            .map(|r| r.to_xml())
1968            .collect::<Vec<_>>()
1969            .join("\n\n");
1970
1971        if project_hint.is_empty() {
1972            Some(format!("{}\n\n{}", parts.join("\n\n"), context_xml))
1973        } else {
1974            Some(format!(
1975                "{}\n\n{}\n\n{}",
1976                parts.join("\n\n"),
1977                project_hint,
1978                context_xml
1979            ))
1980        }
1981    }
1982
1983    /// Notify providers of turn completion for memory extraction
1984    async fn notify_turn_complete(&self, session_id: &str, prompt: &str, response: &str) {
1985        let futures = self
1986            .config
1987            .context_providers
1988            .iter()
1989            .map(|p| p.on_turn_complete(session_id, prompt, response));
1990        let outcomes = join_all(futures).await;
1991
1992        for (i, result) in outcomes.into_iter().enumerate() {
1993            if let Err(e) = result {
1994                tracing::warn!(
1995                    "Context provider '{}' on_turn_complete failed: {}",
1996                    self.config.context_providers[i].name(),
1997                    e
1998                );
1999            }
2000        }
2001    }
2002
2003    /// Fire PreToolUse hook event before tool execution.
2004    /// Returns the HookResult which may block the tool call.
2005    async fn fire_pre_tool_use(
2006        &self,
2007        session_id: &str,
2008        tool_name: &str,
2009        args: &serde_json::Value,
2010        recent_tools: Vec<String>,
2011    ) -> Option<HookResult> {
2012        if let Some(he) = &self.config.hook_engine {
2013            // Convert null args to empty object so JS callbacks don't get null.input errors
2014            let safe_args = if args.is_null() {
2015                serde_json::Value::Object(Default::default())
2016            } else {
2017                args.clone()
2018            };
2019            let event = HookEvent::PreToolUse(PreToolUseEvent {
2020                session_id: session_id.to_string(),
2021                tool: tool_name.to_string(),
2022                args: safe_args,
2023                working_directory: self.tool_context.workspace.to_string_lossy().to_string(),
2024                recent_tools,
2025            });
2026            let result = he.fire(&event).await;
2027            if result.is_block() {
2028                return Some(result);
2029            }
2030        }
2031        None
2032    }
2033
2034    /// Fire PostToolUse hook event after tool execution (fire-and-forget).
2035    async fn fire_post_tool_use(
2036        &self,
2037        session_id: &str,
2038        tool_name: &str,
2039        args: &serde_json::Value,
2040        output: &str,
2041        success: bool,
2042        duration_ms: u64,
2043    ) {
2044        if let Some(he) = &self.config.hook_engine {
2045            // Convert null args to empty object so JS callbacks don't get null.input errors
2046            let safe_args = if args.is_null() {
2047                serde_json::Value::Object(Default::default())
2048            } else {
2049                args.clone()
2050            };
2051            let event = HookEvent::PostToolUse(PostToolUseEvent {
2052                session_id: session_id.to_string(),
2053                tool: tool_name.to_string(),
2054                args: safe_args,
2055                result: ToolResultData {
2056                    success,
2057                    output: output.to_string(),
2058                    exit_code: if success { Some(0) } else { Some(1) },
2059                    duration_ms,
2060                },
2061            });
2062            let he = Arc::clone(he);
2063            tokio::spawn(async move {
2064                let _ = he.fire(&event).await;
2065            });
2066        }
2067    }
2068
2069    /// Fire GenerateStart hook event before an LLM call.
2070    async fn fire_generate_start(
2071        &self,
2072        session_id: &str,
2073        prompt: &str,
2074        system_prompt: &Option<String>,
2075    ) {
2076        if let Some(he) = &self.config.hook_engine {
2077            let event = HookEvent::GenerateStart(GenerateStartEvent {
2078                session_id: session_id.to_string(),
2079                prompt: prompt.to_string(),
2080                system_prompt: system_prompt.clone(),
2081                model_provider: String::new(),
2082                model_name: String::new(),
2083                available_tools: self.config.tools.iter().map(|t| t.name.clone()).collect(),
2084            });
2085            let _ = he.fire(&event).await;
2086        }
2087    }
2088
2089    /// Fire GenerateEnd hook event after an LLM call.
2090    async fn fire_generate_end(
2091        &self,
2092        session_id: &str,
2093        prompt: &str,
2094        response: &LlmResponse,
2095        duration_ms: u64,
2096    ) {
2097        if let Some(he) = &self.config.hook_engine {
2098            let tool_calls: Vec<ToolCallInfo> = response
2099                .tool_calls()
2100                .iter()
2101                .map(|tc| {
2102                    let args = if tc.args.is_null() {
2103                        serde_json::Value::Object(Default::default())
2104                    } else {
2105                        tc.args.clone()
2106                    };
2107                    ToolCallInfo {
2108                        name: tc.name.clone(),
2109                        args,
2110                    }
2111                })
2112                .collect();
2113
2114            let event = HookEvent::GenerateEnd(GenerateEndEvent {
2115                session_id: session_id.to_string(),
2116                prompt: prompt.to_string(),
2117                response_text: response.text().to_string(),
2118                tool_calls,
2119                usage: TokenUsageInfo {
2120                    prompt_tokens: response.usage.prompt_tokens as i32,
2121                    completion_tokens: response.usage.completion_tokens as i32,
2122                    total_tokens: response.usage.total_tokens as i32,
2123                },
2124                duration_ms,
2125            });
2126            let _ = he.fire(&event).await;
2127        }
2128    }
2129
2130    /// Fire PrePrompt hook event before prompt augmentation.
2131    /// Returns optional modified prompt text from the hook.
2132    async fn fire_pre_prompt(
2133        &self,
2134        session_id: &str,
2135        prompt: &str,
2136        system_prompt: &Option<String>,
2137        message_count: usize,
2138    ) -> Option<String> {
2139        if let Some(he) = &self.config.hook_engine {
2140            let event = HookEvent::PrePrompt(PrePromptEvent {
2141                session_id: session_id.to_string(),
2142                prompt: prompt.to_string(),
2143                system_prompt: system_prompt.clone(),
2144                message_count,
2145            });
2146            let result = he.fire(&event).await;
2147            if let HookResult::Continue(Some(modified)) = result {
2148                // Extract modified prompt from hook response
2149                if let Some(new_prompt) = modified.get("prompt").and_then(|v| v.as_str()) {
2150                    return Some(new_prompt.to_string());
2151                }
2152            }
2153        }
2154        None
2155    }
2156
2157    /// Fire PostResponse hook event after the agent loop completes.
2158    async fn fire_post_response(
2159        &self,
2160        session_id: &str,
2161        response_text: &str,
2162        tool_calls_count: usize,
2163        usage: &TokenUsage,
2164        duration_ms: u64,
2165    ) {
2166        if let Some(he) = &self.config.hook_engine {
2167            let event = HookEvent::PostResponse(PostResponseEvent {
2168                session_id: session_id.to_string(),
2169                response_text: response_text.to_string(),
2170                tool_calls_count,
2171                usage: TokenUsageInfo {
2172                    prompt_tokens: usage.prompt_tokens as i32,
2173                    completion_tokens: usage.completion_tokens as i32,
2174                    total_tokens: usage.total_tokens as i32,
2175                },
2176                duration_ms,
2177            });
2178            let he = Arc::clone(he);
2179            tokio::spawn(async move {
2180                let _ = he.fire(&event).await;
2181            });
2182        }
2183    }
2184
2185    /// Fire OnError hook event when an error occurs.
2186    async fn fire_on_error(
2187        &self,
2188        session_id: &str,
2189        error_type: ErrorType,
2190        error_message: &str,
2191        context: serde_json::Value,
2192    ) {
2193        if let Some(he) = &self.config.hook_engine {
2194            let event = HookEvent::OnError(OnErrorEvent {
2195                session_id: session_id.to_string(),
2196                error_type,
2197                error_message: error_message.to_string(),
2198                context,
2199            });
2200            let he = Arc::clone(he);
2201            tokio::spawn(async move {
2202                let _ = he.fire(&event).await;
2203            });
2204        }
2205    }
2206
2207    /// Execute the agent loop for a prompt
2208    ///
2209    /// Takes the conversation history and a new user prompt.
2210    /// Returns the agent result and updated message history.
2211    /// When event_tx is provided, uses streaming LLM API for real-time text output.
2212    pub async fn execute(
2213        &self,
2214        history: &[Message],
2215        prompt: &str,
2216        event_tx: Option<mpsc::Sender<AgentEvent>>,
2217    ) -> Result<AgentResult> {
2218        self.execute_with_session(history, prompt, None, event_tx, None)
2219            .await
2220    }
2221
2222    /// Execute the agent loop with pre-built messages (user message already included).
2223    ///
2224    /// Used by `send_with_attachments` / `stream_with_attachments` where the
2225    /// user message contains multi-modal content and is already appended to
2226    /// the messages vec.
2227    pub async fn execute_from_messages(
2228        &self,
2229        messages: Vec<Message>,
2230        session_id: Option<&str>,
2231        event_tx: Option<mpsc::Sender<AgentEvent>>,
2232        cancel_token: Option<&tokio_util::sync::CancellationToken>,
2233    ) -> Result<AgentResult> {
2234        let default_token = tokio_util::sync::CancellationToken::new();
2235        let token = cancel_token.unwrap_or(&default_token);
2236        tracing::info!(
2237            a3s.session.id = session_id.unwrap_or("none"),
2238            a3s.agent.max_turns = self.config.max_tool_rounds,
2239            "a3s.agent.execute_from_messages started"
2240        );
2241
2242        // Extract the last user message text for hooks, memory recall, and events.
2243        // Pass empty prompt so execute_loop skips adding a duplicate user message,
2244        // but provide effective_prompt for hook/memory/event purposes.
2245        let effective_prompt = messages
2246            .iter()
2247            .rev()
2248            .find(|m| m.role == "user")
2249            .map(|m| m.text())
2250            .unwrap_or_default();
2251
2252        let result = self
2253            .execute_loop_inner(
2254                &messages,
2255                "",
2256                &effective_prompt,
2257                session_id,
2258                event_tx,
2259                token,
2260                true, // emit_end: this is a standalone execution
2261            )
2262            .await;
2263
2264        match &result {
2265            Ok(r) => tracing::info!(
2266                a3s.agent.tool_calls_count = r.tool_calls_count,
2267                a3s.llm.total_tokens = r.usage.total_tokens,
2268                "a3s.agent.execute_from_messages completed"
2269            ),
2270            Err(e) => tracing::warn!(
2271                error = %e,
2272                "a3s.agent.execute_from_messages failed"
2273            ),
2274        }
2275
2276        result
2277    }
2278
2279    /// Execute the agent loop for a prompt with session context
2280    ///
2281    /// Takes the conversation history, user prompt, and optional session ID.
2282    /// When session_id is provided, context providers can use it for session-specific context.
2283    pub async fn execute_with_session(
2284        &self,
2285        history: &[Message],
2286        prompt: &str,
2287        session_id: Option<&str>,
2288        event_tx: Option<mpsc::Sender<AgentEvent>>,
2289        cancel_token: Option<&tokio_util::sync::CancellationToken>,
2290    ) -> Result<AgentResult> {
2291        let default_token = tokio_util::sync::CancellationToken::new();
2292        let token = cancel_token.unwrap_or(&default_token);
2293        tracing::info!(
2294            a3s.session.id = session_id.unwrap_or("none"),
2295            a3s.agent.max_turns = self.config.max_tool_rounds,
2296            "a3s.agent.execute started"
2297        );
2298
2299        let effective_style = self.resolve_effective_style(prompt).await;
2300
2301        // Check if a subagent should be launched for this task
2302        if let Some((subagent_def, cleaned_prompt)) =
2303            self.should_launch_subagent(prompt, effective_style)
2304        {
2305            tracing::info!(subagent = %subagent_def.name, "Subagent launch requested");
2306
2307            // If callback is configured, use it to handle subagent launch
2308            if let Some(ref callback) = self.config.on_subagent_launch {
2309                if let Some(result) = callback(&subagent_def, &cleaned_prompt) {
2310                    tracing::info!(subagent = %subagent_def.name, "Subagent executed successfully");
2311                    return result;
2312                }
2313            }
2314            // If callback not configured or returned None, fall through to normal execution
2315            tracing::debug!(subagent = %subagent_def.name, "No callback or callback returned None, continuing with normal execution");
2316        }
2317
2318        // Determine whether to use planning mode
2319        let use_planning = if self.config.planning_mode == PlanningMode::Auto {
2320            effective_style.requires_planning()
2321        } else {
2322            // Explicit mode: Enabled or Disabled
2323            self.config.planning_mode.should_plan(prompt)
2324        };
2325
2326        // Create agent task if task_manager is available
2327        let task_id = if let Some(ref tm) = self.task_manager {
2328            let workspace = self.tool_context.workspace.display().to_string();
2329            let task = crate::task::Task::agent("agent", &workspace, prompt);
2330            let id = task.id;
2331            tm.spawn(task);
2332            let _ = tm.start(id);
2333            Some(id)
2334        } else {
2335            None
2336        };
2337
2338        let result = if use_planning {
2339            self.execute_with_planning(history, prompt, event_tx).await
2340        } else {
2341            self.execute_loop(history, prompt, session_id, event_tx, token, true)
2342                .await
2343        };
2344
2345        // Complete or fail agent task based on result
2346        if let Some(ref tm) = self.task_manager {
2347            if let Some(tid) = task_id {
2348                match &result {
2349                    Ok(r) => {
2350                        let output = serde_json::json!({
2351                            "text": r.text,
2352                            "tool_calls_count": r.tool_calls_count,
2353                            "usage": r.usage,
2354                        });
2355                        let _ = tm.complete(tid, Some(output));
2356                    }
2357                    Err(e) => {
2358                        let _ = tm.fail(tid, e.to_string());
2359                    }
2360                }
2361            }
2362        }
2363
2364        match &result {
2365            Ok(r) => {
2366                tracing::info!(
2367                    a3s.agent.tool_calls_count = r.tool_calls_count,
2368                    a3s.llm.total_tokens = r.usage.total_tokens,
2369                    "a3s.agent.execute completed"
2370                );
2371                // Fire PostResponse hook
2372                self.fire_post_response(
2373                    session_id.unwrap_or(""),
2374                    &r.text,
2375                    r.tool_calls_count,
2376                    &r.usage,
2377                    0, // duration tracked externally
2378                )
2379                .await;
2380            }
2381            Err(e) => {
2382                tracing::warn!(
2383                    error = %e,
2384                    "a3s.agent.execute failed"
2385                );
2386                // Fire OnError hook
2387                self.fire_on_error(
2388                    session_id.unwrap_or(""),
2389                    ErrorType::Other,
2390                    &e.to_string(),
2391                    serde_json::json!({"phase": "execute"}),
2392                )
2393                .await;
2394            }
2395        }
2396
2397        result
2398    }
2399
2400    /// Core execution loop (without planning routing).
2401    ///
2402    /// This is the inner loop that runs LLM calls and tool executions.
2403    /// Called directly by `execute_with_session` (after planning check)
2404    /// and by `execute_plan` (for individual steps, bypassing planning).
2405    async fn execute_loop(
2406        &self,
2407        history: &[Message],
2408        prompt: &str,
2409        session_id: Option<&str>,
2410        event_tx: Option<mpsc::Sender<AgentEvent>>,
2411        cancel_token: &tokio_util::sync::CancellationToken,
2412        emit_end: bool,
2413    ) -> Result<AgentResult> {
2414        // When called via execute_loop, the prompt is used for both
2415        // message-adding and hook/memory/event purposes.
2416        self.execute_loop_inner(
2417            history,
2418            prompt,
2419            prompt,
2420            session_id,
2421            event_tx,
2422            cancel_token,
2423            emit_end,
2424        )
2425        .await
2426    }
2427
2428    /// Inner execution loop.
2429    ///
2430    /// `msg_prompt` controls whether a user message is appended (empty = skip).
2431    /// `effective_prompt` is used for hooks, memory recall, taint tracking, and events.
2432    /// `emit_end` controls whether to send `AgentEvent::End` when the loop completes
2433    /// (should be false when called from `execute_plan` to avoid duplicate End events).
2434    #[allow(clippy::too_many_arguments)]
2435    async fn execute_loop_inner(
2436        &self,
2437        history: &[Message],
2438        msg_prompt: &str,
2439        effective_prompt: &str,
2440        session_id: Option<&str>,
2441        event_tx: Option<mpsc::Sender<AgentEvent>>,
2442        cancel_token: &tokio_util::sync::CancellationToken,
2443        emit_end: bool,
2444    ) -> Result<AgentResult> {
2445        let mut messages = history.to_vec();
2446        let mut total_usage = TokenUsage::default();
2447        let mut tool_calls_count = 0;
2448        let mut turn = 0;
2449        // Consecutive malformed-tool-args errors (4.1 parse error recovery)
2450        let mut parse_error_count: u32 = 0;
2451        // Continuation injection counter
2452        let mut continuation_count: u32 = 0;
2453        let mut recent_tool_signatures: Vec<String> = Vec::new();
2454        let style_prompt = if effective_prompt.is_empty() {
2455            msg_prompt
2456        } else {
2457            effective_prompt
2458        };
2459        let effective_style = self.resolve_effective_style(style_prompt).await;
2460        let effective_system_prompt = self.system_prompt_for_style(effective_style);
2461        if let Some(tx) = &event_tx {
2462            tx.send(AgentEvent::AgentModeChanged {
2463                mode: effective_style.runtime_mode().to_string(),
2464                agent: effective_style.builtin_agent_name().to_string(),
2465                description: effective_style.description().to_string(),
2466            })
2467            .await
2468            .ok();
2469        }
2470
2471        // Send start event
2472        if let Some(tx) = &event_tx {
2473            tx.send(AgentEvent::Start {
2474                prompt: effective_prompt.to_string(),
2475            })
2476            .await
2477            .ok();
2478        }
2479
2480        // Forward queue events (CommandDeadLettered, CommandRetry, QueueAlert) to event stream
2481        let _queue_forward_handle =
2482            if let (Some(ref queue), Some(ref tx)) = (&self.command_queue, &event_tx) {
2483                let mut rx = queue.subscribe();
2484                let tx = tx.clone();
2485                Some(tokio::spawn(async move {
2486                    while let Ok(event) = rx.recv().await {
2487                        if tx.send(event).await.is_err() {
2488                            break;
2489                        }
2490                    }
2491                }))
2492            } else {
2493                None
2494            };
2495
2496        // Fire PrePrompt hook (may modify the prompt)
2497        let built_system_prompt = Some(effective_system_prompt.clone());
2498        let hooked_prompt = if let Some(modified) = self
2499            .fire_pre_prompt(
2500                session_id.unwrap_or(""),
2501                effective_prompt,
2502                &built_system_prompt,
2503                messages.len(),
2504            )
2505            .await
2506        {
2507            modified
2508        } else {
2509            effective_prompt.to_string()
2510        };
2511        let effective_prompt = hooked_prompt.as_str();
2512
2513        // Taint-track the incoming prompt for sensitive data detection
2514        if let Some(ref sp) = self.config.security_provider {
2515            sp.taint_input(effective_prompt);
2516        }
2517
2518        // Recall relevant memories and inject into system prompt
2519        let system_with_memory = if let Some(ref memory) = self.config.memory {
2520            match memory.recall_similar(effective_prompt, 5).await {
2521                Ok(items) if !items.is_empty() => {
2522                    if let Some(tx) = &event_tx {
2523                        for item in &items {
2524                            tx.send(AgentEvent::MemoryRecalled {
2525                                memory_id: item.id.clone(),
2526                                content: item.content.clone(),
2527                                relevance: item.relevance_score(),
2528                            })
2529                            .await
2530                            .ok();
2531                        }
2532                        tx.send(AgentEvent::MemoriesSearched {
2533                            query: Some(effective_prompt.to_string()),
2534                            tags: Vec::new(),
2535                            result_count: items.len(),
2536                        })
2537                        .await
2538                        .ok();
2539                    }
2540                    let memory_context = items
2541                        .iter()
2542                        .map(|i| format!("- {}", i.content))
2543                        .collect::<Vec<_>>()
2544                        .join(
2545                            "
2546",
2547                        );
2548                    let base = effective_system_prompt.clone();
2549                    Some(format!(
2550                        "{}
2551
2552## Relevant past experience
2553{}",
2554                        base, memory_context
2555                    ))
2556                }
2557                _ => Some(effective_system_prompt.clone()),
2558            }
2559        } else {
2560            Some(effective_system_prompt.clone())
2561        };
2562
2563        // Resolve context from providers on first turn (before adding user message)
2564        // Intent-driven: detect context perception need first via AHP harness, then fire hook
2565        let workspace = self.tool_context.workspace.display().to_string();
2566        let session_id_str = session_id.unwrap_or("");
2567        let context_results = if !self.config.context_providers.is_empty() {
2568            // Step 1: Fire IntentDetection harness point on EVERY prompt
2569            #[allow(clippy::needless_borrow)]
2570            let harness_intent = self
2571                .fire_intent_detection(effective_prompt, &session_id_str, &workspace)
2572                .await;
2573
2574            // Step 2: Build perception event from harness result, or fallback to local detection
2575            #[allow(clippy::needless_borrow)]
2576            let perception_event = if let Some(detected) = harness_intent {
2577                tracing::info!(
2578                    intent = %detected.detected_intent,
2579                    confidence = %detected.confidence,
2580                    "Intent detected from AHP harness"
2581                );
2582                Some(build_pre_context_perception_from_intent(
2583                    detected,
2584                    effective_prompt,
2585                    &session_id_str,
2586                    &workspace,
2587                ))
2588            } else {
2589                // Fallback to local keyword detection
2590                tracing::debug!("No intent from harness, using local keyword detection");
2591                self.detect_context_perception_intent(effective_prompt, &session_id_str, &workspace)
2592            };
2593
2594            if let Some(perception_event) = perception_event {
2595                // Step 3: Fire PreContextPerception hook to get harness decision
2596                tracing::info!(
2597                    intent = %perception_event.intent,
2598                    target_type = %perception_event.target_type,
2599                    "Context perception intent detected, firing AHP hook"
2600                );
2601
2602                let hook_result = self.fire_pre_context_perception(&perception_event).await;
2603
2604                match hook_result {
2605                    HookResult::Continue(Some(modified_context)) => {
2606                        // AHP harness returned injected context - parse and use it
2607                        #[cfg(feature = "ahp")]
2608                        {
2609                            if let Ok(injected) =
2610                                serde_json::from_value::<InjectedContext>(modified_context)
2611                            {
2612                                tracing::info!(
2613                                    facts = injected.facts.len(),
2614                                    "Using injected context from AHP harness"
2615                                );
2616                                self.apply_injected_context(injected)
2617                            } else {
2618                                // Fall back to normal providers if parsing fails
2619                                tracing::warn!(
2620                                    "Failed to parse injected context, falling back to providers"
2621                                );
2622                                self.resolve_context(effective_prompt, session_id).await
2623                            }
2624                        }
2625                        #[cfg(not(feature = "ahp"))]
2626                        {
2627                            // Without AHP, fall back to normal providers
2628                            let _ = modified_context; // suppress unused warning
2629                            self.resolve_context(effective_prompt, session_id).await
2630                        }
2631                    }
2632                    HookResult::Block(_) => {
2633                        // Harness blocked context injection - skip
2634                        tracing::info!("AHP harness blocked context injection");
2635                        Vec::new()
2636                    }
2637                    _ => {
2638                        // No modification or unknown result, proceed with normal providers
2639                        self.resolve_context(effective_prompt, session_id).await
2640                    }
2641                }
2642            } else {
2643                // No intent detected, proceed with normal providers
2644                self.resolve_context(effective_prompt, session_id).await
2645            }
2646        } else {
2647            Vec::new()
2648        };
2649
2650        // Send context resolved event
2651        if let Some(tx) = &event_tx {
2652            let total_items: usize = context_results.iter().map(|r| r.items.len()).sum();
2653            let total_tokens: usize = context_results.iter().map(|r| r.total_tokens).sum();
2654
2655            tracing::info!(
2656                context_items = total_items,
2657                context_tokens = total_tokens,
2658                "Context resolution completed"
2659            );
2660
2661            tx.send(AgentEvent::ContextResolved {
2662                total_items,
2663                total_tokens,
2664            })
2665            .await
2666            .ok();
2667        }
2668
2669        let augmented_system = self
2670            .build_augmented_system_prompt_with_base(&effective_system_prompt, &context_results);
2671
2672        // Merge memory context into system prompt
2673        let base_prompt = effective_system_prompt.clone();
2674        let augmented_system = match (augmented_system, system_with_memory) {
2675            (Some(ctx), Some(mem)) if ctx != mem => Some(ctx.replacen(&base_prompt, &mem, 1)),
2676            (Some(ctx), _) => Some(ctx),
2677            (None, mem) => mem,
2678        };
2679
2680        // Add user message
2681        if !msg_prompt.is_empty() {
2682            messages.push(Message::user(msg_prompt));
2683        }
2684
2685        loop {
2686            turn += 1;
2687
2688            if turn > self.config.max_tool_rounds {
2689                let error = format!("Max tool rounds ({}) exceeded", self.config.max_tool_rounds);
2690                if let Some(tx) = &event_tx {
2691                    tx.send(AgentEvent::Error {
2692                        message: error.clone(),
2693                    })
2694                    .await
2695                    .ok();
2696                }
2697                anyhow::bail!(error);
2698            }
2699
2700            // Send turn start event
2701            if let Some(tx) = &event_tx {
2702                tx.send(AgentEvent::TurnStart { turn }).await.ok();
2703            }
2704
2705            tracing::info!(
2706                turn = turn,
2707                max_turns = self.config.max_tool_rounds,
2708                "Agent turn started"
2709            );
2710
2711            // Call LLM - use streaming if we have an event channel
2712            tracing::info!(
2713                a3s.llm.streaming = event_tx.is_some(),
2714                "LLM completion started"
2715            );
2716
2717            // Fire GenerateStart hook
2718            self.fire_generate_start(
2719                session_id.unwrap_or(""),
2720                effective_prompt,
2721                &augmented_system,
2722            )
2723            .await;
2724
2725            let llm_start = std::time::Instant::now();
2726            // Circuit breaker (4.3): retry non-streaming LLM calls on transient failures.
2727            // Each failure increments `consecutive_llm_errors`; on success it resets to 0.
2728            // Streaming mode bails immediately on failure (events can't be replayed).
2729            let response = {
2730                let threshold = self.config.circuit_breaker_threshold.max(1);
2731                let mut attempt = 0u32;
2732                loop {
2733                    attempt += 1;
2734                    let result = self
2735                        .call_llm(
2736                            &messages,
2737                            augmented_system.as_deref(),
2738                            &event_tx,
2739                            cancel_token,
2740                        )
2741                        .await;
2742                    match result {
2743                        Ok(r) => {
2744                            break r;
2745                        }
2746                        // Never retry if cancelled
2747                        Err(e) if cancel_token.is_cancelled() => {
2748                            anyhow::bail!(e);
2749                        }
2750                        // Retry when: non-streaming under threshold, OR first streaming attempt
2751                        Err(e) if attempt < threshold && (event_tx.is_none() || attempt == 1) => {
2752                            tracing::warn!(
2753                                turn = turn,
2754                                attempt = attempt,
2755                                threshold = threshold,
2756                                error = %e,
2757                                "LLM call failed, will retry"
2758                            );
2759                            tokio::time::sleep(Duration::from_millis(100 * attempt as u64)).await;
2760                        }
2761                        // Threshold exceeded or streaming mid-stream: bail
2762                        Err(e) => {
2763                            let msg = if attempt > 1 {
2764                                format!(
2765                                    "LLM circuit breaker triggered: failed after {} attempt(s): {}",
2766                                    attempt, e
2767                                )
2768                            } else {
2769                                format!("LLM call failed: {}", e)
2770                            };
2771                            tracing::error!(turn = turn, attempt = attempt, "{}", msg);
2772                            // Fire OnError hook for LLM failure
2773                            self.fire_on_error(
2774                                session_id.unwrap_or(""),
2775                                ErrorType::LlmFailure,
2776                                &msg,
2777                                serde_json::json!({"turn": turn, "attempt": attempt}),
2778                            )
2779                            .await;
2780                            if let Some(tx) = &event_tx {
2781                                tx.send(AgentEvent::Error {
2782                                    message: msg.clone(),
2783                                })
2784                                .await
2785                                .ok();
2786                            }
2787                            anyhow::bail!(msg);
2788                        }
2789                    }
2790                }
2791            };
2792
2793            // Update usage
2794            total_usage.prompt_tokens += response.usage.prompt_tokens;
2795            total_usage.completion_tokens += response.usage.completion_tokens;
2796            total_usage.total_tokens += response.usage.total_tokens;
2797
2798            // Track token usage in progress tracker
2799            if let Some(ref tracker) = self.progress_tracker {
2800                let token_usage = crate::task::TaskTokenUsage {
2801                    input_tokens: response.usage.prompt_tokens as u64,
2802                    output_tokens: response.usage.completion_tokens as u64,
2803                    cache_read_tokens: response.usage.cache_read_tokens.unwrap_or(0) as u64,
2804                    cache_write_tokens: response.usage.cache_write_tokens.unwrap_or(0) as u64,
2805                };
2806                if let Ok(mut guard) = tracker.try_write() {
2807                    guard.track_tokens(token_usage);
2808                }
2809            }
2810
2811            // Record LLM completion telemetry
2812            let llm_duration = llm_start.elapsed();
2813            tracing::info!(
2814                turn = turn,
2815                streaming = event_tx.is_some(),
2816                prompt_tokens = response.usage.prompt_tokens,
2817                completion_tokens = response.usage.completion_tokens,
2818                total_tokens = response.usage.total_tokens,
2819                stop_reason = response.stop_reason.as_deref().unwrap_or("unknown"),
2820                duration_ms = llm_duration.as_millis() as u64,
2821                "LLM completion finished"
2822            );
2823
2824            // Fire GenerateEnd hook
2825            self.fire_generate_end(
2826                session_id.unwrap_or(""),
2827                effective_prompt,
2828                &response,
2829                llm_duration.as_millis() as u64,
2830            )
2831            .await;
2832
2833            // Record LLM usage on the llm span
2834            crate::telemetry::record_llm_usage(
2835                response.usage.prompt_tokens,
2836                response.usage.completion_tokens,
2837                response.usage.total_tokens,
2838                response.stop_reason.as_deref(),
2839            );
2840            // Log turn token usage
2841            tracing::info!(
2842                turn = turn,
2843                a3s.llm.total_tokens = response.usage.total_tokens,
2844                "Turn token usage"
2845            );
2846
2847            // Add assistant message to history
2848            messages.push(response.message.clone());
2849
2850            // Check for tool calls
2851            let tool_calls = response.tool_calls();
2852
2853            // Send turn end event
2854            if let Some(tx) = &event_tx {
2855                tx.send(AgentEvent::TurnEnd {
2856                    turn,
2857                    usage: response.usage.clone(),
2858                })
2859                .await
2860                .ok();
2861            }
2862
2863            // Auto-compact: check if context usage exceeds threshold
2864            if self.config.auto_compact {
2865                let used = response.usage.prompt_tokens;
2866                let max = self.config.max_context_tokens;
2867                let threshold = self.config.auto_compact_threshold;
2868
2869                if crate::session::compaction::should_auto_compact(used, max, threshold) {
2870                    let before_len = messages.len();
2871                    let percent_before = used as f32 / max as f32;
2872
2873                    tracing::info!(
2874                        used_tokens = used,
2875                        max_tokens = max,
2876                        percent = percent_before,
2877                        threshold = threshold,
2878                        "Auto-compact triggered"
2879                    );
2880
2881                    // Step 1: Prune large tool outputs first (cheap, no LLM call)
2882                    if let Some(pruned) = crate::session::compaction::prune_tool_outputs(&messages)
2883                    {
2884                        messages = pruned;
2885                        tracing::info!("Tool output pruning applied");
2886                    }
2887
2888                    // Step 2: Full summarization using the agent's LLM client
2889                    if let Ok(Some(compacted)) = crate::session::compaction::compact_messages(
2890                        session_id.unwrap_or(""),
2891                        &messages,
2892                        &self.llm_client,
2893                    )
2894                    .await
2895                    {
2896                        messages = compacted;
2897                    }
2898
2899                    // Emit compaction event
2900                    if let Some(tx) = &event_tx {
2901                        tx.send(AgentEvent::ContextCompacted {
2902                            session_id: session_id.unwrap_or("").to_string(),
2903                            before_messages: before_len,
2904                            after_messages: messages.len(),
2905                            percent_before,
2906                        })
2907                        .await
2908                        .ok();
2909                    }
2910                }
2911            }
2912
2913            if tool_calls.is_empty() {
2914                // No tool calls — check if we should inject a continuation message
2915                // before treating this as a final answer.
2916                let final_text = response.text();
2917
2918                if self.config.continuation_enabled
2919                    && continuation_count < self.config.max_continuation_turns
2920                    && turn < self.config.max_tool_rounds  // never inject past the turn limit
2921                    && Self::looks_incomplete(&final_text)
2922                {
2923                    continuation_count += 1;
2924                    tracing::info!(
2925                        turn = turn,
2926                        continuation = continuation_count,
2927                        max_continuation = self.config.max_continuation_turns,
2928                        "Injecting continuation message — response looks incomplete"
2929                    );
2930                    // Inject continuation as a user message and keep looping
2931                    messages.push(Message::user(crate::prompts::CONTINUATION));
2932                    continue;
2933                }
2934
2935                // Sanitize output to redact any sensitive data before returning
2936                let final_text = if let Some(ref sp) = self.config.security_provider {
2937                    sp.sanitize_output(&final_text)
2938                } else {
2939                    final_text
2940                };
2941
2942                // Record final totals
2943                tracing::info!(
2944                    tool_calls_count = tool_calls_count,
2945                    total_prompt_tokens = total_usage.prompt_tokens,
2946                    total_completion_tokens = total_usage.completion_tokens,
2947                    total_tokens = total_usage.total_tokens,
2948                    turns = turn,
2949                    "Agent execution completed"
2950                );
2951
2952                if emit_end {
2953                    if let Some(tx) = &event_tx {
2954                        tx.send(AgentEvent::End {
2955                            text: final_text.clone(),
2956                            usage: total_usage.clone(),
2957                            meta: response.meta.clone(),
2958                        })
2959                        .await
2960                        .ok();
2961                    }
2962                }
2963
2964                // Notify context providers of turn completion for memory extraction
2965                if let Some(sid) = session_id {
2966                    self.notify_turn_complete(sid, effective_prompt, &final_text)
2967                        .await;
2968                }
2969
2970                return Ok(AgentResult {
2971                    text: final_text,
2972                    messages,
2973                    usage: total_usage,
2974                    tool_calls_count,
2975                });
2976            }
2977
2978            // Execute tools sequentially
2979            // Fast path: when all tool calls are independent file writes and no hooks/HITL
2980            // are configured, execute them concurrently to avoid serial I/O bottleneck.
2981            let tool_calls = if self.config.hook_engine.is_none()
2982                && self.config.confirmation_manager.is_none()
2983                && tool_calls.len() > 1
2984                && tool_calls
2985                    .iter()
2986                    .all(|tc| Self::is_parallel_safe_write(&tc.name, &tc.args))
2987                && {
2988                    // All target paths must be distinct (no write-write conflicts)
2989                    let paths: Vec<_> = tool_calls
2990                        .iter()
2991                        .filter_map(|tc| Self::extract_write_path(&tc.args))
2992                        .collect();
2993                    paths.len() == tool_calls.len()
2994                        && paths.iter().collect::<std::collections::HashSet<_>>().len()
2995                            == paths.len()
2996                } {
2997                tracing::info!(
2998                    count = tool_calls.len(),
2999                    "Parallel write batch: executing {} independent file writes concurrently",
3000                    tool_calls.len()
3001                );
3002
3003                let futures: Vec<_> = tool_calls
3004                    .iter()
3005                    .map(|tc| {
3006                        let ctx = self.tool_context.clone();
3007                        let executor = Arc::clone(&self.tool_executor);
3008                        let name = tc.name.clone();
3009                        let args = tc.args.clone();
3010                        async move { executor.execute_with_context(&name, &args, &ctx).await }
3011                    })
3012                    .collect();
3013
3014                let results = join_all(futures).await;
3015
3016                // Post-process results in original order (sequential, preserves message ordering)
3017                for (tc, result) in tool_calls.iter().zip(results) {
3018                    tool_calls_count += 1;
3019                    let (output, exit_code, is_error, metadata, images) =
3020                        Self::tool_result_to_tuple(result);
3021
3022                    // Track tool call in progress tracker
3023                    self.track_tool_result(&tc.name, &tc.args, exit_code);
3024
3025                    let output = if let Some(ref sp) = self.config.security_provider {
3026                        sp.sanitize_output(&output)
3027                    } else {
3028                        output
3029                    };
3030
3031                    if let Some(tx) = &event_tx {
3032                        tx.send(AgentEvent::ToolEnd {
3033                            id: tc.id.clone(),
3034                            name: tc.name.clone(),
3035                            output: output.clone(),
3036                            exit_code,
3037                            metadata,
3038                        })
3039                        .await
3040                        .ok();
3041                    }
3042
3043                    if images.is_empty() {
3044                        messages.push(Message::tool_result(&tc.id, &output, is_error));
3045                    } else {
3046                        messages.push(Message::tool_result_with_images(
3047                            &tc.id, &output, &images, is_error,
3048                        ));
3049                    }
3050                }
3051
3052                // Skip the sequential loop below
3053                continue;
3054            } else {
3055                tool_calls
3056            };
3057
3058            for tool_call in tool_calls {
3059                tool_calls_count += 1;
3060
3061                let tool_start = std::time::Instant::now();
3062
3063                tracing::info!(
3064                    tool_name = tool_call.name.as_str(),
3065                    tool_id = tool_call.id.as_str(),
3066                    "Tool execution started"
3067                );
3068
3069                // Send tool start event (only if not already sent during streaming)
3070                // In streaming mode, ToolStart is sent when we receive ToolUseStart from LLM
3071                // But we still need to send ToolEnd after execution
3072
3073                // Check for malformed tool arguments from LLM (4.1 parse error recovery)
3074                if let Some(parse_error) =
3075                    tool_call.args.get("__parse_error").and_then(|v| v.as_str())
3076                {
3077                    parse_error_count += 1;
3078                    let error_msg = format!("Error: {}", parse_error);
3079                    tracing::warn!(
3080                        tool = tool_call.name.as_str(),
3081                        parse_error_count = parse_error_count,
3082                        max_parse_retries = self.config.max_parse_retries,
3083                        "Malformed tool arguments from LLM"
3084                    );
3085
3086                    if let Some(tx) = &event_tx {
3087                        tx.send(AgentEvent::ToolEnd {
3088                            id: tool_call.id.clone(),
3089                            name: tool_call.name.clone(),
3090                            output: error_msg.clone(),
3091                            exit_code: 1,
3092                            metadata: None,
3093                        })
3094                        .await
3095                        .ok();
3096                    }
3097
3098                    messages.push(Message::tool_result(&tool_call.id, &error_msg, true));
3099
3100                    if parse_error_count > self.config.max_parse_retries {
3101                        let msg = format!(
3102                            "LLM produced malformed tool arguments {} time(s) in a row \
3103                             (max_parse_retries={}); giving up",
3104                            parse_error_count, self.config.max_parse_retries
3105                        );
3106                        tracing::error!("{}", msg);
3107                        if let Some(tx) = &event_tx {
3108                            tx.send(AgentEvent::Error {
3109                                message: msg.clone(),
3110                            })
3111                            .await
3112                            .ok();
3113                        }
3114                        anyhow::bail!(msg);
3115                    }
3116                    continue;
3117                }
3118
3119                // Tool args are valid — reset parse error counter
3120                parse_error_count = 0;
3121
3122                // Check skill-based tool permissions
3123                if let Some(ref registry) = self.config.skill_registry {
3124                    let instruction_skills =
3125                        registry.by_kind(crate::skills::SkillKind::Instruction);
3126                    let has_restrictions =
3127                        instruction_skills.iter().any(|s| s.allowed_tools.is_some());
3128                    if has_restrictions {
3129                        let allowed = instruction_skills
3130                            .iter()
3131                            .any(|s| s.is_tool_allowed(&tool_call.name));
3132                        if !allowed {
3133                            let msg = format!(
3134                                "Tool '{}' is not allowed by any active skill.",
3135                                tool_call.name
3136                            );
3137                            tracing::info!(
3138                                tool_name = tool_call.name.as_str(),
3139                                "Tool blocked by skill registry"
3140                            );
3141                            if let Some(tx) = &event_tx {
3142                                tx.send(AgentEvent::PermissionDenied {
3143                                    tool_id: tool_call.id.clone(),
3144                                    tool_name: tool_call.name.clone(),
3145                                    args: tool_call.args.clone(),
3146                                    reason: msg.clone(),
3147                                })
3148                                .await
3149                                .ok();
3150                            }
3151                            messages.push(Message::tool_result(&tool_call.id, &msg, true));
3152                            continue;
3153                        }
3154                    }
3155                }
3156
3157                // Fire PreToolUse hook (may block the tool call)
3158                if let Some(HookResult::Block(reason)) = self
3159                    .fire_pre_tool_use(
3160                        session_id.unwrap_or(""),
3161                        &tool_call.name,
3162                        &tool_call.args,
3163                        recent_tool_signatures.clone(),
3164                    )
3165                    .await
3166                {
3167                    let msg = format!("Tool '{}' blocked by hook: {}", tool_call.name, reason);
3168                    tracing::info!(
3169                        tool_name = tool_call.name.as_str(),
3170                        "Tool blocked by PreToolUse hook"
3171                    );
3172
3173                    if let Some(tx) = &event_tx {
3174                        tx.send(AgentEvent::PermissionDenied {
3175                            tool_id: tool_call.id.clone(),
3176                            tool_name: tool_call.name.clone(),
3177                            args: tool_call.args.clone(),
3178                            reason: reason.clone(),
3179                        })
3180                        .await
3181                        .ok();
3182                    }
3183
3184                    messages.push(Message::tool_result(&tool_call.id, &msg, true));
3185                    continue;
3186                }
3187
3188                // Check permission before executing tool
3189                let permission_decision = if let Some(checker) = &self.config.permission_checker {
3190                    checker.check(&tool_call.name, &tool_call.args)
3191                } else {
3192                    // No policy configured — default to Ask so HITL can still intervene
3193                    PermissionDecision::Ask
3194                };
3195
3196                let (output, exit_code, is_error, metadata, images) = match permission_decision {
3197                    PermissionDecision::Deny => {
3198                        tracing::info!(
3199                            tool_name = tool_call.name.as_str(),
3200                            permission = "deny",
3201                            "Tool permission denied"
3202                        );
3203                        // Tool execution denied by permission policy
3204                        let denial_msg = format!(
3205                            "Permission denied: Tool '{}' is blocked by permission policy.",
3206                            tool_call.name
3207                        );
3208
3209                        // Send permission denied event
3210                        if let Some(tx) = &event_tx {
3211                            tx.send(AgentEvent::PermissionDenied {
3212                                tool_id: tool_call.id.clone(),
3213                                tool_name: tool_call.name.clone(),
3214                                args: tool_call.args.clone(),
3215                                reason: "Blocked by deny rule in permission policy".to_string(),
3216                            })
3217                            .await
3218                            .ok();
3219                        }
3220
3221                        (denial_msg, 1, true, None, Vec::new())
3222                    }
3223                    PermissionDecision::Allow => {
3224                        tracing::info!(
3225                            tool_name = tool_call.name.as_str(),
3226                            permission = "allow",
3227                            "Tool permission: allow"
3228                        );
3229                        // Permission explicitly allows — execute directly, no HITL
3230                        let stream_ctx =
3231                            self.streaming_tool_context(&event_tx, &tool_call.id, &tool_call.name);
3232                        let result = self
3233                            .execute_tool_queued_or_direct(
3234                                &tool_call.name,
3235                                &tool_call.args,
3236                                &stream_ctx,
3237                            )
3238                            .await;
3239
3240                        let tuple = Self::tool_result_to_tuple(result);
3241                        // Track tool call in progress tracker
3242                        let (_, exit_code, _, _, _) = tuple;
3243                        self.track_tool_result(&tool_call.name, &tool_call.args, exit_code);
3244                        tuple
3245                    }
3246                    PermissionDecision::Ask => {
3247                        tracing::info!(
3248                            tool_name = tool_call.name.as_str(),
3249                            permission = "ask",
3250                            "Tool permission: ask"
3251                        );
3252                        // Permission says Ask — delegate to HITL confirmation manager
3253                        if let Some(cm) = &self.config.confirmation_manager {
3254                            // Check YOLO lanes: if the tool's lane is in YOLO mode, skip confirmation
3255                            if !cm.requires_confirmation(&tool_call.name).await {
3256                                let stream_ctx = self.streaming_tool_context(
3257                                    &event_tx,
3258                                    &tool_call.id,
3259                                    &tool_call.name,
3260                                );
3261                                let result = self
3262                                    .execute_tool_queued_or_direct(
3263                                        &tool_call.name,
3264                                        &tool_call.args,
3265                                        &stream_ctx,
3266                                    )
3267                                    .await;
3268
3269                                let (output, exit_code, is_error, metadata, images) =
3270                                    Self::tool_result_to_tuple(result);
3271
3272                                // Track tool call in progress tracker
3273                                self.track_tool_result(&tool_call.name, &tool_call.args, exit_code);
3274
3275                                // Add tool result to messages
3276                                if images.is_empty() {
3277                                    messages.push(Message::tool_result(
3278                                        &tool_call.id,
3279                                        &output,
3280                                        is_error,
3281                                    ));
3282                                } else {
3283                                    messages.push(Message::tool_result_with_images(
3284                                        &tool_call.id,
3285                                        &output,
3286                                        &images,
3287                                        is_error,
3288                                    ));
3289                                }
3290
3291                                // Record tool result on the tool span for early exit
3292                                let tool_duration = tool_start.elapsed();
3293                                crate::telemetry::record_tool_result(exit_code, tool_duration);
3294
3295                                // Send ToolEnd event
3296                                if let Some(tx) = &event_tx {
3297                                    tx.send(AgentEvent::ToolEnd {
3298                                        id: tool_call.id.clone(),
3299                                        name: tool_call.name.clone(),
3300                                        output: output.clone(),
3301                                        exit_code,
3302                                        metadata,
3303                                    })
3304                                    .await
3305                                    .ok();
3306                                }
3307
3308                                // Fire PostToolUse hook (fire-and-forget)
3309                                self.fire_post_tool_use(
3310                                    session_id.unwrap_or(""),
3311                                    &tool_call.name,
3312                                    &tool_call.args,
3313                                    &output,
3314                                    exit_code == 0,
3315                                    tool_duration.as_millis() as u64,
3316                                )
3317                                .await;
3318
3319                                continue; // Skip the rest, move to next tool call
3320                            }
3321
3322                            // Get timeout from policy
3323                            let policy = cm.policy().await;
3324                            let timeout_ms = policy.default_timeout_ms;
3325                            let timeout_action = policy.timeout_action;
3326
3327                            // Request confirmation (this emits ConfirmationRequired event)
3328                            let rx = cm
3329                                .request_confirmation(
3330                                    &tool_call.id,
3331                                    &tool_call.name,
3332                                    &tool_call.args,
3333                                )
3334                                .await;
3335
3336                            // Forward ConfirmationRequired to the streaming event channel
3337                            // so external consumers (e.g. SafeClaw engine) can relay it
3338                            // to the browser UI.
3339                            if let Some(tx) = &event_tx {
3340                                tx.send(AgentEvent::ConfirmationRequired {
3341                                    tool_id: tool_call.id.clone(),
3342                                    tool_name: tool_call.name.clone(),
3343                                    args: tool_call.args.clone(),
3344                                    timeout_ms,
3345                                })
3346                                .await
3347                                .ok();
3348                            }
3349
3350                            // Wait for confirmation with timeout
3351                            let confirmation_result =
3352                                tokio::time::timeout(Duration::from_millis(timeout_ms), rx).await;
3353
3354                            match confirmation_result {
3355                                Ok(Ok(response)) => {
3356                                    // Forward ConfirmationReceived
3357                                    if let Some(tx) = &event_tx {
3358                                        tx.send(AgentEvent::ConfirmationReceived {
3359                                            tool_id: tool_call.id.clone(),
3360                                            approved: response.approved,
3361                                            reason: response.reason.clone(),
3362                                        })
3363                                        .await
3364                                        .ok();
3365                                    }
3366                                    if response.approved {
3367                                        let stream_ctx = self.streaming_tool_context(
3368                                            &event_tx,
3369                                            &tool_call.id,
3370                                            &tool_call.name,
3371                                        );
3372                                        let result = self
3373                                            .execute_tool_queued_or_direct(
3374                                                &tool_call.name,
3375                                                &tool_call.args,
3376                                                &stream_ctx,
3377                                            )
3378                                            .await;
3379
3380                                        let tuple = Self::tool_result_to_tuple(result);
3381                                        // Track tool call in progress tracker
3382                                        let (_, exit_code, _, _, _) = tuple;
3383                                        self.track_tool_result(
3384                                            &tool_call.name,
3385                                            &tool_call.args,
3386                                            exit_code,
3387                                        );
3388                                        tuple
3389                                    } else {
3390                                        let rejection_msg = format!(
3391                                            "Tool '{}' execution was REJECTED by the user. Reason: {}. \
3392                                             DO NOT retry this tool call unless the user explicitly asks you to.",
3393                                            tool_call.name,
3394                                            response.reason.unwrap_or_else(|| "No reason provided".to_string())
3395                                        );
3396                                        (rejection_msg, 1, true, None, Vec::new())
3397                                    }
3398                                }
3399                                Ok(Err(_)) => {
3400                                    // Forward ConfirmationTimeout (channel closed = effectively timed out)
3401                                    if let Some(tx) = &event_tx {
3402                                        tx.send(AgentEvent::ConfirmationTimeout {
3403                                            tool_id: tool_call.id.clone(),
3404                                            action_taken: "rejected".to_string(),
3405                                        })
3406                                        .await
3407                                        .ok();
3408                                    }
3409                                    let msg = format!(
3410                                        "Tool '{}' confirmation failed: confirmation channel closed",
3411                                        tool_call.name
3412                                    );
3413                                    (msg, 1, true, None, Vec::new())
3414                                }
3415                                Err(_) => {
3416                                    cm.check_timeouts().await;
3417
3418                                    // Forward ConfirmationTimeout
3419                                    if let Some(tx) = &event_tx {
3420                                        tx.send(AgentEvent::ConfirmationTimeout {
3421                                            tool_id: tool_call.id.clone(),
3422                                            action_taken: match timeout_action {
3423                                                crate::hitl::TimeoutAction::Reject => {
3424                                                    "rejected".to_string()
3425                                                }
3426                                                crate::hitl::TimeoutAction::AutoApprove => {
3427                                                    "auto_approved".to_string()
3428                                                }
3429                                            },
3430                                        })
3431                                        .await
3432                                        .ok();
3433                                    }
3434
3435                                    match timeout_action {
3436                                        crate::hitl::TimeoutAction::Reject => {
3437                                            let msg = format!(
3438                                                "Tool '{}' execution was REJECTED: user confirmation timed out after {}ms. \
3439                                                 DO NOT retry this tool call — the user did not approve it. \
3440                                                 Inform the user that the operation requires their approval and ask them to try again.",
3441                                                tool_call.name, timeout_ms
3442                                            );
3443                                            (msg, 1, true, None, Vec::new())
3444                                        }
3445                                        crate::hitl::TimeoutAction::AutoApprove => {
3446                                            let stream_ctx = self.streaming_tool_context(
3447                                                &event_tx,
3448                                                &tool_call.id,
3449                                                &tool_call.name,
3450                                            );
3451                                            let result = self
3452                                                .execute_tool_queued_or_direct(
3453                                                    &tool_call.name,
3454                                                    &tool_call.args,
3455                                                    &stream_ctx,
3456                                                )
3457                                                .await;
3458
3459                                            let tuple = Self::tool_result_to_tuple(result);
3460                                            // Track tool call in progress tracker
3461                                            let (_, exit_code, _, _, _) = tuple;
3462                                            self.track_tool_result(
3463                                                &tool_call.name,
3464                                                &tool_call.args,
3465                                                exit_code,
3466                                            );
3467                                            tuple
3468                                        }
3469                                    }
3470                                }
3471                            }
3472                        } else {
3473                            // Ask without confirmation manager — safe deny
3474                            let msg = format!(
3475                                "Tool '{}' requires confirmation but no HITL confirmation manager is configured. \
3476                                 Configure a confirmation policy to enable tool execution.",
3477                                tool_call.name
3478                            );
3479                            tracing::warn!(
3480                                tool_name = tool_call.name.as_str(),
3481                                "Tool requires confirmation but no HITL manager configured"
3482                            );
3483                            (msg, 1, true, None, Vec::new())
3484                        }
3485                    }
3486                };
3487
3488                let tool_duration = tool_start.elapsed();
3489                crate::telemetry::record_tool_result(exit_code, tool_duration);
3490
3491                // Sanitize tool output for sensitive data before it enters the message history
3492                let output = if let Some(ref sp) = self.config.security_provider {
3493                    sp.sanitize_output(&output)
3494                } else {
3495                    output
3496                };
3497
3498                recent_tool_signatures.push(format!(
3499                    "{}:{} => {}",
3500                    tool_call.name,
3501                    serde_json::to_string(&tool_call.args).unwrap_or_default(),
3502                    if is_error { "error" } else { "ok" }
3503                ));
3504                if recent_tool_signatures.len() > 8 {
3505                    let overflow = recent_tool_signatures.len() - 8;
3506                    recent_tool_signatures.drain(0..overflow);
3507                }
3508
3509                // Fire PostToolUse hook (fire-and-forget)
3510                self.fire_post_tool_use(
3511                    session_id.unwrap_or(""),
3512                    &tool_call.name,
3513                    &tool_call.args,
3514                    &output,
3515                    exit_code == 0,
3516                    tool_duration.as_millis() as u64,
3517                )
3518                .await;
3519
3520                // Auto-remember tool result in long-term memory
3521                if let Some(ref memory) = self.config.memory {
3522                    let tools_used = [tool_call.name.clone()];
3523                    let remember_result = if exit_code == 0 {
3524                        memory
3525                            .remember_success(effective_prompt, &tools_used, &output)
3526                            .await
3527                    } else {
3528                        memory
3529                            .remember_failure(effective_prompt, &output, &tools_used)
3530                            .await
3531                    };
3532                    match remember_result {
3533                        Ok(()) => {
3534                            if let Some(tx) = &event_tx {
3535                                let item_type = if exit_code == 0 { "success" } else { "failure" };
3536                                tx.send(AgentEvent::MemoryStored {
3537                                    memory_id: uuid::Uuid::new_v4().to_string(),
3538                                    memory_type: item_type.to_string(),
3539                                    importance: if exit_code == 0 { 0.8 } else { 0.9 },
3540                                    tags: vec![item_type.to_string(), tool_call.name.clone()],
3541                                })
3542                                .await
3543                                .ok();
3544                            }
3545                        }
3546                        Err(e) => {
3547                            tracing::warn!("Failed to store memory after tool execution: {}", e);
3548                        }
3549                    }
3550                }
3551
3552                // Send tool end event
3553                if let Some(tx) = &event_tx {
3554                    tx.send(AgentEvent::ToolEnd {
3555                        id: tool_call.id.clone(),
3556                        name: tool_call.name.clone(),
3557                        output: output.clone(),
3558                        exit_code,
3559                        metadata,
3560                    })
3561                    .await
3562                    .ok();
3563                }
3564
3565                // Add tool result to messages
3566                if images.is_empty() {
3567                    messages.push(Message::tool_result(&tool_call.id, &output, is_error));
3568                } else {
3569                    messages.push(Message::tool_result_with_images(
3570                        &tool_call.id,
3571                        &output,
3572                        &images,
3573                        is_error,
3574                    ));
3575                }
3576            }
3577        }
3578    }
3579
3580    /// Execute with streaming events
3581    pub async fn execute_streaming(
3582        &self,
3583        history: &[Message],
3584        prompt: &str,
3585    ) -> Result<(
3586        mpsc::Receiver<AgentEvent>,
3587        tokio::task::JoinHandle<Result<AgentResult>>,
3588        tokio_util::sync::CancellationToken,
3589    )> {
3590        let (tx, rx) = mpsc::channel(100);
3591        let cancel_token = tokio_util::sync::CancellationToken::new();
3592
3593        let llm_client = self.llm_client.clone();
3594        let tool_executor = self.tool_executor.clone();
3595        let tool_context = self.tool_context.clone();
3596        let config = self.config.clone();
3597        let tool_metrics = self.tool_metrics.clone();
3598        let command_queue = self.command_queue.clone();
3599        let history = history.to_vec();
3600        let prompt = prompt.to_string();
3601        let token_clone = cancel_token.clone();
3602
3603        let handle = tokio::spawn(async move {
3604            let mut agent = AgentLoop::new(llm_client, tool_executor, tool_context, config);
3605            if let Some(metrics) = tool_metrics {
3606                agent = agent.with_tool_metrics(metrics);
3607            }
3608            if let Some(queue) = command_queue {
3609                agent = agent.with_queue(queue);
3610            }
3611            agent
3612                .execute_with_session(&history, &prompt, None, Some(tx), Some(&token_clone))
3613                .await
3614        });
3615
3616        Ok((rx, handle, cancel_token))
3617    }
3618
3619    /// Create an execution plan for a prompt
3620    ///
3621    /// Delegates to [`LlmPlanner`] for structured JSON plan generation,
3622    /// falling back to heuristic planning if the LLM call fails.
3623    pub async fn plan(&self, prompt: &str, _context: Option<&str>) -> Result<ExecutionPlan> {
3624        use crate::planning::LlmPlanner;
3625
3626        match LlmPlanner::create_plan(&self.llm_client, prompt).await {
3627            Ok(plan) => Ok(plan),
3628            Err(e) => {
3629                tracing::warn!("LLM plan creation failed, using fallback: {}", e);
3630                Ok(LlmPlanner::fallback_plan(prompt))
3631            }
3632        }
3633    }
3634
3635    /// Execute with planning phase
3636    pub async fn execute_with_planning(
3637        &self,
3638        history: &[Message],
3639        prompt: &str,
3640        event_tx: Option<mpsc::Sender<AgentEvent>>,
3641    ) -> Result<AgentResult> {
3642        // Send planning start event
3643        if let Some(tx) = &event_tx {
3644            tx.send(AgentEvent::PlanningStart {
3645                prompt: prompt.to_string(),
3646            })
3647            .await
3648            .ok();
3649        }
3650
3651        // Extract goal when goal_tracking is enabled
3652        let goal = if self.config.goal_tracking {
3653            let g = self.extract_goal(prompt).await?;
3654            if let Some(tx) = &event_tx {
3655                tx.send(AgentEvent::GoalExtracted { goal: g.clone() })
3656                    .await
3657                    .ok();
3658            }
3659            Some(g)
3660        } else {
3661            None
3662        };
3663
3664        // Create execution plan
3665        let plan = self.plan(prompt, None).await?;
3666
3667        // Send planning end event
3668        if let Some(tx) = &event_tx {
3669            tx.send(AgentEvent::PlanningEnd {
3670                estimated_steps: plan.steps.len(),
3671                plan: plan.clone(),
3672            })
3673            .await
3674            .ok();
3675        }
3676
3677        let plan_start = std::time::Instant::now();
3678
3679        // Execute the plan step by step
3680        let result = self.execute_plan(history, &plan, event_tx.clone()).await?;
3681
3682        // Emit the final End event (execute_loop_inner does not emit End in planning mode)
3683        if let Some(tx) = &event_tx {
3684            tx.send(AgentEvent::End {
3685                text: result.text.clone(),
3686                usage: result.usage.clone(),
3687                meta: None,
3688            })
3689            .await
3690            .ok();
3691        }
3692
3693        // Check goal achievement when goal_tracking is enabled
3694        if self.config.goal_tracking {
3695            if let Some(ref g) = goal {
3696                let achieved = self.check_goal_achievement(g, &result.text).await?;
3697                if achieved {
3698                    if let Some(tx) = &event_tx {
3699                        tx.send(AgentEvent::GoalAchieved {
3700                            goal: g.description.clone(),
3701                            total_steps: result.messages.len(),
3702                            duration_ms: plan_start.elapsed().as_millis() as i64,
3703                        })
3704                        .await
3705                        .ok();
3706                    }
3707                }
3708            }
3709        }
3710
3711        Ok(result)
3712    }
3713
3714    /// Execute an execution plan using wave-based dependency-aware scheduling.
3715    ///
3716    /// Steps with no unmet dependencies are grouped into "waves". A wave with
3717    /// a single step executes sequentially (preserving the history chain). A
3718    /// wave with multiple independent steps executes them in parallel via
3719    /// `JoinSet`, then merges their results back into the shared history.
3720    async fn execute_plan(
3721        &self,
3722        history: &[Message],
3723        plan: &ExecutionPlan,
3724        event_tx: Option<mpsc::Sender<AgentEvent>>,
3725    ) -> Result<AgentResult> {
3726        let mut plan = plan.clone();
3727        let mut current_history = history.to_vec();
3728        let mut total_usage = TokenUsage::default();
3729        let mut tool_calls_count = 0;
3730        let total_steps = plan.steps.len();
3731
3732        // Add initial user message with the goal
3733        let steps_text = plan
3734            .steps
3735            .iter()
3736            .enumerate()
3737            .map(|(i, step)| format!("{}. {}", i + 1, step.content))
3738            .collect::<Vec<_>>()
3739            .join("\n");
3740        current_history.push(Message::user(&crate::prompts::render(
3741            crate::prompts::PLAN_EXECUTE_GOAL,
3742            &[("goal", &plan.goal), ("steps", &steps_text)],
3743        )));
3744
3745        loop {
3746            let ready: Vec<String> = plan
3747                .get_ready_steps()
3748                .iter()
3749                .map(|s| s.id.clone())
3750                .collect();
3751
3752            if ready.is_empty() {
3753                // All done or deadlock
3754                if plan.has_deadlock() {
3755                    tracing::warn!(
3756                        "Plan deadlock detected: {} pending steps with unresolvable dependencies",
3757                        plan.pending_count()
3758                    );
3759                }
3760                break;
3761            }
3762
3763            if ready.len() == 1 {
3764                // === Single step: sequential execution (preserves history chain) ===
3765                let step_id = &ready[0];
3766                let step = plan
3767                    .steps
3768                    .iter()
3769                    .find(|s| s.id == *step_id)
3770                    .ok_or_else(|| anyhow::anyhow!("step '{}' not found in plan", step_id))?
3771                    .clone();
3772                let step_number = plan
3773                    .steps
3774                    .iter()
3775                    .position(|s| s.id == *step_id)
3776                    .unwrap_or(0)
3777                    + 1;
3778
3779                // Send step start event
3780                if let Some(tx) = &event_tx {
3781                    tx.send(AgentEvent::StepStart {
3782                        step_id: step.id.clone(),
3783                        description: step.content.clone(),
3784                        step_number,
3785                        total_steps,
3786                    })
3787                    .await
3788                    .ok();
3789                }
3790
3791                plan.mark_status(&step.id, TaskStatus::InProgress);
3792
3793                let step_prompt = crate::prompts::render(
3794                    crate::prompts::PLAN_EXECUTE_STEP,
3795                    &[
3796                        ("step_num", &step_number.to_string()),
3797                        ("description", &step.content),
3798                    ],
3799                );
3800
3801                match self
3802                    .execute_loop(
3803                        &current_history,
3804                        &step_prompt,
3805                        None,
3806                        event_tx.clone(),
3807                        &tokio_util::sync::CancellationToken::new(),
3808                        false, // emit_end: false — End is emitted by execute_with_planning after execute_plan
3809                    )
3810                    .await
3811                {
3812                    Ok(result) => {
3813                        current_history = result.messages.clone();
3814                        total_usage.prompt_tokens += result.usage.prompt_tokens;
3815                        total_usage.completion_tokens += result.usage.completion_tokens;
3816                        total_usage.total_tokens += result.usage.total_tokens;
3817                        tool_calls_count += result.tool_calls_count;
3818                        plan.mark_status(&step.id, TaskStatus::Completed);
3819
3820                        if let Some(tx) = &event_tx {
3821                            tx.send(AgentEvent::StepEnd {
3822                                step_id: step.id.clone(),
3823                                status: TaskStatus::Completed,
3824                                step_number,
3825                                total_steps,
3826                            })
3827                            .await
3828                            .ok();
3829                        }
3830                    }
3831                    Err(e) => {
3832                        tracing::error!("Plan step '{}' failed: {}", step.id, e);
3833                        plan.mark_status(&step.id, TaskStatus::Failed);
3834
3835                        if let Some(tx) = &event_tx {
3836                            tx.send(AgentEvent::StepEnd {
3837                                step_id: step.id.clone(),
3838                                status: TaskStatus::Failed,
3839                                step_number,
3840                                total_steps,
3841                            })
3842                            .await
3843                            .ok();
3844                        }
3845                    }
3846                }
3847            } else {
3848                // === Multiple steps: parallel execution via JoinSet ===
3849                // NOTE: Each parallel branch gets a clone of the base history.
3850                // Individual branch histories (tool calls, LLM turns) are NOT merged
3851                // back — only a summary message is appended. This is a deliberate
3852                // trade-off: merging divergent histories in a deterministic order is
3853                // complex and the summary approach keeps the context window manageable.
3854                let ready_steps: Vec<_> = ready
3855                    .iter()
3856                    .filter_map(|id| {
3857                        let step = plan.steps.iter().find(|s| s.id == *id)?.clone();
3858                        let step_number =
3859                            plan.steps.iter().position(|s| s.id == *id).unwrap_or(0) + 1;
3860                        Some((step, step_number))
3861                    })
3862                    .collect();
3863
3864                // Mark all as InProgress and emit StepStart events
3865                for (step, step_number) in &ready_steps {
3866                    plan.mark_status(&step.id, TaskStatus::InProgress);
3867                    if let Some(tx) = &event_tx {
3868                        tx.send(AgentEvent::StepStart {
3869                            step_id: step.id.clone(),
3870                            description: step.content.clone(),
3871                            step_number: *step_number,
3872                            total_steps,
3873                        })
3874                        .await
3875                        .ok();
3876                    }
3877                }
3878
3879                // Spawn all into JoinSet, each with a clone of the base history
3880                let mut join_set = tokio::task::JoinSet::new();
3881                for (step, step_number) in &ready_steps {
3882                    let base_history = current_history.clone();
3883                    let agent_clone = self.clone();
3884                    let tx = event_tx.clone();
3885                    let step_clone = step.clone();
3886                    let sn = *step_number;
3887
3888                    join_set.spawn(async move {
3889                        let prompt = crate::prompts::render(
3890                            crate::prompts::PLAN_EXECUTE_STEP,
3891                            &[
3892                                ("step_num", &sn.to_string()),
3893                                ("description", &step_clone.content),
3894                            ],
3895                        );
3896                        let result = agent_clone
3897                            .execute_loop(
3898                                &base_history,
3899                                &prompt,
3900                                None,
3901                                tx,
3902                                &tokio_util::sync::CancellationToken::new(),
3903                                false, // emit_end: false — End is emitted by execute_with_planning after execute_plan
3904                            )
3905                            .await;
3906                        (step_clone.id, sn, result)
3907                    });
3908                }
3909
3910                // Collect results
3911                let mut parallel_summaries = Vec::new();
3912                while let Some(join_result) = join_set.join_next().await {
3913                    match join_result {
3914                        Ok((step_id, step_number, step_result)) => match step_result {
3915                            Ok(result) => {
3916                                total_usage.prompt_tokens += result.usage.prompt_tokens;
3917                                total_usage.completion_tokens += result.usage.completion_tokens;
3918                                total_usage.total_tokens += result.usage.total_tokens;
3919                                tool_calls_count += result.tool_calls_count;
3920                                plan.mark_status(&step_id, TaskStatus::Completed);
3921
3922                                // Collect the final assistant text for context merging
3923                                parallel_summaries.push(format!(
3924                                    "- Step {} ({}): {}",
3925                                    step_number, step_id, result.text
3926                                ));
3927
3928                                if let Some(tx) = &event_tx {
3929                                    tx.send(AgentEvent::StepEnd {
3930                                        step_id,
3931                                        status: TaskStatus::Completed,
3932                                        step_number,
3933                                        total_steps,
3934                                    })
3935                                    .await
3936                                    .ok();
3937                                }
3938                            }
3939                            Err(e) => {
3940                                tracing::error!("Plan step '{}' failed: {}", step_id, e);
3941                                plan.mark_status(&step_id, TaskStatus::Failed);
3942
3943                                if let Some(tx) = &event_tx {
3944                                    tx.send(AgentEvent::StepEnd {
3945                                        step_id,
3946                                        status: TaskStatus::Failed,
3947                                        step_number,
3948                                        total_steps,
3949                                    })
3950                                    .await
3951                                    .ok();
3952                                }
3953                            }
3954                        },
3955                        Err(e) => {
3956                            tracing::error!("JoinSet task panicked: {}", e);
3957                        }
3958                    }
3959                }
3960
3961                // Merge parallel results into history for subsequent steps
3962                if !parallel_summaries.is_empty() {
3963                    parallel_summaries.sort(); // Deterministic ordering
3964                    let results_text = parallel_summaries.join("\n");
3965                    current_history.push(Message::user(&crate::prompts::render(
3966                        crate::prompts::PLAN_PARALLEL_RESULTS,
3967                        &[("results", &results_text)],
3968                    )));
3969                }
3970            }
3971
3972            // Emit GoalProgress after each wave
3973            if self.config.goal_tracking {
3974                let completed = plan
3975                    .steps
3976                    .iter()
3977                    .filter(|s| s.status == TaskStatus::Completed)
3978                    .count();
3979                if let Some(tx) = &event_tx {
3980                    tx.send(AgentEvent::GoalProgress {
3981                        goal: plan.goal.clone(),
3982                        progress: plan.progress(),
3983                        completed_steps: completed,
3984                        total_steps,
3985                    })
3986                    .await
3987                    .ok();
3988                }
3989            }
3990        }
3991
3992        // Get final response
3993        let final_text = current_history
3994            .last()
3995            .map(|m| {
3996                m.content
3997                    .iter()
3998                    .filter_map(|block| {
3999                        if let crate::llm::ContentBlock::Text { text } = block {
4000                            Some(text.as_str())
4001                        } else {
4002                            None
4003                        }
4004                    })
4005                    .collect::<Vec<_>>()
4006                    .join("\n")
4007            })
4008            .unwrap_or_default();
4009
4010        Ok(AgentResult {
4011            text: final_text,
4012            messages: current_history,
4013            usage: total_usage,
4014            tool_calls_count,
4015        })
4016    }
4017
4018    /// Extract goal from prompt
4019    ///
4020    /// Delegates to [`LlmPlanner`] for structured JSON goal extraction,
4021    /// falling back to heuristic logic if the LLM call fails.
4022    pub async fn extract_goal(&self, prompt: &str) -> Result<AgentGoal> {
4023        use crate::planning::LlmPlanner;
4024
4025        match LlmPlanner::extract_goal(&self.llm_client, prompt).await {
4026            Ok(goal) => Ok(goal),
4027            Err(e) => {
4028                tracing::warn!("LLM goal extraction failed, using fallback: {}", e);
4029                Ok(LlmPlanner::fallback_goal(prompt))
4030            }
4031        }
4032    }
4033
4034    /// Check if goal is achieved
4035    ///
4036    /// Delegates to [`LlmPlanner`] for structured JSON achievement check,
4037    /// falling back to heuristic logic if the LLM call fails.
4038    pub async fn check_goal_achievement(
4039        &self,
4040        goal: &AgentGoal,
4041        current_state: &str,
4042    ) -> Result<bool> {
4043        use crate::planning::LlmPlanner;
4044
4045        match LlmPlanner::check_achievement(&self.llm_client, goal, current_state).await {
4046            Ok(result) => Ok(result.achieved),
4047            Err(e) => {
4048                tracing::warn!("LLM achievement check failed, using fallback: {}", e);
4049                let result = LlmPlanner::fallback_check_achievement(goal, current_state);
4050                Ok(result.achieved)
4051            }
4052        }
4053    }
4054}
4055
4056#[cfg(test)]
4057mod tests {
4058    use super::*;
4059    use crate::llm::{ContentBlock, StreamEvent};
4060    use crate::permissions::PermissionPolicy;
4061    use crate::tools::ToolExecutor;
4062    use std::path::PathBuf;
4063    use std::sync::atomic::{AtomicUsize, Ordering};
4064
4065    /// Create a default ToolContext for tests
4066    fn test_tool_context() -> ToolContext {
4067        ToolContext::new(PathBuf::from("/tmp"))
4068    }
4069
4070    #[test]
4071    fn test_agent_config_default() {
4072        let config = AgentConfig::default();
4073        assert!(config.prompt_slots.is_empty());
4074        assert!(config.tools.is_empty()); // Tools are provided externally
4075        assert_eq!(config.max_tool_rounds, MAX_TOOL_ROUNDS);
4076        assert!(config.permission_checker.is_none());
4077        assert!(config.context_providers.is_empty());
4078        // Built-in skills are always present by default
4079        let registry = config
4080            .skill_registry
4081            .expect("skill_registry must be Some by default");
4082        assert!(registry.len() >= 7, "expected at least 7 built-in skills");
4083        assert!(registry.get("code-search").is_some());
4084        assert!(registry.get("find-bugs").is_some());
4085    }
4086
4087    // ========================================================================
4088    // Mock LLM Client for Testing
4089    // ========================================================================
4090
4091    /// Mock LLM client that returns predefined responses
4092    pub(crate) struct MockLlmClient {
4093        /// Responses to return (consumed in order)
4094        responses: std::sync::Mutex<Vec<LlmResponse>>,
4095        /// Number of calls made
4096        pub(crate) call_count: AtomicUsize,
4097    }
4098
4099    impl MockLlmClient {
4100        pub(crate) fn new(responses: Vec<LlmResponse>) -> Self {
4101            Self {
4102                responses: std::sync::Mutex::new(responses),
4103                call_count: AtomicUsize::new(0),
4104            }
4105        }
4106
4107        /// Create a response with text only (no tool calls)
4108        pub(crate) fn text_response(text: &str) -> LlmResponse {
4109            LlmResponse {
4110                message: Message {
4111                    role: "assistant".to_string(),
4112                    content: vec![ContentBlock::Text {
4113                        text: text.to_string(),
4114                    }],
4115                    reasoning_content: None,
4116                },
4117                usage: TokenUsage {
4118                    prompt_tokens: 10,
4119                    completion_tokens: 5,
4120                    total_tokens: 15,
4121                    cache_read_tokens: None,
4122                    cache_write_tokens: None,
4123                },
4124                stop_reason: Some("end_turn".to_string()),
4125                meta: None,
4126            }
4127        }
4128
4129        /// Create a response with a tool call
4130        pub(crate) fn tool_call_response(
4131            tool_id: &str,
4132            tool_name: &str,
4133            args: serde_json::Value,
4134        ) -> LlmResponse {
4135            LlmResponse {
4136                message: Message {
4137                    role: "assistant".to_string(),
4138                    content: vec![ContentBlock::ToolUse {
4139                        id: tool_id.to_string(),
4140                        name: tool_name.to_string(),
4141                        input: args,
4142                    }],
4143                    reasoning_content: None,
4144                },
4145                usage: TokenUsage {
4146                    prompt_tokens: 10,
4147                    completion_tokens: 5,
4148                    total_tokens: 15,
4149                    cache_read_tokens: None,
4150                    cache_write_tokens: None,
4151                },
4152                stop_reason: Some("tool_use".to_string()),
4153                meta: None,
4154            }
4155        }
4156    }
4157
4158    #[async_trait::async_trait]
4159    impl LlmClient for MockLlmClient {
4160        async fn complete(
4161            &self,
4162            _messages: &[Message],
4163            _system: Option<&str>,
4164            _tools: &[ToolDefinition],
4165        ) -> Result<LlmResponse> {
4166            self.call_count.fetch_add(1, Ordering::SeqCst);
4167            let mut responses = self.responses.lock().unwrap();
4168            if responses.is_empty() {
4169                anyhow::bail!("No more mock responses available");
4170            }
4171            Ok(responses.remove(0))
4172        }
4173
4174        async fn complete_streaming(
4175            &self,
4176            _messages: &[Message],
4177            _system: Option<&str>,
4178            _tools: &[ToolDefinition],
4179        ) -> Result<mpsc::Receiver<StreamEvent>> {
4180            self.call_count.fetch_add(1, Ordering::SeqCst);
4181            let mut responses = self.responses.lock().unwrap();
4182            if responses.is_empty() {
4183                anyhow::bail!("No more mock responses available");
4184            }
4185            let response = responses.remove(0);
4186
4187            let (tx, rx) = mpsc::channel(10);
4188            tokio::spawn(async move {
4189                // Send text deltas if any
4190                for block in &response.message.content {
4191                    if let ContentBlock::Text { text } = block {
4192                        tx.send(StreamEvent::TextDelta(text.clone())).await.ok();
4193                    }
4194                }
4195                tx.send(StreamEvent::Done(response)).await.ok();
4196            });
4197
4198            Ok(rx)
4199        }
4200    }
4201
4202    // ========================================================================
4203    // Agent Loop Tests
4204    // ========================================================================
4205
4206    #[tokio::test]
4207    async fn test_agent_simple_response() {
4208        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
4209            "Hello, I'm an AI assistant.",
4210        )]));
4211
4212        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4213        let config = AgentConfig::default();
4214
4215        let agent = AgentLoop::new(
4216            mock_client.clone(),
4217            tool_executor,
4218            test_tool_context(),
4219            config,
4220        );
4221        let result = agent.execute(&[], "Hello", None).await.unwrap();
4222
4223        assert_eq!(result.text, "Hello, I'm an AI assistant.");
4224        assert_eq!(result.tool_calls_count, 0);
4225        assert_eq!(mock_client.call_count.load(Ordering::SeqCst), 1);
4226    }
4227
4228    #[tokio::test]
4229    async fn test_agent_with_tool_call() {
4230        let mock_client = Arc::new(MockLlmClient::new(vec![
4231            // First response: tool call
4232            MockLlmClient::tool_call_response(
4233                "tool-1",
4234                "bash",
4235                serde_json::json!({"command": "echo hello"}),
4236            ),
4237            // Second response: final text
4238            MockLlmClient::text_response("The command output was: hello"),
4239        ]));
4240
4241        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4242        let config = AgentConfig::default();
4243
4244        let agent = AgentLoop::new(
4245            mock_client.clone(),
4246            tool_executor,
4247            test_tool_context(),
4248            config,
4249        );
4250        let result = agent.execute(&[], "Run echo hello", None).await.unwrap();
4251
4252        assert_eq!(result.text, "The command output was: hello");
4253        assert_eq!(result.tool_calls_count, 1);
4254        assert_eq!(mock_client.call_count.load(Ordering::SeqCst), 2);
4255    }
4256
4257    #[tokio::test]
4258    async fn test_agent_permission_deny() {
4259        let mock_client = Arc::new(MockLlmClient::new(vec![
4260            // First response: tool call that will be denied
4261            MockLlmClient::tool_call_response(
4262                "tool-1",
4263                "bash",
4264                serde_json::json!({"command": "rm -rf /tmp/test"}),
4265            ),
4266            // Second response: LLM responds to the denial
4267            MockLlmClient::text_response(
4268                "I cannot execute that command due to permission restrictions.",
4269            ),
4270        ]));
4271
4272        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4273
4274        // Create permission policy that denies rm commands
4275        let permission_policy = PermissionPolicy::new().deny("bash(rm:*)");
4276
4277        let config = AgentConfig {
4278            permission_checker: Some(Arc::new(permission_policy)),
4279            ..Default::default()
4280        };
4281
4282        let (tx, mut rx) = mpsc::channel(100);
4283        let agent = AgentLoop::new(
4284            mock_client.clone(),
4285            tool_executor,
4286            test_tool_context(),
4287            config,
4288        );
4289        let result = agent.execute(&[], "Delete files", Some(tx)).await.unwrap();
4290
4291        // Check that we received a PermissionDenied event
4292        let mut found_permission_denied = false;
4293        while let Ok(event) = rx.try_recv() {
4294            if let AgentEvent::PermissionDenied { tool_name, .. } = event {
4295                assert_eq!(tool_name, "bash");
4296                found_permission_denied = true;
4297            }
4298        }
4299        assert!(
4300            found_permission_denied,
4301            "Should have received PermissionDenied event"
4302        );
4303
4304        assert_eq!(result.tool_calls_count, 1);
4305    }
4306
4307    #[tokio::test]
4308    async fn test_agent_permission_allow() {
4309        let mock_client = Arc::new(MockLlmClient::new(vec![
4310            // First response: tool call that will be allowed
4311            MockLlmClient::tool_call_response(
4312                "tool-1",
4313                "bash",
4314                serde_json::json!({"command": "echo hello"}),
4315            ),
4316            // Second response: final text
4317            MockLlmClient::text_response("Done!"),
4318        ]));
4319
4320        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4321
4322        // Create permission policy that allows echo commands
4323        let permission_policy = PermissionPolicy::new()
4324            .allow("bash(echo:*)")
4325            .deny("bash(rm:*)");
4326
4327        let config = AgentConfig {
4328            permission_checker: Some(Arc::new(permission_policy)),
4329            ..Default::default()
4330        };
4331
4332        let agent = AgentLoop::new(
4333            mock_client.clone(),
4334            tool_executor,
4335            test_tool_context(),
4336            config,
4337        );
4338        let result = agent.execute(&[], "Echo hello", None).await.unwrap();
4339
4340        assert_eq!(result.text, "Done!");
4341        assert_eq!(result.tool_calls_count, 1);
4342    }
4343
4344    #[tokio::test]
4345    async fn test_agent_streaming_events() {
4346        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
4347            "Hello!",
4348        )]));
4349
4350        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4351        let config = AgentConfig::default();
4352
4353        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
4354        let (mut rx, handle, _cancel_token) = agent.execute_streaming(&[], "Hi").await.unwrap();
4355
4356        // Collect events
4357        let mut events = Vec::new();
4358        while let Some(event) = rx.recv().await {
4359            events.push(event);
4360        }
4361
4362        let result = handle.await.unwrap().unwrap();
4363        assert_eq!(result.text, "Hello!");
4364
4365        // Check we received Start and End events
4366        assert!(events.iter().any(|e| matches!(e, AgentEvent::Start { .. })));
4367        assert!(events.iter().any(|e| matches!(e, AgentEvent::End { .. })));
4368    }
4369
4370    #[tokio::test]
4371    async fn test_agent_max_tool_rounds() {
4372        // Create a mock that always returns tool calls (infinite loop)
4373        let responses: Vec<LlmResponse> = (0..100)
4374            .map(|i| {
4375                MockLlmClient::tool_call_response(
4376                    &format!("tool-{}", i),
4377                    "bash",
4378                    serde_json::json!({"command": "echo loop"}),
4379                )
4380            })
4381            .collect();
4382
4383        let mock_client = Arc::new(MockLlmClient::new(responses));
4384        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4385
4386        let config = AgentConfig {
4387            max_tool_rounds: 3,
4388            ..Default::default()
4389        };
4390
4391        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
4392        let result = agent.execute(&[], "Loop forever", None).await;
4393
4394        // Should fail due to max tool rounds exceeded
4395        assert!(result.is_err());
4396        assert!(result.unwrap_err().to_string().contains("Max tool rounds"));
4397    }
4398
4399    #[tokio::test]
4400    async fn test_agent_no_permission_policy_defaults_to_ask() {
4401        // When no permission policy is set, tools default to Ask.
4402        // Without a confirmation manager, Ask = safe deny.
4403        let mock_client = Arc::new(MockLlmClient::new(vec![
4404            MockLlmClient::tool_call_response(
4405                "tool-1",
4406                "bash",
4407                serde_json::json!({"command": "rm -rf /tmp/test"}),
4408            ),
4409            MockLlmClient::text_response("Denied!"),
4410        ]));
4411
4412        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4413        let config = AgentConfig {
4414            permission_checker: None, // No policy → defaults to Ask
4415            // No confirmation_manager → safe deny
4416            ..Default::default()
4417        };
4418
4419        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
4420        let result = agent.execute(&[], "Delete", None).await.unwrap();
4421
4422        // Should be denied (no policy + no CM = safe deny)
4423        assert_eq!(result.text, "Denied!");
4424        assert_eq!(result.tool_calls_count, 1);
4425    }
4426
4427    #[tokio::test]
4428    async fn test_agent_permission_ask_without_cm_denies() {
4429        // When permission is Ask and no confirmation manager configured,
4430        // tool execution should be denied (safe default).
4431        let mock_client = Arc::new(MockLlmClient::new(vec![
4432            MockLlmClient::tool_call_response(
4433                "tool-1",
4434                "bash",
4435                serde_json::json!({"command": "echo test"}),
4436            ),
4437            MockLlmClient::text_response("Denied!"),
4438        ]));
4439
4440        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4441
4442        // Create policy where bash falls through to Ask (default)
4443        let permission_policy = PermissionPolicy::new(); // Default decision is Ask
4444
4445        let config = AgentConfig {
4446            permission_checker: Some(Arc::new(permission_policy)),
4447            // No confirmation_manager — safe deny
4448            ..Default::default()
4449        };
4450
4451        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
4452        let result = agent.execute(&[], "Echo", None).await.unwrap();
4453
4454        // Should deny (Ask without CM = safe deny)
4455        assert_eq!(result.text, "Denied!");
4456        // The tool result should contain the denial message
4457        assert!(result.tool_calls_count >= 1);
4458    }
4459
4460    // ========================================================================
4461    // HITL (Human-in-the-Loop) Tests
4462    // ========================================================================
4463
4464    #[tokio::test]
4465    async fn test_agent_hitl_approved() {
4466        use crate::hitl::{ConfirmationManager, ConfirmationPolicy};
4467        use tokio::sync::broadcast;
4468
4469        let mock_client = Arc::new(MockLlmClient::new(vec![
4470            MockLlmClient::tool_call_response(
4471                "tool-1",
4472                "bash",
4473                serde_json::json!({"command": "echo hello"}),
4474            ),
4475            MockLlmClient::text_response("Command executed!"),
4476        ]));
4477
4478        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4479
4480        // Create HITL confirmation manager with policy enabled
4481        let (event_tx, _event_rx) = broadcast::channel(100);
4482        let hitl_policy = ConfirmationPolicy {
4483            enabled: true,
4484            ..Default::default()
4485        };
4486        let confirmation_manager = Arc::new(ConfirmationManager::new(hitl_policy, event_tx));
4487
4488        // Create permission policy that returns Ask for bash
4489        let permission_policy = PermissionPolicy::new(); // Default is Ask
4490
4491        let config = AgentConfig {
4492            permission_checker: Some(Arc::new(permission_policy)),
4493            confirmation_manager: Some(confirmation_manager.clone()),
4494            ..Default::default()
4495        };
4496
4497        // Spawn a task to approve the confirmation
4498        let cm_clone = confirmation_manager.clone();
4499        tokio::spawn(async move {
4500            // Wait a bit for the confirmation request to be created
4501            tokio::time::sleep(std::time::Duration::from_millis(50)).await;
4502            // Approve it
4503            cm_clone.confirm("tool-1", true, None).await.ok();
4504        });
4505
4506        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
4507        let result = agent.execute(&[], "Run echo", None).await.unwrap();
4508
4509        assert_eq!(result.text, "Command executed!");
4510        assert_eq!(result.tool_calls_count, 1);
4511    }
4512
4513    #[tokio::test]
4514    async fn test_agent_hitl_rejected() {
4515        use crate::hitl::{ConfirmationManager, ConfirmationPolicy};
4516        use tokio::sync::broadcast;
4517
4518        let mock_client = Arc::new(MockLlmClient::new(vec![
4519            MockLlmClient::tool_call_response(
4520                "tool-1",
4521                "bash",
4522                serde_json::json!({"command": "rm -rf /"}),
4523            ),
4524            MockLlmClient::text_response("Understood, I won't do that."),
4525        ]));
4526
4527        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4528
4529        // Create HITL confirmation manager
4530        let (event_tx, _event_rx) = broadcast::channel(100);
4531        let hitl_policy = ConfirmationPolicy {
4532            enabled: true,
4533            ..Default::default()
4534        };
4535        let confirmation_manager = Arc::new(ConfirmationManager::new(hitl_policy, event_tx));
4536
4537        // Permission policy returns Ask
4538        let permission_policy = PermissionPolicy::new();
4539
4540        let config = AgentConfig {
4541            permission_checker: Some(Arc::new(permission_policy)),
4542            confirmation_manager: Some(confirmation_manager.clone()),
4543            ..Default::default()
4544        };
4545
4546        // Spawn a task to reject the confirmation
4547        let cm_clone = confirmation_manager.clone();
4548        tokio::spawn(async move {
4549            tokio::time::sleep(std::time::Duration::from_millis(50)).await;
4550            cm_clone
4551                .confirm("tool-1", false, Some("Too dangerous".to_string()))
4552                .await
4553                .ok();
4554        });
4555
4556        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
4557        let result = agent.execute(&[], "Delete everything", None).await.unwrap();
4558
4559        // LLM should respond to the rejection
4560        assert_eq!(result.text, "Understood, I won't do that.");
4561    }
4562
4563    #[tokio::test]
4564    async fn test_agent_hitl_timeout_reject() {
4565        use crate::hitl::{ConfirmationManager, ConfirmationPolicy, TimeoutAction};
4566        use tokio::sync::broadcast;
4567
4568        let mock_client = Arc::new(MockLlmClient::new(vec![
4569            MockLlmClient::tool_call_response(
4570                "tool-1",
4571                "bash",
4572                serde_json::json!({"command": "echo test"}),
4573            ),
4574            MockLlmClient::text_response("Timed out, I understand."),
4575        ]));
4576
4577        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4578
4579        // Create HITL with very short timeout and Reject action
4580        let (event_tx, _event_rx) = broadcast::channel(100);
4581        let hitl_policy = ConfirmationPolicy {
4582            enabled: true,
4583            default_timeout_ms: 50, // Very short timeout
4584            timeout_action: TimeoutAction::Reject,
4585            ..Default::default()
4586        };
4587        let confirmation_manager = Arc::new(ConfirmationManager::new(hitl_policy, event_tx));
4588
4589        let permission_policy = PermissionPolicy::new();
4590
4591        let config = AgentConfig {
4592            permission_checker: Some(Arc::new(permission_policy)),
4593            confirmation_manager: Some(confirmation_manager),
4594            ..Default::default()
4595        };
4596
4597        // Don't approve - let it timeout
4598        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
4599        let result = agent.execute(&[], "Echo", None).await.unwrap();
4600
4601        // Should get timeout rejection response from LLM
4602        assert_eq!(result.text, "Timed out, I understand.");
4603    }
4604
4605    #[tokio::test]
4606    async fn test_agent_hitl_timeout_auto_approve() {
4607        use crate::hitl::{ConfirmationManager, ConfirmationPolicy, TimeoutAction};
4608        use tokio::sync::broadcast;
4609
4610        let mock_client = Arc::new(MockLlmClient::new(vec![
4611            MockLlmClient::tool_call_response(
4612                "tool-1",
4613                "bash",
4614                serde_json::json!({"command": "echo hello"}),
4615            ),
4616            MockLlmClient::text_response("Auto-approved and executed!"),
4617        ]));
4618
4619        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4620
4621        // Create HITL with very short timeout and AutoApprove action
4622        let (event_tx, _event_rx) = broadcast::channel(100);
4623        let hitl_policy = ConfirmationPolicy {
4624            enabled: true,
4625            default_timeout_ms: 50, // Very short timeout
4626            timeout_action: TimeoutAction::AutoApprove,
4627            ..Default::default()
4628        };
4629        let confirmation_manager = Arc::new(ConfirmationManager::new(hitl_policy, event_tx));
4630
4631        let permission_policy = PermissionPolicy::new();
4632
4633        let config = AgentConfig {
4634            permission_checker: Some(Arc::new(permission_policy)),
4635            confirmation_manager: Some(confirmation_manager),
4636            ..Default::default()
4637        };
4638
4639        // Don't approve - let it timeout and auto-approve
4640        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
4641        let result = agent.execute(&[], "Echo", None).await.unwrap();
4642
4643        // Should auto-approve on timeout and execute
4644        assert_eq!(result.text, "Auto-approved and executed!");
4645        assert_eq!(result.tool_calls_count, 1);
4646    }
4647
4648    #[tokio::test]
4649    async fn test_agent_hitl_confirmation_events() {
4650        use crate::hitl::{ConfirmationManager, ConfirmationPolicy};
4651        use tokio::sync::broadcast;
4652
4653        let mock_client = Arc::new(MockLlmClient::new(vec![
4654            MockLlmClient::tool_call_response(
4655                "tool-1",
4656                "bash",
4657                serde_json::json!({"command": "echo test"}),
4658            ),
4659            MockLlmClient::text_response("Done!"),
4660        ]));
4661
4662        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4663
4664        // Create HITL confirmation manager
4665        let (event_tx, mut event_rx) = broadcast::channel(100);
4666        let hitl_policy = ConfirmationPolicy {
4667            enabled: true,
4668            default_timeout_ms: 5000, // Long enough timeout
4669            ..Default::default()
4670        };
4671        let confirmation_manager = Arc::new(ConfirmationManager::new(hitl_policy, event_tx));
4672
4673        let permission_policy = PermissionPolicy::new();
4674
4675        let config = AgentConfig {
4676            permission_checker: Some(Arc::new(permission_policy)),
4677            confirmation_manager: Some(confirmation_manager.clone()),
4678            ..Default::default()
4679        };
4680
4681        // Spawn task to approve and collect events
4682        let cm_clone = confirmation_manager.clone();
4683        let event_handle = tokio::spawn(async move {
4684            let mut events = Vec::new();
4685            // Wait for ConfirmationRequired event
4686            while let Ok(event) = event_rx.recv().await {
4687                events.push(event.clone());
4688                if let AgentEvent::ConfirmationRequired { tool_id, .. } = event {
4689                    // Approve it
4690                    cm_clone.confirm(&tool_id, true, None).await.ok();
4691                    // Wait for ConfirmationReceived
4692                    if let Ok(recv_event) = event_rx.recv().await {
4693                        events.push(recv_event);
4694                    }
4695                    break;
4696                }
4697            }
4698            events
4699        });
4700
4701        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
4702        let _result = agent.execute(&[], "Echo", None).await.unwrap();
4703
4704        // Check events
4705        let events = event_handle.await.unwrap();
4706        assert!(
4707            events
4708                .iter()
4709                .any(|e| matches!(e, AgentEvent::ConfirmationRequired { .. })),
4710            "Should have ConfirmationRequired event"
4711        );
4712        assert!(
4713            events
4714                .iter()
4715                .any(|e| matches!(e, AgentEvent::ConfirmationReceived { approved: true, .. })),
4716            "Should have ConfirmationReceived event with approved=true"
4717        );
4718    }
4719
4720    #[tokio::test]
4721    async fn test_agent_hitl_disabled_auto_executes() {
4722        // When HITL is disabled, tools should execute automatically even with Ask permission
4723        use crate::hitl::{ConfirmationManager, ConfirmationPolicy};
4724        use tokio::sync::broadcast;
4725
4726        let mock_client = Arc::new(MockLlmClient::new(vec![
4727            MockLlmClient::tool_call_response(
4728                "tool-1",
4729                "bash",
4730                serde_json::json!({"command": "echo auto"}),
4731            ),
4732            MockLlmClient::text_response("Auto executed!"),
4733        ]));
4734
4735        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4736
4737        // Create HITL with enabled=false
4738        let (event_tx, _event_rx) = broadcast::channel(100);
4739        let hitl_policy = ConfirmationPolicy {
4740            enabled: false, // HITL disabled
4741            ..Default::default()
4742        };
4743        let confirmation_manager = Arc::new(ConfirmationManager::new(hitl_policy, event_tx));
4744
4745        let permission_policy = PermissionPolicy::new(); // Default is Ask
4746
4747        let config = AgentConfig {
4748            permission_checker: Some(Arc::new(permission_policy)),
4749            confirmation_manager: Some(confirmation_manager),
4750            ..Default::default()
4751        };
4752
4753        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
4754        let result = agent.execute(&[], "Echo", None).await.unwrap();
4755
4756        // Should execute without waiting for confirmation
4757        assert_eq!(result.text, "Auto executed!");
4758        assert_eq!(result.tool_calls_count, 1);
4759    }
4760
4761    #[tokio::test]
4762    async fn test_agent_hitl_with_permission_deny_skips_hitl() {
4763        // When permission is Deny, HITL should not be triggered
4764        use crate::hitl::{ConfirmationManager, ConfirmationPolicy};
4765        use tokio::sync::broadcast;
4766
4767        let mock_client = Arc::new(MockLlmClient::new(vec![
4768            MockLlmClient::tool_call_response(
4769                "tool-1",
4770                "bash",
4771                serde_json::json!({"command": "rm -rf /"}),
4772            ),
4773            MockLlmClient::text_response("Blocked by permission."),
4774        ]));
4775
4776        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4777
4778        // Create HITL enabled
4779        let (event_tx, mut event_rx) = broadcast::channel(100);
4780        let hitl_policy = ConfirmationPolicy {
4781            enabled: true,
4782            ..Default::default()
4783        };
4784        let confirmation_manager = Arc::new(ConfirmationManager::new(hitl_policy, event_tx));
4785
4786        // Permission policy denies rm commands
4787        let permission_policy = PermissionPolicy::new().deny("bash(rm:*)");
4788
4789        let config = AgentConfig {
4790            permission_checker: Some(Arc::new(permission_policy)),
4791            confirmation_manager: Some(confirmation_manager),
4792            ..Default::default()
4793        };
4794
4795        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
4796        let result = agent.execute(&[], "Delete", None).await.unwrap();
4797
4798        // Should be denied without HITL
4799        assert_eq!(result.text, "Blocked by permission.");
4800
4801        // Should NOT have any ConfirmationRequired events
4802        let mut found_confirmation = false;
4803        while let Ok(event) = event_rx.try_recv() {
4804            if matches!(event, AgentEvent::ConfirmationRequired { .. }) {
4805                found_confirmation = true;
4806            }
4807        }
4808        assert!(
4809            !found_confirmation,
4810            "HITL should not be triggered when permission is Deny"
4811        );
4812    }
4813
4814    #[tokio::test]
4815    async fn test_agent_hitl_with_permission_allow_skips_hitl() {
4816        // When permission is Allow, HITL confirmation is skipped entirely.
4817        // PermissionPolicy is the declarative rule engine; Allow = execute directly.
4818        use crate::hitl::{ConfirmationManager, ConfirmationPolicy};
4819        use tokio::sync::broadcast;
4820
4821        let mock_client = Arc::new(MockLlmClient::new(vec![
4822            MockLlmClient::tool_call_response(
4823                "tool-1",
4824                "bash",
4825                serde_json::json!({"command": "echo hello"}),
4826            ),
4827            MockLlmClient::text_response("Allowed!"),
4828        ]));
4829
4830        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4831
4832        // Create HITL enabled
4833        let (event_tx, mut event_rx) = broadcast::channel(100);
4834        let hitl_policy = ConfirmationPolicy {
4835            enabled: true,
4836            ..Default::default()
4837        };
4838        let confirmation_manager = Arc::new(ConfirmationManager::new(hitl_policy, event_tx));
4839
4840        // Permission policy allows echo commands
4841        let permission_policy = PermissionPolicy::new().allow("bash(echo:*)");
4842
4843        let config = AgentConfig {
4844            permission_checker: Some(Arc::new(permission_policy)),
4845            confirmation_manager: Some(confirmation_manager.clone()),
4846            ..Default::default()
4847        };
4848
4849        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
4850        let result = agent.execute(&[], "Echo", None).await.unwrap();
4851
4852        // Should execute directly without HITL (permission Allow skips confirmation)
4853        assert_eq!(result.text, "Allowed!");
4854
4855        // Should NOT have ConfirmationRequired event (Allow bypasses HITL)
4856        let mut found_confirmation = false;
4857        while let Ok(event) = event_rx.try_recv() {
4858            if matches!(event, AgentEvent::ConfirmationRequired { .. }) {
4859                found_confirmation = true;
4860            }
4861        }
4862        assert!(
4863            !found_confirmation,
4864            "Permission Allow should skip HITL confirmation"
4865        );
4866    }
4867
4868    #[tokio::test]
4869    async fn test_agent_hitl_multiple_tool_calls() {
4870        // Test multiple tool calls in sequence with HITL
4871        use crate::hitl::{ConfirmationManager, ConfirmationPolicy};
4872        use tokio::sync::broadcast;
4873
4874        let mock_client = Arc::new(MockLlmClient::new(vec![
4875            // First response: two tool calls
4876            LlmResponse {
4877                message: Message {
4878                    role: "assistant".to_string(),
4879                    content: vec![
4880                        ContentBlock::ToolUse {
4881                            id: "tool-1".to_string(),
4882                            name: "bash".to_string(),
4883                            input: serde_json::json!({"command": "echo first"}),
4884                        },
4885                        ContentBlock::ToolUse {
4886                            id: "tool-2".to_string(),
4887                            name: "bash".to_string(),
4888                            input: serde_json::json!({"command": "echo second"}),
4889                        },
4890                    ],
4891                    reasoning_content: None,
4892                },
4893                usage: TokenUsage {
4894                    prompt_tokens: 10,
4895                    completion_tokens: 5,
4896                    total_tokens: 15,
4897                    cache_read_tokens: None,
4898                    cache_write_tokens: None,
4899                },
4900                stop_reason: Some("tool_use".to_string()),
4901                meta: None,
4902            },
4903            MockLlmClient::text_response("Both executed!"),
4904        ]));
4905
4906        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4907
4908        // Create HITL
4909        let (event_tx, _event_rx) = broadcast::channel(100);
4910        let hitl_policy = ConfirmationPolicy {
4911            enabled: true,
4912            default_timeout_ms: 5000,
4913            ..Default::default()
4914        };
4915        let confirmation_manager = Arc::new(ConfirmationManager::new(hitl_policy, event_tx));
4916
4917        let permission_policy = PermissionPolicy::new(); // Default Ask
4918
4919        let config = AgentConfig {
4920            permission_checker: Some(Arc::new(permission_policy)),
4921            confirmation_manager: Some(confirmation_manager.clone()),
4922            ..Default::default()
4923        };
4924
4925        // Spawn task to approve both tools
4926        let cm_clone = confirmation_manager.clone();
4927        tokio::spawn(async move {
4928            tokio::time::sleep(std::time::Duration::from_millis(30)).await;
4929            cm_clone.confirm("tool-1", true, None).await.ok();
4930            tokio::time::sleep(std::time::Duration::from_millis(30)).await;
4931            cm_clone.confirm("tool-2", true, None).await.ok();
4932        });
4933
4934        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
4935        let result = agent.execute(&[], "Run both", None).await.unwrap();
4936
4937        assert_eq!(result.text, "Both executed!");
4938        assert_eq!(result.tool_calls_count, 2);
4939    }
4940
4941    #[tokio::test]
4942    async fn test_agent_hitl_partial_approval() {
4943        // Test: first tool approved, second rejected
4944        use crate::hitl::{ConfirmationManager, ConfirmationPolicy};
4945        use tokio::sync::broadcast;
4946
4947        let mock_client = Arc::new(MockLlmClient::new(vec![
4948            // First response: two tool calls
4949            LlmResponse {
4950                message: Message {
4951                    role: "assistant".to_string(),
4952                    content: vec![
4953                        ContentBlock::ToolUse {
4954                            id: "tool-1".to_string(),
4955                            name: "bash".to_string(),
4956                            input: serde_json::json!({"command": "echo safe"}),
4957                        },
4958                        ContentBlock::ToolUse {
4959                            id: "tool-2".to_string(),
4960                            name: "bash".to_string(),
4961                            input: serde_json::json!({"command": "rm -rf /"}),
4962                        },
4963                    ],
4964                    reasoning_content: None,
4965                },
4966                usage: TokenUsage {
4967                    prompt_tokens: 10,
4968                    completion_tokens: 5,
4969                    total_tokens: 15,
4970                    cache_read_tokens: None,
4971                    cache_write_tokens: None,
4972                },
4973                stop_reason: Some("tool_use".to_string()),
4974                meta: None,
4975            },
4976            MockLlmClient::text_response("First worked, second rejected."),
4977        ]));
4978
4979        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4980
4981        let (event_tx, _event_rx) = broadcast::channel(100);
4982        let hitl_policy = ConfirmationPolicy {
4983            enabled: true,
4984            default_timeout_ms: 5000,
4985            ..Default::default()
4986        };
4987        let confirmation_manager = Arc::new(ConfirmationManager::new(hitl_policy, event_tx));
4988
4989        let permission_policy = PermissionPolicy::new();
4990
4991        let config = AgentConfig {
4992            permission_checker: Some(Arc::new(permission_policy)),
4993            confirmation_manager: Some(confirmation_manager.clone()),
4994            ..Default::default()
4995        };
4996
4997        // Approve first, reject second
4998        let cm_clone = confirmation_manager.clone();
4999        tokio::spawn(async move {
5000            tokio::time::sleep(std::time::Duration::from_millis(30)).await;
5001            cm_clone.confirm("tool-1", true, None).await.ok();
5002            tokio::time::sleep(std::time::Duration::from_millis(30)).await;
5003            cm_clone
5004                .confirm("tool-2", false, Some("Dangerous".to_string()))
5005                .await
5006                .ok();
5007        });
5008
5009        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
5010        let result = agent.execute(&[], "Run both", None).await.unwrap();
5011
5012        assert_eq!(result.text, "First worked, second rejected.");
5013        assert_eq!(result.tool_calls_count, 2);
5014    }
5015
5016    #[tokio::test]
5017    async fn test_agent_hitl_yolo_mode_auto_approves() {
5018        // YOLO mode: specific lanes auto-approve without confirmation
5019        use crate::hitl::{ConfirmationManager, ConfirmationPolicy, SessionLane};
5020        use tokio::sync::broadcast;
5021
5022        let mock_client = Arc::new(MockLlmClient::new(vec![
5023            MockLlmClient::tool_call_response(
5024                "tool-1",
5025                "read", // Query lane tool
5026                serde_json::json!({"path": "/tmp/test.txt"}),
5027            ),
5028            MockLlmClient::text_response("File read!"),
5029        ]));
5030
5031        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5032
5033        // YOLO mode for Query lane (read, glob, ls, grep)
5034        let (event_tx, mut event_rx) = broadcast::channel(100);
5035        let mut yolo_lanes = std::collections::HashSet::new();
5036        yolo_lanes.insert(SessionLane::Query);
5037        let hitl_policy = ConfirmationPolicy {
5038            enabled: true,
5039            yolo_lanes, // Auto-approve query operations
5040            ..Default::default()
5041        };
5042        let confirmation_manager = Arc::new(ConfirmationManager::new(hitl_policy, event_tx));
5043
5044        let permission_policy = PermissionPolicy::new();
5045
5046        let config = AgentConfig {
5047            permission_checker: Some(Arc::new(permission_policy)),
5048            confirmation_manager: Some(confirmation_manager),
5049            ..Default::default()
5050        };
5051
5052        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
5053        let result = agent.execute(&[], "Read file", None).await.unwrap();
5054
5055        // Should auto-execute without confirmation (YOLO mode)
5056        assert_eq!(result.text, "File read!");
5057
5058        // Should NOT have ConfirmationRequired for yolo lane
5059        let mut found_confirmation = false;
5060        while let Ok(event) = event_rx.try_recv() {
5061            if matches!(event, AgentEvent::ConfirmationRequired { .. }) {
5062                found_confirmation = true;
5063            }
5064        }
5065        assert!(
5066            !found_confirmation,
5067            "YOLO mode should not trigger confirmation"
5068        );
5069    }
5070
5071    #[tokio::test]
5072    async fn test_agent_config_with_all_options() {
5073        use crate::hitl::{ConfirmationManager, ConfirmationPolicy};
5074        use tokio::sync::broadcast;
5075
5076        let (event_tx, _) = broadcast::channel(100);
5077        let hitl_policy = ConfirmationPolicy::default();
5078        let confirmation_manager = Arc::new(ConfirmationManager::new(hitl_policy, event_tx));
5079
5080        let permission_policy = PermissionPolicy::new().allow("bash(*)");
5081
5082        let config = AgentConfig {
5083            prompt_slots: SystemPromptSlots {
5084                extra: Some("Test system prompt".to_string()),
5085                ..Default::default()
5086            },
5087            tools: vec![],
5088            max_tool_rounds: 10,
5089            permission_checker: Some(Arc::new(permission_policy)),
5090            confirmation_manager: Some(confirmation_manager),
5091            context_providers: vec![],
5092            planning_mode: PlanningMode::default(),
5093            goal_tracking: false,
5094            hook_engine: None,
5095            skill_registry: None,
5096            ..AgentConfig::default()
5097        };
5098
5099        assert!(config.prompt_slots.build().contains("Test system prompt"));
5100        assert_eq!(config.max_tool_rounds, 10);
5101        assert!(config.permission_checker.is_some());
5102        assert!(config.confirmation_manager.is_some());
5103        assert!(config.context_providers.is_empty());
5104
5105        // Test Debug trait
5106        let debug_str = format!("{:?}", config);
5107        assert!(debug_str.contains("AgentConfig"));
5108        assert!(debug_str.contains("permission_checker: true"));
5109        assert!(debug_str.contains("confirmation_manager: true"));
5110        assert!(debug_str.contains("context_providers: 0"));
5111    }
5112
5113    // ========================================================================
5114    // Context Provider Tests
5115    // ========================================================================
5116
5117    use crate::context::{ContextItem, ContextType};
5118
5119    /// Mock context provider for testing
5120    struct MockContextProvider {
5121        name: String,
5122        items: Vec<ContextItem>,
5123        on_turn_calls: std::sync::Arc<tokio::sync::RwLock<Vec<(String, String, String)>>>,
5124    }
5125
5126    impl MockContextProvider {
5127        fn new(name: &str) -> Self {
5128            Self {
5129                name: name.to_string(),
5130                items: Vec::new(),
5131                on_turn_calls: std::sync::Arc::new(tokio::sync::RwLock::new(Vec::new())),
5132            }
5133        }
5134
5135        fn with_items(mut self, items: Vec<ContextItem>) -> Self {
5136            self.items = items;
5137            self
5138        }
5139    }
5140
5141    #[async_trait::async_trait]
5142    impl ContextProvider for MockContextProvider {
5143        fn name(&self) -> &str {
5144            &self.name
5145        }
5146
5147        async fn query(&self, _query: &ContextQuery) -> anyhow::Result<ContextResult> {
5148            let mut result = ContextResult::new(&self.name);
5149            for item in &self.items {
5150                result.add_item(item.clone());
5151            }
5152            Ok(result)
5153        }
5154
5155        async fn on_turn_complete(
5156            &self,
5157            session_id: &str,
5158            prompt: &str,
5159            response: &str,
5160        ) -> anyhow::Result<()> {
5161            let mut calls = self.on_turn_calls.write().await;
5162            calls.push((
5163                session_id.to_string(),
5164                prompt.to_string(),
5165                response.to_string(),
5166            ));
5167            Ok(())
5168        }
5169    }
5170
5171    #[tokio::test]
5172    async fn test_agent_with_context_provider() {
5173        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
5174            "Response using context",
5175        )]));
5176
5177        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5178
5179        let provider =
5180            MockContextProvider::new("test-provider").with_items(vec![ContextItem::new(
5181                "ctx-1",
5182                ContextType::Resource,
5183                "Relevant context here",
5184            )
5185            .with_source("test://docs/example")]);
5186
5187        let config = AgentConfig {
5188            prompt_slots: SystemPromptSlots {
5189                extra: Some("You are helpful.".to_string()),
5190                ..Default::default()
5191            },
5192            context_providers: vec![Arc::new(provider)],
5193            ..Default::default()
5194        };
5195
5196        let agent = AgentLoop::new(
5197            mock_client.clone(),
5198            tool_executor,
5199            test_tool_context(),
5200            config,
5201        );
5202        let result = agent.execute(&[], "What is X?", None).await.unwrap();
5203
5204        assert_eq!(result.text, "Response using context");
5205        assert_eq!(mock_client.call_count.load(Ordering::SeqCst), 1);
5206    }
5207
5208    #[tokio::test]
5209    async fn test_agent_context_provider_events() {
5210        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
5211            "Answer",
5212        )]));
5213
5214        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5215
5216        let provider =
5217            MockContextProvider::new("event-provider").with_items(vec![ContextItem::new(
5218                "item-1",
5219                ContextType::Memory,
5220                "Memory content",
5221            )
5222            .with_token_count(50)]);
5223
5224        let config = AgentConfig {
5225            context_providers: vec![Arc::new(provider)],
5226            ..Default::default()
5227        };
5228
5229        let (tx, mut rx) = mpsc::channel(100);
5230        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
5231        let _result = agent.execute(&[], "Test prompt", Some(tx)).await.unwrap();
5232
5233        // Collect events
5234        let mut events = Vec::new();
5235        while let Ok(event) = rx.try_recv() {
5236            events.push(event);
5237        }
5238
5239        // Should have ContextResolving and ContextResolved events
5240        assert!(
5241            events
5242                .iter()
5243                .any(|e| matches!(e, AgentEvent::ContextResolving { .. })),
5244            "Should have ContextResolving event"
5245        );
5246        assert!(
5247            events
5248                .iter()
5249                .any(|e| matches!(e, AgentEvent::ContextResolved { .. })),
5250            "Should have ContextResolved event"
5251        );
5252
5253        // Check context resolved values
5254        for event in &events {
5255            if let AgentEvent::ContextResolved {
5256                total_items,
5257                total_tokens,
5258            } = event
5259            {
5260                assert_eq!(*total_items, 1);
5261                assert_eq!(*total_tokens, 50);
5262            }
5263        }
5264    }
5265
5266    #[tokio::test]
5267    async fn test_agent_multiple_context_providers() {
5268        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
5269            "Combined response",
5270        )]));
5271
5272        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5273
5274        let provider1 = MockContextProvider::new("provider-1").with_items(vec![ContextItem::new(
5275            "p1-1",
5276            ContextType::Resource,
5277            "Resource from P1",
5278        )
5279        .with_token_count(100)]);
5280
5281        let provider2 = MockContextProvider::new("provider-2").with_items(vec![
5282            ContextItem::new("p2-1", ContextType::Memory, "Memory from P2").with_token_count(50),
5283            ContextItem::new("p2-2", ContextType::Skill, "Skill from P2").with_token_count(75),
5284        ]);
5285
5286        let config = AgentConfig {
5287            prompt_slots: SystemPromptSlots {
5288                extra: Some("Base system prompt.".to_string()),
5289                ..Default::default()
5290            },
5291            context_providers: vec![Arc::new(provider1), Arc::new(provider2)],
5292            ..Default::default()
5293        };
5294
5295        let (tx, mut rx) = mpsc::channel(100);
5296        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
5297        let result = agent.execute(&[], "Query", Some(tx)).await.unwrap();
5298
5299        assert_eq!(result.text, "Combined response");
5300
5301        // Check context resolved event has combined totals
5302        while let Ok(event) = rx.try_recv() {
5303            if let AgentEvent::ContextResolved {
5304                total_items,
5305                total_tokens,
5306            } = event
5307            {
5308                assert_eq!(total_items, 3); // 1 + 2
5309                assert_eq!(total_tokens, 225); // 100 + 50 + 75
5310            }
5311        }
5312    }
5313
5314    #[tokio::test]
5315    async fn test_agent_no_context_providers() {
5316        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
5317            "No context",
5318        )]));
5319
5320        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5321
5322        // No context providers
5323        let config = AgentConfig::default();
5324
5325        let (tx, mut rx) = mpsc::channel(100);
5326        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
5327        let result = agent.execute(&[], "Simple prompt", Some(tx)).await.unwrap();
5328
5329        assert_eq!(result.text, "No context");
5330
5331        // Should NOT have context events when no providers
5332        let mut events = Vec::new();
5333        while let Ok(event) = rx.try_recv() {
5334            events.push(event);
5335        }
5336
5337        assert!(
5338            !events
5339                .iter()
5340                .any(|e| matches!(e, AgentEvent::ContextResolving { .. })),
5341            "Should NOT have ContextResolving event"
5342        );
5343    }
5344
5345    #[tokio::test]
5346    async fn test_agent_context_on_turn_complete() {
5347        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
5348            "Final response",
5349        )]));
5350
5351        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5352
5353        let provider = Arc::new(MockContextProvider::new("memory-provider"));
5354        let on_turn_calls = provider.on_turn_calls.clone();
5355
5356        let config = AgentConfig {
5357            context_providers: vec![provider],
5358            ..Default::default()
5359        };
5360
5361        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
5362
5363        // Execute with session ID
5364        let result = agent
5365            .execute_with_session(&[], "User prompt", Some("sess-123"), None, None)
5366            .await
5367            .unwrap();
5368
5369        assert_eq!(result.text, "Final response");
5370
5371        // Check on_turn_complete was called
5372        let calls = on_turn_calls.read().await;
5373        assert_eq!(calls.len(), 1);
5374        assert_eq!(calls[0].0, "sess-123");
5375        assert_eq!(calls[0].1, "User prompt");
5376        assert_eq!(calls[0].2, "Final response");
5377    }
5378
5379    #[tokio::test]
5380    async fn test_agent_context_on_turn_complete_no_session() {
5381        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
5382            "Response",
5383        )]));
5384
5385        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5386
5387        let provider = Arc::new(MockContextProvider::new("memory-provider"));
5388        let on_turn_calls = provider.on_turn_calls.clone();
5389
5390        let config = AgentConfig {
5391            context_providers: vec![provider],
5392            ..Default::default()
5393        };
5394
5395        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
5396
5397        // Execute without session ID (uses execute() which passes None)
5398        let _result = agent.execute(&[], "Prompt", None).await.unwrap();
5399
5400        // on_turn_complete should NOT be called when session_id is None
5401        let calls = on_turn_calls.read().await;
5402        assert!(calls.is_empty());
5403    }
5404
5405    #[tokio::test]
5406    async fn test_agent_build_augmented_system_prompt() {
5407        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response("OK")]));
5408
5409        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5410
5411        let provider = MockContextProvider::new("test").with_items(vec![ContextItem::new(
5412            "doc-1",
5413            ContextType::Resource,
5414            "Auth uses JWT tokens.",
5415        )
5416        .with_source("viking://docs/auth")]);
5417
5418        let config = AgentConfig {
5419            prompt_slots: SystemPromptSlots {
5420                extra: Some("You are helpful.".to_string()),
5421                ..Default::default()
5422            },
5423            context_providers: vec![Arc::new(provider)],
5424            ..Default::default()
5425        };
5426
5427        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
5428
5429        // Test building augmented prompt
5430        let context_results = agent.resolve_context("test", None).await;
5431        let augmented = agent.build_augmented_system_prompt(&context_results);
5432
5433        let augmented_str = augmented.unwrap();
5434        assert!(augmented_str.contains("You are helpful."));
5435        assert!(augmented_str.contains("<context source=\"viking://docs/auth\" type=\"Resource\">"));
5436        assert!(augmented_str.contains("Auth uses JWT tokens."));
5437    }
5438
5439    // ========================================================================
5440    // Agentic Loop Integration Tests
5441    // ========================================================================
5442
5443    /// Helper: collect all events from a channel
5444    async fn collect_events(mut rx: mpsc::Receiver<AgentEvent>) -> Vec<AgentEvent> {
5445        let mut events = Vec::new();
5446        while let Ok(event) = rx.try_recv() {
5447            events.push(event);
5448        }
5449        // Drain remaining
5450        while let Some(event) = rx.recv().await {
5451            events.push(event);
5452        }
5453        events
5454    }
5455
5456    #[tokio::test]
5457    async fn test_agent_multi_turn_tool_chain() {
5458        // LLM calls tool A → sees result → calls tool B → sees result → final answer
5459        let mock_client = Arc::new(MockLlmClient::new(vec![
5460            // Turn 1: call ls
5461            MockLlmClient::tool_call_response(
5462                "t1",
5463                "bash",
5464                serde_json::json!({"command": "echo step1"}),
5465            ),
5466            // Turn 2: call another tool based on first result
5467            MockLlmClient::tool_call_response(
5468                "t2",
5469                "bash",
5470                serde_json::json!({"command": "echo step2"}),
5471            ),
5472            // Turn 3: final answer
5473            MockLlmClient::text_response("Completed both steps: step1 then step2"),
5474        ]));
5475
5476        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5477        let config = AgentConfig::default();
5478
5479        let agent = AgentLoop::new(
5480            mock_client.clone(),
5481            tool_executor,
5482            test_tool_context(),
5483            config,
5484        );
5485        let result = agent.execute(&[], "Run two steps", None).await.unwrap();
5486
5487        assert_eq!(result.text, "Completed both steps: step1 then step2");
5488        assert_eq!(result.tool_calls_count, 2);
5489        assert_eq!(mock_client.call_count.load(Ordering::SeqCst), 3);
5490
5491        // Verify message history: user → assistant(tool_use) → user(tool_result) → assistant(tool_use) → user(tool_result) → assistant(text)
5492        assert_eq!(result.messages[0].role, "user");
5493        assert_eq!(result.messages[1].role, "assistant"); // tool call 1
5494        assert_eq!(result.messages[2].role, "user"); // tool result 1 (Anthropic convention)
5495        assert_eq!(result.messages[3].role, "assistant"); // tool call 2
5496        assert_eq!(result.messages[4].role, "user"); // tool result 2
5497        assert_eq!(result.messages[5].role, "assistant"); // final text
5498        assert_eq!(result.messages.len(), 6);
5499    }
5500
5501    #[tokio::test]
5502    async fn test_agent_conversation_history_preserved() {
5503        // Pass existing history, verify it's preserved in output
5504        let existing_history = vec![
5505            Message::user("What is Rust?"),
5506            Message {
5507                role: "assistant".to_string(),
5508                content: vec![ContentBlock::Text {
5509                    text: "Rust is a systems programming language.".to_string(),
5510                }],
5511                reasoning_content: None,
5512            },
5513        ];
5514
5515        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
5516            "Rust was created by Graydon Hoare at Mozilla.",
5517        )]));
5518
5519        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5520        let agent = AgentLoop::new(
5521            mock_client.clone(),
5522            tool_executor,
5523            test_tool_context(),
5524            AgentConfig::default(),
5525        );
5526
5527        let result = agent
5528            .execute(&existing_history, "Who created it?", None)
5529            .await
5530            .unwrap();
5531
5532        // History should contain: old user + old assistant + new user + new assistant
5533        assert_eq!(result.messages.len(), 4);
5534        assert_eq!(result.messages[0].text(), "What is Rust?");
5535        assert_eq!(
5536            result.messages[1].text(),
5537            "Rust is a systems programming language."
5538        );
5539        assert_eq!(result.messages[2].text(), "Who created it?");
5540        assert_eq!(
5541            result.messages[3].text(),
5542            "Rust was created by Graydon Hoare at Mozilla."
5543        );
5544    }
5545
5546    #[tokio::test]
5547    async fn test_agent_event_stream_completeness() {
5548        // Verify full event sequence for a single tool call loop
5549        let mock_client = Arc::new(MockLlmClient::new(vec![
5550            MockLlmClient::tool_call_response(
5551                "t1",
5552                "bash",
5553                serde_json::json!({"command": "echo hi"}),
5554            ),
5555            MockLlmClient::text_response("Done"),
5556        ]));
5557
5558        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5559        let agent = AgentLoop::new(
5560            mock_client,
5561            tool_executor,
5562            test_tool_context(),
5563            AgentConfig::default(),
5564        );
5565
5566        let (tx, rx) = mpsc::channel(100);
5567        let result = agent.execute(&[], "Say hi", Some(tx)).await.unwrap();
5568        assert_eq!(result.text, "Done");
5569
5570        let events = collect_events(rx).await;
5571
5572        // Verify event sequence
5573        let event_types: Vec<&str> = events
5574            .iter()
5575            .map(|e| match e {
5576                AgentEvent::Start { .. } => "Start",
5577                AgentEvent::TurnStart { .. } => "TurnStart",
5578                AgentEvent::TurnEnd { .. } => "TurnEnd",
5579                AgentEvent::ToolEnd { .. } => "ToolEnd",
5580                AgentEvent::End { .. } => "End",
5581                _ => "Other",
5582            })
5583            .collect();
5584
5585        // Must start with Start, end with End
5586        assert_eq!(event_types.first(), Some(&"Start"));
5587        assert_eq!(event_types.last(), Some(&"End"));
5588
5589        // Must have 2 TurnStarts (tool call turn + final answer turn)
5590        let turn_starts = event_types.iter().filter(|&&t| t == "TurnStart").count();
5591        assert_eq!(turn_starts, 2);
5592
5593        // Must have 1 ToolEnd
5594        let tool_ends = event_types.iter().filter(|&&t| t == "ToolEnd").count();
5595        assert_eq!(tool_ends, 1);
5596    }
5597
5598    #[tokio::test]
5599    async fn test_agent_multiple_tools_single_turn() {
5600        // LLM returns 2 tool calls in one response
5601        let mock_client = Arc::new(MockLlmClient::new(vec![
5602            LlmResponse {
5603                message: Message {
5604                    role: "assistant".to_string(),
5605                    content: vec![
5606                        ContentBlock::ToolUse {
5607                            id: "t1".to_string(),
5608                            name: "bash".to_string(),
5609                            input: serde_json::json!({"command": "echo first"}),
5610                        },
5611                        ContentBlock::ToolUse {
5612                            id: "t2".to_string(),
5613                            name: "bash".to_string(),
5614                            input: serde_json::json!({"command": "echo second"}),
5615                        },
5616                    ],
5617                    reasoning_content: None,
5618                },
5619                usage: TokenUsage {
5620                    prompt_tokens: 10,
5621                    completion_tokens: 5,
5622                    total_tokens: 15,
5623                    cache_read_tokens: None,
5624                    cache_write_tokens: None,
5625                },
5626                stop_reason: Some("tool_use".to_string()),
5627                meta: None,
5628            },
5629            MockLlmClient::text_response("Both commands ran"),
5630        ]));
5631
5632        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5633        let agent = AgentLoop::new(
5634            mock_client.clone(),
5635            tool_executor,
5636            test_tool_context(),
5637            AgentConfig::default(),
5638        );
5639
5640        let result = agent.execute(&[], "Run both", None).await.unwrap();
5641
5642        assert_eq!(result.text, "Both commands ran");
5643        assert_eq!(result.tool_calls_count, 2);
5644        assert_eq!(mock_client.call_count.load(Ordering::SeqCst), 2); // Only 2 LLM calls
5645
5646        // Messages: user → assistant(2 tools) → user(tool_result) → user(tool_result) → assistant(text)
5647        assert_eq!(result.messages[0].role, "user");
5648        assert_eq!(result.messages[1].role, "assistant");
5649        assert_eq!(result.messages[2].role, "user"); // tool result 1
5650        assert_eq!(result.messages[3].role, "user"); // tool result 2
5651        assert_eq!(result.messages[4].role, "assistant");
5652    }
5653
5654    #[tokio::test]
5655    async fn test_agent_token_usage_accumulation() {
5656        // Verify usage sums across multiple turns
5657        let mock_client = Arc::new(MockLlmClient::new(vec![
5658            MockLlmClient::tool_call_response(
5659                "t1",
5660                "bash",
5661                serde_json::json!({"command": "echo x"}),
5662            ),
5663            MockLlmClient::text_response("Done"),
5664        ]));
5665
5666        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5667        let agent = AgentLoop::new(
5668            mock_client,
5669            tool_executor,
5670            test_tool_context(),
5671            AgentConfig::default(),
5672        );
5673
5674        let result = agent.execute(&[], "test", None).await.unwrap();
5675
5676        // Each mock response has prompt=10, completion=5, total=15
5677        // 2 LLM calls → 20 prompt, 10 completion, 30 total
5678        assert_eq!(result.usage.prompt_tokens, 20);
5679        assert_eq!(result.usage.completion_tokens, 10);
5680        assert_eq!(result.usage.total_tokens, 30);
5681    }
5682
5683    #[tokio::test]
5684    async fn test_agent_system_prompt_passed() {
5685        // Verify system prompt is used (MockLlmClient captures calls)
5686        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
5687            "I am a coding assistant.",
5688        )]));
5689
5690        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5691        let config = AgentConfig {
5692            prompt_slots: SystemPromptSlots {
5693                extra: Some("You are a coding assistant.".to_string()),
5694                ..Default::default()
5695            },
5696            ..Default::default()
5697        };
5698
5699        let agent = AgentLoop::new(
5700            mock_client.clone(),
5701            tool_executor,
5702            test_tool_context(),
5703            config,
5704        );
5705        let result = agent.execute(&[], "What are you?", None).await.unwrap();
5706
5707        assert_eq!(result.text, "I am a coding assistant.");
5708        assert_eq!(mock_client.call_count.load(Ordering::SeqCst), 1);
5709    }
5710
5711    #[tokio::test]
5712    async fn test_agent_max_rounds_with_persistent_tool_calls() {
5713        // LLM keeps calling tools forever — should hit max_tool_rounds
5714        let mut responses = Vec::new();
5715        for i in 0..15 {
5716            responses.push(MockLlmClient::tool_call_response(
5717                &format!("t{}", i),
5718                "bash",
5719                serde_json::json!({"command": format!("echo round{}", i)}),
5720            ));
5721        }
5722
5723        let mock_client = Arc::new(MockLlmClient::new(responses));
5724        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5725        let config = AgentConfig {
5726            max_tool_rounds: 5,
5727            ..Default::default()
5728        };
5729
5730        let agent = AgentLoop::new(
5731            mock_client.clone(),
5732            tool_executor,
5733            test_tool_context(),
5734            config,
5735        );
5736        let result = agent.execute(&[], "Loop forever", None).await;
5737
5738        assert!(result.is_err());
5739        let err = result.unwrap_err().to_string();
5740        assert!(err.contains("Max tool rounds (5) exceeded"));
5741    }
5742
5743    #[tokio::test]
5744    async fn test_agent_end_event_contains_final_text() {
5745        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
5746            "Final answer here",
5747        )]));
5748
5749        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5750        let agent = AgentLoop::new(
5751            mock_client,
5752            tool_executor,
5753            test_tool_context(),
5754            AgentConfig::default(),
5755        );
5756
5757        let (tx, rx) = mpsc::channel(100);
5758        agent.execute(&[], "test", Some(tx)).await.unwrap();
5759
5760        let events = collect_events(rx).await;
5761        let end_event = events.iter().find(|e| matches!(e, AgentEvent::End { .. }));
5762        assert!(end_event.is_some());
5763
5764        if let AgentEvent::End { text, usage, .. } = end_event.unwrap() {
5765            assert_eq!(text, "Final answer here");
5766            assert_eq!(usage.total_tokens, 15);
5767        }
5768    }
5769}
5770
5771#[cfg(test)]
5772mod extra_agent_tests {
5773    use super::*;
5774    use crate::agent::tests::MockLlmClient;
5775    use crate::queue::SessionQueueConfig;
5776    use crate::tools::ToolExecutor;
5777    use std::path::PathBuf;
5778    use std::sync::atomic::{AtomicUsize, Ordering};
5779
5780    fn test_tool_context() -> ToolContext {
5781        ToolContext::new(PathBuf::from("/tmp"))
5782    }
5783
5784    // ========================================================================
5785    // AgentConfig
5786    // ========================================================================
5787
5788    #[test]
5789    fn test_agent_config_debug() {
5790        let config = AgentConfig {
5791            prompt_slots: SystemPromptSlots {
5792                extra: Some("You are helpful".to_string()),
5793                ..Default::default()
5794            },
5795            tools: vec![],
5796            max_tool_rounds: 10,
5797            permission_checker: None,
5798            confirmation_manager: None,
5799            context_providers: vec![],
5800            planning_mode: PlanningMode::Enabled,
5801            goal_tracking: false,
5802            hook_engine: None,
5803            skill_registry: None,
5804            ..AgentConfig::default()
5805        };
5806        let debug = format!("{:?}", config);
5807        assert!(debug.contains("AgentConfig"));
5808        assert!(debug.contains("planning_mode"));
5809    }
5810
5811    #[test]
5812    fn test_agent_config_default_values() {
5813        let config = AgentConfig::default();
5814        assert_eq!(config.max_tool_rounds, MAX_TOOL_ROUNDS);
5815        assert_eq!(config.planning_mode, PlanningMode::Auto);
5816        assert!(!config.goal_tracking);
5817        assert!(config.context_providers.is_empty());
5818    }
5819
5820    // ========================================================================
5821    // AgentEvent serialization
5822    // ========================================================================
5823
5824    #[test]
5825    fn test_agent_event_serialize_start() {
5826        let event = AgentEvent::Start {
5827            prompt: "Hello".to_string(),
5828        };
5829        let json = serde_json::to_string(&event).unwrap();
5830        assert!(json.contains("agent_start"));
5831        assert!(json.contains("Hello"));
5832    }
5833
5834    #[test]
5835    fn test_agent_event_serialize_text_delta() {
5836        let event = AgentEvent::TextDelta {
5837            text: "chunk".to_string(),
5838        };
5839        let json = serde_json::to_string(&event).unwrap();
5840        assert!(json.contains("text_delta"));
5841    }
5842
5843    #[test]
5844    fn test_agent_event_serialize_tool_start() {
5845        let event = AgentEvent::ToolStart {
5846            id: "t1".to_string(),
5847            name: "bash".to_string(),
5848        };
5849        let json = serde_json::to_string(&event).unwrap();
5850        assert!(json.contains("tool_start"));
5851        assert!(json.contains("bash"));
5852    }
5853
5854    #[test]
5855    fn test_agent_event_serialize_tool_end() {
5856        let event = AgentEvent::ToolEnd {
5857            id: "t1".to_string(),
5858            name: "bash".to_string(),
5859            output: "hello".to_string(),
5860            exit_code: 0,
5861            metadata: None,
5862        };
5863        let json = serde_json::to_string(&event).unwrap();
5864        assert!(json.contains("tool_end"));
5865    }
5866
5867    #[test]
5868    fn test_agent_event_tool_end_has_metadata_field() {
5869        let event = AgentEvent::ToolEnd {
5870            id: "t1".to_string(),
5871            name: "write".to_string(),
5872            output: "Wrote 5 bytes".to_string(),
5873            exit_code: 0,
5874            metadata: Some(
5875                serde_json::json!({ "before": "old", "after": "new", "file_path": "f.txt" }),
5876            ),
5877        };
5878        let json = serde_json::to_string(&event).unwrap();
5879        assert!(json.contains("\"before\""));
5880    }
5881
5882    #[test]
5883    fn test_agent_event_serialize_error() {
5884        let event = AgentEvent::Error {
5885            message: "oops".to_string(),
5886        };
5887        let json = serde_json::to_string(&event).unwrap();
5888        assert!(json.contains("error"));
5889        assert!(json.contains("oops"));
5890    }
5891
5892    #[test]
5893    fn test_agent_event_serialize_confirmation_required() {
5894        let event = AgentEvent::ConfirmationRequired {
5895            tool_id: "t1".to_string(),
5896            tool_name: "bash".to_string(),
5897            args: serde_json::json!({"cmd": "rm"}),
5898            timeout_ms: 30000,
5899        };
5900        let json = serde_json::to_string(&event).unwrap();
5901        assert!(json.contains("confirmation_required"));
5902    }
5903
5904    #[test]
5905    fn test_agent_event_serialize_confirmation_received() {
5906        let event = AgentEvent::ConfirmationReceived {
5907            tool_id: "t1".to_string(),
5908            approved: true,
5909            reason: Some("safe".to_string()),
5910        };
5911        let json = serde_json::to_string(&event).unwrap();
5912        assert!(json.contains("confirmation_received"));
5913    }
5914
5915    #[test]
5916    fn test_agent_event_serialize_confirmation_timeout() {
5917        let event = AgentEvent::ConfirmationTimeout {
5918            tool_id: "t1".to_string(),
5919            action_taken: "rejected".to_string(),
5920        };
5921        let json = serde_json::to_string(&event).unwrap();
5922        assert!(json.contains("confirmation_timeout"));
5923    }
5924
5925    #[test]
5926    fn test_agent_event_serialize_external_task_pending() {
5927        let event = AgentEvent::ExternalTaskPending {
5928            task_id: "task-1".to_string(),
5929            session_id: "sess-1".to_string(),
5930            lane: crate::hitl::SessionLane::Execute,
5931            command_type: "bash".to_string(),
5932            payload: serde_json::json!({}),
5933            timeout_ms: 60000,
5934        };
5935        let json = serde_json::to_string(&event).unwrap();
5936        assert!(json.contains("external_task_pending"));
5937    }
5938
5939    #[test]
5940    fn test_agent_event_serialize_external_task_completed() {
5941        let event = AgentEvent::ExternalTaskCompleted {
5942            task_id: "task-1".to_string(),
5943            session_id: "sess-1".to_string(),
5944            success: false,
5945        };
5946        let json = serde_json::to_string(&event).unwrap();
5947        assert!(json.contains("external_task_completed"));
5948    }
5949
5950    #[test]
5951    fn test_agent_event_serialize_permission_denied() {
5952        let event = AgentEvent::PermissionDenied {
5953            tool_id: "t1".to_string(),
5954            tool_name: "bash".to_string(),
5955            args: serde_json::json!({}),
5956            reason: "denied".to_string(),
5957        };
5958        let json = serde_json::to_string(&event).unwrap();
5959        assert!(json.contains("permission_denied"));
5960    }
5961
5962    #[test]
5963    fn test_agent_event_serialize_context_compacted() {
5964        let event = AgentEvent::ContextCompacted {
5965            session_id: "sess-1".to_string(),
5966            before_messages: 100,
5967            after_messages: 20,
5968            percent_before: 0.85,
5969        };
5970        let json = serde_json::to_string(&event).unwrap();
5971        assert!(json.contains("context_compacted"));
5972    }
5973
5974    #[test]
5975    fn test_agent_event_serialize_turn_start() {
5976        let event = AgentEvent::TurnStart { turn: 3 };
5977        let json = serde_json::to_string(&event).unwrap();
5978        assert!(json.contains("turn_start"));
5979    }
5980
5981    #[test]
5982    fn test_agent_event_serialize_turn_end() {
5983        let event = AgentEvent::TurnEnd {
5984            turn: 3,
5985            usage: TokenUsage::default(),
5986        };
5987        let json = serde_json::to_string(&event).unwrap();
5988        assert!(json.contains("turn_end"));
5989    }
5990
5991    #[test]
5992    fn test_agent_event_serialize_end() {
5993        let event = AgentEvent::End {
5994            text: "Done".to_string(),
5995            usage: TokenUsage {
5996                prompt_tokens: 100,
5997                completion_tokens: 50,
5998                total_tokens: 150,
5999                cache_read_tokens: None,
6000                cache_write_tokens: None,
6001            },
6002            meta: None,
6003        };
6004        let json = serde_json::to_string(&event).unwrap();
6005        assert!(json.contains("agent_end"));
6006    }
6007
6008    // ========================================================================
6009    // AgentResult
6010    // ========================================================================
6011
6012    #[test]
6013    fn test_agent_result_fields() {
6014        let result = AgentResult {
6015            text: "output".to_string(),
6016            messages: vec![Message::user("hello")],
6017            usage: TokenUsage::default(),
6018            tool_calls_count: 3,
6019        };
6020        assert_eq!(result.text, "output");
6021        assert_eq!(result.messages.len(), 1);
6022        assert_eq!(result.tool_calls_count, 3);
6023    }
6024
6025    // ========================================================================
6026    // Missing AgentEvent serialization tests
6027    // ========================================================================
6028
6029    #[test]
6030    fn test_agent_event_serialize_context_resolving() {
6031        let event = AgentEvent::ContextResolving {
6032            providers: vec!["provider1".to_string(), "provider2".to_string()],
6033        };
6034        let json = serde_json::to_string(&event).unwrap();
6035        assert!(json.contains("context_resolving"));
6036        assert!(json.contains("provider1"));
6037    }
6038
6039    #[test]
6040    fn test_agent_event_serialize_context_resolved() {
6041        let event = AgentEvent::ContextResolved {
6042            total_items: 5,
6043            total_tokens: 1000,
6044        };
6045        let json = serde_json::to_string(&event).unwrap();
6046        assert!(json.contains("context_resolved"));
6047        assert!(json.contains("1000"));
6048    }
6049
6050    #[test]
6051    fn test_agent_event_serialize_command_dead_lettered() {
6052        let event = AgentEvent::CommandDeadLettered {
6053            command_id: "cmd-1".to_string(),
6054            command_type: "bash".to_string(),
6055            lane: "execute".to_string(),
6056            error: "timeout".to_string(),
6057            attempts: 3,
6058        };
6059        let json = serde_json::to_string(&event).unwrap();
6060        assert!(json.contains("command_dead_lettered"));
6061        assert!(json.contains("cmd-1"));
6062    }
6063
6064    #[test]
6065    fn test_agent_event_serialize_command_retry() {
6066        let event = AgentEvent::CommandRetry {
6067            command_id: "cmd-2".to_string(),
6068            command_type: "read".to_string(),
6069            lane: "query".to_string(),
6070            attempt: 2,
6071            delay_ms: 1000,
6072        };
6073        let json = serde_json::to_string(&event).unwrap();
6074        assert!(json.contains("command_retry"));
6075        assert!(json.contains("cmd-2"));
6076    }
6077
6078    #[test]
6079    fn test_agent_event_serialize_queue_alert() {
6080        let event = AgentEvent::QueueAlert {
6081            level: "warning".to_string(),
6082            alert_type: "depth".to_string(),
6083            message: "Queue depth exceeded".to_string(),
6084        };
6085        let json = serde_json::to_string(&event).unwrap();
6086        assert!(json.contains("queue_alert"));
6087        assert!(json.contains("warning"));
6088    }
6089
6090    #[test]
6091    fn test_agent_event_serialize_task_updated() {
6092        let event = AgentEvent::TaskUpdated {
6093            session_id: "sess-1".to_string(),
6094            tasks: vec![],
6095        };
6096        let json = serde_json::to_string(&event).unwrap();
6097        assert!(json.contains("task_updated"));
6098        assert!(json.contains("sess-1"));
6099    }
6100
6101    #[test]
6102    fn test_agent_event_serialize_memory_stored() {
6103        let event = AgentEvent::MemoryStored {
6104            memory_id: "mem-1".to_string(),
6105            memory_type: "conversation".to_string(),
6106            importance: 0.8,
6107            tags: vec!["important".to_string()],
6108        };
6109        let json = serde_json::to_string(&event).unwrap();
6110        assert!(json.contains("memory_stored"));
6111        assert!(json.contains("mem-1"));
6112    }
6113
6114    #[test]
6115    fn test_agent_event_serialize_memory_recalled() {
6116        let event = AgentEvent::MemoryRecalled {
6117            memory_id: "mem-2".to_string(),
6118            content: "Previous conversation".to_string(),
6119            relevance: 0.9,
6120        };
6121        let json = serde_json::to_string(&event).unwrap();
6122        assert!(json.contains("memory_recalled"));
6123        assert!(json.contains("mem-2"));
6124    }
6125
6126    #[test]
6127    fn test_agent_event_serialize_memories_searched() {
6128        let event = AgentEvent::MemoriesSearched {
6129            query: Some("search term".to_string()),
6130            tags: vec!["tag1".to_string()],
6131            result_count: 5,
6132        };
6133        let json = serde_json::to_string(&event).unwrap();
6134        assert!(json.contains("memories_searched"));
6135        assert!(json.contains("search term"));
6136    }
6137
6138    #[test]
6139    fn test_agent_event_serialize_memory_cleared() {
6140        let event = AgentEvent::MemoryCleared {
6141            tier: "short_term".to_string(),
6142            count: 10,
6143        };
6144        let json = serde_json::to_string(&event).unwrap();
6145        assert!(json.contains("memory_cleared"));
6146        assert!(json.contains("short_term"));
6147    }
6148
6149    #[test]
6150    fn test_agent_event_serialize_subagent_start() {
6151        let event = AgentEvent::SubagentStart {
6152            task_id: "task-1".to_string(),
6153            session_id: "child-sess".to_string(),
6154            parent_session_id: "parent-sess".to_string(),
6155            agent: "explore".to_string(),
6156            description: "Explore codebase".to_string(),
6157        };
6158        let json = serde_json::to_string(&event).unwrap();
6159        assert!(json.contains("subagent_start"));
6160        assert!(json.contains("explore"));
6161    }
6162
6163    #[test]
6164    fn test_agent_event_serialize_subagent_progress() {
6165        let event = AgentEvent::SubagentProgress {
6166            task_id: "task-1".to_string(),
6167            session_id: "child-sess".to_string(),
6168            status: "processing".to_string(),
6169            metadata: serde_json::json!({"progress": 50}),
6170        };
6171        let json = serde_json::to_string(&event).unwrap();
6172        assert!(json.contains("subagent_progress"));
6173        assert!(json.contains("processing"));
6174    }
6175
6176    #[test]
6177    fn test_agent_event_serialize_subagent_end() {
6178        let event = AgentEvent::SubagentEnd {
6179            task_id: "task-1".to_string(),
6180            session_id: "child-sess".to_string(),
6181            agent: "explore".to_string(),
6182            output: "Found 10 files".to_string(),
6183            success: true,
6184        };
6185        let json = serde_json::to_string(&event).unwrap();
6186        assert!(json.contains("subagent_end"));
6187        assert!(json.contains("Found 10 files"));
6188    }
6189
6190    #[test]
6191    fn test_agent_event_serialize_planning_start() {
6192        let event = AgentEvent::PlanningStart {
6193            prompt: "Build a web app".to_string(),
6194        };
6195        let json = serde_json::to_string(&event).unwrap();
6196        assert!(json.contains("planning_start"));
6197        assert!(json.contains("Build a web app"));
6198    }
6199
6200    #[test]
6201    fn test_agent_event_serialize_planning_end() {
6202        use crate::planning::{Complexity, ExecutionPlan};
6203        let plan = ExecutionPlan::new("Test goal".to_string(), Complexity::Simple);
6204        let event = AgentEvent::PlanningEnd {
6205            plan,
6206            estimated_steps: 3,
6207        };
6208        let json = serde_json::to_string(&event).unwrap();
6209        assert!(json.contains("planning_end"));
6210        assert!(json.contains("estimated_steps"));
6211    }
6212
6213    #[test]
6214    fn test_agent_event_serialize_step_start() {
6215        let event = AgentEvent::StepStart {
6216            step_id: "step-1".to_string(),
6217            description: "Initialize project".to_string(),
6218            step_number: 1,
6219            total_steps: 5,
6220        };
6221        let json = serde_json::to_string(&event).unwrap();
6222        assert!(json.contains("step_start"));
6223        assert!(json.contains("Initialize project"));
6224    }
6225
6226    #[test]
6227    fn test_agent_event_serialize_step_end() {
6228        let event = AgentEvent::StepEnd {
6229            step_id: "step-1".to_string(),
6230            status: TaskStatus::Completed,
6231            step_number: 1,
6232            total_steps: 5,
6233        };
6234        let json = serde_json::to_string(&event).unwrap();
6235        assert!(json.contains("step_end"));
6236        assert!(json.contains("step-1"));
6237    }
6238
6239    #[test]
6240    fn test_agent_event_serialize_goal_extracted() {
6241        use crate::planning::AgentGoal;
6242        let goal = AgentGoal::new("Complete the task".to_string());
6243        let event = AgentEvent::GoalExtracted { goal };
6244        let json = serde_json::to_string(&event).unwrap();
6245        assert!(json.contains("goal_extracted"));
6246    }
6247
6248    #[test]
6249    fn test_agent_event_serialize_goal_progress() {
6250        let event = AgentEvent::GoalProgress {
6251            goal: "Build app".to_string(),
6252            progress: 0.5,
6253            completed_steps: 2,
6254            total_steps: 4,
6255        };
6256        let json = serde_json::to_string(&event).unwrap();
6257        assert!(json.contains("goal_progress"));
6258        assert!(json.contains("0.5"));
6259    }
6260
6261    #[test]
6262    fn test_agent_event_serialize_goal_achieved() {
6263        let event = AgentEvent::GoalAchieved {
6264            goal: "Build app".to_string(),
6265            total_steps: 4,
6266            duration_ms: 5000,
6267        };
6268        let json = serde_json::to_string(&event).unwrap();
6269        assert!(json.contains("goal_achieved"));
6270        assert!(json.contains("5000"));
6271    }
6272
6273    #[tokio::test]
6274    async fn test_extract_goal_with_json_response() {
6275        // LlmPlanner expects JSON with "description" and "success_criteria" fields
6276        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
6277            r#"{"description": "Build web app", "success_criteria": ["App runs on port 3000", "Has login page"]}"#,
6278        )]));
6279        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
6280        let agent = AgentLoop::new(
6281            mock_client,
6282            tool_executor,
6283            test_tool_context(),
6284            AgentConfig::default(),
6285        );
6286
6287        let goal = agent.extract_goal("Build a web app").await.unwrap();
6288        assert_eq!(goal.description, "Build web app");
6289        assert_eq!(goal.success_criteria.len(), 2);
6290        assert_eq!(goal.success_criteria[0], "App runs on port 3000");
6291    }
6292
6293    #[tokio::test]
6294    async fn test_extract_goal_fallback_on_non_json() {
6295        // Non-JSON response triggers fallback: returns the original prompt as goal
6296        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
6297            "Some non-JSON response",
6298        )]));
6299        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
6300        let agent = AgentLoop::new(
6301            mock_client,
6302            tool_executor,
6303            test_tool_context(),
6304            AgentConfig::default(),
6305        );
6306
6307        let goal = agent.extract_goal("Do something").await.unwrap();
6308        // Fallback uses the original prompt as description
6309        assert_eq!(goal.description, "Do something");
6310        // Fallback adds 2 generic criteria
6311        assert_eq!(goal.success_criteria.len(), 2);
6312    }
6313
6314    #[tokio::test]
6315    async fn test_check_goal_achievement_json_yes() {
6316        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
6317            r#"{"achieved": true, "progress": 1.0, "remaining_criteria": []}"#,
6318        )]));
6319        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
6320        let agent = AgentLoop::new(
6321            mock_client,
6322            tool_executor,
6323            test_tool_context(),
6324            AgentConfig::default(),
6325        );
6326
6327        let goal = crate::planning::AgentGoal::new("Test goal".to_string());
6328        let achieved = agent
6329            .check_goal_achievement(&goal, "All done")
6330            .await
6331            .unwrap();
6332        assert!(achieved);
6333    }
6334
6335    #[tokio::test]
6336    async fn test_check_goal_achievement_fallback_not_done() {
6337        // Non-JSON response triggers heuristic fallback
6338        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
6339            "invalid json",
6340        )]));
6341        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
6342        let agent = AgentLoop::new(
6343            mock_client,
6344            tool_executor,
6345            test_tool_context(),
6346            AgentConfig::default(),
6347        );
6348
6349        let goal = crate::planning::AgentGoal::new("Test goal".to_string());
6350        // "still working" doesn't contain "complete"/"done"/"finished"
6351        let achieved = agent
6352            .check_goal_achievement(&goal, "still working")
6353            .await
6354            .unwrap();
6355        assert!(!achieved);
6356    }
6357
6358    // ========================================================================
6359    // build_augmented_system_prompt Tests
6360    // ========================================================================
6361
6362    #[test]
6363    fn test_build_augmented_system_prompt_empty_context() {
6364        let mock_client = Arc::new(MockLlmClient::new(vec![]));
6365        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
6366        let config = AgentConfig {
6367            prompt_slots: SystemPromptSlots {
6368                extra: Some("Base prompt".to_string()),
6369                ..Default::default()
6370            },
6371            ..Default::default()
6372        };
6373        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
6374
6375        let result = agent.build_augmented_system_prompt(&[]);
6376        assert!(result.unwrap().contains("Base prompt"));
6377    }
6378
6379    #[test]
6380    fn test_build_augmented_system_prompt_no_custom_slots() {
6381        let mock_client = Arc::new(MockLlmClient::new(vec![]));
6382        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
6383        let agent = AgentLoop::new(
6384            mock_client,
6385            tool_executor,
6386            test_tool_context(),
6387            AgentConfig::default(),
6388        );
6389
6390        let result = agent.build_augmented_system_prompt(&[]);
6391        // Default slots still produce the default agentic prompt
6392        assert!(result.is_some());
6393        assert!(result.unwrap().contains("Core Behaviour"));
6394    }
6395
6396    #[test]
6397    fn test_build_augmented_system_prompt_with_context_no_base() {
6398        use crate::context::{ContextItem, ContextResult, ContextType};
6399
6400        let mock_client = Arc::new(MockLlmClient::new(vec![]));
6401        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
6402        let agent = AgentLoop::new(
6403            mock_client,
6404            tool_executor,
6405            test_tool_context(),
6406            AgentConfig::default(),
6407        );
6408
6409        let context = vec![ContextResult {
6410            provider: "test".to_string(),
6411            items: vec![ContextItem::new("id1", ContextType::Resource, "Content")],
6412            total_tokens: 10,
6413            truncated: false,
6414        }];
6415
6416        let result = agent.build_augmented_system_prompt(&context);
6417        assert!(result.is_some());
6418        let text = result.unwrap();
6419        assert!(text.contains("<context"));
6420        assert!(text.contains("Content"));
6421    }
6422
6423    // ========================================================================
6424    // AgentResult Clone and Debug
6425    // ========================================================================
6426
6427    #[test]
6428    fn test_agent_result_clone() {
6429        let result = AgentResult {
6430            text: "output".to_string(),
6431            messages: vec![Message::user("hello")],
6432            usage: TokenUsage::default(),
6433            tool_calls_count: 3,
6434        };
6435        let cloned = result.clone();
6436        assert_eq!(cloned.text, result.text);
6437        assert_eq!(cloned.tool_calls_count, result.tool_calls_count);
6438    }
6439
6440    #[test]
6441    fn test_agent_result_debug() {
6442        let result = AgentResult {
6443            text: "output".to_string(),
6444            messages: vec![Message::user("hello")],
6445            usage: TokenUsage::default(),
6446            tool_calls_count: 3,
6447        };
6448        let debug = format!("{:?}", result);
6449        assert!(debug.contains("AgentResult"));
6450        assert!(debug.contains("output"));
6451    }
6452
6453    // ========================================================================
6454    // handle_post_execution_metadata Tests
6455    // ========================================================================
6456
6457    // ========================================================================
6458    // ToolCommand adapter tests
6459    // ========================================================================
6460
6461    #[tokio::test]
6462    async fn test_tool_command_command_type() {
6463        let executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
6464        let cmd = ToolCommand {
6465            tool_executor: executor,
6466            tool_name: "read".to_string(),
6467            tool_args: serde_json::json!({"file": "test.rs"}),
6468            skill_registry: None,
6469            tool_context: test_tool_context(),
6470        };
6471        assert_eq!(cmd.command_type(), "read");
6472    }
6473
6474    #[tokio::test]
6475    async fn test_tool_command_payload() {
6476        let executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
6477        let args = serde_json::json!({"file": "test.rs", "offset": 10});
6478        let cmd = ToolCommand {
6479            tool_executor: executor,
6480            tool_name: "read".to_string(),
6481            tool_args: args.clone(),
6482            skill_registry: None,
6483            tool_context: test_tool_context(),
6484        };
6485        assert_eq!(cmd.payload(), args);
6486    }
6487
6488    // ========================================================================
6489    // AgentLoop with queue builder tests
6490    // ========================================================================
6491
6492    #[tokio::test(flavor = "multi_thread")]
6493    async fn test_agent_loop_with_queue() {
6494        use tokio::sync::broadcast;
6495
6496        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
6497            "Hello",
6498        )]));
6499        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
6500        let config = AgentConfig::default();
6501
6502        let (event_tx, _) = broadcast::channel(100);
6503        let queue = SessionLaneQueue::new("test-session", SessionQueueConfig::default(), event_tx)
6504            .await
6505            .unwrap();
6506
6507        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config)
6508            .with_queue(Arc::new(queue));
6509
6510        assert!(agent.command_queue.is_some());
6511    }
6512
6513    #[tokio::test]
6514    async fn test_agent_loop_without_queue() {
6515        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
6516            "Hello",
6517        )]));
6518        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
6519        let config = AgentConfig::default();
6520
6521        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
6522
6523        assert!(agent.command_queue.is_none());
6524    }
6525
6526    // ========================================================================
6527    // Parallel Plan Execution Tests
6528    // ========================================================================
6529
6530    #[tokio::test]
6531    async fn test_execute_plan_parallel_independent() {
6532        use crate::planning::{Complexity, ExecutionPlan, Task};
6533
6534        // 3 independent steps (no dependencies) — should all execute.
6535        // MockLlmClient needs one response per execute_loop call per step.
6536        let mock_client = Arc::new(MockLlmClient::new(vec![
6537            MockLlmClient::text_response("Step 1 done"),
6538            MockLlmClient::text_response("Step 2 done"),
6539            MockLlmClient::text_response("Step 3 done"),
6540        ]));
6541
6542        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
6543        let config = AgentConfig::default();
6544        let agent = AgentLoop::new(
6545            mock_client.clone(),
6546            tool_executor,
6547            test_tool_context(),
6548            config,
6549        );
6550
6551        let mut plan = ExecutionPlan::new("Test parallel", Complexity::Simple);
6552        plan.add_step(Task::new("s1", "First step"));
6553        plan.add_step(Task::new("s2", "Second step"));
6554        plan.add_step(Task::new("s3", "Third step"));
6555
6556        let (tx, mut rx) = mpsc::channel(100);
6557        let result = agent.execute_plan(&[], &plan, Some(tx)).await.unwrap();
6558
6559        // All 3 steps should have been executed (3 * 15 = 45 total tokens)
6560        assert_eq!(result.usage.total_tokens, 45);
6561
6562        // Verify we received StepStart and StepEnd events for all 3 steps
6563        let mut step_starts = Vec::new();
6564        let mut step_ends = Vec::new();
6565        rx.close();
6566        while let Some(event) = rx.recv().await {
6567            match event {
6568                AgentEvent::StepStart { step_id, .. } => step_starts.push(step_id),
6569                AgentEvent::StepEnd {
6570                    step_id, status, ..
6571                } => {
6572                    assert_eq!(status, TaskStatus::Completed);
6573                    step_ends.push(step_id);
6574                }
6575                _ => {}
6576            }
6577        }
6578        assert_eq!(step_starts.len(), 3);
6579        assert_eq!(step_ends.len(), 3);
6580    }
6581
6582    #[tokio::test]
6583    async fn test_execute_plan_respects_dependencies() {
6584        use crate::planning::{Complexity, ExecutionPlan, Task};
6585
6586        // s1 and s2 are independent (wave 1), s3 depends on both (wave 2).
6587        // This requires 3 responses total.
6588        let mock_client = Arc::new(MockLlmClient::new(vec![
6589            MockLlmClient::text_response("Step 1 done"),
6590            MockLlmClient::text_response("Step 2 done"),
6591            MockLlmClient::text_response("Step 3 done"),
6592        ]));
6593
6594        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
6595        let config = AgentConfig::default();
6596        let agent = AgentLoop::new(
6597            mock_client.clone(),
6598            tool_executor,
6599            test_tool_context(),
6600            config,
6601        );
6602
6603        let mut plan = ExecutionPlan::new("Test deps", Complexity::Medium);
6604        plan.add_step(Task::new("s1", "Independent A"));
6605        plan.add_step(Task::new("s2", "Independent B"));
6606        plan.add_step(
6607            Task::new("s3", "Depends on A+B")
6608                .with_dependencies(vec!["s1".to_string(), "s2".to_string()]),
6609        );
6610
6611        let (tx, mut rx) = mpsc::channel(100);
6612        let result = agent.execute_plan(&[], &plan, Some(tx)).await.unwrap();
6613
6614        // All 3 steps should have been executed (3 * 15 = 45 total tokens)
6615        assert_eq!(result.usage.total_tokens, 45);
6616
6617        // Verify ordering: s3's StepStart must come after s1 and s2's StepEnd
6618        let mut events = Vec::new();
6619        rx.close();
6620        while let Some(event) = rx.recv().await {
6621            match &event {
6622                AgentEvent::StepStart { step_id, .. } => {
6623                    events.push(format!("start:{}", step_id));
6624                }
6625                AgentEvent::StepEnd { step_id, .. } => {
6626                    events.push(format!("end:{}", step_id));
6627                }
6628                _ => {}
6629            }
6630        }
6631
6632        // s3 start must occur after both s1 end and s2 end
6633        let s1_end = events.iter().position(|e| e == "end:s1").unwrap();
6634        let s2_end = events.iter().position(|e| e == "end:s2").unwrap();
6635        let s3_start = events.iter().position(|e| e == "start:s3").unwrap();
6636        assert!(
6637            s3_start > s1_end,
6638            "s3 started before s1 ended: {:?}",
6639            events
6640        );
6641        assert!(
6642            s3_start > s2_end,
6643            "s3 started before s2 ended: {:?}",
6644            events
6645        );
6646
6647        // Final result should reflect step 3 (last sequential step)
6648        assert!(result.text.contains("Step 3 done") || !result.text.is_empty());
6649    }
6650
6651    #[tokio::test]
6652    async fn test_execute_plan_handles_step_failure() {
6653        use crate::planning::{Complexity, ExecutionPlan, Task};
6654
6655        // s1 succeeds, s2 depends on s1 (succeeds), s3 depends on nothing (succeeds),
6656        // s4 depends on a step that will fail (s_fail).
6657        // We simulate failure by providing no responses for s_fail's execute_loop.
6658        //
6659        // Simpler approach: s1 succeeds, s2 depends on s1 (will fail because no
6660        // mock response left), s3 is independent.
6661        // Layout: s1 (independent), s3 (independent) → wave 1 parallel
6662        //         s2 depends on s1 → wave 2
6663        //         s4 depends on s2 → wave 3 (should deadlock since s2 fails)
6664        let mock_client = Arc::new(MockLlmClient::new(vec![
6665            // Wave 1: s1 and s3 execute in parallel
6666            MockLlmClient::text_response("s1 done"),
6667            MockLlmClient::text_response("s3 done"),
6668            // Wave 2: s2 executes — but we give it no response, causing failure
6669            // Actually the MockLlmClient will fail with "No more mock responses"
6670        ]));
6671
6672        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
6673        let config = AgentConfig::default();
6674        let agent = AgentLoop::new(
6675            mock_client.clone(),
6676            tool_executor,
6677            test_tool_context(),
6678            config,
6679        );
6680
6681        let mut plan = ExecutionPlan::new("Test failure", Complexity::Medium);
6682        plan.add_step(Task::new("s1", "Independent step"));
6683        plan.add_step(Task::new("s2", "Depends on s1").with_dependencies(vec!["s1".to_string()]));
6684        plan.add_step(Task::new("s3", "Another independent"));
6685        plan.add_step(Task::new("s4", "Depends on s2").with_dependencies(vec!["s2".to_string()]));
6686
6687        let (tx, mut rx) = mpsc::channel(100);
6688        let _result = agent.execute_plan(&[], &plan, Some(tx)).await.unwrap();
6689
6690        // s1 and s3 should succeed (wave 1), s2 should fail (wave 2),
6691        // s4 should never execute (deadlock — dep s2 failed, not completed)
6692        let mut completed_steps = Vec::new();
6693        let mut failed_steps = Vec::new();
6694        rx.close();
6695        while let Some(event) = rx.recv().await {
6696            if let AgentEvent::StepEnd {
6697                step_id, status, ..
6698            } = event
6699            {
6700                match status {
6701                    TaskStatus::Completed => completed_steps.push(step_id),
6702                    TaskStatus::Failed => failed_steps.push(step_id),
6703                    _ => {}
6704                }
6705            }
6706        }
6707
6708        assert!(
6709            completed_steps.contains(&"s1".to_string()),
6710            "s1 should complete"
6711        );
6712        assert!(
6713            completed_steps.contains(&"s3".to_string()),
6714            "s3 should complete"
6715        );
6716        assert!(failed_steps.contains(&"s2".to_string()), "s2 should fail");
6717        // s4 should NOT appear in either list — it was never started
6718        assert!(
6719            !completed_steps.contains(&"s4".to_string()),
6720            "s4 should not complete"
6721        );
6722        assert!(
6723            !failed_steps.contains(&"s4".to_string()),
6724            "s4 should not fail (never started)"
6725        );
6726    }
6727
6728    // ========================================================================
6729    // Phase 4: Error Recovery & Resilience Tests
6730    // ========================================================================
6731
6732    #[test]
6733    fn test_agent_config_resilience_defaults() {
6734        let config = AgentConfig::default();
6735        assert_eq!(config.max_parse_retries, 2);
6736        assert_eq!(config.tool_timeout_ms, None);
6737        assert_eq!(config.circuit_breaker_threshold, 3);
6738    }
6739
6740    /// 4.1 — Parse error recovery: bails after max_parse_retries exceeded
6741    #[tokio::test]
6742    async fn test_parse_error_recovery_bails_after_threshold() {
6743        // 3 parse errors with max_parse_retries=2: count reaches 3 > 2 → bail
6744        let mock_client = Arc::new(MockLlmClient::new(vec![
6745            MockLlmClient::tool_call_response(
6746                "c1",
6747                "bash",
6748                serde_json::json!({"__parse_error": "unexpected token at position 5"}),
6749            ),
6750            MockLlmClient::tool_call_response(
6751                "c2",
6752                "bash",
6753                serde_json::json!({"__parse_error": "missing closing brace"}),
6754            ),
6755            MockLlmClient::tool_call_response(
6756                "c3",
6757                "bash",
6758                serde_json::json!({"__parse_error": "still broken"}),
6759            ),
6760            MockLlmClient::text_response("Done"), // never reached
6761        ]));
6762
6763        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
6764        let config = AgentConfig {
6765            max_parse_retries: 2,
6766            ..AgentConfig::default()
6767        };
6768        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
6769        let result = agent.execute(&[], "Do something", None).await;
6770        assert!(result.is_err(), "should bail after parse error threshold");
6771        let err = result.unwrap_err().to_string();
6772        assert!(
6773            err.contains("malformed tool arguments"),
6774            "error should mention malformed tool arguments, got: {}",
6775            err
6776        );
6777    }
6778
6779    /// 4.1 — Parse error recovery: counter resets after a valid tool execution
6780    #[tokio::test]
6781    async fn test_parse_error_counter_resets_on_success() {
6782        // 2 parse errors (= max_parse_retries, not yet exceeded)
6783        // Then a valid tool call (resets counter)
6784        // Then final text — should NOT bail
6785        let mock_client = Arc::new(MockLlmClient::new(vec![
6786            MockLlmClient::tool_call_response(
6787                "c1",
6788                "bash",
6789                serde_json::json!({"__parse_error": "bad args"}),
6790            ),
6791            MockLlmClient::tool_call_response(
6792                "c2",
6793                "bash",
6794                serde_json::json!({"__parse_error": "bad args again"}),
6795            ),
6796            // Valid call — resets parse_error_count to 0
6797            MockLlmClient::tool_call_response(
6798                "c3",
6799                "bash",
6800                serde_json::json!({"command": "echo ok"}),
6801            ),
6802            MockLlmClient::text_response("All done"),
6803        ]));
6804
6805        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
6806        let config = AgentConfig {
6807            max_parse_retries: 2,
6808            ..AgentConfig::default()
6809        };
6810        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
6811        let result = agent.execute(&[], "Do something", None).await;
6812        assert!(
6813            result.is_ok(),
6814            "should not bail — counter reset after successful tool, got: {:?}",
6815            result.err()
6816        );
6817        assert_eq!(result.unwrap().text, "All done");
6818    }
6819
6820    /// 4.2 — Tool timeout: slow tool produces a timeout error result; session continues
6821    #[tokio::test]
6822    async fn test_tool_timeout_produces_error_result() {
6823        let mock_client = Arc::new(MockLlmClient::new(vec![
6824            MockLlmClient::tool_call_response(
6825                "t1",
6826                "bash",
6827                serde_json::json!({"command": "sleep 10"}),
6828            ),
6829            MockLlmClient::text_response("The command timed out."),
6830        ]));
6831
6832        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
6833        let config = AgentConfig {
6834            // 50ms — sleep 10 will never finish
6835            tool_timeout_ms: Some(50),
6836            ..AgentConfig::default()
6837        };
6838        let agent = AgentLoop::new(
6839            mock_client.clone(),
6840            tool_executor,
6841            test_tool_context(),
6842            config,
6843        );
6844        let result = agent.execute(&[], "Run sleep", None).await;
6845        assert!(
6846            result.is_ok(),
6847            "session should continue after tool timeout: {:?}",
6848            result.err()
6849        );
6850        assert_eq!(result.unwrap().text, "The command timed out.");
6851        // LLM called twice: initial request + response after timeout error
6852        assert_eq!(mock_client.call_count.load(Ordering::SeqCst), 2);
6853    }
6854
6855    /// 4.2 — Tool timeout: tool that finishes before the deadline succeeds normally
6856    #[tokio::test]
6857    async fn test_tool_within_timeout_succeeds() {
6858        let mock_client = Arc::new(MockLlmClient::new(vec![
6859            MockLlmClient::tool_call_response(
6860                "t1",
6861                "bash",
6862                serde_json::json!({"command": "echo fast"}),
6863            ),
6864            MockLlmClient::text_response("Command succeeded."),
6865        ]));
6866
6867        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
6868        let config = AgentConfig {
6869            tool_timeout_ms: Some(5_000), // 5 s — echo completes in <100ms
6870            ..AgentConfig::default()
6871        };
6872        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
6873        let result = agent.execute(&[], "Run something fast", None).await;
6874        assert!(
6875            result.is_ok(),
6876            "fast tool should succeed: {:?}",
6877            result.err()
6878        );
6879        assert_eq!(result.unwrap().text, "Command succeeded.");
6880    }
6881
6882    /// 4.3 — Circuit breaker: retries non-streaming LLM failures up to threshold
6883    #[tokio::test]
6884    async fn test_circuit_breaker_retries_non_streaming() {
6885        // Empty response list → every call bails with "No more mock responses"
6886        // threshold=2 → tries twice, then bails with circuit-breaker message
6887        let mock_client = Arc::new(MockLlmClient::new(vec![]));
6888
6889        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
6890        let config = AgentConfig {
6891            circuit_breaker_threshold: 2,
6892            ..AgentConfig::default()
6893        };
6894        let agent = AgentLoop::new(
6895            mock_client.clone(),
6896            tool_executor,
6897            test_tool_context(),
6898            config,
6899        );
6900        let result = agent.execute(&[], "Hello", None).await;
6901        assert!(result.is_err(), "should fail when LLM always errors");
6902        let err = result.unwrap_err().to_string();
6903        assert!(
6904            err.contains("circuit breaker"),
6905            "error should mention circuit breaker, got: {}",
6906            err
6907        );
6908        assert_eq!(
6909            mock_client.call_count.load(Ordering::SeqCst),
6910            2,
6911            "should make exactly threshold=2 LLM calls"
6912        );
6913    }
6914
6915    /// 4.3 — Circuit breaker: threshold=1 bails on the very first failure
6916    #[tokio::test]
6917    async fn test_circuit_breaker_threshold_one_no_retry() {
6918        let mock_client = Arc::new(MockLlmClient::new(vec![]));
6919
6920        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
6921        let config = AgentConfig {
6922            circuit_breaker_threshold: 1,
6923            ..AgentConfig::default()
6924        };
6925        let agent = AgentLoop::new(
6926            mock_client.clone(),
6927            tool_executor,
6928            test_tool_context(),
6929            config,
6930        );
6931        let result = agent.execute(&[], "Hello", None).await;
6932        assert!(result.is_err());
6933        assert_eq!(
6934            mock_client.call_count.load(Ordering::SeqCst),
6935            1,
6936            "with threshold=1 exactly one attempt should be made"
6937        );
6938    }
6939
6940    /// 4.3 — Circuit breaker: succeeds when LLM recovers before hitting threshold
6941    #[tokio::test]
6942    async fn test_circuit_breaker_succeeds_if_llm_recovers() {
6943        // First call fails, second call succeeds; threshold=3 — recovery within threshold
6944        struct FailOnceThenSucceed {
6945            inner: MockLlmClient,
6946            failed_once: std::sync::atomic::AtomicBool,
6947            call_count: AtomicUsize,
6948        }
6949
6950        #[async_trait::async_trait]
6951        impl LlmClient for FailOnceThenSucceed {
6952            async fn complete(
6953                &self,
6954                messages: &[Message],
6955                system: Option<&str>,
6956                tools: &[ToolDefinition],
6957            ) -> Result<LlmResponse> {
6958                self.call_count.fetch_add(1, Ordering::SeqCst);
6959                let already_failed = self
6960                    .failed_once
6961                    .swap(true, std::sync::atomic::Ordering::SeqCst);
6962                if !already_failed {
6963                    anyhow::bail!("transient network error");
6964                }
6965                self.inner.complete(messages, system, tools).await
6966            }
6967
6968            async fn complete_streaming(
6969                &self,
6970                messages: &[Message],
6971                system: Option<&str>,
6972                tools: &[ToolDefinition],
6973            ) -> Result<tokio::sync::mpsc::Receiver<crate::llm::StreamEvent>> {
6974                self.inner.complete_streaming(messages, system, tools).await
6975            }
6976        }
6977
6978        let mock = Arc::new(FailOnceThenSucceed {
6979            inner: MockLlmClient::new(vec![MockLlmClient::text_response("Recovered!")]),
6980            failed_once: std::sync::atomic::AtomicBool::new(false),
6981            call_count: AtomicUsize::new(0),
6982        });
6983
6984        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
6985        let config = AgentConfig {
6986            circuit_breaker_threshold: 3,
6987            ..AgentConfig::default()
6988        };
6989        let agent = AgentLoop::new(mock.clone(), tool_executor, test_tool_context(), config);
6990        let result = agent.execute(&[], "Hello", None).await;
6991        assert!(
6992            result.is_ok(),
6993            "should succeed when LLM recovers within threshold: {:?}",
6994            result.err()
6995        );
6996        assert_eq!(result.unwrap().text, "Recovered!");
6997        assert_eq!(
6998            mock.call_count.load(Ordering::SeqCst),
6999            2,
7000            "should have made exactly 2 calls (1 fail + 1 success)"
7001        );
7002    }
7003
7004    // ── Continuation detection tests ─────────────────────────────────────────
7005
7006    #[test]
7007    fn test_looks_incomplete_empty() {
7008        assert!(AgentLoop::looks_incomplete(""));
7009        assert!(AgentLoop::looks_incomplete("   "));
7010    }
7011
7012    #[test]
7013    fn test_looks_incomplete_trailing_colon() {
7014        assert!(AgentLoop::looks_incomplete("Let me check the file:"));
7015        assert!(AgentLoop::looks_incomplete("Next steps:"));
7016    }
7017
7018    #[test]
7019    fn test_looks_incomplete_ellipsis() {
7020        assert!(AgentLoop::looks_incomplete("Working on it..."));
7021        assert!(AgentLoop::looks_incomplete("Processing…"));
7022    }
7023
7024    #[test]
7025    fn test_looks_incomplete_intent_phrases() {
7026        assert!(AgentLoop::looks_incomplete(
7027            "I'll start by reading the file."
7028        ));
7029        assert!(AgentLoop::looks_incomplete(
7030            "Let me check the configuration."
7031        ));
7032        assert!(AgentLoop::looks_incomplete("I will now run the tests."));
7033        assert!(AgentLoop::looks_incomplete(
7034            "I need to update the Cargo.toml."
7035        ));
7036    }
7037
7038    #[test]
7039    fn test_looks_complete_final_answer() {
7040        // Clear final answers should NOT trigger continuation
7041        assert!(!AgentLoop::looks_incomplete(
7042            "The tests pass. All changes have been applied successfully."
7043        ));
7044        assert!(!AgentLoop::looks_incomplete(
7045            "Done. I've updated the three files and verified the build succeeds."
7046        ));
7047        assert!(!AgentLoop::looks_incomplete("42"));
7048        assert!(!AgentLoop::looks_incomplete("Yes."));
7049    }
7050
7051    #[test]
7052    fn test_looks_incomplete_multiline_complete() {
7053        let text = "Here is the summary:\n\n- Fixed the bug in agent.rs\n- All tests pass\n- Build succeeds";
7054        assert!(!AgentLoop::looks_incomplete(text));
7055    }
7056}