a3s_code_core/
agent.rs

1//! Agent Loop Implementation
2//!
3//! The agent loop handles the core conversation cycle:
4//! 1. User sends a prompt
5//! 2. LLM generates a response (possibly with tool calls)
6//! 3. If tool calls present, execute them and send results back
7//! 4. Repeat until LLM returns without tool calls
8//!
9//! This implements agentic behavior where the LLM can use tools
10//! to accomplish tasks agentically.
11
12#[cfg(feature = "ahp")]
13use crate::ahp::InjectedContext;
14#[cfg(feature = "ahp")]
15use crate::context::{ContextItem, ContextType};
16use crate::context::{ContextProvider, ContextQuery, ContextResult};
17use crate::hitl::ConfirmationProvider;
18use crate::hooks::{
19    ErrorType, GenerateEndEvent, GenerateStartEvent, HookEvent, HookExecutor, HookResult,
20    IntentDetectionEvent, OnErrorEvent, PostResponseEvent, PostToolUseEvent,
21    PreContextPerceptionEvent, PrePromptEvent, PreToolUseEvent, TokenUsageInfo, ToolCallInfo,
22    ToolResultData,
23};
24use crate::llm::{LlmClient, LlmResponse, Message, TokenUsage, ToolDefinition};
25use crate::permissions::{PermissionChecker, PermissionDecision};
26use crate::planning::{AgentGoal, ExecutionPlan, LlmPlanner, PreAnalysis, TaskStatus};
27use crate::prompts::{AgentStyle, DetectionConfidence, PlanningMode, SystemPromptSlots};
28use crate::queue::SessionCommand;
29use crate::session_lane_queue::SessionLaneQueue;
30use crate::text::truncate_utf8;
31use crate::tool_search::ToolIndex;
32use crate::tools::{ToolContext, ToolExecutor, ToolStreamEvent};
33use anyhow::{Context, Result};
34use async_trait::async_trait;
35use futures::future::join_all;
36use serde::{Deserialize, Serialize};
37use serde_json::{json, Value};
38use std::sync::Arc;
39use std::time::Duration;
40use tokio::sync::{mpsc, RwLock};
41
42/// Maximum number of tool execution rounds before stopping
43const MAX_TOOL_ROUNDS: usize = 50;
44
45/// Result of a single parallel step execution, emitted as structured JSON
46/// so the frontend can render it dynamically in the user's language.
47#[derive(Debug, Clone, Serialize, Deserialize)]
48pub struct ParallelStepResult {
49    pub step_id: String,
50    pub step_number: u32,
51    pub status: String, // "completed" | "failed"
52    /// Brief summary of what this step did (in the LLM's language).
53    pub summary: String,
54    #[serde(skip_serializing_if = "Option::is_none")]
55    pub key_findings: Option<Vec<String>>,
56    #[serde(skip_serializing_if = "Option::is_none")]
57    pub error: Option<String>,
58    #[serde(skip_serializing_if = "Option::is_none")]
59    pub data: Option<Value>,
60}
61
62impl ParallelStepResult {
63    /// Build the full parallel-results JSON envelope injected into history.
64    pub fn build_envelope(results: Vec<ParallelStepResult>) -> Value {
65        json!({
66            "type": "parallel_results",
67            "steps": results
68        })
69    }
70}
71
72/// Agent configuration
73#[derive(Clone)]
74pub struct AgentConfig {
75    /// Slot-based system prompt customization.
76    ///
77    /// Users can customize specific parts (role, guidelines, response style, extra)
78    /// without overriding the core agentic capabilities. The default agentic core
79    /// (tool usage, autonomous behavior, completion criteria) is always preserved.
80    pub prompt_slots: SystemPromptSlots,
81    pub tools: Vec<ToolDefinition>,
82    pub max_tool_rounds: usize,
83    /// Optional security provider for input taint tracking and output sanitization
84    pub security_provider: Option<Arc<dyn crate::security::SecurityProvider>>,
85    /// Optional permission checker for tool execution control
86    pub permission_checker: Option<Arc<dyn PermissionChecker>>,
87    /// Optional confirmation manager for HITL (Human-in-the-Loop)
88    pub confirmation_manager: Option<Arc<dyn ConfirmationProvider>>,
89    /// Context providers for augmenting prompts with external context
90    pub context_providers: Vec<Arc<dyn ContextProvider>>,
91    /// Planning mode — Auto (detect from message), Enabled, or Disabled.
92    pub planning_mode: PlanningMode,
93    /// Enable goal tracking
94    pub goal_tracking: bool,
95    /// Optional hook engine for firing lifecycle events (PreToolUse, PostToolUse, etc.)
96    pub hook_engine: Option<Arc<dyn HookExecutor>>,
97    /// Optional skill registry for tool permission enforcement
98    pub skill_registry: Option<Arc<crate::skills::SkillRegistry>>,
99    /// Max consecutive malformed-tool-args errors before aborting (default: 2).
100    ///
101    /// When the LLM returns tool arguments with `__parse_error`, the error is
102    /// fed back as a tool result. After this many consecutive parse errors the
103    /// loop bails instead of retrying indefinitely.
104    pub max_parse_retries: u32,
105    /// Per-tool execution timeout in milliseconds (`None` = no timeout).
106    ///
107    /// When set, each tool execution is wrapped in `tokio::time::timeout`.
108    /// A timeout produces an error result sent back to the LLM rather than
109    /// crashing the session.
110    pub tool_timeout_ms: Option<u64>,
111    /// Circuit-breaker threshold: max consecutive LLM API failures before
112    /// aborting (default: 3).
113    ///
114    /// In non-streaming mode, transient LLM failures are retried up to this
115    /// many times (with short exponential backoff) before the loop bails.
116    /// In streaming mode, any failure is fatal (events cannot be replayed).
117    pub circuit_breaker_threshold: u32,
118    /// Max consecutive identical tool signatures before aborting (default: 3).
119    ///
120    /// A tool signature is the exact combination of tool name + compact JSON
121    /// arguments. This prevents the agent from getting stuck repeating the same
122    /// tool call in a loop, for example repeatedly fetching the same URL.
123    pub duplicate_tool_call_threshold: u32,
124    /// Enable auto-compaction when context usage exceeds threshold.
125    pub auto_compact: bool,
126    /// Context usage percentage threshold to trigger auto-compaction (0.0 - 1.0).
127    /// Default: 0.80 (80%).
128    pub auto_compact_threshold: f32,
129    /// Maximum context window size in tokens (used for auto-compact calculation).
130    /// Default: 200_000.
131    pub max_context_tokens: usize,
132    /// LLM client reference for auto-compaction (needs to call LLM for summarization).
133    pub llm_client: Option<Arc<dyn LlmClient>>,
134    /// Optional agent memory for auto-remember after tool execution and recall before prompts.
135    pub memory: Option<Arc<crate::memory::AgentMemory>>,
136    /// Inject a continuation message when the LLM stops calling tools before the
137    /// task is complete. Enabled by default. Set to `false` to disable.
138    ///
139    /// When enabled, if the LLM produces a response with no tool calls but the
140    /// response text looks like an intermediate step (not a final answer), the
141    /// loop injects [`crate::prompts::CONTINUATION`] as a user message and
142    /// continues for up to `max_continuation_turns` additional turns.
143    pub continuation_enabled: bool,
144    /// Maximum number of continuation injections per execution (default: 3).
145    ///
146    /// Prevents infinite loops when the LLM repeatedly stops without completing.
147    pub max_continuation_turns: u32,
148    /// Optional tool search index for filtering tools per-turn.
149    ///
150    /// When set, only tools matching the user prompt are sent to the LLM,
151    /// reducing context usage when many MCP tools are registered.
152    pub tool_index: Option<ToolIndex>,
153    /// Optional subagent registry for auto-delegation.
154    ///
155    /// When set, the agent loop can auto-detect when to launch subagents
156    /// based on prompt patterns or agent style.
157    pub subagent_registry: Option<Arc<crate::subagent::AgentRegistry>>,
158    /// Callback for when a subagent should be launched.
159    ///
160    /// When `should_launch_subagent` returns Some, this callback is invoked
161    /// with the agent definition and prompt. The callback should return
162    /// `Some(result)` if it handled the subagent launch, or `None` to
163    /// fall back to normal execution.
164    #[allow(clippy::type_complexity)]
165    pub on_subagent_launch: Option<
166        Arc<
167            dyn Fn(&crate::subagent::AgentDefinition, &str) -> Option<Result<AgentResult>>
168                + Send
169                + Sync,
170        >,
171    >,
172}
173
174impl std::fmt::Debug for AgentConfig {
175    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
176        f.debug_struct("AgentConfig")
177            .field("prompt_slots", &self.prompt_slots)
178            .field("tools", &self.tools)
179            .field("max_tool_rounds", &self.max_tool_rounds)
180            .field("security_provider", &self.security_provider.is_some())
181            .field("permission_checker", &self.permission_checker.is_some())
182            .field("confirmation_manager", &self.confirmation_manager.is_some())
183            .field("context_providers", &self.context_providers.len())
184            .field("planning_mode", &self.planning_mode)
185            .field("goal_tracking", &self.goal_tracking)
186            .field("hook_engine", &self.hook_engine.is_some())
187            .field(
188                "skill_registry",
189                &self.skill_registry.as_ref().map(|r| r.len()),
190            )
191            .field("max_parse_retries", &self.max_parse_retries)
192            .field("tool_timeout_ms", &self.tool_timeout_ms)
193            .field("circuit_breaker_threshold", &self.circuit_breaker_threshold)
194            .field(
195                "duplicate_tool_call_threshold",
196                &self.duplicate_tool_call_threshold,
197            )
198            .field("auto_compact", &self.auto_compact)
199            .field("auto_compact_threshold", &self.auto_compact_threshold)
200            .field("max_context_tokens", &self.max_context_tokens)
201            .field("continuation_enabled", &self.continuation_enabled)
202            .field("max_continuation_turns", &self.max_continuation_turns)
203            .field("memory", &self.memory.is_some())
204            .field("tool_index", &self.tool_index.as_ref().map(|i| i.len()))
205            .field(
206                "subagent_registry",
207                &self.subagent_registry.as_ref().map(|r| r.len()),
208            )
209            .field("on_subagent_launch", &self.on_subagent_launch.is_some())
210            .finish()
211    }
212}
213
214impl Default for AgentConfig {
215    fn default() -> Self {
216        Self {
217            prompt_slots: SystemPromptSlots::default(),
218            tools: Vec::new(), // Tools are provided by ToolExecutor
219            max_tool_rounds: MAX_TOOL_ROUNDS,
220            security_provider: None,
221            permission_checker: None,
222            confirmation_manager: None,
223            context_providers: Vec::new(),
224            planning_mode: PlanningMode::default(),
225            goal_tracking: false,
226            hook_engine: None,
227            skill_registry: Some(Arc::new(crate::skills::SkillRegistry::with_builtins())),
228            max_parse_retries: 2,
229            tool_timeout_ms: None,
230            circuit_breaker_threshold: 3,
231            duplicate_tool_call_threshold: 3,
232            auto_compact: false,
233            auto_compact_threshold: 0.80,
234            max_context_tokens: 200_000,
235            llm_client: None,
236            memory: None,
237            continuation_enabled: true,
238            max_continuation_turns: 3,
239            tool_index: None,
240            subagent_registry: None,
241            on_subagent_launch: None,
242        }
243    }
244}
245
246/// Events emitted during agent execution
247///
248/// Subscribe via [`Session::subscribe_events()`](crate::session::Session::subscribe_events).
249/// New variants may be added in minor releases — always include a wildcard arm
250/// (`_ => {}`) when matching.
251#[derive(Debug, Clone, Serialize, Deserialize)]
252#[serde(tag = "type")]
253#[non_exhaustive]
254pub enum AgentEvent {
255    /// Agent started processing
256    #[serde(rename = "agent_start")]
257    Start { prompt: String },
258
259    /// Runtime agent style/mode selected for the current execution.
260    #[serde(rename = "agent_mode_changed")]
261    AgentModeChanged {
262        /// Stable UI/runtime mode label, e.g. "general", "planning", "explore".
263        mode: String,
264        /// Canonical built-in agent name associated with this mode.
265        agent: String,
266        /// Human-readable explanation of the selected style.
267        description: String,
268    },
269
270    /// LLM turn started
271    #[serde(rename = "turn_start")]
272    TurnStart { turn: usize },
273
274    /// Text delta from streaming
275    #[serde(rename = "text_delta")]
276    TextDelta { text: String },
277
278    /// Reasoning/thinking delta from streaming (for models like kimi, deepseek)
279    #[serde(rename = "reasoning_delta")]
280    ReasoningDelta { text: String },
281
282    /// Tool execution started
283    #[serde(rename = "tool_start")]
284    ToolStart { id: String, name: String },
285
286    /// Tool input delta from streaming (partial JSON arguments)
287    #[serde(rename = "tool_input_delta")]
288    ToolInputDelta { delta: String },
289
290    /// Tool execution completed
291    #[serde(rename = "tool_end")]
292    ToolEnd {
293        id: String,
294        name: String,
295        output: String,
296        exit_code: i32,
297        #[serde(skip_serializing_if = "Option::is_none")]
298        metadata: Option<serde_json::Value>,
299    },
300
301    /// Intermediate tool output (streaming delta)
302    #[serde(rename = "tool_output_delta")]
303    ToolOutputDelta {
304        id: String,
305        name: String,
306        delta: String,
307    },
308
309    /// LLM turn completed
310    #[serde(rename = "turn_end")]
311    TurnEnd { turn: usize, usage: TokenUsage },
312
313    /// Agent completed
314    #[serde(rename = "agent_end")]
315    End {
316        text: String,
317        usage: TokenUsage,
318        #[serde(skip_serializing_if = "Option::is_none")]
319        meta: Option<crate::llm::LlmResponseMeta>,
320    },
321
322    /// Error occurred
323    #[serde(rename = "error")]
324    Error { message: String },
325
326    /// Tool execution requires confirmation (HITL)
327    #[serde(rename = "confirmation_required")]
328    ConfirmationRequired {
329        tool_id: String,
330        tool_name: String,
331        args: serde_json::Value,
332        timeout_ms: u64,
333    },
334
335    /// Confirmation received from user (HITL)
336    #[serde(rename = "confirmation_received")]
337    ConfirmationReceived {
338        tool_id: String,
339        approved: bool,
340        reason: Option<String>,
341    },
342
343    /// Confirmation timed out (HITL)
344    #[serde(rename = "confirmation_timeout")]
345    ConfirmationTimeout {
346        tool_id: String,
347        action_taken: String, // "rejected" or "auto_approved"
348    },
349
350    /// External task pending (needs SDK processing)
351    #[serde(rename = "external_task_pending")]
352    ExternalTaskPending {
353        task_id: String,
354        session_id: String,
355        lane: crate::hitl::SessionLane,
356        command_type: String,
357        payload: serde_json::Value,
358        timeout_ms: u64,
359    },
360
361    /// External task completed
362    #[serde(rename = "external_task_completed")]
363    ExternalTaskCompleted {
364        task_id: String,
365        session_id: String,
366        success: bool,
367    },
368
369    /// Tool execution denied by permission policy
370    #[serde(rename = "permission_denied")]
371    PermissionDenied {
372        tool_id: String,
373        tool_name: String,
374        args: serde_json::Value,
375        reason: String,
376    },
377
378    /// Context resolution started
379    #[serde(rename = "context_resolving")]
380    ContextResolving { providers: Vec<String> },
381
382    /// Context resolution completed
383    #[serde(rename = "context_resolved")]
384    ContextResolved {
385        total_items: usize,
386        total_tokens: usize,
387    },
388
389    // ========================================================================
390    // a3s-lane integration events
391    // ========================================================================
392    /// Command moved to dead letter queue after exhausting retries
393    #[serde(rename = "command_dead_lettered")]
394    CommandDeadLettered {
395        command_id: String,
396        command_type: String,
397        lane: String,
398        error: String,
399        attempts: u32,
400    },
401
402    /// Command retry attempt
403    #[serde(rename = "command_retry")]
404    CommandRetry {
405        command_id: String,
406        command_type: String,
407        lane: String,
408        attempt: u32,
409        delay_ms: u64,
410    },
411
412    /// Queue alert (depth warning, latency alert, etc.)
413    #[serde(rename = "queue_alert")]
414    QueueAlert {
415        level: String,
416        alert_type: String,
417        message: String,
418    },
419
420    // ========================================================================
421    // Task tracking events
422    // ========================================================================
423    /// Task list updated
424    #[serde(rename = "task_updated")]
425    TaskUpdated {
426        session_id: String,
427        tasks: Vec<crate::planning::Task>,
428    },
429
430    // ========================================================================
431    // Memory System events (Phase 3)
432    // ========================================================================
433    /// Memory stored
434    #[serde(rename = "memory_stored")]
435    MemoryStored {
436        memory_id: String,
437        memory_type: String,
438        importance: f32,
439        tags: Vec<String>,
440    },
441
442    /// Memory recalled
443    #[serde(rename = "memory_recalled")]
444    MemoryRecalled {
445        memory_id: String,
446        content: String,
447        relevance: f32,
448    },
449
450    /// Memories searched
451    #[serde(rename = "memories_searched")]
452    MemoriesSearched {
453        query: Option<String>,
454        tags: Vec<String>,
455        result_count: usize,
456    },
457
458    /// Memory cleared
459    #[serde(rename = "memory_cleared")]
460    MemoryCleared {
461        tier: String, // "long_term", "short_term", "working"
462        count: u64,
463    },
464
465    // ========================================================================
466    // Subagent events
467    // ========================================================================
468    /// Subagent task started
469    #[serde(rename = "subagent_start")]
470    SubagentStart {
471        /// Unique task identifier
472        task_id: String,
473        /// Child session ID
474        session_id: String,
475        /// Parent session ID
476        parent_session_id: String,
477        /// Agent type (e.g., "explore", "general")
478        agent: String,
479        /// Short description of the task
480        description: String,
481    },
482
483    /// Subagent task progress update
484    #[serde(rename = "subagent_progress")]
485    SubagentProgress {
486        /// Task identifier
487        task_id: String,
488        /// Child session ID
489        session_id: String,
490        /// Progress status message
491        status: String,
492        /// Additional metadata
493        metadata: serde_json::Value,
494    },
495
496    /// Subagent task completed
497    #[serde(rename = "subagent_end")]
498    SubagentEnd {
499        /// Task identifier
500        task_id: String,
501        /// Child session ID
502        session_id: String,
503        /// Agent type
504        agent: String,
505        /// Task output/result
506        output: String,
507        /// Whether the task succeeded
508        success: bool,
509    },
510
511    // ========================================================================
512    // Planning and Goal Tracking Events (Phase 1)
513    // ========================================================================
514    /// Planning phase started
515    #[serde(rename = "planning_start")]
516    PlanningStart { prompt: String },
517
518    /// Planning phase completed
519    #[serde(rename = "planning_end")]
520    PlanningEnd {
521        plan: ExecutionPlan,
522        estimated_steps: usize,
523    },
524
525    /// Step execution started
526    #[serde(rename = "step_start")]
527    StepStart {
528        step_id: String,
529        description: String,
530        step_number: usize,
531        total_steps: usize,
532    },
533
534    /// Step execution completed
535    #[serde(rename = "step_end")]
536    StepEnd {
537        step_id: String,
538        status: TaskStatus,
539        step_number: usize,
540        total_steps: usize,
541    },
542
543    /// Goal extracted from prompt
544    #[serde(rename = "goal_extracted")]
545    GoalExtracted { goal: AgentGoal },
546
547    /// Goal progress update
548    #[serde(rename = "goal_progress")]
549    GoalProgress {
550        goal: String,
551        progress: f32,
552        completed_steps: usize,
553        total_steps: usize,
554    },
555
556    /// Goal achieved
557    #[serde(rename = "goal_achieved")]
558    GoalAchieved {
559        goal: String,
560        total_steps: usize,
561        duration_ms: i64,
562    },
563
564    // ========================================================================
565    // Context Compaction events
566    // ========================================================================
567    /// Context automatically compacted due to high usage
568    #[serde(rename = "context_compacted")]
569    ContextCompacted {
570        session_id: String,
571        before_messages: usize,
572        after_messages: usize,
573        percent_before: f32,
574    },
575
576    // ========================================================================
577    // Persistence events
578    // ========================================================================
579    /// Session persistence failed — SDK clients should handle this
580    #[serde(rename = "persistence_failed")]
581    PersistenceFailed {
582        session_id: String,
583        operation: String,
584        error: String,
585    },
586
587    // ========================================================================
588    // Side question (btw)
589    // ========================================================================
590    /// Ephemeral side question answered.
591    ///
592    /// Emitted by [`crate::agent_api::AgentSession::btw()`] in streaming mode.
593    /// The answer is never added to conversation history.
594    #[serde(rename = "btw_answer")]
595    BtwAnswer {
596        question: String,
597        answer: String,
598        usage: TokenUsage,
599    },
600}
601
602/// Result of agent execution
603#[derive(Debug, Clone)]
604pub struct AgentResult {
605    pub text: String,
606    pub messages: Vec<Message>,
607    pub usage: TokenUsage,
608    pub tool_calls_count: usize,
609}
610
611// ============================================================================
612// ToolCommand — bridges ToolExecutor to SessionCommand for queue submission
613// ============================================================================
614
615/// Adapter that implements `SessionCommand` for tool execution via the queue.
616///
617/// Wraps a `ToolExecutor` call so it can be submitted to `SessionLaneQueue`.
618pub struct ToolCommand {
619    tool_executor: Arc<ToolExecutor>,
620    tool_name: String,
621    tool_args: Value,
622    tool_context: ToolContext,
623    skill_registry: Option<Arc<crate::skills::SkillRegistry>>,
624}
625
626impl ToolCommand {
627    /// Create a new ToolCommand
628    pub fn new(
629        tool_executor: Arc<ToolExecutor>,
630        tool_name: String,
631        tool_args: Value,
632        tool_context: ToolContext,
633        skill_registry: Option<Arc<crate::skills::SkillRegistry>>,
634    ) -> Self {
635        Self {
636            tool_executor,
637            tool_name,
638            tool_args,
639            tool_context,
640            skill_registry,
641        }
642    }
643}
644
645#[async_trait]
646impl SessionCommand for ToolCommand {
647    async fn execute(&self) -> Result<Value> {
648        // Check skill-based tool permissions
649        if let Some(registry) = &self.skill_registry {
650            let instruction_skills = registry.by_kind(crate::skills::SkillKind::Instruction);
651
652            // If there are instruction skills with tool restrictions, check permissions
653            let has_restrictions = instruction_skills.iter().any(|s| s.allowed_tools.is_some());
654
655            if has_restrictions {
656                let mut allowed = false;
657
658                for skill in &instruction_skills {
659                    if skill.is_tool_allowed(&self.tool_name) {
660                        allowed = true;
661                        break;
662                    }
663                }
664
665                if !allowed {
666                    return Err(anyhow::anyhow!(
667                        "Tool '{}' is not allowed by any active skill. Active skills restrict tools to their allowed-tools lists.",
668                        self.tool_name
669                    ));
670                }
671            }
672        }
673
674        // Execute the tool
675        let result = self
676            .tool_executor
677            .execute_with_context(&self.tool_name, &self.tool_args, &self.tool_context)
678            .await?;
679        Ok(serde_json::json!({
680            "output": result.output,
681            "exit_code": result.exit_code,
682            "metadata": result.metadata,
683        }))
684    }
685
686    fn command_type(&self) -> &str {
687        &self.tool_name
688    }
689
690    fn payload(&self) -> Value {
691        self.tool_args.clone()
692    }
693}
694
695// ============================================================================
696// AgentLoop
697// ============================================================================
698
699/// Agent loop executor
700#[derive(Clone)]
701pub struct AgentLoop {
702    llm_client: Arc<dyn LlmClient>,
703    tool_executor: Arc<ToolExecutor>,
704    tool_context: ToolContext,
705    config: AgentConfig,
706    /// Optional per-session tool metrics collector
707    tool_metrics: Option<Arc<RwLock<crate::telemetry::ToolMetrics>>>,
708    /// Optional lane queue for priority-based tool execution
709    command_queue: Option<Arc<SessionLaneQueue>>,
710    /// Optional progress tracker for real-time tool/token usage tracking
711    progress_tracker: Option<Arc<tokio::sync::RwLock<crate::task::ProgressTracker>>>,
712    /// Optional task manager for centralized task lifecycle tracking
713    task_manager: Option<Arc<crate::task::TaskManager>>,
714}
715
716// ============================================================================
717// Intent Detection Helpers (for AHP Context Perception)
718// ============================================================================
719
720/// Extract a target name from the prompt (e.g., function name, file path).
721#[allow(clippy::extra_unused_lifetimes)]
722fn extract_target_name_from_prompt<'a>(prompt: &str, _patterns: &[&str]) -> String {
723    // Try to extract quoted strings first
724    if let Some(start) = prompt.find('"') {
725        if let Some(end) = prompt[start + 1..].find('"') {
726            return prompt[start + 1..start + 1 + end].to_string();
727        }
728    }
729
730    // Try single quotes
731    if let Some(start) = prompt.find('\'') {
732        if let Some(end) = prompt[start + 1..].find('\'') {
733            return prompt[start + 1..start + 1 + end].to_string();
734        }
735    }
736
737    // Try backticks
738    if let Some(start) = prompt.find('`') {
739        if let Some(end) = prompt[start + 1..].find('`') {
740            return prompt[start + 1..start + 1 + end].to_string();
741        }
742    }
743
744    // Fall back to extracting a reasonable word boundary
745    let words: Vec<&str> = prompt.split_whitespace().collect();
746    if words.len() > 2 {
747        // Look for likely target words (after "the", "find", "where is", etc.)
748        for word in words.iter() {
749            if word.len() > 3
750                && !["where", "what", "find", "the", "how", "is", "are"].contains(word)
751            {
752                return word.to_string();
753            }
754        }
755    }
756
757    String::new()
758}
759
760/// Detect the domain from prompt keywords.
761fn detect_domain_from_prompt(prompt: &str) -> String {
762    let lower = prompt.to_lowercase();
763
764    if lower.contains("rust") || lower.contains("cargo") || lower.contains(".rs") {
765        "rust".to_string()
766    } else if lower.contains("javascript")
767        || lower.contains("typescript")
768        || lower.contains("node")
769        || lower.contains(".js")
770        || lower.contains(".ts")
771    {
772        "javascript".to_string()
773    } else if lower.contains("python") || lower.contains(".py") {
774        "python".to_string()
775    } else if lower.contains("go") || lower.contains(".go") {
776        "go".to_string()
777    } else if lower.contains("java") || lower.contains(".java") {
778        "java".to_string()
779    } else if lower.contains("docker") || lower.contains("container") {
780        "docker".to_string()
781    } else if lower.contains("kubernetes") || lower.contains("k8s") {
782        "kubernetes".to_string()
783    } else if lower.contains("sql")
784        || lower.contains("database")
785        || lower.contains("postgres")
786        || lower.contains("mysql")
787    {
788        "database".to_string()
789    } else if lower.contains("api") || lower.contains("rest") || lower.contains("grpc") {
790        "api".to_string()
791    } else if lower.contains("auth")
792        || lower.contains("login")
793        || lower.contains("password")
794        || lower.contains("token")
795    {
796        "security".to_string()
797    } else if lower.contains("test") || lower.contains("spec") || lower.contains("mock") {
798        "testing".to_string()
799    } else {
800        "general".to_string()
801    }
802}
803
804/// Result from IntentDetection harness
805#[derive(Debug, Clone, Serialize, Deserialize)]
806pub struct IntentDetectionResult {
807    /// Detected intent: "locate" | "understand" | "retrieve" | "explore" | "reason" | "validate" | "compare" | "track"
808    pub detected_intent: String,
809    /// Confidence score 0.0 - 1.0
810    pub confidence: f32,
811    /// Optional target hints from the harness
812    #[serde(skip_serializing_if = "Option::is_none")]
813    pub target_hints: Option<TargetHints>,
814}
815
816/// Target hints from IntentDetection harness
817#[derive(Debug, Clone, Serialize, Deserialize)]
818pub struct TargetHints {
819    #[serde(skip_serializing_if = "Option::is_none")]
820    pub target_type: Option<String>,
821    #[serde(skip_serializing_if = "Option::is_none")]
822    pub target_name: Option<String>,
823    #[serde(skip_serializing_if = "Option::is_none")]
824    pub domain: Option<String>,
825}
826
827/// Detect language hint from prompt characters.
828fn detect_language_hint(prompt: &str) -> Option<String> {
829    // Check for Chinese characters
830    if prompt
831        .chars()
832        .any(|c| ('\u{4e00}'..='\u{9fff}').contains(&c))
833    {
834        return Some("zh".to_string());
835    }
836    // Check for Japanese characters (Hiragana, Katakana, or CJK unified ideographs outside Chinese range)
837    if prompt
838        .chars()
839        .any(|c| ('\u{3040}'..='\u{309f}').contains(&c) || ('\u{30a0}'..='\u{30ff}').contains(&c))
840    {
841        return Some("ja".to_string());
842    }
843    // Check for Korean characters
844    if prompt
845        .chars()
846        .any(|c| ('\u{ac00}'..='\u{d7af}').contains(&c))
847    {
848        return Some("ko".to_string());
849    }
850    // Check for Arabic
851    if prompt
852        .chars()
853        .any(|c| ('\u{0600}'..='\u{06ff}').contains(&c))
854    {
855        return Some("ar".to_string());
856    }
857    // Check for Russian/Cyrillic
858    if prompt
859        .chars()
860        .any(|c| ('\u{0400}'..='\u{04ff}').contains(&c))
861    {
862        return Some("ru".to_string());
863    }
864    None
865}
866
867/// Build PreContextPerceptionEvent from IntentDetection result.
868fn build_pre_context_perception_from_intent(
869    result: IntentDetectionResult,
870    prompt: &str,
871    session_id: &str,
872    workspace: &str,
873) -> PreContextPerceptionEvent {
874    let target_hints = result.target_hints;
875    PreContextPerceptionEvent {
876        session_id: session_id.to_string(),
877        intent: result.detected_intent,
878        target_type: target_hints
879            .as_ref()
880            .and_then(|h| h.target_type.clone())
881            .unwrap_or_else(|| "unknown".to_string()),
882        target_name: target_hints
883            .as_ref()
884            .and_then(|h| h.target_name.clone())
885            .unwrap_or_else(|| extract_target_name_from_prompt(prompt, &[])),
886        domain: target_hints
887            .as_ref()
888            .and_then(|h| h.domain.clone())
889            .unwrap_or_else(|| detect_domain_from_prompt(prompt)),
890        query: Some(prompt.to_string()),
891        working_directory: workspace.to_string(),
892        urgency: "normal".to_string(),
893    }
894}
895
896/// Rough token estimation (~4 chars per token for English/code).
897#[cfg(feature = "ahp")]
898fn estimate_tokens(text: &str) -> usize {
899    text.len() / 4
900}
901
902impl AgentLoop {
903    pub fn new(
904        llm_client: Arc<dyn LlmClient>,
905        tool_executor: Arc<ToolExecutor>,
906        tool_context: ToolContext,
907        config: AgentConfig,
908    ) -> Self {
909        Self {
910            llm_client,
911            tool_executor,
912            tool_context,
913            config,
914            tool_metrics: None,
915            command_queue: None,
916            progress_tracker: None,
917            task_manager: None,
918        }
919    }
920
921    /// Set the progress tracker for real-time tool/token usage tracking.
922    pub fn with_progress_tracker(
923        mut self,
924        tracker: Arc<tokio::sync::RwLock<crate::task::ProgressTracker>>,
925    ) -> Self {
926        self.progress_tracker = Some(tracker);
927        self
928    }
929
930    /// Set the task manager for centralized task lifecycle tracking.
931    pub fn with_task_manager(mut self, manager: Arc<crate::task::TaskManager>) -> Self {
932        self.task_manager = Some(manager);
933        self
934    }
935
936    /// Set the tool metrics collector for this agent loop
937    pub fn with_tool_metrics(
938        mut self,
939        metrics: Arc<RwLock<crate::telemetry::ToolMetrics>>,
940    ) -> Self {
941        self.tool_metrics = Some(metrics);
942        self
943    }
944
945    /// Set the lane queue for priority-based tool execution.
946    ///
947    /// When set, tools are routed through the lane queue which supports
948    /// External task handling for multi-machine parallel processing.
949    pub fn with_queue(mut self, queue: Arc<SessionLaneQueue>) -> Self {
950        self.command_queue = Some(queue);
951        self
952    }
953
954    /// Track a tool call result in the progress tracker.
955    fn track_tool_result(&self, tool_name: &str, args: &serde_json::Value, exit_code: i32) {
956        if let Some(ref tracker) = self.progress_tracker {
957            let args_summary = Self::compact_json_args(args);
958            let success = exit_code == 0;
959            if let Ok(mut guard) = tracker.try_write() {
960                guard.track_tool_call(tool_name, args_summary, success);
961            }
962        }
963    }
964
965    /// Compact JSON arguments to a short summary string for tracking.
966    fn compact_json_args(args: &serde_json::Value) -> String {
967        let raw = match args {
968            serde_json::Value::Null => String::new(),
969            serde_json::Value::String(s) => s.clone(),
970            _ => serde_json::to_string(args).unwrap_or_default(),
971        };
972        let compact = raw.split_whitespace().collect::<Vec<_>>().join(" ");
973        if compact.len() > 180 {
974            format!("{}...", truncate_utf8(&compact, 180))
975        } else {
976            compact
977        }
978    }
979
980    /// Execute a tool, applying the configured timeout if set.
981    ///
982    /// On timeout, returns an error describing which tool timed out and after
983    /// how many milliseconds. The caller converts this to a tool-result error
984    /// message that is fed back to the LLM.
985    async fn execute_tool_timed(
986        &self,
987        name: &str,
988        args: &serde_json::Value,
989        ctx: &ToolContext,
990    ) -> anyhow::Result<crate::tools::ToolResult> {
991        let fut = self.tool_executor.execute_with_context(name, args, ctx);
992        if let Some(timeout_ms) = self.config.tool_timeout_ms {
993            match tokio::time::timeout(Duration::from_millis(timeout_ms), fut).await {
994                Ok(result) => result,
995                Err(_) => Err(anyhow::anyhow!(
996                    "Tool '{}' timed out after {}ms",
997                    name,
998                    timeout_ms
999                )),
1000            }
1001        } else {
1002            fut.await
1003        }
1004    }
1005
1006    /// Convert a tool execution result into the (output, exit_code, is_error, metadata, images) tuple.
1007    fn tool_result_to_tuple(
1008        result: anyhow::Result<crate::tools::ToolResult>,
1009    ) -> (
1010        String,
1011        i32,
1012        bool,
1013        Option<serde_json::Value>,
1014        Vec<crate::llm::Attachment>,
1015    ) {
1016        match result {
1017            Ok(r) => (
1018                r.output,
1019                r.exit_code,
1020                r.exit_code != 0,
1021                r.metadata,
1022                r.images,
1023            ),
1024            Err(e) => {
1025                let msg = e.to_string();
1026                // Classify the error so the LLM knows whether retrying makes sense.
1027                let hint = if Self::is_transient_error(&msg) {
1028                    " [transient — you may retry this tool call]"
1029                } else {
1030                    " [permanent — do not retry without changing the arguments]"
1031                };
1032                (
1033                    format!("Tool execution error: {}{}", msg, hint),
1034                    1,
1035                    true,
1036                    None,
1037                    Vec::new(),
1038                )
1039            }
1040        }
1041    }
1042
1043    /// Inspect the workspace for well-known project marker files and return a short
1044    /// `## Project Context` section that the agent can use without any manual configuration.
1045    /// Returns an empty string when the workspace type cannot be determined.
1046    fn detect_project_hint(workspace: &std::path::Path) -> String {
1047        struct Marker {
1048            file: &'static str,
1049            lang: &'static str,
1050            tip: &'static str,
1051        }
1052
1053        let markers = [
1054            Marker {
1055                file: "Cargo.toml",
1056                lang: "Rust",
1057                tip: "Use `cargo build`, `cargo test`, `cargo clippy`, and `cargo fmt`. \
1058                  Prefer `anyhow` / `thiserror` for error handling. \
1059                  Follow the Microsoft Rust Guidelines (no panics in library code, \
1060                  async-first with Tokio).",
1061            },
1062            Marker {
1063                file: "package.json",
1064                lang: "Node.js / TypeScript",
1065                tip: "Check `package.json` for the package manager (npm/yarn/pnpm/bun) \
1066                  and available scripts. Prefer TypeScript with strict mode. \
1067                  Use ESM imports unless the project is CommonJS.",
1068            },
1069            Marker {
1070                file: "pyproject.toml",
1071                lang: "Python",
1072                tip: "Use the package manager declared in `pyproject.toml` \
1073                  (uv, poetry, hatch, etc.). Prefer type hints and async/await for I/O.",
1074            },
1075            Marker {
1076                file: "setup.py",
1077                lang: "Python",
1078                tip: "Legacy Python project. Prefer type hints and async/await for I/O.",
1079            },
1080            Marker {
1081                file: "requirements.txt",
1082                lang: "Python",
1083                tip: "Python project with pip-style dependencies. \
1084                  Prefer type hints and async/await for I/O.",
1085            },
1086            Marker {
1087                file: "go.mod",
1088                lang: "Go",
1089                tip: "Use `go build ./...` and `go test ./...`. \
1090                  Follow standard Go project layout. Use `gofmt` for formatting.",
1091            },
1092            Marker {
1093                file: "pom.xml",
1094                lang: "Java / Maven",
1095                tip: "Use `mvn compile`, `mvn test`, `mvn package`. \
1096                  Follow standard Maven project structure.",
1097            },
1098            Marker {
1099                file: "build.gradle",
1100                lang: "Java / Gradle",
1101                tip: "Use `./gradlew build` and `./gradlew test`. \
1102                  Follow standard Gradle project structure.",
1103            },
1104            Marker {
1105                file: "build.gradle.kts",
1106                lang: "Kotlin / Gradle",
1107                tip: "Use `./gradlew build` and `./gradlew test`. \
1108                  Prefer Kotlin coroutines for async work.",
1109            },
1110            Marker {
1111                file: "CMakeLists.txt",
1112                lang: "C / C++",
1113                tip: "Use `cmake -B build && cmake --build build`. \
1114                  Check for `compile_commands.json` for IDE tooling.",
1115            },
1116            Marker {
1117                file: "Makefile",
1118                lang: "C / C++ (or generic)",
1119                tip: "Use `make` or `make <target>`. \
1120                  Check available targets with `make help` or by reading the Makefile.",
1121            },
1122        ];
1123
1124        // Check for C# / .NET — no single fixed filename, so glob for *.csproj / *.sln
1125        let is_dotnet = workspace.join("*.csproj").exists() || {
1126            // Fast check: look for any .csproj or .sln in the workspace root
1127            std::fs::read_dir(workspace)
1128                .map(|entries| {
1129                    entries.flatten().any(|e| {
1130                        let name = e.file_name();
1131                        let s = name.to_string_lossy();
1132                        s.ends_with(".csproj") || s.ends_with(".sln")
1133                    })
1134                })
1135                .unwrap_or(false)
1136        };
1137
1138        if is_dotnet {
1139            return "## Project Context\n\nThis is a **C# / .NET** project. \
1140             Use `dotnet build`, `dotnet test`, and `dotnet run`. \
1141             Follow C# coding conventions and async/await patterns."
1142                .to_string();
1143        }
1144
1145        for marker in &markers {
1146            if workspace.join(marker.file).exists() {
1147                return format!(
1148                    "## Project Context\n\nThis is a **{}** project. {}",
1149                    marker.lang, marker.tip
1150                );
1151            }
1152        }
1153
1154        String::new()
1155    }
1156
1157    /// Returns `true` for errors that are likely transient (network, timeout, I/O contention).
1158    /// Used to annotate tool error messages so the LLM knows whether retrying is safe.
1159    fn is_transient_error(msg: &str) -> bool {
1160        let lower = msg.to_lowercase();
1161        lower.contains("timeout")
1162        || lower.contains("timed out")
1163        || lower.contains("connection refused")
1164        || lower.contains("connection reset")
1165        || lower.contains("broken pipe")
1166        || lower.contains("temporarily unavailable")
1167        || lower.contains("resource temporarily unavailable")
1168        || lower.contains("os error 11")  // EAGAIN
1169        || lower.contains("os error 35")  // EAGAIN on macOS
1170        || lower.contains("rate limit")
1171        || lower.contains("too many requests")
1172        || lower.contains("service unavailable")
1173        || lower.contains("network unreachable")
1174    }
1175
1176    /// Returns `true` when a tool writes a file and is safe to run concurrently with other
1177    /// independent writes (no ordering dependencies, no side-channel output).
1178    fn is_parallel_safe_write(name: &str, _args: &serde_json::Value) -> bool {
1179        matches!(
1180            name,
1181            "write_file" | "edit_file" | "create_file" | "append_to_file" | "replace_in_file"
1182        )
1183    }
1184
1185    /// Extract the target file path from write-tool arguments so we can check for conflicts.
1186    fn extract_write_path(args: &serde_json::Value) -> Option<String> {
1187        // write_file / create_file / append_to_file / replace_in_file use "path"
1188        // edit_file uses "path" as well
1189        args.get("path")
1190            .and_then(|v| v.as_str())
1191            .map(|s| s.to_string())
1192    }
1193
1194    /// Execute a tool through the lane queue (if configured) or directly.
1195    /// Wraps execution in task lifecycle if task_manager is configured.
1196    async fn execute_tool_queued_or_direct(
1197        &self,
1198        name: &str,
1199        args: &serde_json::Value,
1200        ctx: &ToolContext,
1201    ) -> anyhow::Result<crate::tools::ToolResult> {
1202        // Create task for this tool execution if task_manager is available
1203        let task_id = if let Some(ref tm) = self.task_manager {
1204            let task = crate::task::Task::tool(name, args.clone());
1205            let id = task.id;
1206            tm.spawn(task);
1207            // Start the task immediately
1208            let _ = tm.start(id);
1209            Some(id)
1210        } else {
1211            None
1212        };
1213
1214        let result = self
1215            .execute_tool_queued_or_direct_inner(name, args, ctx)
1216            .await;
1217
1218        // Complete or fail the task based on result
1219        if let Some(ref tm) = self.task_manager {
1220            if let Some(tid) = task_id {
1221                match &result {
1222                    Ok(r) => {
1223                        let output = serde_json::json!({
1224                            "output": r.output.clone(),
1225                            "exit_code": r.exit_code,
1226                        });
1227                        let _ = tm.complete(tid, Some(output));
1228                    }
1229                    Err(e) => {
1230                        let _ = tm.fail(tid, e.to_string());
1231                    }
1232                }
1233            }
1234        }
1235
1236        result
1237    }
1238
1239    /// Inner execution without task lifecycle wrapping.
1240    async fn execute_tool_queued_or_direct_inner(
1241        &self,
1242        name: &str,
1243        args: &serde_json::Value,
1244        ctx: &ToolContext,
1245    ) -> anyhow::Result<crate::tools::ToolResult> {
1246        if let Some(ref queue) = self.command_queue {
1247            let command = ToolCommand::new(
1248                Arc::clone(&self.tool_executor),
1249                name.to_string(),
1250                args.clone(),
1251                ctx.clone(),
1252                self.config.skill_registry.clone(),
1253            );
1254            let rx = queue.submit_by_tool(name, Box::new(command)).await;
1255            match rx.await {
1256                Ok(Ok(value)) => {
1257                    let output = value["output"]
1258                        .as_str()
1259                        .ok_or_else(|| {
1260                            anyhow::anyhow!(
1261                                "Queue result missing 'output' field for tool '{}'",
1262                                name
1263                            )
1264                        })?
1265                        .to_string();
1266                    let exit_code = value["exit_code"].as_i64().unwrap_or(0) as i32;
1267                    return Ok(crate::tools::ToolResult {
1268                        name: name.to_string(),
1269                        output,
1270                        exit_code,
1271                        metadata: None,
1272                        images: Vec::new(),
1273                    });
1274                }
1275                Ok(Err(e)) => {
1276                    tracing::warn!(
1277                        "Queue execution failed for tool '{}', falling back to direct: {}",
1278                        name,
1279                        e
1280                    );
1281                }
1282                Err(_) => {
1283                    tracing::warn!(
1284                        "Queue channel closed for tool '{}', falling back to direct",
1285                        name
1286                    );
1287                }
1288            }
1289        }
1290        self.execute_tool_timed(name, args, ctx).await
1291    }
1292
1293    /// Call the LLM, handling streaming vs non-streaming internally.
1294    ///
1295    /// Streaming events (`TextDelta`, `ToolStart`) are forwarded to `event_tx`
1296    /// as they arrive. Non-streaming mode simply awaits the complete response.
1297    ///
1298    /// When a `ToolIndex` is configured, tools are filtered per-turn based on
1299    /// the last user message, reducing context usage with large tool sets.
1300    ///
1301    /// Returns `Err` on any LLM API failure. The circuit breaker in
1302    /// `execute_loop` wraps this call with retry logic for non-streaming mode.
1303    async fn call_llm(
1304        &self,
1305        messages: &[Message],
1306        system: Option<&str>,
1307        event_tx: &Option<mpsc::Sender<AgentEvent>>,
1308        cancel_token: &tokio_util::sync::CancellationToken,
1309    ) -> anyhow::Result<LlmResponse> {
1310        // Filter tools through ToolIndex if configured
1311        let tools = if let Some(ref index) = self.config.tool_index {
1312            let query = messages
1313                .iter()
1314                .rev()
1315                .find(|m| m.role == "user")
1316                .and_then(|m| {
1317                    m.content.iter().find_map(|b| match b {
1318                        crate::llm::ContentBlock::Text { text } => Some(text.as_str()),
1319                        _ => None,
1320                    })
1321                })
1322                .unwrap_or("");
1323            let matches = index.search(query, index.len());
1324            let matched_names: std::collections::HashSet<&str> =
1325                matches.iter().map(|m| m.name.as_str()).collect();
1326            self.config
1327                .tools
1328                .iter()
1329                .filter(|t| matched_names.contains(t.name.as_str()))
1330                .cloned()
1331                .collect::<Vec<_>>()
1332        } else {
1333            self.config.tools.clone()
1334        };
1335
1336        if event_tx.is_some() {
1337            let mut stream_rx = match self
1338                .llm_client
1339                .complete_streaming(messages, system, &tools, cancel_token.clone())
1340                .await
1341            {
1342                Ok(rx) => rx,
1343                Err(stream_error) => {
1344                    // Do not fall back to non-streaming if cancelled — propagate cancellation
1345                    if cancel_token.is_cancelled() {
1346                        anyhow::bail!("Operation cancelled by user");
1347                    }
1348                    tracing::warn!(
1349                        error = %stream_error,
1350                        "LLM streaming setup failed; falling back to non-streaming completion"
1351                    );
1352                    return self
1353                        .llm_client
1354                        .complete(messages, system, &tools)
1355                        .await
1356                        .with_context(|| {
1357                            format!(
1358                                "LLM streaming call failed ({stream_error}); non-streaming fallback also failed"
1359                            )
1360                        });
1361                }
1362            };
1363
1364            let mut final_response: Option<LlmResponse> = None;
1365            loop {
1366                tokio::select! {
1367                    _ = cancel_token.cancelled() => {
1368                        tracing::info!("🛑 LLM streaming cancelled by CancellationToken");
1369                        anyhow::bail!("Operation cancelled by user");
1370                    }
1371                    event = stream_rx.recv() => {
1372                        match event {
1373                            Some(crate::llm::StreamEvent::TextDelta(text)) => {
1374                                if let Some(tx) = event_tx {
1375                                    tx.send(AgentEvent::TextDelta { text }).await.ok();
1376                                }
1377                            }
1378                            Some(crate::llm::StreamEvent::ReasoningDelta(text)) => {
1379                                if let Some(tx) = event_tx {
1380                                    tx.send(AgentEvent::ReasoningDelta { text }).await.ok();
1381                                }
1382                            }
1383                            Some(crate::llm::StreamEvent::ToolUseStart { id, name }) => {
1384                                if let Some(tx) = event_tx {
1385                                    tx.send(AgentEvent::ToolStart { id, name }).await.ok();
1386                                }
1387                            }
1388                            Some(crate::llm::StreamEvent::ToolUseInputDelta(delta)) => {
1389                                if let Some(tx) = event_tx {
1390                                    tx.send(AgentEvent::ToolInputDelta { delta }).await.ok();
1391                                }
1392                            }
1393                            Some(crate::llm::StreamEvent::Done(resp)) => {
1394                                final_response = Some(resp);
1395                                break;
1396                            }
1397                            None => break,
1398                        }
1399                    }
1400                }
1401            }
1402            final_response.context("Stream ended without final response")
1403        } else {
1404            self.llm_client
1405                .complete(messages, system, &tools)
1406                .await
1407                .context("LLM call failed")
1408        }
1409    }
1410
1411    /// Create a tool context with streaming support.
1412    ///
1413    /// When `event_tx` is Some, spawns a forwarder task that converts
1414    /// `ToolStreamEvent::OutputDelta` into `AgentEvent::ToolOutputDelta`
1415    /// and sends them to the agent event channel.
1416    ///
1417    /// Returns the augmented `ToolContext`. The forwarder task runs until
1418    /// the tool-side sender is dropped (i.e., tool execution finishes).
1419    fn streaming_tool_context(
1420        &self,
1421        event_tx: &Option<mpsc::Sender<AgentEvent>>,
1422        tool_id: &str,
1423        tool_name: &str,
1424    ) -> ToolContext {
1425        let mut ctx = self.tool_context.clone();
1426        if let Some(agent_tx) = event_tx {
1427            let (tool_tx, mut tool_rx) = mpsc::channel::<ToolStreamEvent>(64);
1428            ctx.event_tx = Some(tool_tx);
1429
1430            let agent_tx = agent_tx.clone();
1431            let tool_id = tool_id.to_string();
1432            let tool_name = tool_name.to_string();
1433            tokio::spawn(async move {
1434                while let Some(event) = tool_rx.recv().await {
1435                    match event {
1436                        ToolStreamEvent::OutputDelta(delta) => {
1437                            agent_tx
1438                                .send(AgentEvent::ToolOutputDelta {
1439                                    id: tool_id.clone(),
1440                                    name: tool_name.clone(),
1441                                    delta,
1442                                })
1443                                .await
1444                                .ok();
1445                        }
1446                    }
1447                }
1448            });
1449        }
1450        ctx
1451    }
1452
1453    /// Resolve context from all providers for a given prompt
1454    ///
1455    /// Returns aggregated context results from all configured providers.
1456    async fn resolve_context(&self, prompt: &str, session_id: Option<&str>) -> Vec<ContextResult> {
1457        if self.config.context_providers.is_empty() {
1458            return Vec::new();
1459        }
1460
1461        let query = ContextQuery::new(prompt).with_session_id(session_id.unwrap_or(""));
1462
1463        let futures = self
1464            .config
1465            .context_providers
1466            .iter()
1467            .map(|p| p.query(&query));
1468        let outcomes = join_all(futures).await;
1469
1470        outcomes
1471            .into_iter()
1472            .enumerate()
1473            .filter_map(|(i, r)| match r {
1474                Ok(result) if !result.is_empty() => Some(result),
1475                Ok(_) => None,
1476                Err(e) => {
1477                    tracing::warn!(
1478                        "Context provider '{}' failed: {}",
1479                        self.config.context_providers[i].name(),
1480                        e
1481                    );
1482                    None
1483                }
1484            })
1485            .collect()
1486    }
1487
1488    /// Detect whether the LLM's no-tool-call response looks like an intermediate
1489    /// step rather than a genuine final answer.
1490    ///
1491    /// Returns `true` when continuation should be injected. Heuristics:
1492    /// - Response ends with a colon or ellipsis (mid-thought)
1493    /// - Response contains phrases that signal incomplete work
1494    /// - Response is very short (< 80 chars) and doesn't look like a summary
1495    fn looks_incomplete(text: &str) -> bool {
1496        let t = text.trim();
1497        if t.is_empty() {
1498            return true;
1499        }
1500        // Very short responses that aren't clearly a final answer
1501        if t.len() < 80 && !t.contains('\n') {
1502            // Short single-line — could be a genuine one-liner answer, keep going
1503            // only if it ends with punctuation that signals continuation
1504            let ends_continuation =
1505                t.ends_with(':') || t.ends_with("...") || t.ends_with('…') || t.ends_with(',');
1506            if ends_continuation {
1507                return true;
1508            }
1509        }
1510        // Phrases that signal the LLM is describing what it will do rather than doing it
1511        let incomplete_phrases = [
1512            "i'll ",
1513            "i will ",
1514            "let me ",
1515            "i need to ",
1516            "i should ",
1517            "next, i",
1518            "first, i",
1519            "now i",
1520            "i'll start",
1521            "i'll begin",
1522            "i'll now",
1523            "let's start",
1524            "let's begin",
1525            "to do this",
1526            "i'm going to",
1527        ];
1528        let lower = t.to_lowercase();
1529        for phrase in &incomplete_phrases {
1530            if lower.contains(phrase) {
1531                return true;
1532            }
1533        }
1534        false
1535    }
1536
1537    /// Get the assembled system prompt from slots.
1538    #[allow(dead_code)]
1539    fn system_prompt(&self) -> String {
1540        self.config.prompt_slots.build()
1541    }
1542
1543    /// Get the assembled system prompt from slots with an explicit style.
1544    fn system_prompt_for_style(&self, style: AgentStyle) -> String {
1545        let mut slots = self.config.prompt_slots.clone();
1546        slots.style = Some(style);
1547        slots.build()
1548    }
1549
1550    async fn resolve_effective_style(&self, prompt: &str) -> AgentStyle {
1551        if let Some(style) = self.config.prompt_slots.style {
1552            return style;
1553        }
1554
1555        let (style, confidence) = AgentStyle::detect_with_confidence(prompt);
1556        if confidence != DetectionConfidence::Low {
1557            return style;
1558        }
1559
1560        match AgentStyle::detect_with_llm(self.llm_client.as_ref(), prompt).await {
1561            Ok(classified_style) => {
1562                tracing::debug!(
1563                    intent.classification = ?classified_style,
1564                    intent.source = "llm",
1565                    "Intent classified via LLM"
1566                );
1567                classified_style
1568            }
1569            Err(e) => {
1570                tracing::warn!(error = %e, "LLM intent classification failed, using keyword detection");
1571                style
1572            }
1573        }
1574    }
1575
1576    /// Detect if a subagent should be launched for this task.
1577    ///
1578    /// Checks for explicit `[subagent:name]` syntax first, then falls back
1579    /// to style-based auto-detection if a registry is configured.
1580    ///
1581    /// Returns `Some((AgentDefinition, cleaned_prompt))` if subagent should be launched,
1582    /// or `None` if normal execution should continue.
1583    fn should_launch_subagent(
1584        &self,
1585        prompt: &str,
1586        style: AgentStyle,
1587    ) -> Option<(Arc<crate::subagent::AgentDefinition>, String)> {
1588        let registry = self.config.subagent_registry.as_ref()?;
1589
1590        // Step 1: Check for explicit [subagent:name] syntax
1591        if let Some(caps) = prompt.find("[subagent:") {
1592            // `end_offset` is offset from `caps` to ']'
1593            if let Some(end_offset) = prompt[caps..].find(']') {
1594                // `end_abs` is the absolute index of ']'
1595                let end_abs = caps + end_offset;
1596                // name is from after "[subagent:" to ']'
1597                let name = &prompt[caps + 10..end_abs];
1598                if let Some(agent_def) = registry.get(name) {
1599                    // Remove the [subagent:name] tag from the prompt
1600                    let after_tag = prompt[end_abs + 1..].trim();
1601                    let cleaned = if caps > 0 {
1602                        format!("{} {}", &prompt[..caps].trim(), after_tag)
1603                    } else {
1604                        after_tag.to_string()
1605                    };
1606                    tracing::info!(subagent = %name, "Explicit subagent request detected");
1607                    return Some((Arc::new(agent_def), cleaned.trim().to_string()));
1608                }
1609            }
1610        }
1611
1612        // Step 2: Style-based auto-detection
1613        let agent_name = match style {
1614            AgentStyle::Explore => "explore",
1615            AgentStyle::Plan => "plan",
1616            AgentStyle::Verification => "verification",
1617            AgentStyle::CodeReview => "review",
1618            AgentStyle::GeneralPurpose => return None,
1619        };
1620
1621        if let Some(agent_def) = registry.get(agent_name) {
1622            tracing::info!(
1623                subagent = %agent_name,
1624                style = ?style,
1625                "Auto-detected subagent launch based on style"
1626            );
1627            return Some((Arc::new(agent_def), prompt.to_string()));
1628        }
1629
1630        None
1631    }
1632
1633    /// Detect if context perception is needed based on user prompt.
1634    ///
1635    /// Returns `Some(PreContextPerceptionEvent)` if the prompt suggests the model
1636    /// needs workspace knowledge (finding files, understanding code, etc.).
1637    pub fn detect_context_perception_intent(
1638        &self,
1639        prompt: &str,
1640        session_id: &str,
1641        workspace: &str,
1642    ) -> Option<PreContextPerceptionEvent> {
1643        let lower = prompt.to_lowercase();
1644
1645        // Pattern matching for different intents that suggest context perception is needed
1646        let intents: &[(&[&str], &str)] = &[
1647            // Locate: finding files, functions, resources
1648            (
1649                &[
1650                    "where is",
1651                    "where are",
1652                    "find the file",
1653                    "find all",
1654                    "find files",
1655                    "who wrote",
1656                    "locate",
1657                    "search for",
1658                    "look for",
1659                    "search",
1660                ],
1661                "locate",
1662            ),
1663            // Understand: explaining how something works
1664            (
1665                &[
1666                    "how does",
1667                    "what does",
1668                    "explain",
1669                    "understand",
1670                    "what is this",
1671                    "how does this work",
1672                ],
1673                "understand",
1674            ),
1675            // Retrieve: recalling from memory/past
1676            (
1677                &[
1678                    "remember",
1679                    "earlier",
1680                    "before",
1681                    "previously",
1682                    "last time",
1683                    "past",
1684                    "previous",
1685                ],
1686                "retrieve",
1687            ),
1688            // Explore: understanding structure
1689            (
1690                &[
1691                    "how is organized",
1692                    "project structure",
1693                    "what files",
1694                    "show me the structure",
1695                    "explore",
1696                ],
1697                "explore",
1698            ),
1699            // Reason: asking why/causality
1700            (
1701                &[
1702                    "why did",
1703                    "why is",
1704                    "cause",
1705                    "reason",
1706                    "what happened",
1707                    "why does",
1708                ],
1709                "reason",
1710            ),
1711            // Validate: checking correctness
1712            (
1713                &["is this correct", "verify", "validate", "check if", "debug"],
1714                "validate",
1715            ),
1716            // Compare: comparing things
1717            (
1718                &[
1719                    "difference between",
1720                    "compare",
1721                    "versus",
1722                    " vs ",
1723                    "different from",
1724                ],
1725                "compare",
1726            ),
1727            // Track: status/history
1728            (
1729                &[
1730                    "status",
1731                    "progress",
1732                    "how far",
1733                    "history",
1734                    "what's the current",
1735                ],
1736                "track",
1737            ),
1738        ];
1739
1740        // Detect target type from keywords
1741        let target_type = if lower.contains("function") || lower.contains("method") {
1742            "function"
1743        } else if lower.contains("file") || lower.contains("config") {
1744            "file"
1745        } else if lower.contains("class") {
1746            "entity"
1747        } else if lower.contains("module") || lower.contains("package") {
1748            "module"
1749        } else if lower.contains("test") {
1750            "test"
1751        } else {
1752            "unknown"
1753        };
1754
1755        // Find matching intent
1756        let matched_intent = intents
1757            .iter()
1758            .find(|(patterns, _)| patterns.iter().any(|p| lower.contains(p)));
1759
1760        matched_intent.map(|(patterns, intent)| {
1761            // Extract target name if possible (simplified extraction)
1762            let target_name = extract_target_name_from_prompt(prompt, patterns);
1763
1764            PreContextPerceptionEvent {
1765                session_id: session_id.to_string(),
1766                intent: intent.to_string(),
1767                target_type: target_type.to_string(),
1768                target_name,
1769                domain: detect_domain_from_prompt(prompt),
1770                query: Some(prompt.to_string()),
1771                working_directory: workspace.to_string(),
1772                urgency: "normal".to_string(),
1773            }
1774        })
1775    }
1776
1777    /// Fire PreContextPerception hook and wait for harness decision.
1778    async fn fire_pre_context_perception(&self, event: &PreContextPerceptionEvent) -> HookResult {
1779        if let Some(he) = &self.config.hook_engine {
1780            let hook_event = HookEvent::PreContextPerception(event.clone());
1781            he.fire(&hook_event).await
1782        } else {
1783            HookResult::continue_()
1784        }
1785    }
1786
1787    /// Fire IntentDetection hook and wait for harness decision.
1788    ///
1789    /// This is called on every prompt to detect user intent via the AHP harness.
1790    /// Returns the detected intent if the harness provides one, or None if blocked/failed.
1791    async fn fire_intent_detection(
1792        &self,
1793        prompt: &str,
1794        session_id: &str,
1795        workspace: &str,
1796    ) -> Option<IntentDetectionResult> {
1797        let event = IntentDetectionEvent {
1798            session_id: session_id.to_string(),
1799            prompt: prompt.to_string(),
1800            workspace: workspace.to_string(),
1801            language_hint: detect_language_hint(prompt),
1802        };
1803
1804        let hook_result = if let Some(he) = &self.config.hook_engine {
1805            let hook_event = HookEvent::IntentDetection(event);
1806            he.fire(&hook_event).await
1807        } else {
1808            return None;
1809        };
1810
1811        match hook_result {
1812            HookResult::Continue(Some(modified)) => {
1813                // Parse the intent detection result
1814                serde_json::from_value::<IntentDetectionResult>(modified).ok()
1815            }
1816            HookResult::Block(_) => {
1817                // Harness blocked intent detection - use fallback
1818                tracing::info!("AHP harness blocked intent detection");
1819                None
1820            }
1821            _ => None,
1822        }
1823    }
1824
1825    /// Apply injected context from AHP harness decision.
1826    #[cfg(feature = "ahp")]
1827    fn apply_injected_context(&self, injected: InjectedContext) -> Vec<ContextResult> {
1828        let mut results = Vec::new();
1829
1830        // Convert facts to ContextResult
1831        if !injected.facts.is_empty() {
1832            let items: Vec<ContextItem> = injected
1833                .facts
1834                .into_iter()
1835                .map(|f| {
1836                    let token_count = estimate_tokens(&f.content);
1837                    ContextItem {
1838                        id: uuid::Uuid::new_v4().to_string(),
1839                        context_type: ContextType::Resource,
1840                        content: f.content,
1841                        token_count,
1842                        relevance: f.confidence,
1843                        source: Some(f.source),
1844                        metadata: std::collections::HashMap::new(),
1845                    }
1846                })
1847                .collect();
1848
1849            let total_tokens: usize = items.iter().map(|i| i.token_count).sum();
1850
1851            results.push(ContextResult {
1852                items,
1853                total_tokens,
1854                provider: "ahp_harness".to_string(),
1855                truncated: false,
1856            });
1857        }
1858
1859        // Handle file_contents
1860        if let Some(file_contents) = injected.file_contents {
1861            let items: Vec<ContextItem> = file_contents
1862                .into_iter()
1863                .map(|f| {
1864                    let token_count = estimate_tokens(&f.snippet);
1865                    ContextItem {
1866                        id: uuid::Uuid::new_v4().to_string(),
1867                        context_type: ContextType::Resource,
1868                        content: f.snippet,
1869                        token_count,
1870                        relevance: f.relevance_score,
1871                        source: Some(f.path),
1872                        metadata: std::collections::HashMap::new(),
1873                    }
1874                })
1875                .collect();
1876
1877            let total_tokens: usize = items.iter().map(|i| i.token_count).sum();
1878
1879            results.push(ContextResult {
1880                items,
1881                total_tokens,
1882                provider: "ahp_harness".to_string(),
1883                truncated: false,
1884            });
1885        }
1886
1887        // Handle project_summary
1888        if let Some(summary) = injected.project_summary {
1889            let content = format!(
1890                "Project: {}\n{}",
1891                summary.project_name, summary.structure_description
1892            );
1893            let token_count = estimate_tokens(&content);
1894
1895            results.push(ContextResult {
1896                items: vec![ContextItem {
1897                    id: uuid::Uuid::new_v4().to_string(),
1898                    context_type: ContextType::Resource,
1899                    content,
1900                    token_count,
1901                    relevance: 0.9,
1902                    source: Some("ahp://project-summary".to_string()),
1903                    metadata: std::collections::HashMap::new(),
1904                }],
1905                total_tokens: token_count,
1906                provider: "ahp_harness".to_string(),
1907                truncated: false,
1908            });
1909        }
1910
1911        // Handle knowledge
1912        if let Some(knowledge) = injected.knowledge {
1913            let items: Vec<ContextItem> = knowledge
1914                .into_iter()
1915                .map(|k| {
1916                    let token_count = estimate_tokens(&k);
1917                    ContextItem {
1918                        id: uuid::Uuid::new_v4().to_string(),
1919                        context_type: ContextType::Resource,
1920                        content: k,
1921                        token_count,
1922                        relevance: 0.8,
1923                        source: Some("ahp://knowledge".to_string()),
1924                        metadata: std::collections::HashMap::new(),
1925                    }
1926                })
1927                .collect();
1928
1929            let total_tokens: usize = items.iter().map(|i| i.token_count).sum();
1930
1931            results.push(ContextResult {
1932                items,
1933                total_tokens,
1934                provider: "ahp_harness".to_string(),
1935                truncated: false,
1936            });
1937        }
1938
1939        results
1940    }
1941
1942    /// Build augmented system prompt with context
1943    #[allow(dead_code)]
1944    fn build_augmented_system_prompt(&self, context_results: &[ContextResult]) -> Option<String> {
1945        let base = self.system_prompt();
1946        self.build_augmented_system_prompt_with_base(&base, context_results)
1947    }
1948
1949    fn build_augmented_system_prompt_with_base(
1950        &self,
1951        base: &str,
1952        context_results: &[ContextResult],
1953    ) -> Option<String> {
1954        let base = base.to_string();
1955
1956        // Use live tool executor definitions so tools added via add_mcp_server() are included
1957        let live_tools = self.tool_executor.definitions();
1958        let mcp_tools: Vec<&ToolDefinition> = live_tools
1959            .iter()
1960            .filter(|t| t.name.starts_with("mcp__"))
1961            .collect();
1962
1963        let mcp_section = if mcp_tools.is_empty() {
1964            String::new()
1965        } else {
1966            let mut lines = vec![
1967                "## MCP Tools".to_string(),
1968                String::new(),
1969                "The following MCP (Model Context Protocol) tools are available. Use them when the task requires external capabilities beyond the built-in tools:".to_string(),
1970                String::new(),
1971            ];
1972            for tool in &mcp_tools {
1973                let display = format!("- `{}` — {}", tool.name, tool.description);
1974                lines.push(display);
1975            }
1976            lines.join("\n")
1977        };
1978
1979        let parts: Vec<&str> = [base.as_str(), mcp_section.as_str()]
1980            .iter()
1981            .filter(|s| !s.is_empty())
1982            .copied()
1983            .collect();
1984
1985        // Auto-detect project type from workspace and inject language-specific guidelines,
1986        // but only when the user hasn't already set a custom `guidelines` slot.
1987        let project_hint = if self.config.prompt_slots.guidelines.is_none() {
1988            Self::detect_project_hint(&self.tool_context.workspace)
1989        } else {
1990            String::new()
1991        };
1992
1993        if context_results.is_empty() {
1994            if project_hint.is_empty() {
1995                return Some(parts.join("\n\n"));
1996            }
1997            return Some(format!("{}\n\n{}", parts.join("\n\n"), project_hint));
1998        }
1999
2000        // Build context XML block
2001        let context_xml: String = context_results
2002            .iter()
2003            .map(|r| r.to_xml())
2004            .collect::<Vec<_>>()
2005            .join("\n\n");
2006
2007        if project_hint.is_empty() {
2008            Some(format!("{}\n\n{}", parts.join("\n\n"), context_xml))
2009        } else {
2010            Some(format!(
2011                "{}\n\n{}\n\n{}",
2012                parts.join("\n\n"),
2013                project_hint,
2014                context_xml
2015            ))
2016        }
2017    }
2018
2019    /// Notify providers of turn completion for memory extraction
2020    async fn notify_turn_complete(&self, session_id: &str, prompt: &str, response: &str) {
2021        let futures = self
2022            .config
2023            .context_providers
2024            .iter()
2025            .map(|p| p.on_turn_complete(session_id, prompt, response));
2026        let outcomes = join_all(futures).await;
2027
2028        for (i, result) in outcomes.into_iter().enumerate() {
2029            if let Err(e) = result {
2030                tracing::warn!(
2031                    "Context provider '{}' on_turn_complete failed: {}",
2032                    self.config.context_providers[i].name(),
2033                    e
2034                );
2035            }
2036        }
2037    }
2038
2039    /// Fire PreToolUse hook event before tool execution.
2040    /// Returns the HookResult which may block the tool call.
2041    async fn fire_pre_tool_use(
2042        &self,
2043        session_id: &str,
2044        tool_name: &str,
2045        args: &serde_json::Value,
2046        recent_tools: Vec<String>,
2047    ) -> Option<HookResult> {
2048        if let Some(he) = &self.config.hook_engine {
2049            // Convert null args to empty object so JS callbacks don't get null.input errors
2050            let safe_args = if args.is_null() {
2051                serde_json::Value::Object(Default::default())
2052            } else {
2053                args.clone()
2054            };
2055            let event = HookEvent::PreToolUse(PreToolUseEvent {
2056                session_id: session_id.to_string(),
2057                tool: tool_name.to_string(),
2058                args: safe_args,
2059                working_directory: self.tool_context.workspace.to_string_lossy().to_string(),
2060                recent_tools,
2061            });
2062            let result = he.fire(&event).await;
2063            if result.is_block() {
2064                return Some(result);
2065            }
2066        }
2067        None
2068    }
2069
2070    /// Fire PostToolUse hook event after tool execution (fire-and-forget).
2071    async fn fire_post_tool_use(
2072        &self,
2073        session_id: &str,
2074        tool_name: &str,
2075        args: &serde_json::Value,
2076        output: &str,
2077        success: bool,
2078        duration_ms: u64,
2079    ) {
2080        if let Some(he) = &self.config.hook_engine {
2081            // Convert null args to empty object so JS callbacks don't get null.input errors
2082            let safe_args = if args.is_null() {
2083                serde_json::Value::Object(Default::default())
2084            } else {
2085                args.clone()
2086            };
2087            let event = HookEvent::PostToolUse(PostToolUseEvent {
2088                session_id: session_id.to_string(),
2089                tool: tool_name.to_string(),
2090                args: safe_args,
2091                result: ToolResultData {
2092                    success,
2093                    output: output.to_string(),
2094                    exit_code: if success { Some(0) } else { Some(1) },
2095                    duration_ms,
2096                },
2097            });
2098            let he = Arc::clone(he);
2099            tokio::spawn(async move {
2100                let _ = he.fire(&event).await;
2101            });
2102        }
2103    }
2104
2105    /// Fire GenerateStart hook event before an LLM call.
2106    async fn fire_generate_start(
2107        &self,
2108        session_id: &str,
2109        prompt: &str,
2110        system_prompt: &Option<String>,
2111    ) {
2112        if let Some(he) = &self.config.hook_engine {
2113            let event = HookEvent::GenerateStart(GenerateStartEvent {
2114                session_id: session_id.to_string(),
2115                prompt: prompt.to_string(),
2116                system_prompt: system_prompt.clone(),
2117                model_provider: String::new(),
2118                model_name: String::new(),
2119                available_tools: self.config.tools.iter().map(|t| t.name.clone()).collect(),
2120            });
2121            let _ = he.fire(&event).await;
2122        }
2123    }
2124
2125    /// Fire GenerateEnd hook event after an LLM call.
2126    async fn fire_generate_end(
2127        &self,
2128        session_id: &str,
2129        prompt: &str,
2130        response: &LlmResponse,
2131        duration_ms: u64,
2132    ) {
2133        if let Some(he) = &self.config.hook_engine {
2134            let tool_calls: Vec<ToolCallInfo> = response
2135                .tool_calls()
2136                .iter()
2137                .map(|tc| {
2138                    let args = if tc.args.is_null() {
2139                        serde_json::Value::Object(Default::default())
2140                    } else {
2141                        tc.args.clone()
2142                    };
2143                    ToolCallInfo {
2144                        name: tc.name.clone(),
2145                        args,
2146                    }
2147                })
2148                .collect();
2149
2150            let event = HookEvent::GenerateEnd(GenerateEndEvent {
2151                session_id: session_id.to_string(),
2152                prompt: prompt.to_string(),
2153                response_text: response.text().to_string(),
2154                tool_calls,
2155                usage: TokenUsageInfo {
2156                    prompt_tokens: response.usage.prompt_tokens as i32,
2157                    completion_tokens: response.usage.completion_tokens as i32,
2158                    total_tokens: response.usage.total_tokens as i32,
2159                },
2160                duration_ms,
2161            });
2162            let _ = he.fire(&event).await;
2163        }
2164    }
2165
2166    /// Fire PrePrompt hook event before prompt augmentation.
2167    /// Returns optional modified prompt text from the hook.
2168    async fn fire_pre_prompt(
2169        &self,
2170        session_id: &str,
2171        prompt: &str,
2172        system_prompt: &Option<String>,
2173        message_count: usize,
2174    ) -> Option<String> {
2175        if let Some(he) = &self.config.hook_engine {
2176            let event = HookEvent::PrePrompt(PrePromptEvent {
2177                session_id: session_id.to_string(),
2178                prompt: prompt.to_string(),
2179                system_prompt: system_prompt.clone(),
2180                message_count,
2181            });
2182            let result = he.fire(&event).await;
2183            if let HookResult::Continue(Some(modified)) = result {
2184                // Extract modified prompt from hook response
2185                if let Some(new_prompt) = modified.get("prompt").and_then(|v| v.as_str()) {
2186                    return Some(new_prompt.to_string());
2187                }
2188            }
2189        }
2190        None
2191    }
2192
2193    /// Fire PostResponse hook event after the agent loop completes.
2194    async fn fire_post_response(
2195        &self,
2196        session_id: &str,
2197        response_text: &str,
2198        tool_calls_count: usize,
2199        usage: &TokenUsage,
2200        duration_ms: u64,
2201    ) {
2202        if let Some(he) = &self.config.hook_engine {
2203            let event = HookEvent::PostResponse(PostResponseEvent {
2204                session_id: session_id.to_string(),
2205                response_text: response_text.to_string(),
2206                tool_calls_count,
2207                usage: TokenUsageInfo {
2208                    prompt_tokens: usage.prompt_tokens as i32,
2209                    completion_tokens: usage.completion_tokens as i32,
2210                    total_tokens: usage.total_tokens as i32,
2211                },
2212                duration_ms,
2213            });
2214            let he = Arc::clone(he);
2215            tokio::spawn(async move {
2216                let _ = he.fire(&event).await;
2217            });
2218        }
2219    }
2220
2221    /// Fire OnError hook event when an error occurs.
2222    async fn fire_on_error(
2223        &self,
2224        session_id: &str,
2225        error_type: ErrorType,
2226        error_message: &str,
2227        context: serde_json::Value,
2228    ) {
2229        if let Some(he) = &self.config.hook_engine {
2230            let event = HookEvent::OnError(OnErrorEvent {
2231                session_id: session_id.to_string(),
2232                error_type,
2233                error_message: error_message.to_string(),
2234                context,
2235            });
2236            let he = Arc::clone(he);
2237            tokio::spawn(async move {
2238                let _ = he.fire(&event).await;
2239            });
2240        }
2241    }
2242
2243    /// Execute the agent loop for a prompt
2244    ///
2245    /// Takes the conversation history and a new user prompt.
2246    /// Returns the agent result and updated message history.
2247    /// When event_tx is provided, uses streaming LLM API for real-time text output.
2248    pub async fn execute(
2249        &self,
2250        history: &[Message],
2251        prompt: &str,
2252        event_tx: Option<mpsc::Sender<AgentEvent>>,
2253    ) -> Result<AgentResult> {
2254        self.execute_with_session(history, prompt, None, event_tx, None)
2255            .await
2256    }
2257
2258    /// Execute the agent loop with pre-built messages (user message already included).
2259    ///
2260    /// Used by `send_with_attachments` / `stream_with_attachments` where the
2261    /// user message contains multi-modal content and is already appended to
2262    /// the messages vec.
2263    pub async fn execute_from_messages(
2264        &self,
2265        messages: Vec<Message>,
2266        session_id: Option<&str>,
2267        event_tx: Option<mpsc::Sender<AgentEvent>>,
2268        cancel_token: Option<&tokio_util::sync::CancellationToken>,
2269    ) -> Result<AgentResult> {
2270        let default_token = tokio_util::sync::CancellationToken::new();
2271        let token = cancel_token.unwrap_or(&default_token);
2272        tracing::info!(
2273            a3s.session.id = session_id.unwrap_or("none"),
2274            a3s.agent.max_turns = self.config.max_tool_rounds,
2275            "a3s.agent.execute_from_messages started"
2276        );
2277
2278        // Extract the last user message text for hooks, memory recall, and events.
2279        // Pass empty prompt so execute_loop skips adding a duplicate user message,
2280        // but provide effective_prompt for hook/memory/event purposes.
2281        let effective_prompt = messages
2282            .iter()
2283            .rev()
2284            .find(|m| m.role == "user")
2285            .map(|m| m.text())
2286            .unwrap_or_default();
2287
2288        let result = self
2289            .execute_loop_inner(
2290                &messages,
2291                "",
2292                &effective_prompt,
2293                None, // no pre-computed style; resolve inside the loop
2294                session_id,
2295                event_tx,
2296                token,
2297                true, // emit_end: this is a standalone execution
2298            )
2299            .await;
2300
2301        match &result {
2302            Ok(r) => tracing::info!(
2303                a3s.agent.tool_calls_count = r.tool_calls_count,
2304                a3s.llm.total_tokens = r.usage.total_tokens,
2305                "a3s.agent.execute_from_messages completed"
2306            ),
2307            Err(e) => tracing::warn!(
2308                error = %e,
2309                "a3s.agent.execute_from_messages failed"
2310            ),
2311        }
2312
2313        result
2314    }
2315
2316    /// Execute the agent loop for a prompt with session context
2317    ///
2318    /// Takes the conversation history, user prompt, and optional session ID.
2319    /// When session_id is provided, context providers can use it for session-specific context.
2320    pub async fn execute_with_session(
2321        &self,
2322        history: &[Message],
2323        prompt: &str,
2324        session_id: Option<&str>,
2325        event_tx: Option<mpsc::Sender<AgentEvent>>,
2326        cancel_token: Option<&tokio_util::sync::CancellationToken>,
2327    ) -> Result<AgentResult> {
2328        let default_token = tokio_util::sync::CancellationToken::new();
2329        let token = cancel_token.unwrap_or(&default_token);
2330        tracing::info!(
2331            a3s.session.id = session_id.unwrap_or("none"),
2332            a3s.agent.max_turns = self.config.max_tool_rounds,
2333            "a3s.agent.execute started"
2334        );
2335
2336        // Step 1: keyword-based detection (always fast, no LLM).
2337        // Only call LLM for style classification when confidence is Low.
2338        let (keyword_style, confidence) = AgentStyle::detect_with_confidence(prompt);
2339        let effective_style = if confidence == DetectionConfidence::Low {
2340            match AgentStyle::detect_with_llm(self.llm_client.as_ref(), prompt).await {
2341                Ok(classified_style) => {
2342                    tracing::debug!(
2343                        intent.classification = ?classified_style,
2344                        intent.source = "llm",
2345                        "Intent classified via LLM"
2346                    );
2347                    classified_style
2348                }
2349                Err(e) => {
2350                    tracing::warn!(error = %e, "LLM intent classification failed, using keyword detection");
2351                    keyword_style
2352                }
2353            }
2354        } else {
2355            keyword_style
2356        };
2357
2358        // Step 2: pre-analysis — intent + goal + plan + input optimization in ONE LLM call.
2359        // Skipped for High-confidence simple queries (just keyword intent, no planning needed).
2360        let pre_analysis: Option<PreAnalysis> = {
2361            let needs_llm_prep = effective_style.requires_planning()
2362                || confidence == DetectionConfidence::Low
2363                || self.config.planning_mode == PlanningMode::Enabled;
2364
2365            if !needs_llm_prep {
2366                None
2367            } else {
2368                match LlmPlanner::pre_analyze(&self.llm_client.clone(), prompt).await {
2369                    Ok(analysis) => {
2370                        tracing::debug!(
2371                            intent = ?analysis.intent,
2372                            requires_planning = analysis.requires_planning,
2373                            plan_steps = analysis.execution_plan.steps.len(),
2374                            "Pre-analysis completed"
2375                        );
2376                        Some(analysis)
2377                    }
2378                    Err(e) => {
2379                        tracing::warn!(error = %e, "Pre-analysis failed, falling back to keyword intent");
2380                        None
2381                    }
2382                }
2383            }
2384        };
2385
2386        // Use pre-analysis style if available, otherwise use resolved style.
2387        let exec_style = pre_analysis
2388            .as_ref()
2389            .map(|a| &a.intent)
2390            .unwrap_or(&effective_style);
2391
2392        // Check if a subagent should be launched for this task
2393        if let Some((subagent_def, cleaned_prompt)) =
2394            self.should_launch_subagent(prompt, *exec_style)
2395        {
2396            tracing::info!(subagent = %subagent_def.name, "Subagent launch requested");
2397
2398            // If callback is configured, use it to handle subagent launch
2399            if let Some(ref callback) = self.config.on_subagent_launch {
2400                if let Some(result) = callback(&subagent_def, &cleaned_prompt) {
2401                    tracing::info!(subagent = %subagent_def.name, "Subagent executed successfully");
2402                    return result;
2403                }
2404            }
2405            // If callback not configured or returned None, fall through to normal execution
2406            tracing::debug!(subagent = %subagent_def.name, "No callback or callback returned None, continuing with normal execution");
2407        }
2408
2409        // Determine whether to use planning mode.
2410        // Prefer pre-analysis result if available (from the single LLM pre-analysis call).
2411        let use_planning = if let Some(ref analysis) = pre_analysis {
2412            analysis.requires_planning
2413        } else if self.config.planning_mode == PlanningMode::Auto {
2414            exec_style.requires_planning()
2415        } else {
2416            // Explicit mode: Enabled or Disabled
2417            self.config.planning_mode.should_plan(prompt)
2418        };
2419
2420        // Create agent task if task_manager is available
2421        let task_id = if let Some(ref tm) = self.task_manager {
2422            let workspace = self.tool_context.workspace.display().to_string();
2423            let task = crate::task::Task::agent("agent", &workspace, prompt);
2424            let id = task.id;
2425            tm.spawn(task);
2426            let _ = tm.start(id);
2427            Some(id)
2428        } else {
2429            None
2430        };
2431
2432        // Determine the effective prompt: use optimized input from pre-analysis if available.
2433        // Clone to avoid borrow conflict when pre_analysis is moved.
2434        let effective_prompt: String = match pre_analysis.as_ref() {
2435            Some(a) => a.optimized_input.clone(),
2436            None => prompt.to_string(),
2437        };
2438
2439        let result = if use_planning {
2440            self.execute_with_planning(history, &effective_prompt, event_tx, pre_analysis)
2441                .await
2442        } else {
2443            self.execute_loop(
2444                history,
2445                &effective_prompt,
2446                *exec_style,
2447                session_id,
2448                event_tx,
2449                token,
2450                true,
2451            )
2452            .await
2453        };
2454
2455        // Complete or fail agent task based on result
2456        if let Some(ref tm) = self.task_manager {
2457            if let Some(tid) = task_id {
2458                match &result {
2459                    Ok(r) => {
2460                        let output = serde_json::json!({
2461                            "text": r.text,
2462                            "tool_calls_count": r.tool_calls_count,
2463                            "usage": r.usage,
2464                        });
2465                        let _ = tm.complete(tid, Some(output));
2466                    }
2467                    Err(e) => {
2468                        let _ = tm.fail(tid, e.to_string());
2469                    }
2470                }
2471            }
2472        }
2473
2474        match &result {
2475            Ok(r) => {
2476                tracing::info!(
2477                    a3s.agent.tool_calls_count = r.tool_calls_count,
2478                    a3s.llm.total_tokens = r.usage.total_tokens,
2479                    "a3s.agent.execute completed"
2480                );
2481                // Fire PostResponse hook
2482                self.fire_post_response(
2483                    session_id.unwrap_or(""),
2484                    &r.text,
2485                    r.tool_calls_count,
2486                    &r.usage,
2487                    0, // duration tracked externally
2488                )
2489                .await;
2490            }
2491            Err(e) => {
2492                tracing::warn!(
2493                    error = %e,
2494                    "a3s.agent.execute failed"
2495                );
2496                // Fire OnError hook
2497                self.fire_on_error(
2498                    session_id.unwrap_or(""),
2499                    ErrorType::Other,
2500                    &e.to_string(),
2501                    serde_json::json!({"phase": "execute"}),
2502                )
2503                .await;
2504            }
2505        }
2506
2507        result
2508    }
2509
2510    /// Core execution loop (without planning routing).
2511    ///
2512    /// This is the inner loop that runs LLM calls and tool executions.
2513    /// Called directly by `execute_with_session` (after planning check)
2514    /// and by `execute_plan` (for individual steps, bypassing planning).
2515    #[allow(clippy::too_many_arguments)]
2516    async fn execute_loop(
2517        &self,
2518        history: &[Message],
2519        prompt: &str,
2520        effective_style: AgentStyle,
2521        session_id: Option<&str>,
2522        event_tx: Option<mpsc::Sender<AgentEvent>>,
2523        cancel_token: &tokio_util::sync::CancellationToken,
2524        emit_end: bool,
2525    ) -> Result<AgentResult> {
2526        // When called via execute_loop, the prompt is used for both
2527        // message-adding and hook/memory/event purposes.
2528        self.execute_loop_inner(
2529            history,
2530            prompt,
2531            prompt,
2532            Some(effective_style),
2533            session_id,
2534            event_tx,
2535            cancel_token,
2536            emit_end,
2537        )
2538        .await
2539    }
2540
2541    /// Inner execution loop.
2542    ///
2543    /// `msg_prompt` controls whether a user message is appended (empty = skip).
2544    /// `effective_prompt` is used for hooks, memory recall, taint tracking, and events.
2545    /// `effective_style` pre-computed style to skip redundant LLM-based intent detection.
2546    /// `emit_end` controls whether to send `AgentEvent::End` when the loop completes
2547    /// (should be false when called from `execute_plan` to avoid duplicate End events).
2548    #[allow(clippy::too_many_arguments)]
2549    async fn execute_loop_inner(
2550        &self,
2551        history: &[Message],
2552        msg_prompt: &str,
2553        effective_prompt: &str,
2554        effective_style: Option<AgentStyle>,
2555        session_id: Option<&str>,
2556        event_tx: Option<mpsc::Sender<AgentEvent>>,
2557        cancel_token: &tokio_util::sync::CancellationToken,
2558        emit_end: bool,
2559    ) -> Result<AgentResult> {
2560        let mut messages = history.to_vec();
2561        let mut total_usage = TokenUsage::default();
2562        let mut tool_calls_count = 0;
2563        let mut turn = 0;
2564        // Consecutive malformed-tool-args errors (4.1 parse error recovery)
2565        let mut parse_error_count: u32 = 0;
2566        // Continuation injection counter
2567        let mut continuation_count: u32 = 0;
2568        let mut recent_tool_signatures: Vec<String> = Vec::new();
2569        let style_prompt = if effective_prompt.is_empty() {
2570            msg_prompt
2571        } else {
2572            effective_prompt
2573        };
2574        let effective_style = match effective_style {
2575            Some(s) => s,
2576            None => self.resolve_effective_style(style_prompt).await,
2577        };
2578        let effective_system_prompt = self.system_prompt_for_style(effective_style);
2579        if let Some(tx) = &event_tx {
2580            tx.send(AgentEvent::AgentModeChanged {
2581                mode: effective_style.runtime_mode().to_string(),
2582                agent: effective_style.builtin_agent_name().to_string(),
2583                description: effective_style.description().to_string(),
2584            })
2585            .await
2586            .ok();
2587        }
2588
2589        // Send start event
2590        if let Some(tx) = &event_tx {
2591            tx.send(AgentEvent::Start {
2592                prompt: effective_prompt.to_string(),
2593            })
2594            .await
2595            .ok();
2596        }
2597
2598        // Forward queue events (CommandDeadLettered, CommandRetry, QueueAlert) to event stream
2599        let _queue_forward_handle =
2600            if let (Some(ref queue), Some(ref tx)) = (&self.command_queue, &event_tx) {
2601                let mut rx = queue.subscribe();
2602                let tx = tx.clone();
2603                Some(tokio::spawn(async move {
2604                    while let Ok(event) = rx.recv().await {
2605                        if tx.send(event).await.is_err() {
2606                            break;
2607                        }
2608                    }
2609                }))
2610            } else {
2611                None
2612            };
2613
2614        // Fire PrePrompt hook (may modify the prompt)
2615        let built_system_prompt = Some(effective_system_prompt.clone());
2616        let hooked_prompt = if let Some(modified) = self
2617            .fire_pre_prompt(
2618                session_id.unwrap_or(""),
2619                effective_prompt,
2620                &built_system_prompt,
2621                messages.len(),
2622            )
2623            .await
2624        {
2625            modified
2626        } else {
2627            effective_prompt.to_string()
2628        };
2629        let effective_prompt = hooked_prompt.as_str();
2630
2631        // Taint-track the incoming prompt for sensitive data detection
2632        if let Some(ref sp) = self.config.security_provider {
2633            sp.taint_input(effective_prompt);
2634        }
2635
2636        // Recall relevant memories and inject into system prompt
2637        let system_with_memory = if let Some(ref memory) = self.config.memory {
2638            match memory.recall_similar(effective_prompt, 5).await {
2639                Ok(items) if !items.is_empty() => {
2640                    if let Some(tx) = &event_tx {
2641                        for item in &items {
2642                            tx.send(AgentEvent::MemoryRecalled {
2643                                memory_id: item.id.clone(),
2644                                content: item.content.clone(),
2645                                relevance: item.relevance_score(),
2646                            })
2647                            .await
2648                            .ok();
2649                        }
2650                        tx.send(AgentEvent::MemoriesSearched {
2651                            query: Some(effective_prompt.to_string()),
2652                            tags: Vec::new(),
2653                            result_count: items.len(),
2654                        })
2655                        .await
2656                        .ok();
2657                    }
2658                    let memory_context = items
2659                        .iter()
2660                        .map(|i| format!("- {}", i.content))
2661                        .collect::<Vec<_>>()
2662                        .join(
2663                            "
2664",
2665                        );
2666                    let base = effective_system_prompt.clone();
2667                    Some(format!(
2668                        "{}
2669
2670## Relevant past experience
2671{}",
2672                        base, memory_context
2673                    ))
2674                }
2675                _ => Some(effective_system_prompt.clone()),
2676            }
2677        } else {
2678            Some(effective_system_prompt.clone())
2679        };
2680
2681        // Resolve context from providers on first turn (before adding user message)
2682        // Intent-driven: detect context perception need first via AHP harness, then fire hook
2683        let workspace = self.tool_context.workspace.display().to_string();
2684        let session_id_str = session_id.unwrap_or("");
2685        let context_results = if !self.config.context_providers.is_empty() {
2686            // Step 1: Fire IntentDetection harness point on EVERY prompt
2687            #[allow(clippy::needless_borrow)]
2688            let harness_intent = self
2689                .fire_intent_detection(effective_prompt, &session_id_str, &workspace)
2690                .await;
2691
2692            // Step 2: Build perception event from harness result, or fallback to local detection
2693            #[allow(clippy::needless_borrow)]
2694            let perception_event = if let Some(detected) = harness_intent {
2695                tracing::info!(
2696                    intent = %detected.detected_intent,
2697                    confidence = %detected.confidence,
2698                    "Intent detected from AHP harness"
2699                );
2700                Some(build_pre_context_perception_from_intent(
2701                    detected,
2702                    effective_prompt,
2703                    &session_id_str,
2704                    &workspace,
2705                ))
2706            } else {
2707                // Fallback to local keyword detection
2708                tracing::debug!("No intent from harness, using local keyword detection");
2709                self.detect_context_perception_intent(effective_prompt, &session_id_str, &workspace)
2710            };
2711
2712            if let Some(perception_event) = perception_event {
2713                // Step 3: Fire PreContextPerception hook to get harness decision
2714                tracing::info!(
2715                    intent = %perception_event.intent,
2716                    target_type = %perception_event.target_type,
2717                    "Context perception intent detected, firing AHP hook"
2718                );
2719
2720                let hook_result = self.fire_pre_context_perception(&perception_event).await;
2721
2722                match hook_result {
2723                    HookResult::Continue(Some(modified_context)) => {
2724                        // AHP harness returned injected context - parse and use it
2725                        #[cfg(feature = "ahp")]
2726                        {
2727                            if let Ok(injected) =
2728                                serde_json::from_value::<InjectedContext>(modified_context)
2729                            {
2730                                tracing::info!(
2731                                    facts = injected.facts.len(),
2732                                    "Using injected context from AHP harness"
2733                                );
2734                                self.apply_injected_context(injected)
2735                            } else {
2736                                // Fall back to normal providers if parsing fails
2737                                tracing::warn!(
2738                                    "Failed to parse injected context, falling back to providers"
2739                                );
2740                                self.resolve_context(effective_prompt, session_id).await
2741                            }
2742                        }
2743                        #[cfg(not(feature = "ahp"))]
2744                        {
2745                            // Without AHP, fall back to normal providers
2746                            let _ = modified_context; // suppress unused warning
2747                            self.resolve_context(effective_prompt, session_id).await
2748                        }
2749                    }
2750                    HookResult::Block(_) => {
2751                        // Harness blocked context injection - skip
2752                        tracing::info!("AHP harness blocked context injection");
2753                        Vec::new()
2754                    }
2755                    _ => {
2756                        // No modification or unknown result, proceed with normal providers
2757                        self.resolve_context(effective_prompt, session_id).await
2758                    }
2759                }
2760            } else {
2761                // No intent detected, proceed with normal providers
2762                self.resolve_context(effective_prompt, session_id).await
2763            }
2764        } else {
2765            Vec::new()
2766        };
2767
2768        // Send context resolved event
2769        if let Some(tx) = &event_tx {
2770            let total_items: usize = context_results.iter().map(|r| r.items.len()).sum();
2771            let total_tokens: usize = context_results.iter().map(|r| r.total_tokens).sum();
2772
2773            tracing::info!(
2774                context_items = total_items,
2775                context_tokens = total_tokens,
2776                "Context resolution completed"
2777            );
2778
2779            tx.send(AgentEvent::ContextResolved {
2780                total_items,
2781                total_tokens,
2782            })
2783            .await
2784            .ok();
2785        }
2786
2787        let augmented_system = self
2788            .build_augmented_system_prompt_with_base(&effective_system_prompt, &context_results);
2789
2790        // Merge memory context into system prompt
2791        let base_prompt = effective_system_prompt.clone();
2792        let augmented_system = match (augmented_system, system_with_memory) {
2793            (Some(ctx), Some(mem)) if ctx != mem => Some(ctx.replacen(&base_prompt, &mem, 1)),
2794            (Some(ctx), _) => Some(ctx),
2795            (None, mem) => mem,
2796        };
2797
2798        // Add user message
2799        if !msg_prompt.is_empty() {
2800            messages.push(Message::user(msg_prompt));
2801        }
2802
2803        loop {
2804            turn += 1;
2805
2806            if turn > self.config.max_tool_rounds {
2807                let error = format!("Max tool rounds ({}) exceeded", self.config.max_tool_rounds);
2808                if let Some(tx) = &event_tx {
2809                    tx.send(AgentEvent::Error {
2810                        message: error.clone(),
2811                    })
2812                    .await
2813                    .ok();
2814                }
2815                anyhow::bail!(error);
2816            }
2817
2818            // Send turn start event
2819            if let Some(tx) = &event_tx {
2820                tx.send(AgentEvent::TurnStart { turn }).await.ok();
2821            }
2822
2823            tracing::info!(
2824                turn = turn,
2825                max_turns = self.config.max_tool_rounds,
2826                "Agent turn started"
2827            );
2828
2829            // Call LLM - use streaming if we have an event channel
2830            tracing::info!(
2831                a3s.llm.streaming = event_tx.is_some(),
2832                "LLM completion started"
2833            );
2834
2835            // Fire GenerateStart hook
2836            self.fire_generate_start(
2837                session_id.unwrap_or(""),
2838                effective_prompt,
2839                &augmented_system,
2840            )
2841            .await;
2842
2843            let llm_start = std::time::Instant::now();
2844            // Circuit breaker (4.3): retry non-streaming LLM calls on transient failures.
2845            // Each failure increments `consecutive_llm_errors`; on success it resets to 0.
2846            // Streaming mode bails immediately on failure (events can't be replayed).
2847            let response = {
2848                let threshold = self.config.circuit_breaker_threshold.max(1);
2849                let mut attempt = 0u32;
2850                loop {
2851                    attempt += 1;
2852                    let result = self
2853                        .call_llm(
2854                            &messages,
2855                            augmented_system.as_deref(),
2856                            &event_tx,
2857                            cancel_token,
2858                        )
2859                        .await;
2860                    match result {
2861                        Ok(r) => {
2862                            break r;
2863                        }
2864                        // Never retry if cancelled
2865                        Err(e) if cancel_token.is_cancelled() => {
2866                            anyhow::bail!(e);
2867                        }
2868                        // Retry when: non-streaming under threshold, OR first streaming attempt
2869                        Err(e) if attempt < threshold && (event_tx.is_none() || attempt == 1) => {
2870                            tracing::warn!(
2871                                turn = turn,
2872                                attempt = attempt,
2873                                threshold = threshold,
2874                                error = %e,
2875                                "LLM call failed, will retry"
2876                            );
2877                            tokio::time::sleep(Duration::from_millis(100 * attempt as u64)).await;
2878                        }
2879                        // Threshold exceeded or streaming mid-stream: bail
2880                        Err(e) => {
2881                            let msg = if attempt > 1 {
2882                                format!(
2883                                    "LLM circuit breaker triggered: failed after {} attempt(s): {}",
2884                                    attempt, e
2885                                )
2886                            } else {
2887                                format!("LLM call failed: {}", e)
2888                            };
2889                            tracing::error!(turn = turn, attempt = attempt, "{}", msg);
2890                            // Fire OnError hook for LLM failure
2891                            self.fire_on_error(
2892                                session_id.unwrap_or(""),
2893                                ErrorType::LlmFailure,
2894                                &msg,
2895                                serde_json::json!({"turn": turn, "attempt": attempt}),
2896                            )
2897                            .await;
2898                            if let Some(tx) = &event_tx {
2899                                tx.send(AgentEvent::Error {
2900                                    message: msg.clone(),
2901                                })
2902                                .await
2903                                .ok();
2904                            }
2905                            anyhow::bail!(msg);
2906                        }
2907                    }
2908                }
2909            };
2910
2911            // Update usage
2912            total_usage.prompt_tokens += response.usage.prompt_tokens;
2913            total_usage.completion_tokens += response.usage.completion_tokens;
2914            total_usage.total_tokens += response.usage.total_tokens;
2915
2916            // Track token usage in progress tracker
2917            if let Some(ref tracker) = self.progress_tracker {
2918                let token_usage = crate::task::TaskTokenUsage {
2919                    input_tokens: response.usage.prompt_tokens as u64,
2920                    output_tokens: response.usage.completion_tokens as u64,
2921                    cache_read_tokens: response.usage.cache_read_tokens.unwrap_or(0) as u64,
2922                    cache_write_tokens: response.usage.cache_write_tokens.unwrap_or(0) as u64,
2923                };
2924                if let Ok(mut guard) = tracker.try_write() {
2925                    guard.track_tokens(token_usage);
2926                }
2927            }
2928
2929            // Record LLM completion telemetry
2930            let llm_duration = llm_start.elapsed();
2931            tracing::info!(
2932                turn = turn,
2933                streaming = event_tx.is_some(),
2934                prompt_tokens = response.usage.prompt_tokens,
2935                completion_tokens = response.usage.completion_tokens,
2936                total_tokens = response.usage.total_tokens,
2937                stop_reason = response.stop_reason.as_deref().unwrap_or("unknown"),
2938                duration_ms = llm_duration.as_millis() as u64,
2939                "LLM completion finished"
2940            );
2941
2942            // Fire GenerateEnd hook
2943            self.fire_generate_end(
2944                session_id.unwrap_or(""),
2945                effective_prompt,
2946                &response,
2947                llm_duration.as_millis() as u64,
2948            )
2949            .await;
2950
2951            // Record LLM usage on the llm span
2952            crate::telemetry::record_llm_usage(
2953                response.usage.prompt_tokens,
2954                response.usage.completion_tokens,
2955                response.usage.total_tokens,
2956                response.stop_reason.as_deref(),
2957            );
2958            // Log turn token usage
2959            tracing::info!(
2960                turn = turn,
2961                a3s.llm.total_tokens = response.usage.total_tokens,
2962                "Turn token usage"
2963            );
2964
2965            // Add assistant message to history
2966            messages.push(response.message.clone());
2967
2968            // Check for tool calls
2969            let tool_calls = response.tool_calls();
2970
2971            // Send turn end event
2972            if let Some(tx) = &event_tx {
2973                tx.send(AgentEvent::TurnEnd {
2974                    turn,
2975                    usage: response.usage.clone(),
2976                })
2977                .await
2978                .ok();
2979            }
2980
2981            // Auto-compact: check if context usage exceeds threshold
2982            if self.config.auto_compact {
2983                let used = response.usage.prompt_tokens;
2984                let max = self.config.max_context_tokens;
2985                let threshold = self.config.auto_compact_threshold;
2986
2987                if crate::session::compaction::should_auto_compact(used, max, threshold) {
2988                    let before_len = messages.len();
2989                    let percent_before = used as f32 / max as f32;
2990
2991                    tracing::info!(
2992                        used_tokens = used,
2993                        max_tokens = max,
2994                        percent = percent_before,
2995                        threshold = threshold,
2996                        "Auto-compact triggered"
2997                    );
2998
2999                    // Step 1: Prune large tool outputs first (cheap, no LLM call)
3000                    if let Some(pruned) = crate::session::compaction::prune_tool_outputs(&messages)
3001                    {
3002                        messages = pruned;
3003                        tracing::info!("Tool output pruning applied");
3004                    }
3005
3006                    // Step 2: Full summarization using the agent's LLM client
3007                    if let Ok(Some(compacted)) = crate::session::compaction::compact_messages(
3008                        session_id.unwrap_or(""),
3009                        &messages,
3010                        &self.llm_client,
3011                    )
3012                    .await
3013                    {
3014                        messages = compacted;
3015                    }
3016
3017                    // Emit compaction event
3018                    if let Some(tx) = &event_tx {
3019                        tx.send(AgentEvent::ContextCompacted {
3020                            session_id: session_id.unwrap_or("").to_string(),
3021                            before_messages: before_len,
3022                            after_messages: messages.len(),
3023                            percent_before,
3024                        })
3025                        .await
3026                        .ok();
3027                    }
3028                }
3029            }
3030
3031            if tool_calls.is_empty() {
3032                // No tool calls — check if we should inject a continuation message
3033                // before treating this as a final answer.
3034                let final_text = response.text();
3035
3036                if self.config.continuation_enabled
3037                    && continuation_count < self.config.max_continuation_turns
3038                    && turn < self.config.max_tool_rounds  // never inject past the turn limit
3039                    && Self::looks_incomplete(&final_text)
3040                {
3041                    continuation_count += 1;
3042                    tracing::info!(
3043                        turn = turn,
3044                        continuation = continuation_count,
3045                        max_continuation = self.config.max_continuation_turns,
3046                        "Injecting continuation message — response looks incomplete"
3047                    );
3048                    // Inject continuation as a user message and keep looping
3049                    messages.push(Message::user(crate::prompts::CONTINUATION));
3050                    continue;
3051                }
3052
3053                // Sanitize output to redact any sensitive data before returning
3054                let final_text = if let Some(ref sp) = self.config.security_provider {
3055                    sp.sanitize_output(&final_text)
3056                } else {
3057                    final_text
3058                };
3059
3060                // Record final totals
3061                tracing::info!(
3062                    tool_calls_count = tool_calls_count,
3063                    total_prompt_tokens = total_usage.prompt_tokens,
3064                    total_completion_tokens = total_usage.completion_tokens,
3065                    total_tokens = total_usage.total_tokens,
3066                    turns = turn,
3067                    "Agent execution completed"
3068                );
3069
3070                if emit_end {
3071                    if let Some(tx) = &event_tx {
3072                        tx.send(AgentEvent::End {
3073                            text: final_text.clone(),
3074                            usage: total_usage.clone(),
3075                            meta: response.meta.clone(),
3076                        })
3077                        .await
3078                        .ok();
3079                    }
3080                }
3081
3082                // Notify context providers of turn completion for memory extraction
3083                if let Some(sid) = session_id {
3084                    self.notify_turn_complete(sid, effective_prompt, &final_text)
3085                        .await;
3086                }
3087
3088                return Ok(AgentResult {
3089                    text: final_text,
3090                    messages,
3091                    usage: total_usage,
3092                    tool_calls_count,
3093                });
3094            }
3095
3096            // Execute tools sequentially
3097            // Fast path: when all tool calls are independent file writes and no hooks/HITL
3098            // are configured, execute them concurrently to avoid serial I/O bottleneck.
3099            let tool_calls = if self.config.hook_engine.is_none()
3100                && self.config.confirmation_manager.is_none()
3101                && tool_calls.len() > 1
3102                && tool_calls
3103                    .iter()
3104                    .all(|tc| Self::is_parallel_safe_write(&tc.name, &tc.args))
3105                && {
3106                    // All target paths must be distinct (no write-write conflicts)
3107                    let paths: Vec<_> = tool_calls
3108                        .iter()
3109                        .filter_map(|tc| Self::extract_write_path(&tc.args))
3110                        .collect();
3111                    paths.len() == tool_calls.len()
3112                        && paths.iter().collect::<std::collections::HashSet<_>>().len()
3113                            == paths.len()
3114                } {
3115                tracing::info!(
3116                    count = tool_calls.len(),
3117                    "Parallel write batch: executing {} independent file writes concurrently",
3118                    tool_calls.len()
3119                );
3120
3121                let futures: Vec<_> = tool_calls
3122                    .iter()
3123                    .map(|tc| {
3124                        let ctx = self.tool_context.clone();
3125                        let executor = Arc::clone(&self.tool_executor);
3126                        let name = tc.name.clone();
3127                        let args = tc.args.clone();
3128                        async move { executor.execute_with_context(&name, &args, &ctx).await }
3129                    })
3130                    .collect();
3131
3132                let results = join_all(futures).await;
3133
3134                // Post-process results in original order (sequential, preserves message ordering)
3135                for (tc, result) in tool_calls.iter().zip(results) {
3136                    tool_calls_count += 1;
3137                    let (output, exit_code, is_error, metadata, images) =
3138                        Self::tool_result_to_tuple(result);
3139
3140                    // Track tool call in progress tracker
3141                    self.track_tool_result(&tc.name, &tc.args, exit_code);
3142
3143                    let output = if let Some(ref sp) = self.config.security_provider {
3144                        sp.sanitize_output(&output)
3145                    } else {
3146                        output
3147                    };
3148
3149                    if let Some(tx) = &event_tx {
3150                        tx.send(AgentEvent::ToolEnd {
3151                            id: tc.id.clone(),
3152                            name: tc.name.clone(),
3153                            output: output.clone(),
3154                            exit_code,
3155                            metadata,
3156                        })
3157                        .await
3158                        .ok();
3159                    }
3160
3161                    if images.is_empty() {
3162                        messages.push(Message::tool_result(&tc.id, &output, is_error));
3163                    } else {
3164                        messages.push(Message::tool_result_with_images(
3165                            &tc.id, &output, &images, is_error,
3166                        ));
3167                    }
3168                }
3169
3170                // Skip the sequential loop below
3171                continue;
3172            } else {
3173                tool_calls
3174            };
3175
3176            for tool_call in tool_calls {
3177                tool_calls_count += 1;
3178
3179                let tool_start = std::time::Instant::now();
3180
3181                tracing::info!(
3182                    tool_name = tool_call.name.as_str(),
3183                    tool_id = tool_call.id.as_str(),
3184                    "Tool execution started"
3185                );
3186
3187                // Send tool start event (only if not already sent during streaming)
3188                // In streaming mode, ToolStart is sent when we receive ToolUseStart from LLM
3189                // But we still need to send ToolEnd after execution
3190
3191                // Check for malformed tool arguments from LLM (4.1 parse error recovery)
3192                if let Some(parse_error) =
3193                    tool_call.args.get("__parse_error").and_then(|v| v.as_str())
3194                {
3195                    parse_error_count += 1;
3196                    let error_msg = format!("Error: {}", parse_error);
3197                    tracing::warn!(
3198                        tool = tool_call.name.as_str(),
3199                        parse_error_count = parse_error_count,
3200                        max_parse_retries = self.config.max_parse_retries,
3201                        "Malformed tool arguments from LLM"
3202                    );
3203
3204                    if let Some(tx) = &event_tx {
3205                        tx.send(AgentEvent::ToolEnd {
3206                            id: tool_call.id.clone(),
3207                            name: tool_call.name.clone(),
3208                            output: error_msg.clone(),
3209                            exit_code: 1,
3210                            metadata: None,
3211                        })
3212                        .await
3213                        .ok();
3214                    }
3215
3216                    messages.push(Message::tool_result(&tool_call.id, &error_msg, true));
3217
3218                    if parse_error_count > self.config.max_parse_retries {
3219                        let msg = format!(
3220                            "LLM produced malformed tool arguments {} time(s) in a row \
3221                             (max_parse_retries={}); giving up",
3222                            parse_error_count, self.config.max_parse_retries
3223                        );
3224                        tracing::error!("{}", msg);
3225                        if let Some(tx) = &event_tx {
3226                            tx.send(AgentEvent::Error {
3227                                message: msg.clone(),
3228                            })
3229                            .await
3230                            .ok();
3231                        }
3232                        anyhow::bail!(msg);
3233                    }
3234                    continue;
3235                }
3236
3237                // Tool args are valid — reset parse error counter
3238                parse_error_count = 0;
3239
3240                // Check skill-based tool permissions
3241                if let Some(ref registry) = self.config.skill_registry {
3242                    let instruction_skills =
3243                        registry.by_kind(crate::skills::SkillKind::Instruction);
3244                    let has_restrictions =
3245                        instruction_skills.iter().any(|s| s.allowed_tools.is_some());
3246                    if has_restrictions {
3247                        let allowed = instruction_skills
3248                            .iter()
3249                            .any(|s| s.is_tool_allowed(&tool_call.name));
3250                        if !allowed {
3251                            let msg = format!(
3252                                "Tool '{}' is not allowed by any active skill.",
3253                                tool_call.name
3254                            );
3255                            tracing::info!(
3256                                tool_name = tool_call.name.as_str(),
3257                                "Tool blocked by skill registry"
3258                            );
3259                            if let Some(tx) = &event_tx {
3260                                tx.send(AgentEvent::PermissionDenied {
3261                                    tool_id: tool_call.id.clone(),
3262                                    tool_name: tool_call.name.clone(),
3263                                    args: tool_call.args.clone(),
3264                                    reason: msg.clone(),
3265                                })
3266                                .await
3267                                .ok();
3268                            }
3269                            messages.push(Message::tool_result(&tool_call.id, &msg, true));
3270                            continue;
3271                        }
3272                    }
3273                }
3274
3275                // Fire PreToolUse hook (may block the tool call)
3276                if let Some(HookResult::Block(reason)) = self
3277                    .fire_pre_tool_use(
3278                        session_id.unwrap_or(""),
3279                        &tool_call.name,
3280                        &tool_call.args,
3281                        recent_tool_signatures.clone(),
3282                    )
3283                    .await
3284                {
3285                    let msg = format!("Tool '{}' blocked by hook: {}", tool_call.name, reason);
3286                    tracing::info!(
3287                        tool_name = tool_call.name.as_str(),
3288                        "Tool blocked by PreToolUse hook"
3289                    );
3290
3291                    if let Some(tx) = &event_tx {
3292                        tx.send(AgentEvent::PermissionDenied {
3293                            tool_id: tool_call.id.clone(),
3294                            tool_name: tool_call.name.clone(),
3295                            args: tool_call.args.clone(),
3296                            reason: reason.clone(),
3297                        })
3298                        .await
3299                        .ok();
3300                    }
3301
3302                    messages.push(Message::tool_result(&tool_call.id, &msg, true));
3303                    continue;
3304                }
3305
3306                // Check permission before executing tool
3307                let permission_decision = if let Some(checker) = &self.config.permission_checker {
3308                    checker.check(&tool_call.name, &tool_call.args)
3309                } else {
3310                    // No policy configured — default to Ask so HITL can still intervene
3311                    PermissionDecision::Ask
3312                };
3313
3314                let (output, exit_code, is_error, metadata, images) = match permission_decision {
3315                    PermissionDecision::Deny => {
3316                        tracing::info!(
3317                            tool_name = tool_call.name.as_str(),
3318                            permission = "deny",
3319                            "Tool permission denied"
3320                        );
3321                        // Tool execution denied by permission policy
3322                        let denial_msg = format!(
3323                            "Permission denied: Tool '{}' is blocked by permission policy.",
3324                            tool_call.name
3325                        );
3326
3327                        // Send permission denied event
3328                        if let Some(tx) = &event_tx {
3329                            tx.send(AgentEvent::PermissionDenied {
3330                                tool_id: tool_call.id.clone(),
3331                                tool_name: tool_call.name.clone(),
3332                                args: tool_call.args.clone(),
3333                                reason: "Blocked by deny rule in permission policy".to_string(),
3334                            })
3335                            .await
3336                            .ok();
3337                        }
3338
3339                        (denial_msg, 1, true, None, Vec::new())
3340                    }
3341                    PermissionDecision::Allow => {
3342                        tracing::info!(
3343                            tool_name = tool_call.name.as_str(),
3344                            permission = "allow",
3345                            "Tool permission: allow"
3346                        );
3347                        // Permission explicitly allows — execute directly, no HITL
3348                        let stream_ctx =
3349                            self.streaming_tool_context(&event_tx, &tool_call.id, &tool_call.name);
3350                        let result = self
3351                            .execute_tool_queued_or_direct(
3352                                &tool_call.name,
3353                                &tool_call.args,
3354                                &stream_ctx,
3355                            )
3356                            .await;
3357
3358                        let tuple = Self::tool_result_to_tuple(result);
3359                        // Track tool call in progress tracker
3360                        let (_, exit_code, _, _, _) = tuple;
3361                        self.track_tool_result(&tool_call.name, &tool_call.args, exit_code);
3362                        tuple
3363                    }
3364                    PermissionDecision::Ask => {
3365                        tracing::info!(
3366                            tool_name = tool_call.name.as_str(),
3367                            permission = "ask",
3368                            "Tool permission: ask"
3369                        );
3370                        // Permission says Ask — delegate to HITL confirmation manager
3371                        if let Some(cm) = &self.config.confirmation_manager {
3372                            // Check YOLO lanes: if the tool's lane is in YOLO mode, skip confirmation
3373                            if !cm.requires_confirmation(&tool_call.name).await {
3374                                let stream_ctx = self.streaming_tool_context(
3375                                    &event_tx,
3376                                    &tool_call.id,
3377                                    &tool_call.name,
3378                                );
3379                                let result = self
3380                                    .execute_tool_queued_or_direct(
3381                                        &tool_call.name,
3382                                        &tool_call.args,
3383                                        &stream_ctx,
3384                                    )
3385                                    .await;
3386
3387                                let (output, exit_code, is_error, metadata, images) =
3388                                    Self::tool_result_to_tuple(result);
3389
3390                                // Track tool call in progress tracker
3391                                self.track_tool_result(&tool_call.name, &tool_call.args, exit_code);
3392
3393                                // Add tool result to messages
3394                                if images.is_empty() {
3395                                    messages.push(Message::tool_result(
3396                                        &tool_call.id,
3397                                        &output,
3398                                        is_error,
3399                                    ));
3400                                } else {
3401                                    messages.push(Message::tool_result_with_images(
3402                                        &tool_call.id,
3403                                        &output,
3404                                        &images,
3405                                        is_error,
3406                                    ));
3407                                }
3408
3409                                // Record tool result on the tool span for early exit
3410                                let tool_duration = tool_start.elapsed();
3411                                crate::telemetry::record_tool_result(exit_code, tool_duration);
3412
3413                                // Send ToolEnd event
3414                                if let Some(tx) = &event_tx {
3415                                    tx.send(AgentEvent::ToolEnd {
3416                                        id: tool_call.id.clone(),
3417                                        name: tool_call.name.clone(),
3418                                        output: output.clone(),
3419                                        exit_code,
3420                                        metadata,
3421                                    })
3422                                    .await
3423                                    .ok();
3424                                }
3425
3426                                // Fire PostToolUse hook (fire-and-forget)
3427                                self.fire_post_tool_use(
3428                                    session_id.unwrap_or(""),
3429                                    &tool_call.name,
3430                                    &tool_call.args,
3431                                    &output,
3432                                    exit_code == 0,
3433                                    tool_duration.as_millis() as u64,
3434                                )
3435                                .await;
3436
3437                                continue; // Skip the rest, move to next tool call
3438                            }
3439
3440                            // Get timeout from policy
3441                            let policy = cm.policy().await;
3442                            let timeout_ms = policy.default_timeout_ms;
3443                            let timeout_action = policy.timeout_action;
3444
3445                            // Request confirmation (this emits ConfirmationRequired event)
3446                            let rx = cm
3447                                .request_confirmation(
3448                                    &tool_call.id,
3449                                    &tool_call.name,
3450                                    &tool_call.args,
3451                                )
3452                                .await;
3453
3454                            // Forward ConfirmationRequired to the streaming event channel
3455                            // so external consumers (e.g. SafeClaw engine) can relay it
3456                            // to the browser UI.
3457                            if let Some(tx) = &event_tx {
3458                                tx.send(AgentEvent::ConfirmationRequired {
3459                                    tool_id: tool_call.id.clone(),
3460                                    tool_name: tool_call.name.clone(),
3461                                    args: tool_call.args.clone(),
3462                                    timeout_ms,
3463                                })
3464                                .await
3465                                .ok();
3466                            }
3467
3468                            // Wait for confirmation with timeout
3469                            let confirmation_result =
3470                                tokio::time::timeout(Duration::from_millis(timeout_ms), rx).await;
3471
3472                            match confirmation_result {
3473                                Ok(Ok(response)) => {
3474                                    // Forward ConfirmationReceived
3475                                    if let Some(tx) = &event_tx {
3476                                        tx.send(AgentEvent::ConfirmationReceived {
3477                                            tool_id: tool_call.id.clone(),
3478                                            approved: response.approved,
3479                                            reason: response.reason.clone(),
3480                                        })
3481                                        .await
3482                                        .ok();
3483                                    }
3484                                    if response.approved {
3485                                        let stream_ctx = self.streaming_tool_context(
3486                                            &event_tx,
3487                                            &tool_call.id,
3488                                            &tool_call.name,
3489                                        );
3490                                        let result = self
3491                                            .execute_tool_queued_or_direct(
3492                                                &tool_call.name,
3493                                                &tool_call.args,
3494                                                &stream_ctx,
3495                                            )
3496                                            .await;
3497
3498                                        let tuple = Self::tool_result_to_tuple(result);
3499                                        // Track tool call in progress tracker
3500                                        let (_, exit_code, _, _, _) = tuple;
3501                                        self.track_tool_result(
3502                                            &tool_call.name,
3503                                            &tool_call.args,
3504                                            exit_code,
3505                                        );
3506                                        tuple
3507                                    } else {
3508                                        let rejection_msg = format!(
3509                                            "Tool '{}' execution was REJECTED by the user. Reason: {}. \
3510                                             DO NOT retry this tool call unless the user explicitly asks you to.",
3511                                            tool_call.name,
3512                                            response.reason.unwrap_or_else(|| "No reason provided".to_string())
3513                                        );
3514                                        (rejection_msg, 1, true, None, Vec::new())
3515                                    }
3516                                }
3517                                Ok(Err(_)) => {
3518                                    // Forward ConfirmationTimeout (channel closed = effectively timed out)
3519                                    if let Some(tx) = &event_tx {
3520                                        tx.send(AgentEvent::ConfirmationTimeout {
3521                                            tool_id: tool_call.id.clone(),
3522                                            action_taken: "rejected".to_string(),
3523                                        })
3524                                        .await
3525                                        .ok();
3526                                    }
3527                                    let msg = format!(
3528                                        "Tool '{}' confirmation failed: confirmation channel closed",
3529                                        tool_call.name
3530                                    );
3531                                    (msg, 1, true, None, Vec::new())
3532                                }
3533                                Err(_) => {
3534                                    cm.check_timeouts().await;
3535
3536                                    // Forward ConfirmationTimeout
3537                                    if let Some(tx) = &event_tx {
3538                                        tx.send(AgentEvent::ConfirmationTimeout {
3539                                            tool_id: tool_call.id.clone(),
3540                                            action_taken: match timeout_action {
3541                                                crate::hitl::TimeoutAction::Reject => {
3542                                                    "rejected".to_string()
3543                                                }
3544                                                crate::hitl::TimeoutAction::AutoApprove => {
3545                                                    "auto_approved".to_string()
3546                                                }
3547                                            },
3548                                        })
3549                                        .await
3550                                        .ok();
3551                                    }
3552
3553                                    match timeout_action {
3554                                        crate::hitl::TimeoutAction::Reject => {
3555                                            let msg = format!(
3556                                                "Tool '{}' execution was REJECTED: user confirmation timed out after {}ms. \
3557                                                 DO NOT retry this tool call — the user did not approve it. \
3558                                                 Inform the user that the operation requires their approval and ask them to try again.",
3559                                                tool_call.name, timeout_ms
3560                                            );
3561                                            (msg, 1, true, None, Vec::new())
3562                                        }
3563                                        crate::hitl::TimeoutAction::AutoApprove => {
3564                                            let stream_ctx = self.streaming_tool_context(
3565                                                &event_tx,
3566                                                &tool_call.id,
3567                                                &tool_call.name,
3568                                            );
3569                                            let result = self
3570                                                .execute_tool_queued_or_direct(
3571                                                    &tool_call.name,
3572                                                    &tool_call.args,
3573                                                    &stream_ctx,
3574                                                )
3575                                                .await;
3576
3577                                            let tuple = Self::tool_result_to_tuple(result);
3578                                            // Track tool call in progress tracker
3579                                            let (_, exit_code, _, _, _) = tuple;
3580                                            self.track_tool_result(
3581                                                &tool_call.name,
3582                                                &tool_call.args,
3583                                                exit_code,
3584                                            );
3585                                            tuple
3586                                        }
3587                                    }
3588                                }
3589                            }
3590                        } else {
3591                            // Ask without confirmation manager — safe deny
3592                            let msg = format!(
3593                                "Tool '{}' requires confirmation but no HITL confirmation manager is configured. \
3594                                 Configure a confirmation policy to enable tool execution.",
3595                                tool_call.name
3596                            );
3597                            tracing::warn!(
3598                                tool_name = tool_call.name.as_str(),
3599                                "Tool requires confirmation but no HITL manager configured"
3600                            );
3601                            (msg, 1, true, None, Vec::new())
3602                        }
3603                    }
3604                };
3605
3606                let tool_duration = tool_start.elapsed();
3607                crate::telemetry::record_tool_result(exit_code, tool_duration);
3608
3609                // Sanitize tool output for sensitive data before it enters the message history
3610                let output = if let Some(ref sp) = self.config.security_provider {
3611                    sp.sanitize_output(&output)
3612                } else {
3613                    output
3614                };
3615
3616                recent_tool_signatures.push(format!(
3617                    "{}:{} => {}",
3618                    tool_call.name,
3619                    serde_json::to_string(&tool_call.args).unwrap_or_default(),
3620                    if is_error { "error" } else { "ok" }
3621                ));
3622                if recent_tool_signatures.len() > 8 {
3623                    let overflow = recent_tool_signatures.len() - 8;
3624                    recent_tool_signatures.drain(0..overflow);
3625                }
3626
3627                // Fire PostToolUse hook (fire-and-forget)
3628                self.fire_post_tool_use(
3629                    session_id.unwrap_or(""),
3630                    &tool_call.name,
3631                    &tool_call.args,
3632                    &output,
3633                    exit_code == 0,
3634                    tool_duration.as_millis() as u64,
3635                )
3636                .await;
3637
3638                // Auto-remember tool result in long-term memory
3639                if let Some(ref memory) = self.config.memory {
3640                    let tools_used = [tool_call.name.clone()];
3641                    let remember_result = if exit_code == 0 {
3642                        memory
3643                            .remember_success(effective_prompt, &tools_used, &output)
3644                            .await
3645                    } else {
3646                        memory
3647                            .remember_failure(effective_prompt, &output, &tools_used)
3648                            .await
3649                    };
3650                    match remember_result {
3651                        Ok(()) => {
3652                            if let Some(tx) = &event_tx {
3653                                let item_type = if exit_code == 0 { "success" } else { "failure" };
3654                                tx.send(AgentEvent::MemoryStored {
3655                                    memory_id: uuid::Uuid::new_v4().to_string(),
3656                                    memory_type: item_type.to_string(),
3657                                    importance: if exit_code == 0 { 0.8 } else { 0.9 },
3658                                    tags: vec![item_type.to_string(), tool_call.name.clone()],
3659                                })
3660                                .await
3661                                .ok();
3662                            }
3663                        }
3664                        Err(e) => {
3665                            tracing::warn!("Failed to store memory after tool execution: {}", e);
3666                        }
3667                    }
3668                }
3669
3670                // Send tool end event
3671                if let Some(tx) = &event_tx {
3672                    tx.send(AgentEvent::ToolEnd {
3673                        id: tool_call.id.clone(),
3674                        name: tool_call.name.clone(),
3675                        output: output.clone(),
3676                        exit_code,
3677                        metadata,
3678                    })
3679                    .await
3680                    .ok();
3681                }
3682
3683                // Add tool result to messages
3684                if images.is_empty() {
3685                    messages.push(Message::tool_result(&tool_call.id, &output, is_error));
3686                } else {
3687                    messages.push(Message::tool_result_with_images(
3688                        &tool_call.id,
3689                        &output,
3690                        &images,
3691                        is_error,
3692                    ));
3693                }
3694            }
3695        }
3696    }
3697
3698    /// Execute with streaming events
3699    pub async fn execute_streaming(
3700        &self,
3701        history: &[Message],
3702        prompt: &str,
3703    ) -> Result<(
3704        mpsc::Receiver<AgentEvent>,
3705        tokio::task::JoinHandle<Result<AgentResult>>,
3706        tokio_util::sync::CancellationToken,
3707    )> {
3708        let (tx, rx) = mpsc::channel(100);
3709        let cancel_token = tokio_util::sync::CancellationToken::new();
3710
3711        let llm_client = self.llm_client.clone();
3712        let tool_executor = self.tool_executor.clone();
3713        let tool_context = self.tool_context.clone();
3714        let config = self.config.clone();
3715        let tool_metrics = self.tool_metrics.clone();
3716        let command_queue = self.command_queue.clone();
3717        let history = history.to_vec();
3718        let prompt = prompt.to_string();
3719        let token_clone = cancel_token.clone();
3720
3721        let handle = tokio::spawn(async move {
3722            let mut agent = AgentLoop::new(llm_client, tool_executor, tool_context, config);
3723            if let Some(metrics) = tool_metrics {
3724                agent = agent.with_tool_metrics(metrics);
3725            }
3726            if let Some(queue) = command_queue {
3727                agent = agent.with_queue(queue);
3728            }
3729            agent
3730                .execute_with_session(&history, &prompt, None, Some(tx), Some(&token_clone))
3731                .await
3732        });
3733
3734        Ok((rx, handle, cancel_token))
3735    }
3736
3737    /// Create an execution plan for a prompt
3738    ///
3739    /// Delegates to [`LlmPlanner`] for structured JSON plan generation,
3740    /// falling back to heuristic planning if the LLM call fails.
3741    pub async fn plan(&self, prompt: &str, _context: Option<&str>) -> Result<ExecutionPlan> {
3742        use crate::planning::LlmPlanner;
3743
3744        match LlmPlanner::create_plan(&self.llm_client, prompt).await {
3745            Ok(plan) => Ok(plan),
3746            Err(e) => {
3747                tracing::warn!("LLM plan creation failed, using fallback: {}", e);
3748                Ok(LlmPlanner::fallback_plan(prompt))
3749            }
3750        }
3751    }
3752
3753    /// Execute with planning phase.
3754    ///
3755    /// If `pre_analysis` is provided (from a single pre-analysis LLM call in
3756    /// `execute_with_session`), the goal and plan are already available and no
3757    /// additional LLM calls are needed for planning. Otherwise, falls back to
3758    /// calling `extract_goal` and `plan` individually.
3759    pub async fn execute_with_planning(
3760        &self,
3761        history: &[Message],
3762        prompt: &str,
3763        event_tx: Option<mpsc::Sender<AgentEvent>>,
3764        pre_analysis: Option<PreAnalysis>,
3765    ) -> Result<AgentResult> {
3766        // Send planning start event
3767        if let Some(tx) = &event_tx {
3768            tx.send(AgentEvent::PlanningStart {
3769                prompt: prompt.to_string(),
3770            })
3771            .await
3772            .ok();
3773        }
3774
3775        // Use pre-analysis result if available (goal + plan already computed in one LLM call).
3776        let (goal, plan) = if let Some(analysis) = pre_analysis {
3777            (Some(analysis.goal.clone()), analysis.execution_plan.clone())
3778        } else {
3779            // Fall back: extract goal and create plan via separate LLM calls.
3780            let g = if self.config.goal_tracking {
3781                Some(self.extract_goal(prompt).await?)
3782            } else {
3783                None
3784            };
3785            let p = self.plan(prompt, None).await?;
3786            (g, p)
3787        };
3788
3789        // Send GoalExtracted event if goal_tracking is enabled.
3790        if self.config.goal_tracking {
3791            if let Some(ref g) = goal {
3792                if let Some(tx) = &event_tx {
3793                    tx.send(AgentEvent::GoalExtracted { goal: g.clone() })
3794                        .await
3795                        .ok();
3796                }
3797            }
3798        }
3799
3800        // Send planning end event
3801        if let Some(tx) = &event_tx {
3802            tx.send(AgentEvent::PlanningEnd {
3803                estimated_steps: plan.steps.len(),
3804                plan: plan.clone(),
3805            })
3806            .await
3807            .ok();
3808        }
3809
3810        let plan_start = std::time::Instant::now();
3811
3812        // Execute the plan step by step
3813        let result = self.execute_plan(history, &plan, event_tx.clone()).await?;
3814
3815        // Emit the final End event (execute_loop_inner does not emit End in planning mode)
3816        if let Some(tx) = &event_tx {
3817            tx.send(AgentEvent::End {
3818                text: result.text.clone(),
3819                usage: result.usage.clone(),
3820                meta: None,
3821            })
3822            .await
3823            .ok();
3824        }
3825
3826        // Check goal achievement when goal_tracking is enabled
3827        if self.config.goal_tracking {
3828            if let Some(ref g) = goal {
3829                let achieved = self.check_goal_achievement(g, &result.text).await?;
3830                if achieved {
3831                    if let Some(tx) = &event_tx {
3832                        tx.send(AgentEvent::GoalAchieved {
3833                            goal: g.description.clone(),
3834                            total_steps: result.messages.len(),
3835                            duration_ms: plan_start.elapsed().as_millis() as i64,
3836                        })
3837                        .await
3838                        .ok();
3839                    }
3840                }
3841            }
3842        }
3843
3844        Ok(result)
3845    }
3846
3847    /// Execute an execution plan using wave-based dependency-aware scheduling.
3848    ///
3849    /// Steps with no unmet dependencies are grouped into "waves". A wave with
3850    /// a single step executes sequentially (preserving the history chain). A
3851    /// wave with multiple independent steps executes them in parallel via
3852    /// `JoinSet`, then merges their results back into the shared history.
3853    async fn execute_plan(
3854        &self,
3855        history: &[Message],
3856        plan: &ExecutionPlan,
3857        event_tx: Option<mpsc::Sender<AgentEvent>>,
3858    ) -> Result<AgentResult> {
3859        let mut plan = plan.clone();
3860        let mut current_history = history.to_vec();
3861        let mut total_usage = TokenUsage::default();
3862        let mut tool_calls_count = 0;
3863        let total_steps = plan.steps.len();
3864
3865        // Add initial user message with the goal
3866        let steps_text = plan
3867            .steps
3868            .iter()
3869            .enumerate()
3870            .map(|(i, step)| format!("{}. {}", i + 1, step.content))
3871            .collect::<Vec<_>>()
3872            .join("\n");
3873        current_history.push(Message::user(&crate::prompts::render(
3874            crate::prompts::PLAN_EXECUTE_GOAL,
3875            &[("goal", &plan.goal), ("steps", &steps_text)],
3876        )));
3877
3878        loop {
3879            let ready: Vec<String> = plan
3880                .get_ready_steps()
3881                .iter()
3882                .map(|s| s.id.clone())
3883                .collect();
3884
3885            if ready.is_empty() {
3886                // All done or deadlock
3887                if plan.has_deadlock() {
3888                    tracing::warn!(
3889                        "Plan deadlock detected: {} pending steps with unresolvable dependencies",
3890                        plan.pending_count()
3891                    );
3892                }
3893                break;
3894            }
3895
3896            if ready.len() == 1 {
3897                // === Single step: sequential execution (preserves history chain) ===
3898                let step_id = &ready[0];
3899                let step = plan
3900                    .steps
3901                    .iter()
3902                    .find(|s| s.id == *step_id)
3903                    .ok_or_else(|| anyhow::anyhow!("step '{}' not found in plan", step_id))?
3904                    .clone();
3905                let step_number = plan
3906                    .steps
3907                    .iter()
3908                    .position(|s| s.id == *step_id)
3909                    .unwrap_or(0)
3910                    + 1;
3911
3912                // Send step start event
3913                if let Some(tx) = &event_tx {
3914                    tx.send(AgentEvent::StepStart {
3915                        step_id: step.id.clone(),
3916                        description: step.content.clone(),
3917                        step_number,
3918                        total_steps,
3919                    })
3920                    .await
3921                    .ok();
3922                }
3923
3924                plan.mark_status(&step.id, TaskStatus::InProgress);
3925
3926                let step_prompt = crate::prompts::render(
3927                    crate::prompts::PLAN_EXECUTE_STEP,
3928                    &[
3929                        ("step_num", &step_number.to_string()),
3930                        ("description", &step.content),
3931                    ],
3932                );
3933
3934                match self
3935                    .execute_loop(
3936                        &current_history,
3937                        &step_prompt,
3938                        AgentStyle::GeneralPurpose,
3939                        None,
3940                        event_tx.clone(),
3941                        &tokio_util::sync::CancellationToken::new(),
3942                        false, // emit_end: false — End is emitted by execute_with_planning after execute_plan
3943                    )
3944                    .await
3945                {
3946                    Ok(result) => {
3947                        current_history = result.messages.clone();
3948                        total_usage.prompt_tokens += result.usage.prompt_tokens;
3949                        total_usage.completion_tokens += result.usage.completion_tokens;
3950                        total_usage.total_tokens += result.usage.total_tokens;
3951                        tool_calls_count += result.tool_calls_count;
3952                        plan.mark_status(&step.id, TaskStatus::Completed);
3953
3954                        if let Some(tx) = &event_tx {
3955                            tx.send(AgentEvent::StepEnd {
3956                                step_id: step.id.clone(),
3957                                status: TaskStatus::Completed,
3958                                step_number,
3959                                total_steps,
3960                            })
3961                            .await
3962                            .ok();
3963                        }
3964                    }
3965                    Err(e) => {
3966                        tracing::error!("Plan step '{}' failed: {}", step.id, e);
3967                        plan.mark_status(&step.id, TaskStatus::Failed);
3968
3969                        if let Some(tx) = &event_tx {
3970                            tx.send(AgentEvent::StepEnd {
3971                                step_id: step.id.clone(),
3972                                status: TaskStatus::Failed,
3973                                step_number,
3974                                total_steps,
3975                            })
3976                            .await
3977                            .ok();
3978                        }
3979                    }
3980                }
3981            } else {
3982                // === Multiple steps: parallel execution via JoinSet ===
3983                // NOTE: Each parallel branch gets a clone of the base history.
3984                // Individual branch histories (tool calls, LLM turns) are NOT merged
3985                // back — only a summary message is appended. This is a deliberate
3986                // trade-off: merging divergent histories in a deterministic order is
3987                // complex and the summary approach keeps the context window manageable.
3988                let ready_steps: Vec<_> = ready
3989                    .iter()
3990                    .filter_map(|id| {
3991                        let step = plan.steps.iter().find(|s| s.id == *id)?.clone();
3992                        let step_number =
3993                            plan.steps.iter().position(|s| s.id == *id).unwrap_or(0) + 1;
3994                        Some((step, step_number))
3995                    })
3996                    .collect();
3997
3998                // Mark all as InProgress and emit StepStart events
3999                for (step, step_number) in &ready_steps {
4000                    plan.mark_status(&step.id, TaskStatus::InProgress);
4001                    if let Some(tx) = &event_tx {
4002                        tx.send(AgentEvent::StepStart {
4003                            step_id: step.id.clone(),
4004                            description: step.content.clone(),
4005                            step_number: *step_number,
4006                            total_steps,
4007                        })
4008                        .await
4009                        .ok();
4010                    }
4011                }
4012
4013                // Spawn all into JoinSet, each with a clone of the base history
4014                let mut join_set = tokio::task::JoinSet::new();
4015                for (step, step_number) in &ready_steps {
4016                    let base_history = current_history.clone();
4017                    let agent_clone = self.clone();
4018                    let tx = event_tx.clone();
4019                    let step_clone = step.clone();
4020                    let sn = *step_number;
4021
4022                    join_set.spawn(async move {
4023                        let prompt = crate::prompts::render(
4024                            crate::prompts::PLAN_EXECUTE_STEP,
4025                            &[
4026                                ("step_num", &sn.to_string()),
4027                                ("description", &step_clone.content),
4028                            ],
4029                        );
4030                        let result = agent_clone
4031                            .execute_loop(
4032                                &base_history,
4033                                &prompt,
4034                                AgentStyle::GeneralPurpose,
4035                                None,
4036                                tx,
4037                                &tokio_util::sync::CancellationToken::new(),
4038                                false, // emit_end: false — End is emitted by execute_with_planning after execute_plan
4039                            )
4040                            .await;
4041                        (step_clone.id, sn, result)
4042                    });
4043                }
4044
4045                // Collect results
4046                let mut parallel_results: Vec<ParallelStepResult> = Vec::new();
4047                while let Some(join_result) = join_set.join_next().await {
4048                    match join_result {
4049                        Ok((step_id, step_number, step_result)) => match step_result {
4050                            Ok(result) => {
4051                                total_usage.prompt_tokens += result.usage.prompt_tokens;
4052                                total_usage.completion_tokens += result.usage.completion_tokens;
4053                                total_usage.total_tokens += result.usage.total_tokens;
4054                                tool_calls_count += result.tool_calls_count;
4055                                plan.mark_status(&step_id, TaskStatus::Completed);
4056
4057                                parallel_results.push(ParallelStepResult {
4058                                    step_id: step_id.clone(),
4059                                    step_number: step_number as u32,
4060                                    status: "completed".to_string(),
4061                                    summary: result.text.trim().to_string(),
4062                                    key_findings: None,
4063                                    error: None,
4064                                    data: None,
4065                                });
4066
4067                                if let Some(tx) = &event_tx {
4068                                    tx.send(AgentEvent::StepEnd {
4069                                        step_id,
4070                                        status: TaskStatus::Completed,
4071                                        step_number,
4072                                        total_steps,
4073                                    })
4074                                    .await
4075                                    .ok();
4076                                }
4077                            }
4078                            Err(e) => {
4079                                tracing::error!("Plan step '{}' failed: {}", step_id, e);
4080                                plan.mark_status(&step_id, TaskStatus::Failed);
4081
4082                                parallel_results.push(ParallelStepResult {
4083                                    step_id: step_id.clone(),
4084                                    step_number: step_number as u32,
4085                                    status: "failed".to_string(),
4086                                    summary: String::new(),
4087                                    key_findings: None,
4088                                    error: Some(e.to_string()),
4089                                    data: None,
4090                                });
4091
4092                                if let Some(tx) = &event_tx {
4093                                    tx.send(AgentEvent::StepEnd {
4094                                        step_id,
4095                                        status: TaskStatus::Failed,
4096                                        step_number,
4097                                        total_steps,
4098                                    })
4099                                    .await
4100                                    .ok();
4101                                }
4102                            }
4103                        },
4104                        Err(e) => {
4105                            tracing::error!("JoinSet task panicked: {}", e);
4106                        }
4107                    }
4108                }
4109
4110                // Merge parallel results into history for subsequent steps.
4111                // Emit as a structured JSON USER message so the frontend can
4112                // parse and render it in the user's language.
4113                if !parallel_results.is_empty() {
4114                    parallel_results.sort_by_key(|r| r.step_number);
4115                    let envelope = ParallelStepResult::build_envelope(parallel_results);
4116                    current_history.push(Message::user(
4117                        &serde_json::to_string(&envelope).unwrap_or_default(),
4118                    ));
4119                }
4120            }
4121
4122            // Emit GoalProgress after each wave
4123            if self.config.goal_tracking {
4124                let completed = plan
4125                    .steps
4126                    .iter()
4127                    .filter(|s| s.status == TaskStatus::Completed)
4128                    .count();
4129                if let Some(tx) = &event_tx {
4130                    tx.send(AgentEvent::GoalProgress {
4131                        goal: plan.goal.clone(),
4132                        progress: plan.progress(),
4133                        completed_steps: completed,
4134                        total_steps,
4135                    })
4136                    .await
4137                    .ok();
4138                }
4139            }
4140        }
4141
4142        // Get final response — find the last assistant message (not the last history
4143        // entry, which may be a user-summary injected after parallel execution)
4144        let final_text = current_history
4145            .iter()
4146            .rev()
4147            .find(|m| m.role == "assistant")
4148            .map(|m| {
4149                m.content
4150                    .iter()
4151                    .filter_map(|block| {
4152                        if let crate::llm::ContentBlock::Text { text } = block {
4153                            Some(text.as_str())
4154                        } else {
4155                            None
4156                        }
4157                    })
4158                    .collect::<Vec<_>>()
4159                    .join("\n")
4160            })
4161            .unwrap_or_default();
4162
4163        Ok(AgentResult {
4164            text: final_text,
4165            messages: current_history,
4166            usage: total_usage,
4167            tool_calls_count,
4168        })
4169    }
4170
4171    /// Extract goal from prompt
4172    ///
4173    /// Delegates to [`LlmPlanner`] for structured JSON goal extraction,
4174    /// falling back to heuristic logic if the LLM call fails.
4175    pub async fn extract_goal(&self, prompt: &str) -> Result<AgentGoal> {
4176        use crate::planning::LlmPlanner;
4177
4178        match LlmPlanner::extract_goal(&self.llm_client, prompt).await {
4179            Ok(goal) => Ok(goal),
4180            Err(e) => {
4181                tracing::warn!("LLM goal extraction failed, using fallback: {}", e);
4182                Ok(LlmPlanner::fallback_goal(prompt))
4183            }
4184        }
4185    }
4186
4187    /// Check if goal is achieved
4188    ///
4189    /// Delegates to [`LlmPlanner`] for structured JSON achievement check,
4190    /// falling back to heuristic logic if the LLM call fails.
4191    pub async fn check_goal_achievement(
4192        &self,
4193        goal: &AgentGoal,
4194        current_state: &str,
4195    ) -> Result<bool> {
4196        use crate::planning::LlmPlanner;
4197
4198        match LlmPlanner::check_achievement(&self.llm_client, goal, current_state).await {
4199            Ok(result) => Ok(result.achieved),
4200            Err(e) => {
4201                tracing::warn!("LLM achievement check failed, using fallback: {}", e);
4202                let result = LlmPlanner::fallback_check_achievement(goal, current_state);
4203                Ok(result.achieved)
4204            }
4205        }
4206    }
4207}
4208
4209#[cfg(test)]
4210mod tests {
4211    use super::*;
4212    use crate::llm::{ContentBlock, StreamEvent};
4213    use crate::permissions::PermissionPolicy;
4214    use crate::tools::ToolExecutor;
4215    use std::path::PathBuf;
4216    use std::sync::atomic::{AtomicUsize, Ordering};
4217
4218    /// Create a default ToolContext for tests
4219    fn test_tool_context() -> ToolContext {
4220        ToolContext::new(PathBuf::from("/tmp"))
4221    }
4222
4223    #[test]
4224    fn test_agent_config_default() {
4225        let config = AgentConfig::default();
4226        assert!(config.prompt_slots.is_empty());
4227        assert!(config.tools.is_empty()); // Tools are provided externally
4228        assert_eq!(config.max_tool_rounds, MAX_TOOL_ROUNDS);
4229        assert!(config.permission_checker.is_none());
4230        assert!(config.context_providers.is_empty());
4231        // Built-in skills are always present by default
4232        let registry = config
4233            .skill_registry
4234            .expect("skill_registry must be Some by default");
4235        assert!(registry.len() >= 7, "expected at least 7 built-in skills");
4236        assert!(registry.get("code-search").is_some());
4237        assert!(registry.get("find-bugs").is_some());
4238    }
4239
4240    // ========================================================================
4241    // Mock LLM Client for Testing
4242    // ========================================================================
4243
4244    /// Mock LLM client that returns predefined responses
4245    pub(crate) struct MockLlmClient {
4246        /// Responses to return (consumed in order)
4247        responses: std::sync::Mutex<Vec<LlmResponse>>,
4248        /// Number of calls made
4249        pub(crate) call_count: AtomicUsize,
4250    }
4251
4252    impl MockLlmClient {
4253        pub(crate) fn new(responses: Vec<LlmResponse>) -> Self {
4254            Self {
4255                responses: std::sync::Mutex::new(responses),
4256                call_count: AtomicUsize::new(0),
4257            }
4258        }
4259
4260        /// Create a response with text only (no tool calls)
4261        pub(crate) fn text_response(text: &str) -> LlmResponse {
4262            LlmResponse {
4263                message: Message {
4264                    role: "assistant".to_string(),
4265                    content: vec![ContentBlock::Text {
4266                        text: text.to_string(),
4267                    }],
4268                    reasoning_content: None,
4269                },
4270                usage: TokenUsage {
4271                    prompt_tokens: 10,
4272                    completion_tokens: 5,
4273                    total_tokens: 15,
4274                    cache_read_tokens: None,
4275                    cache_write_tokens: None,
4276                },
4277                stop_reason: Some("end_turn".to_string()),
4278                meta: None,
4279            }
4280        }
4281
4282        /// Create a response with a tool call
4283        pub(crate) fn tool_call_response(
4284            tool_id: &str,
4285            tool_name: &str,
4286            args: serde_json::Value,
4287        ) -> LlmResponse {
4288            LlmResponse {
4289                message: Message {
4290                    role: "assistant".to_string(),
4291                    content: vec![ContentBlock::ToolUse {
4292                        id: tool_id.to_string(),
4293                        name: tool_name.to_string(),
4294                        input: args,
4295                    }],
4296                    reasoning_content: None,
4297                },
4298                usage: TokenUsage {
4299                    prompt_tokens: 10,
4300                    completion_tokens: 5,
4301                    total_tokens: 15,
4302                    cache_read_tokens: None,
4303                    cache_write_tokens: None,
4304                },
4305                stop_reason: Some("tool_use".to_string()),
4306                meta: None,
4307            }
4308        }
4309    }
4310
4311    #[async_trait::async_trait]
4312    impl LlmClient for MockLlmClient {
4313        async fn complete(
4314            &self,
4315            _messages: &[Message],
4316            _system: Option<&str>,
4317            _tools: &[ToolDefinition],
4318        ) -> Result<LlmResponse> {
4319            self.call_count.fetch_add(1, Ordering::SeqCst);
4320            let mut responses = self.responses.lock().unwrap();
4321            if responses.is_empty() {
4322                anyhow::bail!("No more mock responses available");
4323            }
4324            Ok(responses.remove(0))
4325        }
4326
4327        async fn complete_streaming(
4328            &self,
4329            _messages: &[Message],
4330            _system: Option<&str>,
4331            _tools: &[ToolDefinition],
4332            _cancel_token: tokio_util::sync::CancellationToken,
4333        ) -> Result<mpsc::Receiver<StreamEvent>> {
4334            self.call_count.fetch_add(1, Ordering::SeqCst);
4335            let mut responses = self.responses.lock().unwrap();
4336            if responses.is_empty() {
4337                anyhow::bail!("No more mock responses available");
4338            }
4339            let response = responses.remove(0);
4340
4341            let (tx, rx) = mpsc::channel(10);
4342            tokio::spawn(async move {
4343                // Send text deltas if any
4344                for block in &response.message.content {
4345                    if let ContentBlock::Text { text } = block {
4346                        tx.send(StreamEvent::TextDelta(text.clone())).await.ok();
4347                    }
4348                }
4349                tx.send(StreamEvent::Done(response)).await.ok();
4350            });
4351
4352            Ok(rx)
4353        }
4354    }
4355
4356    // ========================================================================
4357    // Agent Loop Tests
4358    // ========================================================================
4359
4360    #[tokio::test]
4361    async fn test_agent_simple_response() {
4362        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
4363            "Hello, I'm an AI assistant.",
4364        )]));
4365
4366        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4367        let config = AgentConfig::default();
4368
4369        let agent = AgentLoop::new(
4370            mock_client.clone(),
4371            tool_executor,
4372            test_tool_context(),
4373            config,
4374        );
4375        let result = agent.execute(&[], "Hello", None).await.unwrap();
4376
4377        assert_eq!(result.text, "Hello, I'm an AI assistant.");
4378        assert_eq!(result.tool_calls_count, 0);
4379        assert_eq!(mock_client.call_count.load(Ordering::SeqCst), 1);
4380    }
4381
4382    #[tokio::test]
4383    async fn test_agent_with_tool_call() {
4384        let mock_client = Arc::new(MockLlmClient::new(vec![
4385            // First response: tool call
4386            MockLlmClient::tool_call_response(
4387                "tool-1",
4388                "bash",
4389                serde_json::json!({"command": "echo hello"}),
4390            ),
4391            // Second response: final text
4392            MockLlmClient::text_response("The command output was: hello"),
4393        ]));
4394
4395        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4396        let config = AgentConfig::default();
4397
4398        let agent = AgentLoop::new(
4399            mock_client.clone(),
4400            tool_executor,
4401            test_tool_context(),
4402            config,
4403        );
4404        let result = agent.execute(&[], "Run echo hello", None).await.unwrap();
4405
4406        assert_eq!(result.text, "The command output was: hello");
4407        assert_eq!(result.tool_calls_count, 1);
4408        assert_eq!(mock_client.call_count.load(Ordering::SeqCst), 2);
4409    }
4410
4411    #[tokio::test]
4412    async fn test_agent_permission_deny() {
4413        let mock_client = Arc::new(MockLlmClient::new(vec![
4414            // First response: tool call that will be denied
4415            MockLlmClient::tool_call_response(
4416                "tool-1",
4417                "bash",
4418                serde_json::json!({"command": "rm -rf /tmp/test"}),
4419            ),
4420            // Second response: LLM responds to the denial
4421            MockLlmClient::text_response(
4422                "I cannot execute that command due to permission restrictions.",
4423            ),
4424        ]));
4425
4426        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4427
4428        // Create permission policy that denies rm commands
4429        let permission_policy = PermissionPolicy::new().deny("bash(rm:*)");
4430
4431        let config = AgentConfig {
4432            permission_checker: Some(Arc::new(permission_policy)),
4433            ..Default::default()
4434        };
4435
4436        let (tx, mut rx) = mpsc::channel(100);
4437        let agent = AgentLoop::new(
4438            mock_client.clone(),
4439            tool_executor,
4440            test_tool_context(),
4441            config,
4442        );
4443        let result = agent.execute(&[], "Delete files", Some(tx)).await.unwrap();
4444
4445        // Check that we received a PermissionDenied event
4446        let mut found_permission_denied = false;
4447        while let Ok(event) = rx.try_recv() {
4448            if let AgentEvent::PermissionDenied { tool_name, .. } = event {
4449                assert_eq!(tool_name, "bash");
4450                found_permission_denied = true;
4451            }
4452        }
4453        assert!(
4454            found_permission_denied,
4455            "Should have received PermissionDenied event"
4456        );
4457
4458        assert_eq!(result.tool_calls_count, 1);
4459    }
4460
4461    #[tokio::test]
4462    async fn test_agent_permission_allow() {
4463        let mock_client = Arc::new(MockLlmClient::new(vec![
4464            // First response: tool call that will be allowed
4465            MockLlmClient::tool_call_response(
4466                "tool-1",
4467                "bash",
4468                serde_json::json!({"command": "echo hello"}),
4469            ),
4470            // Second response: final text
4471            MockLlmClient::text_response("Done!"),
4472        ]));
4473
4474        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4475
4476        // Create permission policy that allows echo commands
4477        let permission_policy = PermissionPolicy::new()
4478            .allow("bash(echo:*)")
4479            .deny("bash(rm:*)");
4480
4481        let config = AgentConfig {
4482            permission_checker: Some(Arc::new(permission_policy)),
4483            ..Default::default()
4484        };
4485
4486        let agent = AgentLoop::new(
4487            mock_client.clone(),
4488            tool_executor,
4489            test_tool_context(),
4490            config,
4491        );
4492        let result = agent.execute(&[], "Echo hello", None).await.unwrap();
4493
4494        assert_eq!(result.text, "Done!");
4495        assert_eq!(result.tool_calls_count, 1);
4496    }
4497
4498    #[tokio::test]
4499    async fn test_agent_streaming_events() {
4500        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
4501            "Hello!",
4502        )]));
4503
4504        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4505        let config = AgentConfig::default();
4506
4507        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
4508        let (mut rx, handle, _cancel_token) = agent.execute_streaming(&[], "Hi").await.unwrap();
4509
4510        // Collect events
4511        let mut events = Vec::new();
4512        while let Some(event) = rx.recv().await {
4513            events.push(event);
4514        }
4515
4516        let result = handle.await.unwrap().unwrap();
4517        assert_eq!(result.text, "Hello!");
4518
4519        // Check we received Start and End events
4520        assert!(events.iter().any(|e| matches!(e, AgentEvent::Start { .. })));
4521        assert!(events.iter().any(|e| matches!(e, AgentEvent::End { .. })));
4522    }
4523
4524    #[tokio::test]
4525    async fn test_agent_max_tool_rounds() {
4526        // Create a mock that always returns tool calls (infinite loop)
4527        let responses: Vec<LlmResponse> = (0..100)
4528            .map(|i| {
4529                MockLlmClient::tool_call_response(
4530                    &format!("tool-{}", i),
4531                    "bash",
4532                    serde_json::json!({"command": "echo loop"}),
4533                )
4534            })
4535            .collect();
4536
4537        let mock_client = Arc::new(MockLlmClient::new(responses));
4538        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4539
4540        let config = AgentConfig {
4541            max_tool_rounds: 3,
4542            ..Default::default()
4543        };
4544
4545        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
4546        let result = agent.execute(&[], "Loop forever", None).await;
4547
4548        // Should fail due to max tool rounds exceeded
4549        assert!(result.is_err());
4550        assert!(result.unwrap_err().to_string().contains("Max tool rounds"));
4551    }
4552
4553    #[tokio::test]
4554    async fn test_agent_no_permission_policy_defaults_to_ask() {
4555        // When no permission policy is set, tools default to Ask.
4556        // Without a confirmation manager, Ask = safe deny.
4557        let mock_client = Arc::new(MockLlmClient::new(vec![
4558            MockLlmClient::tool_call_response(
4559                "tool-1",
4560                "bash",
4561                serde_json::json!({"command": "rm -rf /tmp/test"}),
4562            ),
4563            MockLlmClient::text_response("Denied!"),
4564        ]));
4565
4566        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4567        let config = AgentConfig {
4568            permission_checker: None, // No policy → defaults to Ask
4569            // No confirmation_manager → safe deny
4570            ..Default::default()
4571        };
4572
4573        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
4574        let result = agent.execute(&[], "Delete", None).await.unwrap();
4575
4576        // Should be denied (no policy + no CM = safe deny)
4577        assert_eq!(result.text, "Denied!");
4578        assert_eq!(result.tool_calls_count, 1);
4579    }
4580
4581    #[tokio::test]
4582    async fn test_agent_permission_ask_without_cm_denies() {
4583        // When permission is Ask and no confirmation manager configured,
4584        // tool execution should be denied (safe default).
4585        let mock_client = Arc::new(MockLlmClient::new(vec![
4586            MockLlmClient::tool_call_response(
4587                "tool-1",
4588                "bash",
4589                serde_json::json!({"command": "echo test"}),
4590            ),
4591            MockLlmClient::text_response("Denied!"),
4592        ]));
4593
4594        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4595
4596        // Create policy where bash falls through to Ask (default)
4597        let permission_policy = PermissionPolicy::new(); // Default decision is Ask
4598
4599        let config = AgentConfig {
4600            permission_checker: Some(Arc::new(permission_policy)),
4601            // No confirmation_manager — safe deny
4602            ..Default::default()
4603        };
4604
4605        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
4606        let result = agent.execute(&[], "Echo", None).await.unwrap();
4607
4608        // Should deny (Ask without CM = safe deny)
4609        assert_eq!(result.text, "Denied!");
4610        // The tool result should contain the denial message
4611        assert!(result.tool_calls_count >= 1);
4612    }
4613
4614    // ========================================================================
4615    // HITL (Human-in-the-Loop) Tests
4616    // ========================================================================
4617
4618    #[tokio::test]
4619    async fn test_agent_hitl_approved() {
4620        use crate::hitl::{ConfirmationManager, ConfirmationPolicy};
4621        use tokio::sync::broadcast;
4622
4623        let mock_client = Arc::new(MockLlmClient::new(vec![
4624            MockLlmClient::tool_call_response(
4625                "tool-1",
4626                "bash",
4627                serde_json::json!({"command": "echo hello"}),
4628            ),
4629            MockLlmClient::text_response("Command executed!"),
4630        ]));
4631
4632        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4633
4634        // Create HITL confirmation manager with policy enabled
4635        let (event_tx, _event_rx) = broadcast::channel(100);
4636        let hitl_policy = ConfirmationPolicy {
4637            enabled: true,
4638            ..Default::default()
4639        };
4640        let confirmation_manager = Arc::new(ConfirmationManager::new(hitl_policy, event_tx));
4641
4642        // Create permission policy that returns Ask for bash
4643        let permission_policy = PermissionPolicy::new(); // Default is Ask
4644
4645        let config = AgentConfig {
4646            permission_checker: Some(Arc::new(permission_policy)),
4647            confirmation_manager: Some(confirmation_manager.clone()),
4648            ..Default::default()
4649        };
4650
4651        // Spawn a task to approve the confirmation
4652        let cm_clone = confirmation_manager.clone();
4653        tokio::spawn(async move {
4654            // Wait a bit for the confirmation request to be created
4655            tokio::time::sleep(std::time::Duration::from_millis(50)).await;
4656            // Approve it
4657            cm_clone.confirm("tool-1", true, None).await.ok();
4658        });
4659
4660        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
4661        let result = agent.execute(&[], "Run echo", None).await.unwrap();
4662
4663        assert_eq!(result.text, "Command executed!");
4664        assert_eq!(result.tool_calls_count, 1);
4665    }
4666
4667    #[tokio::test]
4668    async fn test_agent_hitl_rejected() {
4669        use crate::hitl::{ConfirmationManager, ConfirmationPolicy};
4670        use tokio::sync::broadcast;
4671
4672        let mock_client = Arc::new(MockLlmClient::new(vec![
4673            MockLlmClient::tool_call_response(
4674                "tool-1",
4675                "bash",
4676                serde_json::json!({"command": "rm -rf /"}),
4677            ),
4678            MockLlmClient::text_response("Understood, I won't do that."),
4679        ]));
4680
4681        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4682
4683        // Create HITL confirmation manager
4684        let (event_tx, _event_rx) = broadcast::channel(100);
4685        let hitl_policy = ConfirmationPolicy {
4686            enabled: true,
4687            ..Default::default()
4688        };
4689        let confirmation_manager = Arc::new(ConfirmationManager::new(hitl_policy, event_tx));
4690
4691        // Permission policy returns Ask
4692        let permission_policy = PermissionPolicy::new();
4693
4694        let config = AgentConfig {
4695            permission_checker: Some(Arc::new(permission_policy)),
4696            confirmation_manager: Some(confirmation_manager.clone()),
4697            ..Default::default()
4698        };
4699
4700        // Spawn a task to reject the confirmation
4701        let cm_clone = confirmation_manager.clone();
4702        tokio::spawn(async move {
4703            tokio::time::sleep(std::time::Duration::from_millis(50)).await;
4704            cm_clone
4705                .confirm("tool-1", false, Some("Too dangerous".to_string()))
4706                .await
4707                .ok();
4708        });
4709
4710        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
4711        let result = agent.execute(&[], "Delete everything", None).await.unwrap();
4712
4713        // LLM should respond to the rejection
4714        assert_eq!(result.text, "Understood, I won't do that.");
4715    }
4716
4717    #[tokio::test]
4718    async fn test_agent_hitl_timeout_reject() {
4719        use crate::hitl::{ConfirmationManager, ConfirmationPolicy, TimeoutAction};
4720        use tokio::sync::broadcast;
4721
4722        let mock_client = Arc::new(MockLlmClient::new(vec![
4723            MockLlmClient::tool_call_response(
4724                "tool-1",
4725                "bash",
4726                serde_json::json!({"command": "echo test"}),
4727            ),
4728            MockLlmClient::text_response("Timed out, I understand."),
4729        ]));
4730
4731        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4732
4733        // Create HITL with very short timeout and Reject action
4734        let (event_tx, _event_rx) = broadcast::channel(100);
4735        let hitl_policy = ConfirmationPolicy {
4736            enabled: true,
4737            default_timeout_ms: 50, // Very short timeout
4738            timeout_action: TimeoutAction::Reject,
4739            ..Default::default()
4740        };
4741        let confirmation_manager = Arc::new(ConfirmationManager::new(hitl_policy, event_tx));
4742
4743        let permission_policy = PermissionPolicy::new();
4744
4745        let config = AgentConfig {
4746            permission_checker: Some(Arc::new(permission_policy)),
4747            confirmation_manager: Some(confirmation_manager),
4748            ..Default::default()
4749        };
4750
4751        // Don't approve - let it timeout
4752        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
4753        let result = agent.execute(&[], "Echo", None).await.unwrap();
4754
4755        // Should get timeout rejection response from LLM
4756        assert_eq!(result.text, "Timed out, I understand.");
4757    }
4758
4759    #[tokio::test]
4760    async fn test_agent_hitl_timeout_auto_approve() {
4761        use crate::hitl::{ConfirmationManager, ConfirmationPolicy, TimeoutAction};
4762        use tokio::sync::broadcast;
4763
4764        let mock_client = Arc::new(MockLlmClient::new(vec![
4765            MockLlmClient::tool_call_response(
4766                "tool-1",
4767                "bash",
4768                serde_json::json!({"command": "echo hello"}),
4769            ),
4770            MockLlmClient::text_response("Auto-approved and executed!"),
4771        ]));
4772
4773        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4774
4775        // Create HITL with very short timeout and AutoApprove action
4776        let (event_tx, _event_rx) = broadcast::channel(100);
4777        let hitl_policy = ConfirmationPolicy {
4778            enabled: true,
4779            default_timeout_ms: 50, // Very short timeout
4780            timeout_action: TimeoutAction::AutoApprove,
4781            ..Default::default()
4782        };
4783        let confirmation_manager = Arc::new(ConfirmationManager::new(hitl_policy, event_tx));
4784
4785        let permission_policy = PermissionPolicy::new();
4786
4787        let config = AgentConfig {
4788            permission_checker: Some(Arc::new(permission_policy)),
4789            confirmation_manager: Some(confirmation_manager),
4790            ..Default::default()
4791        };
4792
4793        // Don't approve - let it timeout and auto-approve
4794        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
4795        let result = agent.execute(&[], "Echo", None).await.unwrap();
4796
4797        // Should auto-approve on timeout and execute
4798        assert_eq!(result.text, "Auto-approved and executed!");
4799        assert_eq!(result.tool_calls_count, 1);
4800    }
4801
4802    #[tokio::test]
4803    async fn test_agent_hitl_confirmation_events() {
4804        use crate::hitl::{ConfirmationManager, ConfirmationPolicy};
4805        use tokio::sync::broadcast;
4806
4807        let mock_client = Arc::new(MockLlmClient::new(vec![
4808            MockLlmClient::tool_call_response(
4809                "tool-1",
4810                "bash",
4811                serde_json::json!({"command": "echo test"}),
4812            ),
4813            MockLlmClient::text_response("Done!"),
4814        ]));
4815
4816        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4817
4818        // Create HITL confirmation manager
4819        let (event_tx, mut event_rx) = broadcast::channel(100);
4820        let hitl_policy = ConfirmationPolicy {
4821            enabled: true,
4822            default_timeout_ms: 5000, // Long enough timeout
4823            ..Default::default()
4824        };
4825        let confirmation_manager = Arc::new(ConfirmationManager::new(hitl_policy, event_tx));
4826
4827        let permission_policy = PermissionPolicy::new();
4828
4829        let config = AgentConfig {
4830            permission_checker: Some(Arc::new(permission_policy)),
4831            confirmation_manager: Some(confirmation_manager.clone()),
4832            ..Default::default()
4833        };
4834
4835        // Spawn task to approve and collect events
4836        let cm_clone = confirmation_manager.clone();
4837        let event_handle = tokio::spawn(async move {
4838            let mut events = Vec::new();
4839            // Wait for ConfirmationRequired event
4840            while let Ok(event) = event_rx.recv().await {
4841                events.push(event.clone());
4842                if let AgentEvent::ConfirmationRequired { tool_id, .. } = event {
4843                    // Approve it
4844                    cm_clone.confirm(&tool_id, true, None).await.ok();
4845                    // Wait for ConfirmationReceived
4846                    if let Ok(recv_event) = event_rx.recv().await {
4847                        events.push(recv_event);
4848                    }
4849                    break;
4850                }
4851            }
4852            events
4853        });
4854
4855        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
4856        let _result = agent.execute(&[], "Echo", None).await.unwrap();
4857
4858        // Check events
4859        let events = event_handle.await.unwrap();
4860        assert!(
4861            events
4862                .iter()
4863                .any(|e| matches!(e, AgentEvent::ConfirmationRequired { .. })),
4864            "Should have ConfirmationRequired event"
4865        );
4866        assert!(
4867            events
4868                .iter()
4869                .any(|e| matches!(e, AgentEvent::ConfirmationReceived { approved: true, .. })),
4870            "Should have ConfirmationReceived event with approved=true"
4871        );
4872    }
4873
4874    #[tokio::test]
4875    async fn test_agent_hitl_disabled_auto_executes() {
4876        // When HITL is disabled, tools should execute automatically even with Ask permission
4877        use crate::hitl::{ConfirmationManager, ConfirmationPolicy};
4878        use tokio::sync::broadcast;
4879
4880        let mock_client = Arc::new(MockLlmClient::new(vec![
4881            MockLlmClient::tool_call_response(
4882                "tool-1",
4883                "bash",
4884                serde_json::json!({"command": "echo auto"}),
4885            ),
4886            MockLlmClient::text_response("Auto executed!"),
4887        ]));
4888
4889        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4890
4891        // Create HITL with enabled=false
4892        let (event_tx, _event_rx) = broadcast::channel(100);
4893        let hitl_policy = ConfirmationPolicy {
4894            enabled: false, // HITL disabled
4895            ..Default::default()
4896        };
4897        let confirmation_manager = Arc::new(ConfirmationManager::new(hitl_policy, event_tx));
4898
4899        let permission_policy = PermissionPolicy::new(); // Default is Ask
4900
4901        let config = AgentConfig {
4902            permission_checker: Some(Arc::new(permission_policy)),
4903            confirmation_manager: Some(confirmation_manager),
4904            ..Default::default()
4905        };
4906
4907        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
4908        let result = agent.execute(&[], "Echo", None).await.unwrap();
4909
4910        // Should execute without waiting for confirmation
4911        assert_eq!(result.text, "Auto executed!");
4912        assert_eq!(result.tool_calls_count, 1);
4913    }
4914
4915    #[tokio::test]
4916    async fn test_agent_hitl_with_permission_deny_skips_hitl() {
4917        // When permission is Deny, HITL should not be triggered
4918        use crate::hitl::{ConfirmationManager, ConfirmationPolicy};
4919        use tokio::sync::broadcast;
4920
4921        let mock_client = Arc::new(MockLlmClient::new(vec![
4922            MockLlmClient::tool_call_response(
4923                "tool-1",
4924                "bash",
4925                serde_json::json!({"command": "rm -rf /"}),
4926            ),
4927            MockLlmClient::text_response("Blocked by permission."),
4928        ]));
4929
4930        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4931
4932        // Create HITL enabled
4933        let (event_tx, mut event_rx) = broadcast::channel(100);
4934        let hitl_policy = ConfirmationPolicy {
4935            enabled: true,
4936            ..Default::default()
4937        };
4938        let confirmation_manager = Arc::new(ConfirmationManager::new(hitl_policy, event_tx));
4939
4940        // Permission policy denies rm commands
4941        let permission_policy = PermissionPolicy::new().deny("bash(rm:*)");
4942
4943        let config = AgentConfig {
4944            permission_checker: Some(Arc::new(permission_policy)),
4945            confirmation_manager: Some(confirmation_manager),
4946            ..Default::default()
4947        };
4948
4949        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
4950        let result = agent.execute(&[], "Delete", None).await.unwrap();
4951
4952        // Should be denied without HITL
4953        assert_eq!(result.text, "Blocked by permission.");
4954
4955        // Should NOT have any ConfirmationRequired events
4956        let mut found_confirmation = false;
4957        while let Ok(event) = event_rx.try_recv() {
4958            if matches!(event, AgentEvent::ConfirmationRequired { .. }) {
4959                found_confirmation = true;
4960            }
4961        }
4962        assert!(
4963            !found_confirmation,
4964            "HITL should not be triggered when permission is Deny"
4965        );
4966    }
4967
4968    #[tokio::test]
4969    async fn test_agent_hitl_with_permission_allow_skips_hitl() {
4970        // When permission is Allow, HITL confirmation is skipped entirely.
4971        // PermissionPolicy is the declarative rule engine; Allow = execute directly.
4972        use crate::hitl::{ConfirmationManager, ConfirmationPolicy};
4973        use tokio::sync::broadcast;
4974
4975        let mock_client = Arc::new(MockLlmClient::new(vec![
4976            MockLlmClient::tool_call_response(
4977                "tool-1",
4978                "bash",
4979                serde_json::json!({"command": "echo hello"}),
4980            ),
4981            MockLlmClient::text_response("Allowed!"),
4982        ]));
4983
4984        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4985
4986        // Create HITL enabled
4987        let (event_tx, mut event_rx) = broadcast::channel(100);
4988        let hitl_policy = ConfirmationPolicy {
4989            enabled: true,
4990            ..Default::default()
4991        };
4992        let confirmation_manager = Arc::new(ConfirmationManager::new(hitl_policy, event_tx));
4993
4994        // Permission policy allows echo commands
4995        let permission_policy = PermissionPolicy::new().allow("bash(echo:*)");
4996
4997        let config = AgentConfig {
4998            permission_checker: Some(Arc::new(permission_policy)),
4999            confirmation_manager: Some(confirmation_manager.clone()),
5000            ..Default::default()
5001        };
5002
5003        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
5004        let result = agent.execute(&[], "Echo", None).await.unwrap();
5005
5006        // Should execute directly without HITL (permission Allow skips confirmation)
5007        assert_eq!(result.text, "Allowed!");
5008
5009        // Should NOT have ConfirmationRequired event (Allow bypasses HITL)
5010        let mut found_confirmation = false;
5011        while let Ok(event) = event_rx.try_recv() {
5012            if matches!(event, AgentEvent::ConfirmationRequired { .. }) {
5013                found_confirmation = true;
5014            }
5015        }
5016        assert!(
5017            !found_confirmation,
5018            "Permission Allow should skip HITL confirmation"
5019        );
5020    }
5021
5022    #[tokio::test]
5023    async fn test_agent_hitl_multiple_tool_calls() {
5024        // Test multiple tool calls in sequence with HITL
5025        use crate::hitl::{ConfirmationManager, ConfirmationPolicy};
5026        use tokio::sync::broadcast;
5027
5028        let mock_client = Arc::new(MockLlmClient::new(vec![
5029            // First response: two tool calls
5030            LlmResponse {
5031                message: Message {
5032                    role: "assistant".to_string(),
5033                    content: vec![
5034                        ContentBlock::ToolUse {
5035                            id: "tool-1".to_string(),
5036                            name: "bash".to_string(),
5037                            input: serde_json::json!({"command": "echo first"}),
5038                        },
5039                        ContentBlock::ToolUse {
5040                            id: "tool-2".to_string(),
5041                            name: "bash".to_string(),
5042                            input: serde_json::json!({"command": "echo second"}),
5043                        },
5044                    ],
5045                    reasoning_content: None,
5046                },
5047                usage: TokenUsage {
5048                    prompt_tokens: 10,
5049                    completion_tokens: 5,
5050                    total_tokens: 15,
5051                    cache_read_tokens: None,
5052                    cache_write_tokens: None,
5053                },
5054                stop_reason: Some("tool_use".to_string()),
5055                meta: None,
5056            },
5057            MockLlmClient::text_response("Both executed!"),
5058        ]));
5059
5060        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5061
5062        // Create HITL
5063        let (event_tx, _event_rx) = broadcast::channel(100);
5064        let hitl_policy = ConfirmationPolicy {
5065            enabled: true,
5066            default_timeout_ms: 5000,
5067            ..Default::default()
5068        };
5069        let confirmation_manager = Arc::new(ConfirmationManager::new(hitl_policy, event_tx));
5070
5071        let permission_policy = PermissionPolicy::new(); // Default Ask
5072
5073        let config = AgentConfig {
5074            permission_checker: Some(Arc::new(permission_policy)),
5075            confirmation_manager: Some(confirmation_manager.clone()),
5076            ..Default::default()
5077        };
5078
5079        // Spawn task to approve both tools
5080        let cm_clone = confirmation_manager.clone();
5081        tokio::spawn(async move {
5082            tokio::time::sleep(std::time::Duration::from_millis(30)).await;
5083            cm_clone.confirm("tool-1", true, None).await.ok();
5084            tokio::time::sleep(std::time::Duration::from_millis(30)).await;
5085            cm_clone.confirm("tool-2", true, None).await.ok();
5086        });
5087
5088        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
5089        let result = agent.execute(&[], "Run both", None).await.unwrap();
5090
5091        assert_eq!(result.text, "Both executed!");
5092        assert_eq!(result.tool_calls_count, 2);
5093    }
5094
5095    #[tokio::test]
5096    async fn test_agent_hitl_partial_approval() {
5097        // Test: first tool approved, second rejected
5098        use crate::hitl::{ConfirmationManager, ConfirmationPolicy};
5099        use tokio::sync::broadcast;
5100
5101        let mock_client = Arc::new(MockLlmClient::new(vec![
5102            // First response: two tool calls
5103            LlmResponse {
5104                message: Message {
5105                    role: "assistant".to_string(),
5106                    content: vec![
5107                        ContentBlock::ToolUse {
5108                            id: "tool-1".to_string(),
5109                            name: "bash".to_string(),
5110                            input: serde_json::json!({"command": "echo safe"}),
5111                        },
5112                        ContentBlock::ToolUse {
5113                            id: "tool-2".to_string(),
5114                            name: "bash".to_string(),
5115                            input: serde_json::json!({"command": "rm -rf /"}),
5116                        },
5117                    ],
5118                    reasoning_content: None,
5119                },
5120                usage: TokenUsage {
5121                    prompt_tokens: 10,
5122                    completion_tokens: 5,
5123                    total_tokens: 15,
5124                    cache_read_tokens: None,
5125                    cache_write_tokens: None,
5126                },
5127                stop_reason: Some("tool_use".to_string()),
5128                meta: None,
5129            },
5130            MockLlmClient::text_response("First worked, second rejected."),
5131        ]));
5132
5133        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5134
5135        let (event_tx, _event_rx) = broadcast::channel(100);
5136        let hitl_policy = ConfirmationPolicy {
5137            enabled: true,
5138            default_timeout_ms: 5000,
5139            ..Default::default()
5140        };
5141        let confirmation_manager = Arc::new(ConfirmationManager::new(hitl_policy, event_tx));
5142
5143        let permission_policy = PermissionPolicy::new();
5144
5145        let config = AgentConfig {
5146            permission_checker: Some(Arc::new(permission_policy)),
5147            confirmation_manager: Some(confirmation_manager.clone()),
5148            ..Default::default()
5149        };
5150
5151        // Approve first, reject second
5152        let cm_clone = confirmation_manager.clone();
5153        tokio::spawn(async move {
5154            tokio::time::sleep(std::time::Duration::from_millis(30)).await;
5155            cm_clone.confirm("tool-1", true, None).await.ok();
5156            tokio::time::sleep(std::time::Duration::from_millis(30)).await;
5157            cm_clone
5158                .confirm("tool-2", false, Some("Dangerous".to_string()))
5159                .await
5160                .ok();
5161        });
5162
5163        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
5164        let result = agent.execute(&[], "Run both", None).await.unwrap();
5165
5166        assert_eq!(result.text, "First worked, second rejected.");
5167        assert_eq!(result.tool_calls_count, 2);
5168    }
5169
5170    #[tokio::test]
5171    async fn test_agent_hitl_yolo_mode_auto_approves() {
5172        // YOLO mode: specific lanes auto-approve without confirmation
5173        use crate::hitl::{ConfirmationManager, ConfirmationPolicy, SessionLane};
5174        use tokio::sync::broadcast;
5175
5176        let mock_client = Arc::new(MockLlmClient::new(vec![
5177            MockLlmClient::tool_call_response(
5178                "tool-1",
5179                "read", // Query lane tool
5180                serde_json::json!({"path": "/tmp/test.txt"}),
5181            ),
5182            MockLlmClient::text_response("File read!"),
5183        ]));
5184
5185        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5186
5187        // YOLO mode for Query lane (read, glob, ls, grep)
5188        let (event_tx, mut event_rx) = broadcast::channel(100);
5189        let mut yolo_lanes = std::collections::HashSet::new();
5190        yolo_lanes.insert(SessionLane::Query);
5191        let hitl_policy = ConfirmationPolicy {
5192            enabled: true,
5193            yolo_lanes, // Auto-approve query operations
5194            ..Default::default()
5195        };
5196        let confirmation_manager = Arc::new(ConfirmationManager::new(hitl_policy, event_tx));
5197
5198        let permission_policy = PermissionPolicy::new();
5199
5200        let config = AgentConfig {
5201            permission_checker: Some(Arc::new(permission_policy)),
5202            confirmation_manager: Some(confirmation_manager),
5203            ..Default::default()
5204        };
5205
5206        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
5207        let result = agent.execute(&[], "Read file", None).await.unwrap();
5208
5209        // Should auto-execute without confirmation (YOLO mode)
5210        assert_eq!(result.text, "File read!");
5211
5212        // Should NOT have ConfirmationRequired for yolo lane
5213        let mut found_confirmation = false;
5214        while let Ok(event) = event_rx.try_recv() {
5215            if matches!(event, AgentEvent::ConfirmationRequired { .. }) {
5216                found_confirmation = true;
5217            }
5218        }
5219        assert!(
5220            !found_confirmation,
5221            "YOLO mode should not trigger confirmation"
5222        );
5223    }
5224
5225    #[tokio::test]
5226    async fn test_agent_config_with_all_options() {
5227        use crate::hitl::{ConfirmationManager, ConfirmationPolicy};
5228        use tokio::sync::broadcast;
5229
5230        let (event_tx, _) = broadcast::channel(100);
5231        let hitl_policy = ConfirmationPolicy::default();
5232        let confirmation_manager = Arc::new(ConfirmationManager::new(hitl_policy, event_tx));
5233
5234        let permission_policy = PermissionPolicy::new().allow("bash(*)");
5235
5236        let config = AgentConfig {
5237            prompt_slots: SystemPromptSlots {
5238                extra: Some("Test system prompt".to_string()),
5239                ..Default::default()
5240            },
5241            tools: vec![],
5242            max_tool_rounds: 10,
5243            permission_checker: Some(Arc::new(permission_policy)),
5244            confirmation_manager: Some(confirmation_manager),
5245            context_providers: vec![],
5246            planning_mode: PlanningMode::default(),
5247            goal_tracking: false,
5248            hook_engine: None,
5249            skill_registry: None,
5250            ..AgentConfig::default()
5251        };
5252
5253        assert!(config.prompt_slots.build().contains("Test system prompt"));
5254        assert_eq!(config.max_tool_rounds, 10);
5255        assert!(config.permission_checker.is_some());
5256        assert!(config.confirmation_manager.is_some());
5257        assert!(config.context_providers.is_empty());
5258
5259        // Test Debug trait
5260        let debug_str = format!("{:?}", config);
5261        assert!(debug_str.contains("AgentConfig"));
5262        assert!(debug_str.contains("permission_checker: true"));
5263        assert!(debug_str.contains("confirmation_manager: true"));
5264        assert!(debug_str.contains("context_providers: 0"));
5265    }
5266
5267    // ========================================================================
5268    // Context Provider Tests
5269    // ========================================================================
5270
5271    use crate::context::{ContextItem, ContextType};
5272
5273    /// Mock context provider for testing
5274    struct MockContextProvider {
5275        name: String,
5276        items: Vec<ContextItem>,
5277        on_turn_calls: std::sync::Arc<tokio::sync::RwLock<Vec<(String, String, String)>>>,
5278    }
5279
5280    impl MockContextProvider {
5281        fn new(name: &str) -> Self {
5282            Self {
5283                name: name.to_string(),
5284                items: Vec::new(),
5285                on_turn_calls: std::sync::Arc::new(tokio::sync::RwLock::new(Vec::new())),
5286            }
5287        }
5288
5289        fn with_items(mut self, items: Vec<ContextItem>) -> Self {
5290            self.items = items;
5291            self
5292        }
5293    }
5294
5295    #[async_trait::async_trait]
5296    impl ContextProvider for MockContextProvider {
5297        fn name(&self) -> &str {
5298            &self.name
5299        }
5300
5301        async fn query(&self, _query: &ContextQuery) -> anyhow::Result<ContextResult> {
5302            let mut result = ContextResult::new(&self.name);
5303            for item in &self.items {
5304                result.add_item(item.clone());
5305            }
5306            Ok(result)
5307        }
5308
5309        async fn on_turn_complete(
5310            &self,
5311            session_id: &str,
5312            prompt: &str,
5313            response: &str,
5314        ) -> anyhow::Result<()> {
5315            let mut calls = self.on_turn_calls.write().await;
5316            calls.push((
5317                session_id.to_string(),
5318                prompt.to_string(),
5319                response.to_string(),
5320            ));
5321            Ok(())
5322        }
5323    }
5324
5325    #[tokio::test]
5326    async fn test_agent_with_context_provider() {
5327        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
5328            "Response using context",
5329        )]));
5330
5331        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5332
5333        let provider =
5334            MockContextProvider::new("test-provider").with_items(vec![ContextItem::new(
5335                "ctx-1",
5336                ContextType::Resource,
5337                "Relevant context here",
5338            )
5339            .with_source("test://docs/example")]);
5340
5341        let config = AgentConfig {
5342            prompt_slots: SystemPromptSlots {
5343                extra: Some("You are helpful.".to_string()),
5344                ..Default::default()
5345            },
5346            context_providers: vec![Arc::new(provider)],
5347            ..Default::default()
5348        };
5349
5350        let agent = AgentLoop::new(
5351            mock_client.clone(),
5352            tool_executor,
5353            test_tool_context(),
5354            config,
5355        );
5356        let result = agent.execute(&[], "What is X?", None).await.unwrap();
5357
5358        assert_eq!(result.text, "Response using context");
5359        assert_eq!(mock_client.call_count.load(Ordering::SeqCst), 1);
5360    }
5361
5362    #[tokio::test]
5363    async fn test_agent_context_provider_events() {
5364        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
5365            "Answer",
5366        )]));
5367
5368        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5369
5370        let provider =
5371            MockContextProvider::new("event-provider").with_items(vec![ContextItem::new(
5372                "item-1",
5373                ContextType::Memory,
5374                "Memory content",
5375            )
5376            .with_token_count(50)]);
5377
5378        let config = AgentConfig {
5379            context_providers: vec![Arc::new(provider)],
5380            ..Default::default()
5381        };
5382
5383        let (tx, mut rx) = mpsc::channel(100);
5384        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
5385        let _result = agent.execute(&[], "Test prompt", Some(tx)).await.unwrap();
5386
5387        // Collect events
5388        let mut events = Vec::new();
5389        while let Ok(event) = rx.try_recv() {
5390            events.push(event);
5391        }
5392
5393        // Should have ContextResolving and ContextResolved events
5394        assert!(
5395            events
5396                .iter()
5397                .any(|e| matches!(e, AgentEvent::ContextResolving { .. })),
5398            "Should have ContextResolving event"
5399        );
5400        assert!(
5401            events
5402                .iter()
5403                .any(|e| matches!(e, AgentEvent::ContextResolved { .. })),
5404            "Should have ContextResolved event"
5405        );
5406
5407        // Check context resolved values
5408        for event in &events {
5409            if let AgentEvent::ContextResolved {
5410                total_items,
5411                total_tokens,
5412            } = event
5413            {
5414                assert_eq!(*total_items, 1);
5415                assert_eq!(*total_tokens, 50);
5416            }
5417        }
5418    }
5419
5420    #[tokio::test]
5421    async fn test_agent_multiple_context_providers() {
5422        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
5423            "Combined response",
5424        )]));
5425
5426        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5427
5428        let provider1 = MockContextProvider::new("provider-1").with_items(vec![ContextItem::new(
5429            "p1-1",
5430            ContextType::Resource,
5431            "Resource from P1",
5432        )
5433        .with_token_count(100)]);
5434
5435        let provider2 = MockContextProvider::new("provider-2").with_items(vec![
5436            ContextItem::new("p2-1", ContextType::Memory, "Memory from P2").with_token_count(50),
5437            ContextItem::new("p2-2", ContextType::Skill, "Skill from P2").with_token_count(75),
5438        ]);
5439
5440        let config = AgentConfig {
5441            prompt_slots: SystemPromptSlots {
5442                extra: Some("Base system prompt.".to_string()),
5443                ..Default::default()
5444            },
5445            context_providers: vec![Arc::new(provider1), Arc::new(provider2)],
5446            ..Default::default()
5447        };
5448
5449        let (tx, mut rx) = mpsc::channel(100);
5450        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
5451        let result = agent.execute(&[], "Query", Some(tx)).await.unwrap();
5452
5453        assert_eq!(result.text, "Combined response");
5454
5455        // Check context resolved event has combined totals
5456        while let Ok(event) = rx.try_recv() {
5457            if let AgentEvent::ContextResolved {
5458                total_items,
5459                total_tokens,
5460            } = event
5461            {
5462                assert_eq!(total_items, 3); // 1 + 2
5463                assert_eq!(total_tokens, 225); // 100 + 50 + 75
5464            }
5465        }
5466    }
5467
5468    #[tokio::test]
5469    async fn test_agent_no_context_providers() {
5470        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
5471            "No context",
5472        )]));
5473
5474        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5475
5476        // No context providers
5477        let config = AgentConfig::default();
5478
5479        let (tx, mut rx) = mpsc::channel(100);
5480        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
5481        let result = agent.execute(&[], "Simple prompt", Some(tx)).await.unwrap();
5482
5483        assert_eq!(result.text, "No context");
5484
5485        // Should NOT have context events when no providers
5486        let mut events = Vec::new();
5487        while let Ok(event) = rx.try_recv() {
5488            events.push(event);
5489        }
5490
5491        assert!(
5492            !events
5493                .iter()
5494                .any(|e| matches!(e, AgentEvent::ContextResolving { .. })),
5495            "Should NOT have ContextResolving event"
5496        );
5497    }
5498
5499    #[tokio::test]
5500    async fn test_agent_context_on_turn_complete() {
5501        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
5502            "Final response",
5503        )]));
5504
5505        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5506
5507        let provider = Arc::new(MockContextProvider::new("memory-provider"));
5508        let on_turn_calls = provider.on_turn_calls.clone();
5509
5510        let config = AgentConfig {
5511            context_providers: vec![provider],
5512            ..Default::default()
5513        };
5514
5515        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
5516
5517        // Execute with session ID
5518        let result = agent
5519            .execute_with_session(&[], "User prompt", Some("sess-123"), None, None)
5520            .await
5521            .unwrap();
5522
5523        assert_eq!(result.text, "Final response");
5524
5525        // Check on_turn_complete was called
5526        let calls = on_turn_calls.read().await;
5527        assert_eq!(calls.len(), 1);
5528        assert_eq!(calls[0].0, "sess-123");
5529        assert_eq!(calls[0].1, "User prompt");
5530        assert_eq!(calls[0].2, "Final response");
5531    }
5532
5533    #[tokio::test]
5534    async fn test_agent_context_on_turn_complete_no_session() {
5535        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
5536            "Response",
5537        )]));
5538
5539        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5540
5541        let provider = Arc::new(MockContextProvider::new("memory-provider"));
5542        let on_turn_calls = provider.on_turn_calls.clone();
5543
5544        let config = AgentConfig {
5545            context_providers: vec![provider],
5546            ..Default::default()
5547        };
5548
5549        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
5550
5551        // Execute without session ID (uses execute() which passes None)
5552        let _result = agent.execute(&[], "Prompt", None).await.unwrap();
5553
5554        // on_turn_complete should NOT be called when session_id is None
5555        let calls = on_turn_calls.read().await;
5556        assert!(calls.is_empty());
5557    }
5558
5559    #[tokio::test]
5560    async fn test_agent_build_augmented_system_prompt() {
5561        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response("OK")]));
5562
5563        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5564
5565        let provider = MockContextProvider::new("test").with_items(vec![ContextItem::new(
5566            "doc-1",
5567            ContextType::Resource,
5568            "Auth uses JWT tokens.",
5569        )
5570        .with_source("viking://docs/auth")]);
5571
5572        let config = AgentConfig {
5573            prompt_slots: SystemPromptSlots {
5574                extra: Some("You are helpful.".to_string()),
5575                ..Default::default()
5576            },
5577            context_providers: vec![Arc::new(provider)],
5578            ..Default::default()
5579        };
5580
5581        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
5582
5583        // Test building augmented prompt
5584        let context_results = agent.resolve_context("test", None).await;
5585        let augmented = agent.build_augmented_system_prompt(&context_results);
5586
5587        let augmented_str = augmented.unwrap();
5588        assert!(augmented_str.contains("You are helpful."));
5589        assert!(augmented_str.contains("<context source=\"viking://docs/auth\" type=\"Resource\">"));
5590        assert!(augmented_str.contains("Auth uses JWT tokens."));
5591    }
5592
5593    // ========================================================================
5594    // Agentic Loop Integration Tests
5595    // ========================================================================
5596
5597    /// Helper: collect all events from a channel
5598    async fn collect_events(mut rx: mpsc::Receiver<AgentEvent>) -> Vec<AgentEvent> {
5599        let mut events = Vec::new();
5600        while let Ok(event) = rx.try_recv() {
5601            events.push(event);
5602        }
5603        // Drain remaining
5604        while let Some(event) = rx.recv().await {
5605            events.push(event);
5606        }
5607        events
5608    }
5609
5610    #[tokio::test]
5611    async fn test_agent_multi_turn_tool_chain() {
5612        // LLM calls tool A → sees result → calls tool B → sees result → final answer
5613        let mock_client = Arc::new(MockLlmClient::new(vec![
5614            // Turn 1: call ls
5615            MockLlmClient::tool_call_response(
5616                "t1",
5617                "bash",
5618                serde_json::json!({"command": "echo step1"}),
5619            ),
5620            // Turn 2: call another tool based on first result
5621            MockLlmClient::tool_call_response(
5622                "t2",
5623                "bash",
5624                serde_json::json!({"command": "echo step2"}),
5625            ),
5626            // Turn 3: final answer
5627            MockLlmClient::text_response("Completed both steps: step1 then step2"),
5628        ]));
5629
5630        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5631        let config = AgentConfig::default();
5632
5633        let agent = AgentLoop::new(
5634            mock_client.clone(),
5635            tool_executor,
5636            test_tool_context(),
5637            config,
5638        );
5639        let result = agent.execute(&[], "Run two steps", None).await.unwrap();
5640
5641        assert_eq!(result.text, "Completed both steps: step1 then step2");
5642        assert_eq!(result.tool_calls_count, 2);
5643        assert_eq!(mock_client.call_count.load(Ordering::SeqCst), 3);
5644
5645        // Verify message history: user → assistant(tool_use) → user(tool_result) → assistant(tool_use) → user(tool_result) → assistant(text)
5646        assert_eq!(result.messages[0].role, "user");
5647        assert_eq!(result.messages[1].role, "assistant"); // tool call 1
5648        assert_eq!(result.messages[2].role, "user"); // tool result 1 (Anthropic convention)
5649        assert_eq!(result.messages[3].role, "assistant"); // tool call 2
5650        assert_eq!(result.messages[4].role, "user"); // tool result 2
5651        assert_eq!(result.messages[5].role, "assistant"); // final text
5652        assert_eq!(result.messages.len(), 6);
5653    }
5654
5655    #[tokio::test]
5656    async fn test_agent_conversation_history_preserved() {
5657        // Pass existing history, verify it's preserved in output
5658        let existing_history = vec![
5659            Message::user("What is Rust?"),
5660            Message {
5661                role: "assistant".to_string(),
5662                content: vec![ContentBlock::Text {
5663                    text: "Rust is a systems programming language.".to_string(),
5664                }],
5665                reasoning_content: None,
5666            },
5667        ];
5668
5669        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
5670            "Rust was created by Graydon Hoare at Mozilla.",
5671        )]));
5672
5673        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5674        let agent = AgentLoop::new(
5675            mock_client.clone(),
5676            tool_executor,
5677            test_tool_context(),
5678            AgentConfig::default(),
5679        );
5680
5681        let result = agent
5682            .execute(&existing_history, "Who created it?", None)
5683            .await
5684            .unwrap();
5685
5686        // History should contain: old user + old assistant + new user + new assistant
5687        assert_eq!(result.messages.len(), 4);
5688        assert_eq!(result.messages[0].text(), "What is Rust?");
5689        assert_eq!(
5690            result.messages[1].text(),
5691            "Rust is a systems programming language."
5692        );
5693        assert_eq!(result.messages[2].text(), "Who created it?");
5694        assert_eq!(
5695            result.messages[3].text(),
5696            "Rust was created by Graydon Hoare at Mozilla."
5697        );
5698    }
5699
5700    #[tokio::test]
5701    async fn test_agent_event_stream_completeness() {
5702        // Verify full event sequence for a single tool call loop
5703        let mock_client = Arc::new(MockLlmClient::new(vec![
5704            MockLlmClient::tool_call_response(
5705                "t1",
5706                "bash",
5707                serde_json::json!({"command": "echo hi"}),
5708            ),
5709            MockLlmClient::text_response("Done"),
5710        ]));
5711
5712        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5713        let agent = AgentLoop::new(
5714            mock_client,
5715            tool_executor,
5716            test_tool_context(),
5717            AgentConfig::default(),
5718        );
5719
5720        let (tx, rx) = mpsc::channel(100);
5721        let result = agent.execute(&[], "Say hi", Some(tx)).await.unwrap();
5722        assert_eq!(result.text, "Done");
5723
5724        let events = collect_events(rx).await;
5725
5726        // Verify event sequence
5727        let event_types: Vec<&str> = events
5728            .iter()
5729            .map(|e| match e {
5730                AgentEvent::Start { .. } => "Start",
5731                AgentEvent::TurnStart { .. } => "TurnStart",
5732                AgentEvent::TurnEnd { .. } => "TurnEnd",
5733                AgentEvent::ToolEnd { .. } => "ToolEnd",
5734                AgentEvent::End { .. } => "End",
5735                _ => "Other",
5736            })
5737            .collect();
5738
5739        // Must start with Start, end with End
5740        assert_eq!(event_types.first(), Some(&"Start"));
5741        assert_eq!(event_types.last(), Some(&"End"));
5742
5743        // Must have 2 TurnStarts (tool call turn + final answer turn)
5744        let turn_starts = event_types.iter().filter(|&&t| t == "TurnStart").count();
5745        assert_eq!(turn_starts, 2);
5746
5747        // Must have 1 ToolEnd
5748        let tool_ends = event_types.iter().filter(|&&t| t == "ToolEnd").count();
5749        assert_eq!(tool_ends, 1);
5750    }
5751
5752    #[tokio::test]
5753    async fn test_agent_multiple_tools_single_turn() {
5754        // LLM returns 2 tool calls in one response
5755        let mock_client = Arc::new(MockLlmClient::new(vec![
5756            LlmResponse {
5757                message: Message {
5758                    role: "assistant".to_string(),
5759                    content: vec![
5760                        ContentBlock::ToolUse {
5761                            id: "t1".to_string(),
5762                            name: "bash".to_string(),
5763                            input: serde_json::json!({"command": "echo first"}),
5764                        },
5765                        ContentBlock::ToolUse {
5766                            id: "t2".to_string(),
5767                            name: "bash".to_string(),
5768                            input: serde_json::json!({"command": "echo second"}),
5769                        },
5770                    ],
5771                    reasoning_content: None,
5772                },
5773                usage: TokenUsage {
5774                    prompt_tokens: 10,
5775                    completion_tokens: 5,
5776                    total_tokens: 15,
5777                    cache_read_tokens: None,
5778                    cache_write_tokens: None,
5779                },
5780                stop_reason: Some("tool_use".to_string()),
5781                meta: None,
5782            },
5783            MockLlmClient::text_response("Both commands ran"),
5784        ]));
5785
5786        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5787        let agent = AgentLoop::new(
5788            mock_client.clone(),
5789            tool_executor,
5790            test_tool_context(),
5791            AgentConfig::default(),
5792        );
5793
5794        let result = agent.execute(&[], "Run both", None).await.unwrap();
5795
5796        assert_eq!(result.text, "Both commands ran");
5797        assert_eq!(result.tool_calls_count, 2);
5798        assert_eq!(mock_client.call_count.load(Ordering::SeqCst), 2); // Only 2 LLM calls
5799
5800        // Messages: user → assistant(2 tools) → user(tool_result) → user(tool_result) → assistant(text)
5801        assert_eq!(result.messages[0].role, "user");
5802        assert_eq!(result.messages[1].role, "assistant");
5803        assert_eq!(result.messages[2].role, "user"); // tool result 1
5804        assert_eq!(result.messages[3].role, "user"); // tool result 2
5805        assert_eq!(result.messages[4].role, "assistant");
5806    }
5807
5808    #[tokio::test]
5809    async fn test_agent_token_usage_accumulation() {
5810        // Verify usage sums across multiple turns
5811        let mock_client = Arc::new(MockLlmClient::new(vec![
5812            MockLlmClient::tool_call_response(
5813                "t1",
5814                "bash",
5815                serde_json::json!({"command": "echo x"}),
5816            ),
5817            MockLlmClient::text_response("Done"),
5818        ]));
5819
5820        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5821        let agent = AgentLoop::new(
5822            mock_client,
5823            tool_executor,
5824            test_tool_context(),
5825            AgentConfig::default(),
5826        );
5827
5828        let result = agent.execute(&[], "test", None).await.unwrap();
5829
5830        // Each mock response has prompt=10, completion=5, total=15
5831        // 2 LLM calls → 20 prompt, 10 completion, 30 total
5832        assert_eq!(result.usage.prompt_tokens, 20);
5833        assert_eq!(result.usage.completion_tokens, 10);
5834        assert_eq!(result.usage.total_tokens, 30);
5835    }
5836
5837    #[tokio::test]
5838    async fn test_agent_system_prompt_passed() {
5839        // Verify system prompt is used (MockLlmClient captures calls)
5840        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
5841            "I am a coding assistant.",
5842        )]));
5843
5844        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5845        let config = AgentConfig {
5846            prompt_slots: SystemPromptSlots {
5847                extra: Some("You are a coding assistant.".to_string()),
5848                ..Default::default()
5849            },
5850            ..Default::default()
5851        };
5852
5853        let agent = AgentLoop::new(
5854            mock_client.clone(),
5855            tool_executor,
5856            test_tool_context(),
5857            config,
5858        );
5859        let result = agent.execute(&[], "What are you?", None).await.unwrap();
5860
5861        assert_eq!(result.text, "I am a coding assistant.");
5862        assert_eq!(mock_client.call_count.load(Ordering::SeqCst), 1);
5863    }
5864
5865    #[tokio::test]
5866    async fn test_agent_max_rounds_with_persistent_tool_calls() {
5867        // LLM keeps calling tools forever — should hit max_tool_rounds
5868        let mut responses = Vec::new();
5869        for i in 0..15 {
5870            responses.push(MockLlmClient::tool_call_response(
5871                &format!("t{}", i),
5872                "bash",
5873                serde_json::json!({"command": format!("echo round{}", i)}),
5874            ));
5875        }
5876
5877        let mock_client = Arc::new(MockLlmClient::new(responses));
5878        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5879        let config = AgentConfig {
5880            max_tool_rounds: 5,
5881            ..Default::default()
5882        };
5883
5884        let agent = AgentLoop::new(
5885            mock_client.clone(),
5886            tool_executor,
5887            test_tool_context(),
5888            config,
5889        );
5890        let result = agent.execute(&[], "Loop forever", None).await;
5891
5892        assert!(result.is_err());
5893        let err = result.unwrap_err().to_string();
5894        assert!(err.contains("Max tool rounds (5) exceeded"));
5895    }
5896
5897    #[tokio::test]
5898    async fn test_agent_end_event_contains_final_text() {
5899        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
5900            "Final answer here",
5901        )]));
5902
5903        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5904        let agent = AgentLoop::new(
5905            mock_client,
5906            tool_executor,
5907            test_tool_context(),
5908            AgentConfig::default(),
5909        );
5910
5911        let (tx, rx) = mpsc::channel(100);
5912        agent.execute(&[], "test", Some(tx)).await.unwrap();
5913
5914        let events = collect_events(rx).await;
5915        let end_event = events.iter().find(|e| matches!(e, AgentEvent::End { .. }));
5916        assert!(end_event.is_some());
5917
5918        if let AgentEvent::End { text, usage, .. } = end_event.unwrap() {
5919            assert_eq!(text, "Final answer here");
5920            assert_eq!(usage.total_tokens, 15);
5921        }
5922    }
5923}
5924
5925#[cfg(test)]
5926mod extra_agent_tests {
5927    use super::*;
5928    use crate::agent::tests::MockLlmClient;
5929    use crate::queue::SessionQueueConfig;
5930    use crate::tools::ToolExecutor;
5931    use std::path::PathBuf;
5932    use std::sync::atomic::{AtomicUsize, Ordering};
5933
5934    fn test_tool_context() -> ToolContext {
5935        ToolContext::new(PathBuf::from("/tmp"))
5936    }
5937
5938    // ========================================================================
5939    // AgentConfig
5940    // ========================================================================
5941
5942    #[test]
5943    fn test_agent_config_debug() {
5944        let config = AgentConfig {
5945            prompt_slots: SystemPromptSlots {
5946                extra: Some("You are helpful".to_string()),
5947                ..Default::default()
5948            },
5949            tools: vec![],
5950            max_tool_rounds: 10,
5951            permission_checker: None,
5952            confirmation_manager: None,
5953            context_providers: vec![],
5954            planning_mode: PlanningMode::Enabled,
5955            goal_tracking: false,
5956            hook_engine: None,
5957            skill_registry: None,
5958            ..AgentConfig::default()
5959        };
5960        let debug = format!("{:?}", config);
5961        assert!(debug.contains("AgentConfig"));
5962        assert!(debug.contains("planning_mode"));
5963    }
5964
5965    #[test]
5966    fn test_agent_config_default_values() {
5967        let config = AgentConfig::default();
5968        assert_eq!(config.max_tool_rounds, MAX_TOOL_ROUNDS);
5969        assert_eq!(config.planning_mode, PlanningMode::Auto);
5970        assert!(!config.goal_tracking);
5971        assert!(config.context_providers.is_empty());
5972    }
5973
5974    // ========================================================================
5975    // AgentEvent serialization
5976    // ========================================================================
5977
5978    #[test]
5979    fn test_agent_event_serialize_start() {
5980        let event = AgentEvent::Start {
5981            prompt: "Hello".to_string(),
5982        };
5983        let json = serde_json::to_string(&event).unwrap();
5984        assert!(json.contains("agent_start"));
5985        assert!(json.contains("Hello"));
5986    }
5987
5988    #[test]
5989    fn test_agent_event_serialize_text_delta() {
5990        let event = AgentEvent::TextDelta {
5991            text: "chunk".to_string(),
5992        };
5993        let json = serde_json::to_string(&event).unwrap();
5994        assert!(json.contains("text_delta"));
5995    }
5996
5997    #[test]
5998    fn test_agent_event_serialize_tool_start() {
5999        let event = AgentEvent::ToolStart {
6000            id: "t1".to_string(),
6001            name: "bash".to_string(),
6002        };
6003        let json = serde_json::to_string(&event).unwrap();
6004        assert!(json.contains("tool_start"));
6005        assert!(json.contains("bash"));
6006    }
6007
6008    #[test]
6009    fn test_agent_event_serialize_tool_end() {
6010        let event = AgentEvent::ToolEnd {
6011            id: "t1".to_string(),
6012            name: "bash".to_string(),
6013            output: "hello".to_string(),
6014            exit_code: 0,
6015            metadata: None,
6016        };
6017        let json = serde_json::to_string(&event).unwrap();
6018        assert!(json.contains("tool_end"));
6019    }
6020
6021    #[test]
6022    fn test_agent_event_tool_end_has_metadata_field() {
6023        let event = AgentEvent::ToolEnd {
6024            id: "t1".to_string(),
6025            name: "write".to_string(),
6026            output: "Wrote 5 bytes".to_string(),
6027            exit_code: 0,
6028            metadata: Some(
6029                serde_json::json!({ "before": "old", "after": "new", "file_path": "f.txt" }),
6030            ),
6031        };
6032        let json = serde_json::to_string(&event).unwrap();
6033        assert!(json.contains("\"before\""));
6034    }
6035
6036    #[test]
6037    fn test_agent_event_serialize_error() {
6038        let event = AgentEvent::Error {
6039            message: "oops".to_string(),
6040        };
6041        let json = serde_json::to_string(&event).unwrap();
6042        assert!(json.contains("error"));
6043        assert!(json.contains("oops"));
6044    }
6045
6046    #[test]
6047    fn test_agent_event_serialize_confirmation_required() {
6048        let event = AgentEvent::ConfirmationRequired {
6049            tool_id: "t1".to_string(),
6050            tool_name: "bash".to_string(),
6051            args: serde_json::json!({"cmd": "rm"}),
6052            timeout_ms: 30000,
6053        };
6054        let json = serde_json::to_string(&event).unwrap();
6055        assert!(json.contains("confirmation_required"));
6056    }
6057
6058    #[test]
6059    fn test_agent_event_serialize_confirmation_received() {
6060        let event = AgentEvent::ConfirmationReceived {
6061            tool_id: "t1".to_string(),
6062            approved: true,
6063            reason: Some("safe".to_string()),
6064        };
6065        let json = serde_json::to_string(&event).unwrap();
6066        assert!(json.contains("confirmation_received"));
6067    }
6068
6069    #[test]
6070    fn test_agent_event_serialize_confirmation_timeout() {
6071        let event = AgentEvent::ConfirmationTimeout {
6072            tool_id: "t1".to_string(),
6073            action_taken: "rejected".to_string(),
6074        };
6075        let json = serde_json::to_string(&event).unwrap();
6076        assert!(json.contains("confirmation_timeout"));
6077    }
6078
6079    #[test]
6080    fn test_agent_event_serialize_external_task_pending() {
6081        let event = AgentEvent::ExternalTaskPending {
6082            task_id: "task-1".to_string(),
6083            session_id: "sess-1".to_string(),
6084            lane: crate::hitl::SessionLane::Execute,
6085            command_type: "bash".to_string(),
6086            payload: serde_json::json!({}),
6087            timeout_ms: 60000,
6088        };
6089        let json = serde_json::to_string(&event).unwrap();
6090        assert!(json.contains("external_task_pending"));
6091    }
6092
6093    #[test]
6094    fn test_agent_event_serialize_external_task_completed() {
6095        let event = AgentEvent::ExternalTaskCompleted {
6096            task_id: "task-1".to_string(),
6097            session_id: "sess-1".to_string(),
6098            success: false,
6099        };
6100        let json = serde_json::to_string(&event).unwrap();
6101        assert!(json.contains("external_task_completed"));
6102    }
6103
6104    #[test]
6105    fn test_agent_event_serialize_permission_denied() {
6106        let event = AgentEvent::PermissionDenied {
6107            tool_id: "t1".to_string(),
6108            tool_name: "bash".to_string(),
6109            args: serde_json::json!({}),
6110            reason: "denied".to_string(),
6111        };
6112        let json = serde_json::to_string(&event).unwrap();
6113        assert!(json.contains("permission_denied"));
6114    }
6115
6116    #[test]
6117    fn test_agent_event_serialize_context_compacted() {
6118        let event = AgentEvent::ContextCompacted {
6119            session_id: "sess-1".to_string(),
6120            before_messages: 100,
6121            after_messages: 20,
6122            percent_before: 0.85,
6123        };
6124        let json = serde_json::to_string(&event).unwrap();
6125        assert!(json.contains("context_compacted"));
6126    }
6127
6128    #[test]
6129    fn test_agent_event_serialize_turn_start() {
6130        let event = AgentEvent::TurnStart { turn: 3 };
6131        let json = serde_json::to_string(&event).unwrap();
6132        assert!(json.contains("turn_start"));
6133    }
6134
6135    #[test]
6136    fn test_agent_event_serialize_turn_end() {
6137        let event = AgentEvent::TurnEnd {
6138            turn: 3,
6139            usage: TokenUsage::default(),
6140        };
6141        let json = serde_json::to_string(&event).unwrap();
6142        assert!(json.contains("turn_end"));
6143    }
6144
6145    #[test]
6146    fn test_agent_event_serialize_end() {
6147        let event = AgentEvent::End {
6148            text: "Done".to_string(),
6149            usage: TokenUsage {
6150                prompt_tokens: 100,
6151                completion_tokens: 50,
6152                total_tokens: 150,
6153                cache_read_tokens: None,
6154                cache_write_tokens: None,
6155            },
6156            meta: None,
6157        };
6158        let json = serde_json::to_string(&event).unwrap();
6159        assert!(json.contains("agent_end"));
6160    }
6161
6162    // ========================================================================
6163    // AgentResult
6164    // ========================================================================
6165
6166    #[test]
6167    fn test_agent_result_fields() {
6168        let result = AgentResult {
6169            text: "output".to_string(),
6170            messages: vec![Message::user("hello")],
6171            usage: TokenUsage::default(),
6172            tool_calls_count: 3,
6173        };
6174        assert_eq!(result.text, "output");
6175        assert_eq!(result.messages.len(), 1);
6176        assert_eq!(result.tool_calls_count, 3);
6177    }
6178
6179    // ========================================================================
6180    // Missing AgentEvent serialization tests
6181    // ========================================================================
6182
6183    #[test]
6184    fn test_agent_event_serialize_context_resolving() {
6185        let event = AgentEvent::ContextResolving {
6186            providers: vec!["provider1".to_string(), "provider2".to_string()],
6187        };
6188        let json = serde_json::to_string(&event).unwrap();
6189        assert!(json.contains("context_resolving"));
6190        assert!(json.contains("provider1"));
6191    }
6192
6193    #[test]
6194    fn test_agent_event_serialize_context_resolved() {
6195        let event = AgentEvent::ContextResolved {
6196            total_items: 5,
6197            total_tokens: 1000,
6198        };
6199        let json = serde_json::to_string(&event).unwrap();
6200        assert!(json.contains("context_resolved"));
6201        assert!(json.contains("1000"));
6202    }
6203
6204    #[test]
6205    fn test_agent_event_serialize_command_dead_lettered() {
6206        let event = AgentEvent::CommandDeadLettered {
6207            command_id: "cmd-1".to_string(),
6208            command_type: "bash".to_string(),
6209            lane: "execute".to_string(),
6210            error: "timeout".to_string(),
6211            attempts: 3,
6212        };
6213        let json = serde_json::to_string(&event).unwrap();
6214        assert!(json.contains("command_dead_lettered"));
6215        assert!(json.contains("cmd-1"));
6216    }
6217
6218    #[test]
6219    fn test_agent_event_serialize_command_retry() {
6220        let event = AgentEvent::CommandRetry {
6221            command_id: "cmd-2".to_string(),
6222            command_type: "read".to_string(),
6223            lane: "query".to_string(),
6224            attempt: 2,
6225            delay_ms: 1000,
6226        };
6227        let json = serde_json::to_string(&event).unwrap();
6228        assert!(json.contains("command_retry"));
6229        assert!(json.contains("cmd-2"));
6230    }
6231
6232    #[test]
6233    fn test_agent_event_serialize_queue_alert() {
6234        let event = AgentEvent::QueueAlert {
6235            level: "warning".to_string(),
6236            alert_type: "depth".to_string(),
6237            message: "Queue depth exceeded".to_string(),
6238        };
6239        let json = serde_json::to_string(&event).unwrap();
6240        assert!(json.contains("queue_alert"));
6241        assert!(json.contains("warning"));
6242    }
6243
6244    #[test]
6245    fn test_agent_event_serialize_task_updated() {
6246        let event = AgentEvent::TaskUpdated {
6247            session_id: "sess-1".to_string(),
6248            tasks: vec![],
6249        };
6250        let json = serde_json::to_string(&event).unwrap();
6251        assert!(json.contains("task_updated"));
6252        assert!(json.contains("sess-1"));
6253    }
6254
6255    #[test]
6256    fn test_agent_event_serialize_memory_stored() {
6257        let event = AgentEvent::MemoryStored {
6258            memory_id: "mem-1".to_string(),
6259            memory_type: "conversation".to_string(),
6260            importance: 0.8,
6261            tags: vec!["important".to_string()],
6262        };
6263        let json = serde_json::to_string(&event).unwrap();
6264        assert!(json.contains("memory_stored"));
6265        assert!(json.contains("mem-1"));
6266    }
6267
6268    #[test]
6269    fn test_agent_event_serialize_memory_recalled() {
6270        let event = AgentEvent::MemoryRecalled {
6271            memory_id: "mem-2".to_string(),
6272            content: "Previous conversation".to_string(),
6273            relevance: 0.9,
6274        };
6275        let json = serde_json::to_string(&event).unwrap();
6276        assert!(json.contains("memory_recalled"));
6277        assert!(json.contains("mem-2"));
6278    }
6279
6280    #[test]
6281    fn test_agent_event_serialize_memories_searched() {
6282        let event = AgentEvent::MemoriesSearched {
6283            query: Some("search term".to_string()),
6284            tags: vec!["tag1".to_string()],
6285            result_count: 5,
6286        };
6287        let json = serde_json::to_string(&event).unwrap();
6288        assert!(json.contains("memories_searched"));
6289        assert!(json.contains("search term"));
6290    }
6291
6292    #[test]
6293    fn test_agent_event_serialize_memory_cleared() {
6294        let event = AgentEvent::MemoryCleared {
6295            tier: "short_term".to_string(),
6296            count: 10,
6297        };
6298        let json = serde_json::to_string(&event).unwrap();
6299        assert!(json.contains("memory_cleared"));
6300        assert!(json.contains("short_term"));
6301    }
6302
6303    #[test]
6304    fn test_agent_event_serialize_subagent_start() {
6305        let event = AgentEvent::SubagentStart {
6306            task_id: "task-1".to_string(),
6307            session_id: "child-sess".to_string(),
6308            parent_session_id: "parent-sess".to_string(),
6309            agent: "explore".to_string(),
6310            description: "Explore codebase".to_string(),
6311        };
6312        let json = serde_json::to_string(&event).unwrap();
6313        assert!(json.contains("subagent_start"));
6314        assert!(json.contains("explore"));
6315    }
6316
6317    #[test]
6318    fn test_agent_event_serialize_subagent_progress() {
6319        let event = AgentEvent::SubagentProgress {
6320            task_id: "task-1".to_string(),
6321            session_id: "child-sess".to_string(),
6322            status: "processing".to_string(),
6323            metadata: serde_json::json!({"progress": 50}),
6324        };
6325        let json = serde_json::to_string(&event).unwrap();
6326        assert!(json.contains("subagent_progress"));
6327        assert!(json.contains("processing"));
6328    }
6329
6330    #[test]
6331    fn test_agent_event_serialize_subagent_end() {
6332        let event = AgentEvent::SubagentEnd {
6333            task_id: "task-1".to_string(),
6334            session_id: "child-sess".to_string(),
6335            agent: "explore".to_string(),
6336            output: "Found 10 files".to_string(),
6337            success: true,
6338        };
6339        let json = serde_json::to_string(&event).unwrap();
6340        assert!(json.contains("subagent_end"));
6341        assert!(json.contains("Found 10 files"));
6342    }
6343
6344    #[test]
6345    fn test_agent_event_serialize_planning_start() {
6346        let event = AgentEvent::PlanningStart {
6347            prompt: "Build a web app".to_string(),
6348        };
6349        let json = serde_json::to_string(&event).unwrap();
6350        assert!(json.contains("planning_start"));
6351        assert!(json.contains("Build a web app"));
6352    }
6353
6354    #[test]
6355    fn test_agent_event_serialize_planning_end() {
6356        use crate::planning::{Complexity, ExecutionPlan};
6357        let plan = ExecutionPlan::new("Test goal".to_string(), Complexity::Simple);
6358        let event = AgentEvent::PlanningEnd {
6359            plan,
6360            estimated_steps: 3,
6361        };
6362        let json = serde_json::to_string(&event).unwrap();
6363        assert!(json.contains("planning_end"));
6364        assert!(json.contains("estimated_steps"));
6365    }
6366
6367    #[test]
6368    fn test_agent_event_serialize_step_start() {
6369        let event = AgentEvent::StepStart {
6370            step_id: "step-1".to_string(),
6371            description: "Initialize project".to_string(),
6372            step_number: 1,
6373            total_steps: 5,
6374        };
6375        let json = serde_json::to_string(&event).unwrap();
6376        assert!(json.contains("step_start"));
6377        assert!(json.contains("Initialize project"));
6378    }
6379
6380    #[test]
6381    fn test_agent_event_serialize_step_end() {
6382        let event = AgentEvent::StepEnd {
6383            step_id: "step-1".to_string(),
6384            status: TaskStatus::Completed,
6385            step_number: 1,
6386            total_steps: 5,
6387        };
6388        let json = serde_json::to_string(&event).unwrap();
6389        assert!(json.contains("step_end"));
6390        assert!(json.contains("step-1"));
6391    }
6392
6393    #[test]
6394    fn test_agent_event_serialize_goal_extracted() {
6395        use crate::planning::AgentGoal;
6396        let goal = AgentGoal::new("Complete the task".to_string());
6397        let event = AgentEvent::GoalExtracted { goal };
6398        let json = serde_json::to_string(&event).unwrap();
6399        assert!(json.contains("goal_extracted"));
6400    }
6401
6402    #[test]
6403    fn test_agent_event_serialize_goal_progress() {
6404        let event = AgentEvent::GoalProgress {
6405            goal: "Build app".to_string(),
6406            progress: 0.5,
6407            completed_steps: 2,
6408            total_steps: 4,
6409        };
6410        let json = serde_json::to_string(&event).unwrap();
6411        assert!(json.contains("goal_progress"));
6412        assert!(json.contains("0.5"));
6413    }
6414
6415    #[test]
6416    fn test_agent_event_serialize_goal_achieved() {
6417        let event = AgentEvent::GoalAchieved {
6418            goal: "Build app".to_string(),
6419            total_steps: 4,
6420            duration_ms: 5000,
6421        };
6422        let json = serde_json::to_string(&event).unwrap();
6423        assert!(json.contains("goal_achieved"));
6424        assert!(json.contains("5000"));
6425    }
6426
6427    #[tokio::test]
6428    async fn test_extract_goal_with_json_response() {
6429        // LlmPlanner expects JSON with "description" and "success_criteria" fields
6430        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
6431            r#"{"description": "Build web app", "success_criteria": ["App runs on port 3000", "Has login page"]}"#,
6432        )]));
6433        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
6434        let agent = AgentLoop::new(
6435            mock_client,
6436            tool_executor,
6437            test_tool_context(),
6438            AgentConfig::default(),
6439        );
6440
6441        let goal = agent.extract_goal("Build a web app").await.unwrap();
6442        assert_eq!(goal.description, "Build web app");
6443        assert_eq!(goal.success_criteria.len(), 2);
6444        assert_eq!(goal.success_criteria[0], "App runs on port 3000");
6445    }
6446
6447    #[tokio::test]
6448    async fn test_extract_goal_fallback_on_non_json() {
6449        // Non-JSON response triggers fallback: returns the original prompt as goal
6450        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
6451            "Some non-JSON response",
6452        )]));
6453        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
6454        let agent = AgentLoop::new(
6455            mock_client,
6456            tool_executor,
6457            test_tool_context(),
6458            AgentConfig::default(),
6459        );
6460
6461        let goal = agent.extract_goal("Do something").await.unwrap();
6462        // Fallback uses the original prompt as description
6463        assert_eq!(goal.description, "Do something");
6464        // Fallback adds 2 generic criteria
6465        assert_eq!(goal.success_criteria.len(), 2);
6466    }
6467
6468    #[tokio::test]
6469    async fn test_check_goal_achievement_json_yes() {
6470        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
6471            r#"{"achieved": true, "progress": 1.0, "remaining_criteria": []}"#,
6472        )]));
6473        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
6474        let agent = AgentLoop::new(
6475            mock_client,
6476            tool_executor,
6477            test_tool_context(),
6478            AgentConfig::default(),
6479        );
6480
6481        let goal = crate::planning::AgentGoal::new("Test goal".to_string());
6482        let achieved = agent
6483            .check_goal_achievement(&goal, "All done")
6484            .await
6485            .unwrap();
6486        assert!(achieved);
6487    }
6488
6489    #[tokio::test]
6490    async fn test_check_goal_achievement_fallback_not_done() {
6491        // Non-JSON response triggers heuristic fallback
6492        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
6493            "invalid json",
6494        )]));
6495        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
6496        let agent = AgentLoop::new(
6497            mock_client,
6498            tool_executor,
6499            test_tool_context(),
6500            AgentConfig::default(),
6501        );
6502
6503        let goal = crate::planning::AgentGoal::new("Test goal".to_string());
6504        // "still working" doesn't contain "complete"/"done"/"finished"
6505        let achieved = agent
6506            .check_goal_achievement(&goal, "still working")
6507            .await
6508            .unwrap();
6509        assert!(!achieved);
6510    }
6511
6512    // ========================================================================
6513    // build_augmented_system_prompt Tests
6514    // ========================================================================
6515
6516    #[test]
6517    fn test_build_augmented_system_prompt_empty_context() {
6518        let mock_client = Arc::new(MockLlmClient::new(vec![]));
6519        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
6520        let config = AgentConfig {
6521            prompt_slots: SystemPromptSlots {
6522                extra: Some("Base prompt".to_string()),
6523                ..Default::default()
6524            },
6525            ..Default::default()
6526        };
6527        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
6528
6529        let result = agent.build_augmented_system_prompt(&[]);
6530        assert!(result.unwrap().contains("Base prompt"));
6531    }
6532
6533    #[test]
6534    fn test_build_augmented_system_prompt_no_custom_slots() {
6535        let mock_client = Arc::new(MockLlmClient::new(vec![]));
6536        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
6537        let agent = AgentLoop::new(
6538            mock_client,
6539            tool_executor,
6540            test_tool_context(),
6541            AgentConfig::default(),
6542        );
6543
6544        let result = agent.build_augmented_system_prompt(&[]);
6545        // Default slots still produce the default agentic prompt
6546        assert!(result.is_some());
6547        assert!(result.unwrap().contains("Core Behaviour"));
6548    }
6549
6550    #[test]
6551    fn test_build_augmented_system_prompt_with_context_no_base() {
6552        use crate::context::{ContextItem, ContextResult, ContextType};
6553
6554        let mock_client = Arc::new(MockLlmClient::new(vec![]));
6555        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
6556        let agent = AgentLoop::new(
6557            mock_client,
6558            tool_executor,
6559            test_tool_context(),
6560            AgentConfig::default(),
6561        );
6562
6563        let context = vec![ContextResult {
6564            provider: "test".to_string(),
6565            items: vec![ContextItem::new("id1", ContextType::Resource, "Content")],
6566            total_tokens: 10,
6567            truncated: false,
6568        }];
6569
6570        let result = agent.build_augmented_system_prompt(&context);
6571        assert!(result.is_some());
6572        let text = result.unwrap();
6573        assert!(text.contains("<context"));
6574        assert!(text.contains("Content"));
6575    }
6576
6577    // ========================================================================
6578    // AgentResult Clone and Debug
6579    // ========================================================================
6580
6581    #[test]
6582    fn test_agent_result_clone() {
6583        let result = AgentResult {
6584            text: "output".to_string(),
6585            messages: vec![Message::user("hello")],
6586            usage: TokenUsage::default(),
6587            tool_calls_count: 3,
6588        };
6589        let cloned = result.clone();
6590        assert_eq!(cloned.text, result.text);
6591        assert_eq!(cloned.tool_calls_count, result.tool_calls_count);
6592    }
6593
6594    #[test]
6595    fn test_agent_result_debug() {
6596        let result = AgentResult {
6597            text: "output".to_string(),
6598            messages: vec![Message::user("hello")],
6599            usage: TokenUsage::default(),
6600            tool_calls_count: 3,
6601        };
6602        let debug = format!("{:?}", result);
6603        assert!(debug.contains("AgentResult"));
6604        assert!(debug.contains("output"));
6605    }
6606
6607    // ========================================================================
6608    // handle_post_execution_metadata Tests
6609    // ========================================================================
6610
6611    // ========================================================================
6612    // ToolCommand adapter tests
6613    // ========================================================================
6614
6615    #[tokio::test]
6616    async fn test_tool_command_command_type() {
6617        let executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
6618        let cmd = ToolCommand {
6619            tool_executor: executor,
6620            tool_name: "read".to_string(),
6621            tool_args: serde_json::json!({"file": "test.rs"}),
6622            skill_registry: None,
6623            tool_context: test_tool_context(),
6624        };
6625        assert_eq!(cmd.command_type(), "read");
6626    }
6627
6628    #[tokio::test]
6629    async fn test_tool_command_payload() {
6630        let executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
6631        let args = serde_json::json!({"file": "test.rs", "offset": 10});
6632        let cmd = ToolCommand {
6633            tool_executor: executor,
6634            tool_name: "read".to_string(),
6635            tool_args: args.clone(),
6636            skill_registry: None,
6637            tool_context: test_tool_context(),
6638        };
6639        assert_eq!(cmd.payload(), args);
6640    }
6641
6642    // ========================================================================
6643    // AgentLoop with queue builder tests
6644    // ========================================================================
6645
6646    #[tokio::test(flavor = "multi_thread")]
6647    async fn test_agent_loop_with_queue() {
6648        use tokio::sync::broadcast;
6649
6650        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
6651            "Hello",
6652        )]));
6653        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
6654        let config = AgentConfig::default();
6655
6656        let (event_tx, _) = broadcast::channel(100);
6657        let queue = SessionLaneQueue::new("test-session", SessionQueueConfig::default(), event_tx)
6658            .await
6659            .unwrap();
6660
6661        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config)
6662            .with_queue(Arc::new(queue));
6663
6664        assert!(agent.command_queue.is_some());
6665    }
6666
6667    #[tokio::test]
6668    async fn test_agent_loop_without_queue() {
6669        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
6670            "Hello",
6671        )]));
6672        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
6673        let config = AgentConfig::default();
6674
6675        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
6676
6677        assert!(agent.command_queue.is_none());
6678    }
6679
6680    // ========================================================================
6681    // Parallel Plan Execution Tests
6682    // ========================================================================
6683
6684    #[tokio::test]
6685    async fn test_execute_plan_parallel_independent() {
6686        use crate::planning::{Complexity, ExecutionPlan, Task};
6687
6688        // 3 independent steps (no dependencies) — should all execute.
6689        // MockLlmClient needs one response per execute_loop call per step.
6690        let mock_client = Arc::new(MockLlmClient::new(vec![
6691            MockLlmClient::text_response("Step 1 done"),
6692            MockLlmClient::text_response("Step 2 done"),
6693            MockLlmClient::text_response("Step 3 done"),
6694        ]));
6695
6696        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
6697        let config = AgentConfig::default();
6698        let agent = AgentLoop::new(
6699            mock_client.clone(),
6700            tool_executor,
6701            test_tool_context(),
6702            config,
6703        );
6704
6705        let mut plan = ExecutionPlan::new("Test parallel", Complexity::Simple);
6706        plan.add_step(Task::new("s1", "First step"));
6707        plan.add_step(Task::new("s2", "Second step"));
6708        plan.add_step(Task::new("s3", "Third step"));
6709
6710        let (tx, mut rx) = mpsc::channel(100);
6711        let result = agent.execute_plan(&[], &plan, Some(tx)).await.unwrap();
6712
6713        // All 3 steps should have been executed (3 * 15 = 45 total tokens)
6714        assert_eq!(result.usage.total_tokens, 45);
6715
6716        // Verify we received StepStart and StepEnd events for all 3 steps
6717        let mut step_starts = Vec::new();
6718        let mut step_ends = Vec::new();
6719        rx.close();
6720        while let Some(event) = rx.recv().await {
6721            match event {
6722                AgentEvent::StepStart { step_id, .. } => step_starts.push(step_id),
6723                AgentEvent::StepEnd {
6724                    step_id, status, ..
6725                } => {
6726                    assert_eq!(status, TaskStatus::Completed);
6727                    step_ends.push(step_id);
6728                }
6729                _ => {}
6730            }
6731        }
6732        assert_eq!(step_starts.len(), 3);
6733        assert_eq!(step_ends.len(), 3);
6734    }
6735
6736    #[tokio::test]
6737    async fn test_execute_plan_respects_dependencies() {
6738        use crate::planning::{Complexity, ExecutionPlan, Task};
6739
6740        // s1 and s2 are independent (wave 1), s3 depends on both (wave 2).
6741        // This requires 3 responses total.
6742        let mock_client = Arc::new(MockLlmClient::new(vec![
6743            MockLlmClient::text_response("Step 1 done"),
6744            MockLlmClient::text_response("Step 2 done"),
6745            MockLlmClient::text_response("Step 3 done"),
6746        ]));
6747
6748        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
6749        let config = AgentConfig::default();
6750        let agent = AgentLoop::new(
6751            mock_client.clone(),
6752            tool_executor,
6753            test_tool_context(),
6754            config,
6755        );
6756
6757        let mut plan = ExecutionPlan::new("Test deps", Complexity::Medium);
6758        plan.add_step(Task::new("s1", "Independent A"));
6759        plan.add_step(Task::new("s2", "Independent B"));
6760        plan.add_step(
6761            Task::new("s3", "Depends on A+B")
6762                .with_dependencies(vec!["s1".to_string(), "s2".to_string()]),
6763        );
6764
6765        let (tx, mut rx) = mpsc::channel(100);
6766        let result = agent.execute_plan(&[], &plan, Some(tx)).await.unwrap();
6767
6768        // All 3 steps should have been executed (3 * 15 = 45 total tokens)
6769        assert_eq!(result.usage.total_tokens, 45);
6770
6771        // Verify ordering: s3's StepStart must come after s1 and s2's StepEnd
6772        let mut events = Vec::new();
6773        rx.close();
6774        while let Some(event) = rx.recv().await {
6775            match &event {
6776                AgentEvent::StepStart { step_id, .. } => {
6777                    events.push(format!("start:{}", step_id));
6778                }
6779                AgentEvent::StepEnd { step_id, .. } => {
6780                    events.push(format!("end:{}", step_id));
6781                }
6782                _ => {}
6783            }
6784        }
6785
6786        // s3 start must occur after both s1 end and s2 end
6787        let s1_end = events.iter().position(|e| e == "end:s1").unwrap();
6788        let s2_end = events.iter().position(|e| e == "end:s2").unwrap();
6789        let s3_start = events.iter().position(|e| e == "start:s3").unwrap();
6790        assert!(
6791            s3_start > s1_end,
6792            "s3 started before s1 ended: {:?}",
6793            events
6794        );
6795        assert!(
6796            s3_start > s2_end,
6797            "s3 started before s2 ended: {:?}",
6798            events
6799        );
6800
6801        // Final result should reflect step 3 (last sequential step)
6802        assert!(result.text.contains("Step 3 done") || !result.text.is_empty());
6803    }
6804
6805    #[tokio::test]
6806    async fn test_execute_plan_handles_step_failure() {
6807        use crate::planning::{Complexity, ExecutionPlan, Task};
6808
6809        // s1 succeeds, s2 depends on s1 (succeeds), s3 depends on nothing (succeeds),
6810        // s4 depends on a step that will fail (s_fail).
6811        // We simulate failure by providing no responses for s_fail's execute_loop.
6812        //
6813        // Simpler approach: s1 succeeds, s2 depends on s1 (will fail because no
6814        // mock response left), s3 is independent.
6815        // Layout: s1 (independent), s3 (independent) → wave 1 parallel
6816        //         s2 depends on s1 → wave 2
6817        //         s4 depends on s2 → wave 3 (should deadlock since s2 fails)
6818        let mock_client = Arc::new(MockLlmClient::new(vec![
6819            // Wave 1: s1 and s3 execute in parallel
6820            MockLlmClient::text_response("s1 done"),
6821            MockLlmClient::text_response("s3 done"),
6822            // Wave 2: s2 executes — but we give it no response, causing failure
6823            // Actually the MockLlmClient will fail with "No more mock responses"
6824        ]));
6825
6826        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
6827        let config = AgentConfig::default();
6828        let agent = AgentLoop::new(
6829            mock_client.clone(),
6830            tool_executor,
6831            test_tool_context(),
6832            config,
6833        );
6834
6835        let mut plan = ExecutionPlan::new("Test failure", Complexity::Medium);
6836        plan.add_step(Task::new("s1", "Independent step"));
6837        plan.add_step(Task::new("s2", "Depends on s1").with_dependencies(vec!["s1".to_string()]));
6838        plan.add_step(Task::new("s3", "Another independent"));
6839        plan.add_step(Task::new("s4", "Depends on s2").with_dependencies(vec!["s2".to_string()]));
6840
6841        let (tx, mut rx) = mpsc::channel(100);
6842        let _result = agent.execute_plan(&[], &plan, Some(tx)).await.unwrap();
6843
6844        // s1 and s3 should succeed (wave 1), s2 should fail (wave 2),
6845        // s4 should never execute (deadlock — dep s2 failed, not completed)
6846        let mut completed_steps = Vec::new();
6847        let mut failed_steps = Vec::new();
6848        rx.close();
6849        while let Some(event) = rx.recv().await {
6850            if let AgentEvent::StepEnd {
6851                step_id, status, ..
6852            } = event
6853            {
6854                match status {
6855                    TaskStatus::Completed => completed_steps.push(step_id),
6856                    TaskStatus::Failed => failed_steps.push(step_id),
6857                    _ => {}
6858                }
6859            }
6860        }
6861
6862        assert!(
6863            completed_steps.contains(&"s1".to_string()),
6864            "s1 should complete"
6865        );
6866        assert!(
6867            completed_steps.contains(&"s3".to_string()),
6868            "s3 should complete"
6869        );
6870        assert!(failed_steps.contains(&"s2".to_string()), "s2 should fail");
6871        // s4 should NOT appear in either list — it was never started
6872        assert!(
6873            !completed_steps.contains(&"s4".to_string()),
6874            "s4 should not complete"
6875        );
6876        assert!(
6877            !failed_steps.contains(&"s4".to_string()),
6878            "s4 should not fail (never started)"
6879        );
6880    }
6881
6882    // ========================================================================
6883    // Phase 4: Error Recovery & Resilience Tests
6884    // ========================================================================
6885
6886    #[test]
6887    fn test_agent_config_resilience_defaults() {
6888        let config = AgentConfig::default();
6889        assert_eq!(config.max_parse_retries, 2);
6890        assert_eq!(config.tool_timeout_ms, None);
6891        assert_eq!(config.circuit_breaker_threshold, 3);
6892    }
6893
6894    /// 4.1 — Parse error recovery: bails after max_parse_retries exceeded
6895    #[tokio::test]
6896    async fn test_parse_error_recovery_bails_after_threshold() {
6897        // 3 parse errors with max_parse_retries=2: count reaches 3 > 2 → bail
6898        let mock_client = Arc::new(MockLlmClient::new(vec![
6899            MockLlmClient::tool_call_response(
6900                "c1",
6901                "bash",
6902                serde_json::json!({"__parse_error": "unexpected token at position 5"}),
6903            ),
6904            MockLlmClient::tool_call_response(
6905                "c2",
6906                "bash",
6907                serde_json::json!({"__parse_error": "missing closing brace"}),
6908            ),
6909            MockLlmClient::tool_call_response(
6910                "c3",
6911                "bash",
6912                serde_json::json!({"__parse_error": "still broken"}),
6913            ),
6914            MockLlmClient::text_response("Done"), // never reached
6915        ]));
6916
6917        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
6918        let config = AgentConfig {
6919            max_parse_retries: 2,
6920            ..AgentConfig::default()
6921        };
6922        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
6923        let result = agent.execute(&[], "Do something", None).await;
6924        assert!(result.is_err(), "should bail after parse error threshold");
6925        let err = result.unwrap_err().to_string();
6926        assert!(
6927            err.contains("malformed tool arguments"),
6928            "error should mention malformed tool arguments, got: {}",
6929            err
6930        );
6931    }
6932
6933    /// 4.1 — Parse error recovery: counter resets after a valid tool execution
6934    #[tokio::test]
6935    async fn test_parse_error_counter_resets_on_success() {
6936        // 2 parse errors (= max_parse_retries, not yet exceeded)
6937        // Then a valid tool call (resets counter)
6938        // Then final text — should NOT bail
6939        let mock_client = Arc::new(MockLlmClient::new(vec![
6940            MockLlmClient::tool_call_response(
6941                "c1",
6942                "bash",
6943                serde_json::json!({"__parse_error": "bad args"}),
6944            ),
6945            MockLlmClient::tool_call_response(
6946                "c2",
6947                "bash",
6948                serde_json::json!({"__parse_error": "bad args again"}),
6949            ),
6950            // Valid call — resets parse_error_count to 0
6951            MockLlmClient::tool_call_response(
6952                "c3",
6953                "bash",
6954                serde_json::json!({"command": "echo ok"}),
6955            ),
6956            MockLlmClient::text_response("All done"),
6957        ]));
6958
6959        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
6960        let config = AgentConfig {
6961            max_parse_retries: 2,
6962            ..AgentConfig::default()
6963        };
6964        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
6965        let result = agent.execute(&[], "Do something", None).await;
6966        assert!(
6967            result.is_ok(),
6968            "should not bail — counter reset after successful tool, got: {:?}",
6969            result.err()
6970        );
6971        assert_eq!(result.unwrap().text, "All done");
6972    }
6973
6974    /// 4.2 — Tool timeout: slow tool produces a timeout error result; session continues
6975    #[tokio::test]
6976    async fn test_tool_timeout_produces_error_result() {
6977        let mock_client = Arc::new(MockLlmClient::new(vec![
6978            MockLlmClient::tool_call_response(
6979                "t1",
6980                "bash",
6981                serde_json::json!({"command": "sleep 10"}),
6982            ),
6983            MockLlmClient::text_response("The command timed out."),
6984        ]));
6985
6986        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
6987        let config = AgentConfig {
6988            // 50ms — sleep 10 will never finish
6989            tool_timeout_ms: Some(50),
6990            ..AgentConfig::default()
6991        };
6992        let agent = AgentLoop::new(
6993            mock_client.clone(),
6994            tool_executor,
6995            test_tool_context(),
6996            config,
6997        );
6998        let result = agent.execute(&[], "Run sleep", None).await;
6999        assert!(
7000            result.is_ok(),
7001            "session should continue after tool timeout: {:?}",
7002            result.err()
7003        );
7004        assert_eq!(result.unwrap().text, "The command timed out.");
7005        // LLM called twice: initial request + response after timeout error
7006        assert_eq!(mock_client.call_count.load(Ordering::SeqCst), 2);
7007    }
7008
7009    /// 4.2 — Tool timeout: tool that finishes before the deadline succeeds normally
7010    #[tokio::test]
7011    async fn test_tool_within_timeout_succeeds() {
7012        let mock_client = Arc::new(MockLlmClient::new(vec![
7013            MockLlmClient::tool_call_response(
7014                "t1",
7015                "bash",
7016                serde_json::json!({"command": "echo fast"}),
7017            ),
7018            MockLlmClient::text_response("Command succeeded."),
7019        ]));
7020
7021        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
7022        let config = AgentConfig {
7023            tool_timeout_ms: Some(5_000), // 5 s — echo completes in <100ms
7024            ..AgentConfig::default()
7025        };
7026        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
7027        let result = agent.execute(&[], "Run something fast", None).await;
7028        assert!(
7029            result.is_ok(),
7030            "fast tool should succeed: {:?}",
7031            result.err()
7032        );
7033        assert_eq!(result.unwrap().text, "Command succeeded.");
7034    }
7035
7036    /// 4.3 — Circuit breaker: retries non-streaming LLM failures up to threshold
7037    #[tokio::test]
7038    async fn test_circuit_breaker_retries_non_streaming() {
7039        // Empty response list → every call bails with "No more mock responses"
7040        // threshold=2 → tries twice, then bails with circuit-breaker message
7041        let mock_client = Arc::new(MockLlmClient::new(vec![]));
7042
7043        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
7044        let config = AgentConfig {
7045            circuit_breaker_threshold: 2,
7046            ..AgentConfig::default()
7047        };
7048        let agent = AgentLoop::new(
7049            mock_client.clone(),
7050            tool_executor,
7051            test_tool_context(),
7052            config,
7053        );
7054        let result = agent.execute(&[], "Hello", None).await;
7055        assert!(result.is_err(), "should fail when LLM always errors");
7056        let err = result.unwrap_err().to_string();
7057        assert!(
7058            err.contains("circuit breaker"),
7059            "error should mention circuit breaker, got: {}",
7060            err
7061        );
7062        assert_eq!(
7063            mock_client.call_count.load(Ordering::SeqCst),
7064            2,
7065            "should make exactly threshold=2 LLM calls"
7066        );
7067    }
7068
7069    /// 4.3 — Circuit breaker: threshold=1 bails on the very first failure
7070    #[tokio::test]
7071    async fn test_circuit_breaker_threshold_one_no_retry() {
7072        let mock_client = Arc::new(MockLlmClient::new(vec![]));
7073
7074        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
7075        let config = AgentConfig {
7076            circuit_breaker_threshold: 1,
7077            ..AgentConfig::default()
7078        };
7079        let agent = AgentLoop::new(
7080            mock_client.clone(),
7081            tool_executor,
7082            test_tool_context(),
7083            config,
7084        );
7085        let result = agent.execute(&[], "Hello", None).await;
7086        assert!(result.is_err());
7087        assert_eq!(
7088            mock_client.call_count.load(Ordering::SeqCst),
7089            1,
7090            "with threshold=1 exactly one attempt should be made"
7091        );
7092    }
7093
7094    /// 4.3 — Circuit breaker: succeeds when LLM recovers before hitting threshold
7095    #[tokio::test]
7096    async fn test_circuit_breaker_succeeds_if_llm_recovers() {
7097        // First call fails, second call succeeds; threshold=3 — recovery within threshold
7098        struct FailOnceThenSucceed {
7099            inner: MockLlmClient,
7100            failed_once: std::sync::atomic::AtomicBool,
7101            call_count: AtomicUsize,
7102        }
7103
7104        #[async_trait::async_trait]
7105        impl LlmClient for FailOnceThenSucceed {
7106            async fn complete(
7107                &self,
7108                messages: &[Message],
7109                system: Option<&str>,
7110                tools: &[ToolDefinition],
7111            ) -> Result<LlmResponse> {
7112                self.call_count.fetch_add(1, Ordering::SeqCst);
7113                let already_failed = self
7114                    .failed_once
7115                    .swap(true, std::sync::atomic::Ordering::SeqCst);
7116                if !already_failed {
7117                    anyhow::bail!("transient network error");
7118                }
7119                self.inner.complete(messages, system, tools).await
7120            }
7121
7122            async fn complete_streaming(
7123                &self,
7124                messages: &[Message],
7125                system: Option<&str>,
7126                tools: &[ToolDefinition],
7127                cancel_token: tokio_util::sync::CancellationToken,
7128            ) -> Result<tokio::sync::mpsc::Receiver<crate::llm::StreamEvent>> {
7129                self.inner
7130                    .complete_streaming(messages, system, tools, cancel_token)
7131                    .await
7132            }
7133        }
7134
7135        let mock = Arc::new(FailOnceThenSucceed {
7136            inner: MockLlmClient::new(vec![MockLlmClient::text_response("Recovered!")]),
7137            failed_once: std::sync::atomic::AtomicBool::new(false),
7138            call_count: AtomicUsize::new(0),
7139        });
7140
7141        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
7142        let config = AgentConfig {
7143            circuit_breaker_threshold: 3,
7144            ..AgentConfig::default()
7145        };
7146        let agent = AgentLoop::new(mock.clone(), tool_executor, test_tool_context(), config);
7147        let result = agent.execute(&[], "Hello", None).await;
7148        assert!(
7149            result.is_ok(),
7150            "should succeed when LLM recovers within threshold: {:?}",
7151            result.err()
7152        );
7153        assert_eq!(result.unwrap().text, "Recovered!");
7154        assert_eq!(
7155            mock.call_count.load(Ordering::SeqCst),
7156            2,
7157            "should have made exactly 2 calls (1 fail + 1 success)"
7158        );
7159    }
7160
7161    // ── Continuation detection tests ─────────────────────────────────────────
7162
7163    #[test]
7164    fn test_looks_incomplete_empty() {
7165        assert!(AgentLoop::looks_incomplete(""));
7166        assert!(AgentLoop::looks_incomplete("   "));
7167    }
7168
7169    #[test]
7170    fn test_looks_incomplete_trailing_colon() {
7171        assert!(AgentLoop::looks_incomplete("Let me check the file:"));
7172        assert!(AgentLoop::looks_incomplete("Next steps:"));
7173    }
7174
7175    #[test]
7176    fn test_looks_incomplete_ellipsis() {
7177        assert!(AgentLoop::looks_incomplete("Working on it..."));
7178        assert!(AgentLoop::looks_incomplete("Processing…"));
7179    }
7180
7181    #[test]
7182    fn test_looks_incomplete_intent_phrases() {
7183        assert!(AgentLoop::looks_incomplete(
7184            "I'll start by reading the file."
7185        ));
7186        assert!(AgentLoop::looks_incomplete(
7187            "Let me check the configuration."
7188        ));
7189        assert!(AgentLoop::looks_incomplete("I will now run the tests."));
7190        assert!(AgentLoop::looks_incomplete(
7191            "I need to update the Cargo.toml."
7192        ));
7193    }
7194
7195    #[test]
7196    fn test_looks_complete_final_answer() {
7197        // Clear final answers should NOT trigger continuation
7198        assert!(!AgentLoop::looks_incomplete(
7199            "The tests pass. All changes have been applied successfully."
7200        ));
7201        assert!(!AgentLoop::looks_incomplete(
7202            "Done. I've updated the three files and verified the build succeeds."
7203        ));
7204        assert!(!AgentLoop::looks_incomplete("42"));
7205        assert!(!AgentLoop::looks_incomplete("Yes."));
7206    }
7207
7208    #[test]
7209    fn test_looks_incomplete_multiline_complete() {
7210        let text = "Here is the summary:\n\n- Fixed the bug in agent.rs\n- All tests pass\n- Build succeeds";
7211        assert!(!AgentLoop::looks_incomplete(text));
7212    }
7213}
a3s_code_core/agent.rs

a3s_code_core/
agent.rs