Skip to main content

a3s_code_core/
agent.rs

1//! Agent Loop Implementation
2//!
3//! The agent loop handles the core conversation cycle:
4//! 1. User sends a prompt
5//! 2. LLM generates a response (possibly with tool calls)
6//! 3. If tool calls present, execute them and send results back
7//! 4. Repeat until LLM returns without tool calls
8//!
9//! This implements agentic behavior where the LLM can use tools
10//! to accomplish tasks agentically.
11
12#[cfg(feature = "ahp")]
13use crate::ahp::InjectedContext;
14#[cfg(feature = "ahp")]
15use crate::context::{ContextItem, ContextType};
16use crate::context::{ContextProvider, ContextQuery, ContextResult};
17use crate::hitl::ConfirmationProvider;
18use crate::hooks::{
19    ErrorType, GenerateEndEvent, GenerateStartEvent, HookEvent, HookExecutor, HookResult,
20    IntentDetectionEvent, OnErrorEvent, PostResponseEvent, PostToolUseEvent,
21    PreContextPerceptionEvent, PrePromptEvent, PreToolUseEvent, TokenUsageInfo, ToolCallInfo,
22    ToolResultData,
23};
24use crate::llm::{LlmClient, LlmResponse, Message, TokenUsage, ToolDefinition};
25use crate::permissions::{PermissionChecker, PermissionDecision};
26use crate::planning::{AgentGoal, ExecutionPlan, LlmPlanner, PreAnalysis, TaskStatus};
27use crate::prompts::{AgentStyle, DetectionConfidence, PlanningMode, SystemPromptSlots};
28use crate::queue::SessionCommand;
29use crate::session_lane_queue::SessionLaneQueue;
30use crate::text::truncate_utf8;
31use crate::tool_search::ToolIndex;
32use crate::tools::{ToolContext, ToolExecutor, ToolStreamEvent};
33use anyhow::{Context, Result};
34use async_trait::async_trait;
35use futures::future::join_all;
36use serde::{Deserialize, Serialize};
37use serde_json::{json, Value};
38use std::sync::Arc;
39use std::time::Duration;
40use tokio::sync::{mpsc, RwLock};
41
42/// Maximum number of tool execution rounds before stopping
43const MAX_TOOL_ROUNDS: usize = 50;
44
45/// Result of a single parallel step execution, emitted as structured JSON
46/// so the frontend can render it dynamically in the user's language.
47#[derive(Debug, Clone, Serialize, Deserialize)]
48pub struct ParallelStepResult {
49    pub step_id: String,
50    pub step_number: u32,
51    pub status: String, // "completed" | "failed"
52    /// Brief summary of what this step did (in the LLM's language).
53    pub summary: String,
54    #[serde(skip_serializing_if = "Option::is_none")]
55    pub key_findings: Option<Vec<String>>,
56    #[serde(skip_serializing_if = "Option::is_none")]
57    pub error: Option<String>,
58    #[serde(skip_serializing_if = "Option::is_none")]
59    pub data: Option<Value>,
60}
61
62impl ParallelStepResult {
63    /// Build the full parallel-results JSON envelope injected into history.
64    pub fn build_envelope(results: Vec<ParallelStepResult>) -> Value {
65        json!({
66            "type": "parallel_results",
67            "steps": results
68        })
69    }
70}
71
72/// Agent configuration
73#[derive(Clone)]
74pub struct AgentConfig {
75    /// Slot-based system prompt customization.
76    ///
77    /// Users can customize specific parts (role, guidelines, response style, extra)
78    /// without overriding the core agentic capabilities. The default agentic core
79    /// (tool usage, autonomous behavior, completion criteria) is always preserved.
80    pub prompt_slots: SystemPromptSlots,
81    pub tools: Vec<ToolDefinition>,
82    pub max_tool_rounds: usize,
83    /// Optional security provider for input taint tracking and output sanitization
84    pub security_provider: Option<Arc<dyn crate::security::SecurityProvider>>,
85    /// Optional permission checker for tool execution control
86    pub permission_checker: Option<Arc<dyn PermissionChecker>>,
87    /// Optional confirmation manager for HITL (Human-in-the-Loop)
88    pub confirmation_manager: Option<Arc<dyn ConfirmationProvider>>,
89    /// Context providers for augmenting prompts with external context
90    pub context_providers: Vec<Arc<dyn ContextProvider>>,
91    /// Planning mode — Auto (detect from message), Enabled, or Disabled.
92    pub planning_mode: PlanningMode,
93    /// Enable goal tracking
94    pub goal_tracking: bool,
95    /// Optional hook engine for firing lifecycle events (PreToolUse, PostToolUse, etc.)
96    pub hook_engine: Option<Arc<dyn HookExecutor>>,
97    /// Optional skill registry for tool permission enforcement
98    pub skill_registry: Option<Arc<crate::skills::SkillRegistry>>,
99    /// Max consecutive malformed-tool-args errors before aborting (default: 2).
100    ///
101    /// When the LLM returns tool arguments with `__parse_error`, the error is
102    /// fed back as a tool result. After this many consecutive parse errors the
103    /// loop bails instead of retrying indefinitely.
104    pub max_parse_retries: u32,
105    /// Per-tool execution timeout in milliseconds (`None` = no timeout).
106    ///
107    /// When set, each tool execution is wrapped in `tokio::time::timeout`.
108    /// A timeout produces an error result sent back to the LLM rather than
109    /// crashing the session.
110    pub tool_timeout_ms: Option<u64>,
111    /// Circuit-breaker threshold: max consecutive LLM API failures before
112    /// aborting (default: 3).
113    ///
114    /// In non-streaming mode, transient LLM failures are retried up to this
115    /// many times (with short exponential backoff) before the loop bails.
116    /// In streaming mode, any failure is fatal (events cannot be replayed).
117    pub circuit_breaker_threshold: u32,
118    /// Max consecutive identical tool signatures before aborting (default: 3).
119    ///
120    /// A tool signature is the exact combination of tool name + compact JSON
121    /// arguments. This prevents the agent from getting stuck repeating the same
122    /// tool call in a loop, for example repeatedly fetching the same URL.
123    pub duplicate_tool_call_threshold: u32,
124    /// Enable auto-compaction when context usage exceeds threshold.
125    pub auto_compact: bool,
126    /// Context usage percentage threshold to trigger auto-compaction (0.0 - 1.0).
127    /// Default: 0.80 (80%).
128    pub auto_compact_threshold: f32,
129    /// Maximum context window size in tokens (used for auto-compact calculation).
130    /// Default: 200_000.
131    pub max_context_tokens: usize,
132    /// LLM client reference for auto-compaction (needs to call LLM for summarization).
133    pub llm_client: Option<Arc<dyn LlmClient>>,
134    /// Optional agent memory for auto-remember after tool execution and recall before prompts.
135    pub memory: Option<Arc<crate::memory::AgentMemory>>,
136    /// Inject a continuation message when the LLM stops calling tools before the
137    /// task is complete. Enabled by default. Set to `false` to disable.
138    ///
139    /// When enabled, if the LLM produces a response with no tool calls but the
140    /// response text looks like an intermediate step (not a final answer), the
141    /// loop injects [`crate::prompts::CONTINUATION`] as a user message and
142    /// continues for up to `max_continuation_turns` additional turns.
143    pub continuation_enabled: bool,
144    /// Maximum number of continuation injections per execution (default: 3).
145    ///
146    /// Prevents infinite loops when the LLM repeatedly stops without completing.
147    pub max_continuation_turns: u32,
148    /// Optional tool search index for filtering tools per-turn.
149    ///
150    /// When set, only tools matching the user prompt are sent to the LLM,
151    /// reducing context usage when many MCP tools are registered.
152    pub tool_index: Option<ToolIndex>,
153    /// Optional subagent registry for auto-delegation.
154    ///
155    /// When set, the agent loop can auto-detect when to launch subagents
156    /// based on prompt patterns or agent style.
157    pub subagent_registry: Option<Arc<crate::subagent::AgentRegistry>>,
158    /// Callback for when a subagent should be launched.
159    ///
160    /// When `should_launch_subagent` returns Some, this callback is invoked
161    /// with the agent definition and prompt. The callback should return
162    /// `Some(result)` if it handled the subagent launch, or `None` to
163    /// fall back to normal execution.
164    #[allow(clippy::type_complexity)]
165    pub on_subagent_launch: Option<
166        Arc<
167            dyn Fn(&crate::subagent::AgentDefinition, &str) -> Option<Result<AgentResult>>
168                + Send
169                + Sync,
170        >,
171    >,
172}
173
174impl std::fmt::Debug for AgentConfig {
175    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
176        f.debug_struct("AgentConfig")
177            .field("prompt_slots", &self.prompt_slots)
178            .field("tools", &self.tools)
179            .field("max_tool_rounds", &self.max_tool_rounds)
180            .field("security_provider", &self.security_provider.is_some())
181            .field("permission_checker", &self.permission_checker.is_some())
182            .field("confirmation_manager", &self.confirmation_manager.is_some())
183            .field("context_providers", &self.context_providers.len())
184            .field("planning_mode", &self.planning_mode)
185            .field("goal_tracking", &self.goal_tracking)
186            .field("hook_engine", &self.hook_engine.is_some())
187            .field(
188                "skill_registry",
189                &self.skill_registry.as_ref().map(|r| r.len()),
190            )
191            .field("max_parse_retries", &self.max_parse_retries)
192            .field("tool_timeout_ms", &self.tool_timeout_ms)
193            .field("circuit_breaker_threshold", &self.circuit_breaker_threshold)
194            .field(
195                "duplicate_tool_call_threshold",
196                &self.duplicate_tool_call_threshold,
197            )
198            .field("auto_compact", &self.auto_compact)
199            .field("auto_compact_threshold", &self.auto_compact_threshold)
200            .field("max_context_tokens", &self.max_context_tokens)
201            .field("continuation_enabled", &self.continuation_enabled)
202            .field("max_continuation_turns", &self.max_continuation_turns)
203            .field("memory", &self.memory.is_some())
204            .field("tool_index", &self.tool_index.as_ref().map(|i| i.len()))
205            .field(
206                "subagent_registry",
207                &self.subagent_registry.as_ref().map(|r| r.len()),
208            )
209            .field("on_subagent_launch", &self.on_subagent_launch.is_some())
210            .finish()
211    }
212}
213
214impl Default for AgentConfig {
215    fn default() -> Self {
216        Self {
217            prompt_slots: SystemPromptSlots::default(),
218            tools: Vec::new(), // Tools are provided by ToolExecutor
219            max_tool_rounds: MAX_TOOL_ROUNDS,
220            security_provider: None,
221            permission_checker: None,
222            confirmation_manager: None,
223            context_providers: Vec::new(),
224            planning_mode: PlanningMode::default(),
225            goal_tracking: false,
226            hook_engine: None,
227            skill_registry: Some(Arc::new(crate::skills::SkillRegistry::with_builtins())),
228            max_parse_retries: 2,
229            tool_timeout_ms: None,
230            circuit_breaker_threshold: 3,
231            duplicate_tool_call_threshold: 3,
232            auto_compact: false,
233            auto_compact_threshold: 0.80,
234            max_context_tokens: 200_000,
235            llm_client: None,
236            memory: None,
237            continuation_enabled: true,
238            max_continuation_turns: 3,
239            tool_index: None,
240            subagent_registry: None,
241            on_subagent_launch: None,
242        }
243    }
244}
245
246/// Events emitted during agent execution
247///
248/// Subscribe via [`Session::subscribe_events()`](crate::session::Session::subscribe_events).
249/// New variants may be added in minor releases — always include a wildcard arm
250/// (`_ => {}`) when matching.
251#[derive(Debug, Clone, Serialize, Deserialize)]
252#[serde(tag = "type")]
253#[non_exhaustive]
254pub enum AgentEvent {
255    /// Agent started processing
256    #[serde(rename = "agent_start")]
257    Start { prompt: String },
258
259    /// Runtime agent style/mode selected for the current execution.
260    #[serde(rename = "agent_mode_changed")]
261    AgentModeChanged {
262        /// Stable UI/runtime mode label, e.g. "general", "planning", "explore".
263        mode: String,
264        /// Canonical built-in agent name associated with this mode.
265        agent: String,
266        /// Human-readable explanation of the selected style.
267        description: String,
268    },
269
270    /// LLM turn started
271    #[serde(rename = "turn_start")]
272    TurnStart { turn: usize },
273
274    /// Text delta from streaming
275    #[serde(rename = "text_delta")]
276    TextDelta { text: String },
277
278    /// Reasoning/thinking delta from streaming (for models like kimi, deepseek)
279    #[serde(rename = "reasoning_delta")]
280    ReasoningDelta { text: String },
281
282    /// Tool execution started
283    #[serde(rename = "tool_start")]
284    ToolStart { id: String, name: String },
285
286    /// Tool input delta from streaming (partial JSON arguments)
287    #[serde(rename = "tool_input_delta")]
288    ToolInputDelta { delta: String },
289
290    /// Tool execution completed
291    #[serde(rename = "tool_end")]
292    ToolEnd {
293        id: String,
294        name: String,
295        output: String,
296        exit_code: i32,
297        #[serde(skip_serializing_if = "Option::is_none")]
298        metadata: Option<serde_json::Value>,
299    },
300
301    /// Intermediate tool output (streaming delta)
302    #[serde(rename = "tool_output_delta")]
303    ToolOutputDelta {
304        id: String,
305        name: String,
306        delta: String,
307    },
308
309    /// LLM turn completed
310    #[serde(rename = "turn_end")]
311    TurnEnd { turn: usize, usage: TokenUsage },
312
313    /// Agent completed
314    #[serde(rename = "agent_end")]
315    End {
316        text: String,
317        usage: TokenUsage,
318        #[serde(skip_serializing_if = "Option::is_none")]
319        meta: Option<crate::llm::LlmResponseMeta>,
320    },
321
322    /// Error occurred
323    #[serde(rename = "error")]
324    Error { message: String },
325
326    /// Tool execution requires confirmation (HITL)
327    #[serde(rename = "confirmation_required")]
328    ConfirmationRequired {
329        tool_id: String,
330        tool_name: String,
331        args: serde_json::Value,
332        timeout_ms: u64,
333    },
334
335    /// Confirmation received from user (HITL)
336    #[serde(rename = "confirmation_received")]
337    ConfirmationReceived {
338        tool_id: String,
339        approved: bool,
340        reason: Option<String>,
341    },
342
343    /// Confirmation timed out (HITL)
344    #[serde(rename = "confirmation_timeout")]
345    ConfirmationTimeout {
346        tool_id: String,
347        action_taken: String, // "rejected" or "auto_approved"
348    },
349
350    /// External task pending (needs SDK processing)
351    #[serde(rename = "external_task_pending")]
352    ExternalTaskPending {
353        task_id: String,
354        session_id: String,
355        lane: crate::hitl::SessionLane,
356        command_type: String,
357        payload: serde_json::Value,
358        timeout_ms: u64,
359    },
360
361    /// External task completed
362    #[serde(rename = "external_task_completed")]
363    ExternalTaskCompleted {
364        task_id: String,
365        session_id: String,
366        success: bool,
367    },
368
369    /// Tool execution denied by permission policy
370    #[serde(rename = "permission_denied")]
371    PermissionDenied {
372        tool_id: String,
373        tool_name: String,
374        args: serde_json::Value,
375        reason: String,
376    },
377
378    /// Context resolution started
379    #[serde(rename = "context_resolving")]
380    ContextResolving { providers: Vec<String> },
381
382    /// Context resolution completed
383    #[serde(rename = "context_resolved")]
384    ContextResolved {
385        total_items: usize,
386        total_tokens: usize,
387    },
388
389    // ========================================================================
390    // a3s-lane integration events
391    // ========================================================================
392    /// Command moved to dead letter queue after exhausting retries
393    #[serde(rename = "command_dead_lettered")]
394    CommandDeadLettered {
395        command_id: String,
396        command_type: String,
397        lane: String,
398        error: String,
399        attempts: u32,
400    },
401
402    /// Command retry attempt
403    #[serde(rename = "command_retry")]
404    CommandRetry {
405        command_id: String,
406        command_type: String,
407        lane: String,
408        attempt: u32,
409        delay_ms: u64,
410    },
411
412    /// Queue alert (depth warning, latency alert, etc.)
413    #[serde(rename = "queue_alert")]
414    QueueAlert {
415        level: String,
416        alert_type: String,
417        message: String,
418    },
419
420    // ========================================================================
421    // Task tracking events
422    // ========================================================================
423    /// Task list updated
424    #[serde(rename = "task_updated")]
425    TaskUpdated {
426        session_id: String,
427        tasks: Vec<crate::planning::Task>,
428    },
429
430    // ========================================================================
431    // Memory System events (Phase 3)
432    // ========================================================================
433    /// Memory stored
434    #[serde(rename = "memory_stored")]
435    MemoryStored {
436        memory_id: String,
437        memory_type: String,
438        importance: f32,
439        tags: Vec<String>,
440    },
441
442    /// Memory recalled
443    #[serde(rename = "memory_recalled")]
444    MemoryRecalled {
445        memory_id: String,
446        content: String,
447        relevance: f32,
448    },
449
450    /// Memories searched
451    #[serde(rename = "memories_searched")]
452    MemoriesSearched {
453        query: Option<String>,
454        tags: Vec<String>,
455        result_count: usize,
456    },
457
458    /// Memory cleared
459    #[serde(rename = "memory_cleared")]
460    MemoryCleared {
461        tier: String, // "long_term", "short_term", "working"
462        count: u64,
463    },
464
465    // ========================================================================
466    // Subagent events
467    // ========================================================================
468    /// Subagent task started
469    #[serde(rename = "subagent_start")]
470    SubagentStart {
471        /// Unique task identifier
472        task_id: String,
473        /// Child session ID
474        session_id: String,
475        /// Parent session ID
476        parent_session_id: String,
477        /// Agent type (e.g., "explore", "general")
478        agent: String,
479        /// Short description of the task
480        description: String,
481    },
482
483    /// Subagent task progress update
484    #[serde(rename = "subagent_progress")]
485    SubagentProgress {
486        /// Task identifier
487        task_id: String,
488        /// Child session ID
489        session_id: String,
490        /// Progress status message
491        status: String,
492        /// Additional metadata
493        metadata: serde_json::Value,
494    },
495
496    /// Subagent task completed
497    #[serde(rename = "subagent_end")]
498    SubagentEnd {
499        /// Task identifier
500        task_id: String,
501        /// Child session ID
502        session_id: String,
503        /// Agent type
504        agent: String,
505        /// Task output/result
506        output: String,
507        /// Whether the task succeeded
508        success: bool,
509    },
510
511    // ========================================================================
512    // Planning and Goal Tracking Events (Phase 1)
513    // ========================================================================
514    /// Planning phase started
515    #[serde(rename = "planning_start")]
516    PlanningStart { prompt: String },
517
518    /// Planning phase completed
519    #[serde(rename = "planning_end")]
520    PlanningEnd {
521        plan: ExecutionPlan,
522        estimated_steps: usize,
523    },
524
525    /// Step execution started
526    #[serde(rename = "step_start")]
527    StepStart {
528        step_id: String,
529        description: String,
530        step_number: usize,
531        total_steps: usize,
532    },
533
534    /// Step execution completed
535    #[serde(rename = "step_end")]
536    StepEnd {
537        step_id: String,
538        status: TaskStatus,
539        step_number: usize,
540        total_steps: usize,
541    },
542
543    /// Goal extracted from prompt
544    #[serde(rename = "goal_extracted")]
545    GoalExtracted { goal: AgentGoal },
546
547    /// Goal progress update
548    #[serde(rename = "goal_progress")]
549    GoalProgress {
550        goal: String,
551        progress: f32,
552        completed_steps: usize,
553        total_steps: usize,
554    },
555
556    /// Goal achieved
557    #[serde(rename = "goal_achieved")]
558    GoalAchieved {
559        goal: String,
560        total_steps: usize,
561        duration_ms: i64,
562    },
563
564    // ========================================================================
565    // Context Compaction events
566    // ========================================================================
567    /// Context automatically compacted due to high usage
568    #[serde(rename = "context_compacted")]
569    ContextCompacted {
570        session_id: String,
571        before_messages: usize,
572        after_messages: usize,
573        percent_before: f32,
574    },
575
576    // ========================================================================
577    // Persistence events
578    // ========================================================================
579    /// Session persistence failed — SDK clients should handle this
580    #[serde(rename = "persistence_failed")]
581    PersistenceFailed {
582        session_id: String,
583        operation: String,
584        error: String,
585    },
586
587    // ========================================================================
588    // Side question (btw)
589    // ========================================================================
590    /// Ephemeral side question answered.
591    ///
592    /// Emitted by [`crate::agent_api::AgentSession::btw()`] in streaming mode.
593    /// The answer is never added to conversation history.
594    #[serde(rename = "btw_answer")]
595    BtwAnswer {
596        question: String,
597        answer: String,
598        usage: TokenUsage,
599    },
600}
601
602/// Result of agent execution
603#[derive(Debug, Clone)]
604pub struct AgentResult {
605    pub text: String,
606    pub messages: Vec<Message>,
607    pub usage: TokenUsage,
608    pub tool_calls_count: usize,
609}
610
611// ============================================================================
612// ToolCommand — bridges ToolExecutor to SessionCommand for queue submission
613// ============================================================================
614
615/// Adapter that implements `SessionCommand` for tool execution via the queue.
616///
617/// Wraps a `ToolExecutor` call so it can be submitted to `SessionLaneQueue`.
618pub struct ToolCommand {
619    tool_executor: Arc<ToolExecutor>,
620    tool_name: String,
621    tool_args: Value,
622    tool_context: ToolContext,
623    skill_registry: Option<Arc<crate::skills::SkillRegistry>>,
624}
625
626impl ToolCommand {
627    /// Create a new ToolCommand
628    pub fn new(
629        tool_executor: Arc<ToolExecutor>,
630        tool_name: String,
631        tool_args: Value,
632        tool_context: ToolContext,
633        skill_registry: Option<Arc<crate::skills::SkillRegistry>>,
634    ) -> Self {
635        Self {
636            tool_executor,
637            tool_name,
638            tool_args,
639            tool_context,
640            skill_registry,
641        }
642    }
643}
644
645#[async_trait]
646impl SessionCommand for ToolCommand {
647    async fn execute(&self) -> Result<Value> {
648        // Check skill-based tool permissions
649        if let Some(registry) = &self.skill_registry {
650            let instruction_skills = registry.by_kind(crate::skills::SkillKind::Instruction);
651
652            // If there are instruction skills with tool restrictions, check permissions
653            let has_restrictions = instruction_skills.iter().any(|s| s.allowed_tools.is_some());
654
655            if has_restrictions {
656                let mut allowed = false;
657
658                for skill in &instruction_skills {
659                    if skill.is_tool_allowed(&self.tool_name) {
660                        allowed = true;
661                        break;
662                    }
663                }
664
665                if !allowed {
666                    return Err(anyhow::anyhow!(
667                        "Tool '{}' is not allowed by any active skill. Active skills restrict tools to their allowed-tools lists.",
668                        self.tool_name
669                    ));
670                }
671            }
672        }
673
674        // Execute the tool
675        let result = self
676            .tool_executor
677            .execute_with_context(&self.tool_name, &self.tool_args, &self.tool_context)
678            .await?;
679        Ok(serde_json::json!({
680            "output": result.output,
681            "exit_code": result.exit_code,
682            "metadata": result.metadata,
683        }))
684    }
685
686    fn command_type(&self) -> &str {
687        &self.tool_name
688    }
689
690    fn payload(&self) -> Value {
691        self.tool_args.clone()
692    }
693}
694
695// ============================================================================
696// AgentLoop
697// ============================================================================
698
699/// Agent loop executor
700#[derive(Clone)]
701pub struct AgentLoop {
702    llm_client: Arc<dyn LlmClient>,
703    tool_executor: Arc<ToolExecutor>,
704    tool_context: ToolContext,
705    config: AgentConfig,
706    /// Optional per-session tool metrics collector
707    tool_metrics: Option<Arc<RwLock<crate::telemetry::ToolMetrics>>>,
708    /// Optional lane queue for priority-based tool execution
709    command_queue: Option<Arc<SessionLaneQueue>>,
710    /// Optional progress tracker for real-time tool/token usage tracking
711    progress_tracker: Option<Arc<tokio::sync::RwLock<crate::task::ProgressTracker>>>,
712    /// Optional task manager for centralized task lifecycle tracking
713    task_manager: Option<Arc<crate::task::TaskManager>>,
714}
715
716// ============================================================================
717// Intent Detection Helpers (for AHP Context Perception)
718// ============================================================================
719
720/// Extract a target name from the prompt (e.g., function name, file path).
721#[allow(clippy::extra_unused_lifetimes)]
722fn extract_target_name_from_prompt<'a>(prompt: &str, _patterns: &[&str]) -> String {
723    // Try to extract quoted strings first
724    if let Some(start) = prompt.find('"') {
725        if let Some(end) = prompt[start + 1..].find('"') {
726            return prompt[start + 1..start + 1 + end].to_string();
727        }
728    }
729
730    // Try single quotes
731    if let Some(start) = prompt.find('\'') {
732        if let Some(end) = prompt[start + 1..].find('\'') {
733            return prompt[start + 1..start + 1 + end].to_string();
734        }
735    }
736
737    // Try backticks
738    if let Some(start) = prompt.find('`') {
739        if let Some(end) = prompt[start + 1..].find('`') {
740            return prompt[start + 1..start + 1 + end].to_string();
741        }
742    }
743
744    // Fall back to extracting a reasonable word boundary
745    let words: Vec<&str> = prompt.split_whitespace().collect();
746    if words.len() > 2 {
747        // Look for likely target words (after "the", "find", "where is", etc.)
748        for word in words.iter() {
749            if word.len() > 3
750                && !["where", "what", "find", "the", "how", "is", "are"].contains(word)
751            {
752                return word.to_string();
753            }
754        }
755    }
756
757    String::new()
758}
759
760/// Detect the domain from prompt keywords.
761fn detect_domain_from_prompt(prompt: &str) -> String {
762    let lower = prompt.to_lowercase();
763
764    if lower.contains("rust") || lower.contains("cargo") || lower.contains(".rs") {
765        "rust".to_string()
766    } else if lower.contains("javascript")
767        || lower.contains("typescript")
768        || lower.contains("node")
769        || lower.contains(".js")
770        || lower.contains(".ts")
771    {
772        "javascript".to_string()
773    } else if lower.contains("python") || lower.contains(".py") {
774        "python".to_string()
775    } else if lower.contains("go") || lower.contains(".go") {
776        "go".to_string()
777    } else if lower.contains("java") || lower.contains(".java") {
778        "java".to_string()
779    } else if lower.contains("docker") || lower.contains("container") {
780        "docker".to_string()
781    } else if lower.contains("kubernetes") || lower.contains("k8s") {
782        "kubernetes".to_string()
783    } else if lower.contains("sql")
784        || lower.contains("database")
785        || lower.contains("postgres")
786        || lower.contains("mysql")
787    {
788        "database".to_string()
789    } else if lower.contains("api") || lower.contains("rest") || lower.contains("grpc") {
790        "api".to_string()
791    } else if lower.contains("auth")
792        || lower.contains("login")
793        || lower.contains("password")
794        || lower.contains("token")
795    {
796        "security".to_string()
797    } else if lower.contains("test") || lower.contains("spec") || lower.contains("mock") {
798        "testing".to_string()
799    } else {
800        "general".to_string()
801    }
802}
803
804/// Result from IntentDetection harness
805#[derive(Debug, Clone, Serialize, Deserialize)]
806pub struct IntentDetectionResult {
807    /// Detected intent: "locate" | "understand" | "retrieve" | "explore" | "reason" | "validate" | "compare" | "track"
808    pub detected_intent: String,
809    /// Confidence score 0.0 - 1.0
810    pub confidence: f32,
811    /// Optional target hints from the harness
812    #[serde(skip_serializing_if = "Option::is_none")]
813    pub target_hints: Option<TargetHints>,
814}
815
816/// Target hints from IntentDetection harness
817#[derive(Debug, Clone, Serialize, Deserialize)]
818pub struct TargetHints {
819    #[serde(skip_serializing_if = "Option::is_none")]
820    pub target_type: Option<String>,
821    #[serde(skip_serializing_if = "Option::is_none")]
822    pub target_name: Option<String>,
823    #[serde(skip_serializing_if = "Option::is_none")]
824    pub domain: Option<String>,
825}
826
827/// Detect language hint from prompt characters.
828fn detect_language_hint(prompt: &str) -> Option<String> {
829    // Check for Chinese characters
830    if prompt
831        .chars()
832        .any(|c| ('\u{4e00}'..='\u{9fff}').contains(&c))
833    {
834        return Some("zh".to_string());
835    }
836    // Check for Japanese characters (Hiragana, Katakana, or CJK unified ideographs outside Chinese range)
837    if prompt
838        .chars()
839        .any(|c| ('\u{3040}'..='\u{309f}').contains(&c) || ('\u{30a0}'..='\u{30ff}').contains(&c))
840    {
841        return Some("ja".to_string());
842    }
843    // Check for Korean characters
844    if prompt
845        .chars()
846        .any(|c| ('\u{ac00}'..='\u{d7af}').contains(&c))
847    {
848        return Some("ko".to_string());
849    }
850    // Check for Arabic
851    if prompt
852        .chars()
853        .any(|c| ('\u{0600}'..='\u{06ff}').contains(&c))
854    {
855        return Some("ar".to_string());
856    }
857    // Check for Russian/Cyrillic
858    if prompt
859        .chars()
860        .any(|c| ('\u{0400}'..='\u{04ff}').contains(&c))
861    {
862        return Some("ru".to_string());
863    }
864    None
865}
866
867/// Build PreContextPerceptionEvent from IntentDetection result.
868fn build_pre_context_perception_from_intent(
869    result: IntentDetectionResult,
870    prompt: &str,
871    session_id: &str,
872    workspace: &str,
873) -> PreContextPerceptionEvent {
874    let target_hints = result.target_hints;
875    PreContextPerceptionEvent {
876        session_id: session_id.to_string(),
877        intent: result.detected_intent,
878        target_type: target_hints
879            .as_ref()
880            .and_then(|h| h.target_type.clone())
881            .unwrap_or_else(|| "unknown".to_string()),
882        target_name: target_hints
883            .as_ref()
884            .and_then(|h| h.target_name.clone())
885            .unwrap_or_else(|| extract_target_name_from_prompt(prompt, &[])),
886        domain: target_hints
887            .as_ref()
888            .and_then(|h| h.domain.clone())
889            .unwrap_or_else(|| detect_domain_from_prompt(prompt)),
890        query: Some(prompt.to_string()),
891        working_directory: workspace.to_string(),
892        urgency: "normal".to_string(),
893    }
894}
895
896/// Rough token estimation (~4 chars per token for English/code).
897#[cfg(feature = "ahp")]
898fn estimate_tokens(text: &str) -> usize {
899    text.len() / 4
900}
901
902impl AgentLoop {
903    pub fn new(
904        llm_client: Arc<dyn LlmClient>,
905        tool_executor: Arc<ToolExecutor>,
906        tool_context: ToolContext,
907        config: AgentConfig,
908    ) -> Self {
909        Self {
910            llm_client,
911            tool_executor,
912            tool_context,
913            config,
914            tool_metrics: None,
915            command_queue: None,
916            progress_tracker: None,
917            task_manager: None,
918        }
919    }
920
921    /// Set the progress tracker for real-time tool/token usage tracking.
922    pub fn with_progress_tracker(
923        mut self,
924        tracker: Arc<tokio::sync::RwLock<crate::task::ProgressTracker>>,
925    ) -> Self {
926        self.progress_tracker = Some(tracker);
927        self
928    }
929
930    /// Set the task manager for centralized task lifecycle tracking.
931    pub fn with_task_manager(mut self, manager: Arc<crate::task::TaskManager>) -> Self {
932        self.task_manager = Some(manager);
933        self
934    }
935
936    /// Set the tool metrics collector for this agent loop
937    pub fn with_tool_metrics(
938        mut self,
939        metrics: Arc<RwLock<crate::telemetry::ToolMetrics>>,
940    ) -> Self {
941        self.tool_metrics = Some(metrics);
942        self
943    }
944
945    /// Set the lane queue for priority-based tool execution.
946    ///
947    /// When set, tools are routed through the lane queue which supports
948    /// External task handling for multi-machine parallel processing.
949    pub fn with_queue(mut self, queue: Arc<SessionLaneQueue>) -> Self {
950        self.command_queue = Some(queue);
951        self
952    }
953
954    /// Track a tool call result in the progress tracker.
955    fn track_tool_result(&self, tool_name: &str, args: &serde_json::Value, exit_code: i32) {
956        if let Some(ref tracker) = self.progress_tracker {
957            let args_summary = Self::compact_json_args(args);
958            let success = exit_code == 0;
959            if let Ok(mut guard) = tracker.try_write() {
960                guard.track_tool_call(tool_name, args_summary, success);
961            }
962        }
963    }
964
965    /// Compact JSON arguments to a short summary string for tracking.
966    fn compact_json_args(args: &serde_json::Value) -> String {
967        let raw = match args {
968            serde_json::Value::Null => String::new(),
969            serde_json::Value::String(s) => s.clone(),
970            _ => serde_json::to_string(args).unwrap_or_default(),
971        };
972        let compact = raw.split_whitespace().collect::<Vec<_>>().join(" ");
973        if compact.len() > 180 {
974            format!("{}...", truncate_utf8(&compact, 180))
975        } else {
976            compact
977        }
978    }
979
980    /// Execute a tool, applying the configured timeout if set.
981    ///
982    /// On timeout, returns an error describing which tool timed out and after
983    /// how many milliseconds. The caller converts this to a tool-result error
984    /// message that is fed back to the LLM.
985    async fn execute_tool_timed(
986        &self,
987        name: &str,
988        args: &serde_json::Value,
989        ctx: &ToolContext,
990    ) -> anyhow::Result<crate::tools::ToolResult> {
991        let fut = self.tool_executor.execute_with_context(name, args, ctx);
992        if let Some(timeout_ms) = self.config.tool_timeout_ms {
993            match tokio::time::timeout(Duration::from_millis(timeout_ms), fut).await {
994                Ok(result) => result,
995                Err(_) => Err(anyhow::anyhow!(
996                    "Tool '{}' timed out after {}ms",
997                    name,
998                    timeout_ms
999                )),
1000            }
1001        } else {
1002            fut.await
1003        }
1004    }
1005
1006    /// Convert a tool execution result into the (output, exit_code, is_error, metadata, images) tuple.
1007    fn tool_result_to_tuple(
1008        result: anyhow::Result<crate::tools::ToolResult>,
1009    ) -> (
1010        String,
1011        i32,
1012        bool,
1013        Option<serde_json::Value>,
1014        Vec<crate::llm::Attachment>,
1015    ) {
1016        match result {
1017            Ok(r) => (
1018                r.output,
1019                r.exit_code,
1020                r.exit_code != 0,
1021                r.metadata,
1022                r.images,
1023            ),
1024            Err(e) => {
1025                let msg = e.to_string();
1026                // Classify the error so the LLM knows whether retrying makes sense.
1027                let hint = if Self::is_transient_error(&msg) {
1028                    " [transient — you may retry this tool call]"
1029                } else {
1030                    " [permanent — do not retry without changing the arguments]"
1031                };
1032                (
1033                    format!("Tool execution error: {}{}", msg, hint),
1034                    1,
1035                    true,
1036                    None,
1037                    Vec::new(),
1038                )
1039            }
1040        }
1041    }
1042
1043    /// Inspect the workspace for well-known project marker files and return a short
1044    /// `## Project Context` section that the agent can use without any manual configuration.
1045    /// Returns an empty string when the workspace type cannot be determined.
1046    fn detect_project_hint(workspace: &std::path::Path) -> String {
1047        struct Marker {
1048            file: &'static str,
1049            lang: &'static str,
1050            tip: &'static str,
1051        }
1052
1053        let markers = [
1054            Marker {
1055                file: "Cargo.toml",
1056                lang: "Rust",
1057                tip: "Use `cargo build`, `cargo test`, `cargo clippy`, and `cargo fmt`. \
1058                  Prefer `anyhow` / `thiserror` for error handling. \
1059                  Follow the Microsoft Rust Guidelines (no panics in library code, \
1060                  async-first with Tokio).",
1061            },
1062            Marker {
1063                file: "package.json",
1064                lang: "Node.js / TypeScript",
1065                tip: "Check `package.json` for the package manager (npm/yarn/pnpm/bun) \
1066                  and available scripts. Prefer TypeScript with strict mode. \
1067                  Use ESM imports unless the project is CommonJS.",
1068            },
1069            Marker {
1070                file: "pyproject.toml",
1071                lang: "Python",
1072                tip: "Use the package manager declared in `pyproject.toml` \
1073                  (uv, poetry, hatch, etc.). Prefer type hints and async/await for I/O.",
1074            },
1075            Marker {
1076                file: "setup.py",
1077                lang: "Python",
1078                tip: "Legacy Python project. Prefer type hints and async/await for I/O.",
1079            },
1080            Marker {
1081                file: "requirements.txt",
1082                lang: "Python",
1083                tip: "Python project with pip-style dependencies. \
1084                  Prefer type hints and async/await for I/O.",
1085            },
1086            Marker {
1087                file: "go.mod",
1088                lang: "Go",
1089                tip: "Use `go build ./...` and `go test ./...`. \
1090                  Follow standard Go project layout. Use `gofmt` for formatting.",
1091            },
1092            Marker {
1093                file: "pom.xml",
1094                lang: "Java / Maven",
1095                tip: "Use `mvn compile`, `mvn test`, `mvn package`. \
1096                  Follow standard Maven project structure.",
1097            },
1098            Marker {
1099                file: "build.gradle",
1100                lang: "Java / Gradle",
1101                tip: "Use `./gradlew build` and `./gradlew test`. \
1102                  Follow standard Gradle project structure.",
1103            },
1104            Marker {
1105                file: "build.gradle.kts",
1106                lang: "Kotlin / Gradle",
1107                tip: "Use `./gradlew build` and `./gradlew test`. \
1108                  Prefer Kotlin coroutines for async work.",
1109            },
1110            Marker {
1111                file: "CMakeLists.txt",
1112                lang: "C / C++",
1113                tip: "Use `cmake -B build && cmake --build build`. \
1114                  Check for `compile_commands.json` for IDE tooling.",
1115            },
1116            Marker {
1117                file: "Makefile",
1118                lang: "C / C++ (or generic)",
1119                tip: "Use `make` or `make <target>`. \
1120                  Check available targets with `make help` or by reading the Makefile.",
1121            },
1122        ];
1123
1124        // Check for C# / .NET — no single fixed filename, so glob for *.csproj / *.sln
1125        let is_dotnet = workspace.join("*.csproj").exists() || {
1126            // Fast check: look for any .csproj or .sln in the workspace root
1127            std::fs::read_dir(workspace)
1128                .map(|entries| {
1129                    entries.flatten().any(|e| {
1130                        let name = e.file_name();
1131                        let s = name.to_string_lossy();
1132                        s.ends_with(".csproj") || s.ends_with(".sln")
1133                    })
1134                })
1135                .unwrap_or(false)
1136        };
1137
1138        if is_dotnet {
1139            return "## Project Context\n\nThis is a **C# / .NET** project. \
1140             Use `dotnet build`, `dotnet test`, and `dotnet run`. \
1141             Follow C# coding conventions and async/await patterns."
1142                .to_string();
1143        }
1144
1145        for marker in &markers {
1146            if workspace.join(marker.file).exists() {
1147                return format!(
1148                    "## Project Context\n\nThis is a **{}** project. {}",
1149                    marker.lang, marker.tip
1150                );
1151            }
1152        }
1153
1154        String::new()
1155    }
1156
1157    /// Returns `true` for errors that are likely transient (network, timeout, I/O contention).
1158    /// Used to annotate tool error messages so the LLM knows whether retrying is safe.
1159    fn is_transient_error(msg: &str) -> bool {
1160        let lower = msg.to_lowercase();
1161        lower.contains("timeout")
1162        || lower.contains("timed out")
1163        || lower.contains("connection refused")
1164        || lower.contains("connection reset")
1165        || lower.contains("broken pipe")
1166        || lower.contains("temporarily unavailable")
1167        || lower.contains("resource temporarily unavailable")
1168        || lower.contains("os error 11")  // EAGAIN
1169        || lower.contains("os error 35")  // EAGAIN on macOS
1170        || lower.contains("rate limit")
1171        || lower.contains("too many requests")
1172        || lower.contains("service unavailable")
1173        || lower.contains("network unreachable")
1174    }
1175
1176    /// Returns `true` when a tool writes a file and is safe to run concurrently with other
1177    /// independent writes (no ordering dependencies, no side-channel output).
1178    fn is_parallel_safe_write(name: &str, _args: &serde_json::Value) -> bool {
1179        matches!(
1180            name,
1181            "write_file" | "edit_file" | "create_file" | "append_to_file" | "replace_in_file"
1182        )
1183    }
1184
1185    /// Extract the target file path from write-tool arguments so we can check for conflicts.
1186    fn extract_write_path(args: &serde_json::Value) -> Option<String> {
1187        // write_file / create_file / append_to_file / replace_in_file use "path"
1188        // edit_file uses "path" as well
1189        args.get("path")
1190            .and_then(|v| v.as_str())
1191            .map(|s| s.to_string())
1192    }
1193
1194    /// Execute a tool through the lane queue (if configured) or directly.
1195    /// Wraps execution in task lifecycle if task_manager is configured.
1196    async fn execute_tool_queued_or_direct(
1197        &self,
1198        name: &str,
1199        args: &serde_json::Value,
1200        ctx: &ToolContext,
1201    ) -> anyhow::Result<crate::tools::ToolResult> {
1202        // Create task for this tool execution if task_manager is available
1203        let task_id = if let Some(ref tm) = self.task_manager {
1204            let task = crate::task::Task::tool(name, args.clone());
1205            let id = task.id;
1206            tm.spawn(task);
1207            // Start the task immediately
1208            let _ = tm.start(id);
1209            Some(id)
1210        } else {
1211            None
1212        };
1213
1214        let result = self
1215            .execute_tool_queued_or_direct_inner(name, args, ctx)
1216            .await;
1217
1218        // Complete or fail the task based on result
1219        if let Some(ref tm) = self.task_manager {
1220            if let Some(tid) = task_id {
1221                match &result {
1222                    Ok(r) => {
1223                        let output = serde_json::json!({
1224                            "output": r.output.clone(),
1225                            "exit_code": r.exit_code,
1226                        });
1227                        let _ = tm.complete(tid, Some(output));
1228                    }
1229                    Err(e) => {
1230                        let _ = tm.fail(tid, e.to_string());
1231                    }
1232                }
1233            }
1234        }
1235
1236        result
1237    }
1238
1239    /// Inner execution without task lifecycle wrapping.
1240    async fn execute_tool_queued_or_direct_inner(
1241        &self,
1242        name: &str,
1243        args: &serde_json::Value,
1244        ctx: &ToolContext,
1245    ) -> anyhow::Result<crate::tools::ToolResult> {
1246        if let Some(ref queue) = self.command_queue {
1247            let command = ToolCommand::new(
1248                Arc::clone(&self.tool_executor),
1249                name.to_string(),
1250                args.clone(),
1251                ctx.clone(),
1252                self.config.skill_registry.clone(),
1253            );
1254            let rx = queue.submit_by_tool(name, Box::new(command)).await;
1255            match rx.await {
1256                Ok(Ok(value)) => {
1257                    let output = value["output"]
1258                        .as_str()
1259                        .ok_or_else(|| {
1260                            anyhow::anyhow!(
1261                                "Queue result missing 'output' field for tool '{}'",
1262                                name
1263                            )
1264                        })?
1265                        .to_string();
1266                    let exit_code = value["exit_code"].as_i64().unwrap_or(0) as i32;
1267                    return Ok(crate::tools::ToolResult {
1268                        name: name.to_string(),
1269                        output,
1270                        exit_code,
1271                        metadata: None,
1272                        images: Vec::new(),
1273                    });
1274                }
1275                Ok(Err(e)) => {
1276                    tracing::warn!(
1277                        "Queue execution failed for tool '{}', falling back to direct: {}",
1278                        name,
1279                        e
1280                    );
1281                }
1282                Err(_) => {
1283                    tracing::warn!(
1284                        "Queue channel closed for tool '{}', falling back to direct",
1285                        name
1286                    );
1287                }
1288            }
1289        }
1290        self.execute_tool_timed(name, args, ctx).await
1291    }
1292
1293    /// Call the LLM, handling streaming vs non-streaming internally.
1294    ///
1295    /// Streaming events (`TextDelta`, `ToolStart`) are forwarded to `event_tx`
1296    /// as they arrive. Non-streaming mode simply awaits the complete response.
1297    ///
1298    /// When a `ToolIndex` is configured, tools are filtered per-turn based on
1299    /// the last user message, reducing context usage with large tool sets.
1300    ///
1301    /// Returns `Err` on any LLM API failure. The circuit breaker in
1302    /// `execute_loop` wraps this call with retry logic for non-streaming mode.
1303    async fn call_llm(
1304        &self,
1305        messages: &[Message],
1306        system: Option<&str>,
1307        event_tx: &Option<mpsc::Sender<AgentEvent>>,
1308        cancel_token: &tokio_util::sync::CancellationToken,
1309    ) -> anyhow::Result<LlmResponse> {
1310        // Filter tools through ToolIndex if configured
1311        let tools = if let Some(ref index) = self.config.tool_index {
1312            let query = messages
1313                .iter()
1314                .rev()
1315                .find(|m| m.role == "user")
1316                .and_then(|m| {
1317                    m.content.iter().find_map(|b| match b {
1318                        crate::llm::ContentBlock::Text { text } => Some(text.as_str()),
1319                        _ => None,
1320                    })
1321                })
1322                .unwrap_or("");
1323            let matches = index.search(query, index.len());
1324            let matched_names: std::collections::HashSet<&str> =
1325                matches.iter().map(|m| m.name.as_str()).collect();
1326            self.config
1327                .tools
1328                .iter()
1329                .filter(|t| matched_names.contains(t.name.as_str()))
1330                .cloned()
1331                .collect::<Vec<_>>()
1332        } else {
1333            self.config.tools.clone()
1334        };
1335
1336        if event_tx.is_some() {
1337            let mut stream_rx = match self
1338                .llm_client
1339                .complete_streaming(messages, system, &tools, cancel_token.clone())
1340                .await
1341            {
1342                Ok(rx) => rx,
1343                Err(stream_error) => {
1344                    // Do not fall back to non-streaming if cancelled — propagate cancellation
1345                    if cancel_token.is_cancelled() {
1346                        anyhow::bail!("Operation cancelled by user");
1347                    }
1348                    tracing::warn!(
1349                        error = %stream_error,
1350                        "LLM streaming setup failed; falling back to non-streaming completion"
1351                    );
1352                    return self
1353                        .llm_client
1354                        .complete(messages, system, &tools)
1355                        .await
1356                        .with_context(|| {
1357                            format!(
1358                                "LLM streaming call failed ({stream_error}); non-streaming fallback also failed"
1359                            )
1360                        });
1361                }
1362            };
1363
1364            let mut final_response: Option<LlmResponse> = None;
1365            loop {
1366                tokio::select! {
1367                    _ = cancel_token.cancelled() => {
1368                        tracing::info!("🛑 LLM streaming cancelled by CancellationToken");
1369                        anyhow::bail!("Operation cancelled by user");
1370                    }
1371                    event = stream_rx.recv() => {
1372                        match event {
1373                            Some(crate::llm::StreamEvent::TextDelta(text)) => {
1374                                if let Some(tx) = event_tx {
1375                                    tx.send(AgentEvent::TextDelta { text }).await.ok();
1376                                }
1377                            }
1378                            Some(crate::llm::StreamEvent::ReasoningDelta(text)) => {
1379                                if let Some(tx) = event_tx {
1380                                    tx.send(AgentEvent::ReasoningDelta { text }).await.ok();
1381                                }
1382                            }
1383                            Some(crate::llm::StreamEvent::ToolUseStart { id, name }) => {
1384                                if let Some(tx) = event_tx {
1385                                    tx.send(AgentEvent::ToolStart { id, name }).await.ok();
1386                                }
1387                            }
1388                            Some(crate::llm::StreamEvent::ToolUseInputDelta(delta)) => {
1389                                if let Some(tx) = event_tx {
1390                                    tx.send(AgentEvent::ToolInputDelta { delta }).await.ok();
1391                                }
1392                            }
1393                            Some(crate::llm::StreamEvent::Done(resp)) => {
1394                                final_response = Some(resp);
1395                                break;
1396                            }
1397                            None => break,
1398                        }
1399                    }
1400                }
1401            }
1402            final_response.context("Stream ended without final response")
1403        } else {
1404            self.llm_client
1405                .complete(messages, system, &tools)
1406                .await
1407                .context("LLM call failed")
1408        }
1409    }
1410
1411    /// Create a tool context with streaming support.
1412    ///
1413    /// When `event_tx` is Some, spawns a forwarder task that converts
1414    /// `ToolStreamEvent::OutputDelta` into `AgentEvent::ToolOutputDelta`
1415    /// and sends them to the agent event channel.
1416    ///
1417    /// Returns the augmented `ToolContext`. The forwarder task runs until
1418    /// the tool-side sender is dropped (i.e., tool execution finishes).
1419    fn streaming_tool_context(
1420        &self,
1421        event_tx: &Option<mpsc::Sender<AgentEvent>>,
1422        tool_id: &str,
1423        tool_name: &str,
1424    ) -> ToolContext {
1425        let mut ctx = self.tool_context.clone();
1426        if let Some(agent_tx) = event_tx {
1427            let (tool_tx, mut tool_rx) = mpsc::channel::<ToolStreamEvent>(64);
1428            ctx.event_tx = Some(tool_tx);
1429
1430            let agent_tx = agent_tx.clone();
1431            let tool_id = tool_id.to_string();
1432            let tool_name = tool_name.to_string();
1433            tokio::spawn(async move {
1434                while let Some(event) = tool_rx.recv().await {
1435                    match event {
1436                        ToolStreamEvent::OutputDelta(delta) => {
1437                            agent_tx
1438                                .send(AgentEvent::ToolOutputDelta {
1439                                    id: tool_id.clone(),
1440                                    name: tool_name.clone(),
1441                                    delta,
1442                                })
1443                                .await
1444                                .ok();
1445                        }
1446                    }
1447                }
1448            });
1449        }
1450        ctx
1451    }
1452
1453    /// Resolve context from all providers for a given prompt
1454    ///
1455    /// Returns aggregated context results from all configured providers.
1456    async fn resolve_context(&self, prompt: &str, session_id: Option<&str>) -> Vec<ContextResult> {
1457        if self.config.context_providers.is_empty() {
1458            return Vec::new();
1459        }
1460
1461        let query = ContextQuery::new(prompt).with_session_id(session_id.unwrap_or(""));
1462
1463        let futures = self
1464            .config
1465            .context_providers
1466            .iter()
1467            .map(|p| p.query(&query));
1468        let outcomes = join_all(futures).await;
1469
1470        outcomes
1471            .into_iter()
1472            .enumerate()
1473            .filter_map(|(i, r)| match r {
1474                Ok(result) if !result.is_empty() => Some(result),
1475                Ok(_) => None,
1476                Err(e) => {
1477                    tracing::warn!(
1478                        "Context provider '{}' failed: {}",
1479                        self.config.context_providers[i].name(),
1480                        e
1481                    );
1482                    None
1483                }
1484            })
1485            .collect()
1486    }
1487
1488    /// Detect whether the LLM's no-tool-call response looks like an intermediate
1489    /// step rather than a genuine final answer.
1490    ///
1491    /// Returns `true` when continuation should be injected. Heuristics:
1492    /// - Response ends with a colon or ellipsis (mid-thought)
1493    /// - Response contains phrases that signal incomplete work
1494    /// - Response is very short (< 80 chars) and doesn't look like a summary
1495    fn looks_incomplete(text: &str) -> bool {
1496        let t = text.trim();
1497        if t.is_empty() {
1498            return true;
1499        }
1500        // Very short responses that aren't clearly a final answer
1501        if t.len() < 80 && !t.contains('\n') {
1502            // Short single-line — could be a genuine one-liner answer, keep going
1503            // only if it ends with punctuation that signals continuation
1504            let ends_continuation =
1505                t.ends_with(':') || t.ends_with("...") || t.ends_with('…') || t.ends_with(',');
1506            if ends_continuation {
1507                return true;
1508            }
1509        }
1510        // Phrases that signal the LLM is describing what it will do rather than doing it
1511        let incomplete_phrases = [
1512            "i'll ",
1513            "i will ",
1514            "let me ",
1515            "i need to ",
1516            "i should ",
1517            "next, i",
1518            "first, i",
1519            "now i",
1520            "i'll start",
1521            "i'll begin",
1522            "i'll now",
1523            "let's start",
1524            "let's begin",
1525            "to do this",
1526            "i'm going to",
1527        ];
1528        let lower = t.to_lowercase();
1529        for phrase in &incomplete_phrases {
1530            if lower.contains(phrase) {
1531                return true;
1532            }
1533        }
1534        false
1535    }
1536
1537    /// Get the assembled system prompt from slots.
1538    #[allow(dead_code)]
1539    fn system_prompt(&self) -> String {
1540        self.config.prompt_slots.build()
1541    }
1542
1543    /// Get the assembled system prompt from slots with an explicit style.
1544    fn system_prompt_for_style(&self, style: AgentStyle) -> String {
1545        let mut slots = self.config.prompt_slots.clone();
1546        slots.style = Some(style);
1547        slots.build()
1548    }
1549
1550    async fn resolve_effective_style(&self, prompt: &str) -> AgentStyle {
1551        if let Some(style) = self.config.prompt_slots.style {
1552            return style;
1553        }
1554
1555        let (style, confidence) = AgentStyle::detect_with_confidence(prompt);
1556        if confidence != DetectionConfidence::Low {
1557            return style;
1558        }
1559
1560        match AgentStyle::detect_with_llm(self.llm_client.as_ref(), prompt).await {
1561            Ok(classified_style) => {
1562                tracing::debug!(
1563                    intent.classification = ?classified_style,
1564                    intent.source = "llm",
1565                    "Intent classified via LLM"
1566                );
1567                classified_style
1568            }
1569            Err(e) => {
1570                tracing::warn!(error = %e, "LLM intent classification failed, using keyword detection");
1571                style
1572            }
1573        }
1574    }
1575
1576    /// Detect if a subagent should be launched for this task.
1577    ///
1578    /// Checks for explicit `[subagent:name]` syntax first, then falls back
1579    /// to style-based auto-detection if a registry is configured.
1580    ///
1581    /// Returns `Some((AgentDefinition, cleaned_prompt))` if subagent should be launched,
1582    /// or `None` if normal execution should continue.
1583    fn should_launch_subagent(
1584        &self,
1585        prompt: &str,
1586        style: AgentStyle,
1587    ) -> Option<(Arc<crate::subagent::AgentDefinition>, String)> {
1588        let registry = self.config.subagent_registry.as_ref()?;
1589
1590        // Step 1: Check for explicit [subagent:name] syntax
1591        if let Some(caps) = prompt.find("[subagent:") {
1592            // `end_offset` is offset from `caps` to ']'
1593            if let Some(end_offset) = prompt[caps..].find(']') {
1594                // `end_abs` is the absolute index of ']'
1595                let end_abs = caps + end_offset;
1596                // name is from after "[subagent:" to ']'
1597                let name = &prompt[caps + 10..end_abs];
1598                if let Some(agent_def) = registry.get(name) {
1599                    // Remove the [subagent:name] tag from the prompt
1600                    let after_tag = prompt[end_abs + 1..].trim();
1601                    let cleaned = if caps > 0 {
1602                        format!("{} {}", &prompt[..caps].trim(), after_tag)
1603                    } else {
1604                        after_tag.to_string()
1605                    };
1606                    tracing::info!(subagent = %name, "Explicit subagent request detected");
1607                    return Some((Arc::new(agent_def), cleaned.trim().to_string()));
1608                }
1609            }
1610        }
1611
1612        // Step 2: Style-based auto-detection
1613        let agent_name = match style {
1614            AgentStyle::Explore => "explore",
1615            AgentStyle::Plan => "plan",
1616            AgentStyle::Verification => "verification",
1617            AgentStyle::CodeReview => "review",
1618            AgentStyle::GeneralPurpose => return None,
1619        };
1620
1621        if let Some(agent_def) = registry.get(agent_name) {
1622            tracing::info!(
1623                subagent = %agent_name,
1624                style = ?style,
1625                "Auto-detected subagent launch based on style"
1626            );
1627            return Some((Arc::new(agent_def), prompt.to_string()));
1628        }
1629
1630        None
1631    }
1632
1633    /// Detect if context perception is needed based on user prompt.
1634    ///
1635    /// Returns `Some(PreContextPerceptionEvent)` if the prompt suggests the model
1636    /// needs workspace knowledge (finding files, understanding code, etc.).
1637    pub fn detect_context_perception_intent(
1638        &self,
1639        prompt: &str,
1640        session_id: &str,
1641        workspace: &str,
1642    ) -> Option<PreContextPerceptionEvent> {
1643        let lower = prompt.to_lowercase();
1644
1645        // Pattern matching for different intents that suggest context perception is needed
1646        let intents: &[(&[&str], &str)] = &[
1647            // Locate: finding files, functions, resources
1648            (
1649                &[
1650                    "where is",
1651                    "where are",
1652                    "find the file",
1653                    "find all",
1654                    "find files",
1655                    "who wrote",
1656                    "locate",
1657                    "search for",
1658                    "look for",
1659                    "search",
1660                ],
1661                "locate",
1662            ),
1663            // Understand: explaining how something works
1664            (
1665                &[
1666                    "how does",
1667                    "what does",
1668                    "explain",
1669                    "understand",
1670                    "what is this",
1671                    "how does this work",
1672                ],
1673                "understand",
1674            ),
1675            // Retrieve: recalling from memory/past
1676            (
1677                &[
1678                    "remember",
1679                    "earlier",
1680                    "before",
1681                    "previously",
1682                    "last time",
1683                    "past",
1684                    "previous",
1685                ],
1686                "retrieve",
1687            ),
1688            // Explore: understanding structure
1689            (
1690                &[
1691                    "how is organized",
1692                    "project structure",
1693                    "what files",
1694                    "show me the structure",
1695                    "explore",
1696                ],
1697                "explore",
1698            ),
1699            // Reason: asking why/causality
1700            (
1701                &[
1702                    "why did",
1703                    "why is",
1704                    "cause",
1705                    "reason",
1706                    "what happened",
1707                    "why does",
1708                ],
1709                "reason",
1710            ),
1711            // Validate: checking correctness
1712            (
1713                &["is this correct", "verify", "validate", "check if", "debug"],
1714                "validate",
1715            ),
1716            // Compare: comparing things
1717            (
1718                &[
1719                    "difference between",
1720                    "compare",
1721                    "versus",
1722                    " vs ",
1723                    "different from",
1724                ],
1725                "compare",
1726            ),
1727            // Track: status/history
1728            (
1729                &[
1730                    "status",
1731                    "progress",
1732                    "how far",
1733                    "history",
1734                    "what's the current",
1735                ],
1736                "track",
1737            ),
1738        ];
1739
1740        // Detect target type from keywords
1741        let target_type = if lower.contains("function") || lower.contains("method") {
1742            "function"
1743        } else if lower.contains("file") || lower.contains("config") {
1744            "file"
1745        } else if lower.contains("class") {
1746            "entity"
1747        } else if lower.contains("module") || lower.contains("package") {
1748            "module"
1749        } else if lower.contains("test") {
1750            "test"
1751        } else {
1752            "unknown"
1753        };
1754
1755        // Find matching intent
1756        let matched_intent = intents
1757            .iter()
1758            .find(|(patterns, _)| patterns.iter().any(|p| lower.contains(p)));
1759
1760        matched_intent.map(|(patterns, intent)| {
1761            // Extract target name if possible (simplified extraction)
1762            let target_name = extract_target_name_from_prompt(prompt, patterns);
1763
1764            PreContextPerceptionEvent {
1765                session_id: session_id.to_string(),
1766                intent: intent.to_string(),
1767                target_type: target_type.to_string(),
1768                target_name,
1769                domain: detect_domain_from_prompt(prompt),
1770                query: Some(prompt.to_string()),
1771                working_directory: workspace.to_string(),
1772                urgency: "normal".to_string(),
1773            }
1774        })
1775    }
1776
1777    /// Fire PreContextPerception hook and wait for harness decision.
1778    async fn fire_pre_context_perception(&self, event: &PreContextPerceptionEvent) -> HookResult {
1779        if let Some(he) = &self.config.hook_engine {
1780            let hook_event = HookEvent::PreContextPerception(event.clone());
1781            he.fire(&hook_event).await
1782        } else {
1783            HookResult::continue_()
1784        }
1785    }
1786
1787    /// Fire IntentDetection hook and wait for harness decision.
1788    ///
1789    /// This is called on every prompt to detect user intent via the AHP harness.
1790    /// Returns the detected intent if the harness provides one, or None if blocked/failed.
1791    async fn fire_intent_detection(
1792        &self,
1793        prompt: &str,
1794        session_id: &str,
1795        workspace: &str,
1796    ) -> Option<IntentDetectionResult> {
1797        let event = IntentDetectionEvent {
1798            session_id: session_id.to_string(),
1799            prompt: prompt.to_string(),
1800            workspace: workspace.to_string(),
1801            language_hint: detect_language_hint(prompt),
1802        };
1803
1804        let hook_result = if let Some(he) = &self.config.hook_engine {
1805            let hook_event = HookEvent::IntentDetection(event);
1806            he.fire(&hook_event).await
1807        } else {
1808            return None;
1809        };
1810
1811        match hook_result {
1812            HookResult::Continue(Some(modified)) => {
1813                // Parse the intent detection result
1814                serde_json::from_value::<IntentDetectionResult>(modified).ok()
1815            }
1816            HookResult::Block(_) => {
1817                // Harness blocked intent detection - use fallback
1818                tracing::info!("AHP harness blocked intent detection");
1819                None
1820            }
1821            _ => None,
1822        }
1823    }
1824
1825    /// Apply injected context from AHP harness decision.
1826    #[cfg(feature = "ahp")]
1827    fn apply_injected_context(&self, injected: InjectedContext) -> Vec<ContextResult> {
1828        let mut results = Vec::new();
1829
1830        // Convert facts to ContextResult
1831        if !injected.facts.is_empty() {
1832            let items: Vec<ContextItem> = injected
1833                .facts
1834                .into_iter()
1835                .map(|f| {
1836                    let token_count = estimate_tokens(&f.content);
1837                    ContextItem {
1838                        id: uuid::Uuid::new_v4().to_string(),
1839                        context_type: ContextType::Resource,
1840                        content: f.content,
1841                        token_count,
1842                        relevance: f.confidence,
1843                        source: Some(f.source),
1844                        metadata: std::collections::HashMap::new(),
1845                    }
1846                })
1847                .collect();
1848
1849            let total_tokens: usize = items.iter().map(|i| i.token_count).sum();
1850
1851            results.push(ContextResult {
1852                items,
1853                total_tokens,
1854                provider: "ahp_harness".to_string(),
1855                truncated: false,
1856            });
1857        }
1858
1859        // Handle file_contents
1860        if let Some(file_contents) = injected.file_contents {
1861            let items: Vec<ContextItem> = file_contents
1862                .into_iter()
1863                .map(|f| {
1864                    let token_count = estimate_tokens(&f.snippet);
1865                    ContextItem {
1866                        id: uuid::Uuid::new_v4().to_string(),
1867                        context_type: ContextType::Resource,
1868                        content: f.snippet,
1869                        token_count,
1870                        relevance: f.relevance_score,
1871                        source: Some(f.path),
1872                        metadata: std::collections::HashMap::new(),
1873                    }
1874                })
1875                .collect();
1876
1877            let total_tokens: usize = items.iter().map(|i| i.token_count).sum();
1878
1879            results.push(ContextResult {
1880                items,
1881                total_tokens,
1882                provider: "ahp_harness".to_string(),
1883                truncated: false,
1884            });
1885        }
1886
1887        // Handle project_summary
1888        if let Some(summary) = injected.project_summary {
1889            let content = format!(
1890                "Project: {}\n{}",
1891                summary.project_name, summary.structure_description
1892            );
1893            let token_count = estimate_tokens(&content);
1894
1895            results.push(ContextResult {
1896                items: vec![ContextItem {
1897                    id: uuid::Uuid::new_v4().to_string(),
1898                    context_type: ContextType::Resource,
1899                    content,
1900                    token_count,
1901                    relevance: 0.9,
1902                    source: Some("ahp://project-summary".to_string()),
1903                    metadata: std::collections::HashMap::new(),
1904                }],
1905                total_tokens: token_count,
1906                provider: "ahp_harness".to_string(),
1907                truncated: false,
1908            });
1909        }
1910
1911        // Handle knowledge
1912        if let Some(knowledge) = injected.knowledge {
1913            let items: Vec<ContextItem> = knowledge
1914                .into_iter()
1915                .map(|k| {
1916                    let token_count = estimate_tokens(&k);
1917                    ContextItem {
1918                        id: uuid::Uuid::new_v4().to_string(),
1919                        context_type: ContextType::Resource,
1920                        content: k,
1921                        token_count,
1922                        relevance: 0.8,
1923                        source: Some("ahp://knowledge".to_string()),
1924                        metadata: std::collections::HashMap::new(),
1925                    }
1926                })
1927                .collect();
1928
1929            let total_tokens: usize = items.iter().map(|i| i.token_count).sum();
1930
1931            results.push(ContextResult {
1932                items,
1933                total_tokens,
1934                provider: "ahp_harness".to_string(),
1935                truncated: false,
1936            });
1937        }
1938
1939        results
1940    }
1941
1942    /// Build augmented system prompt with context
1943    #[allow(dead_code)]
1944    fn build_augmented_system_prompt(&self, context_results: &[ContextResult]) -> Option<String> {
1945        let base = self.system_prompt();
1946        self.build_augmented_system_prompt_with_base(&base, context_results)
1947    }
1948
1949    fn build_augmented_system_prompt_with_base(
1950        &self,
1951        base: &str,
1952        context_results: &[ContextResult],
1953    ) -> Option<String> {
1954        let base = base.to_string();
1955
1956        // Use live tool executor definitions so tools added via add_mcp_server() are included
1957        let live_tools = self.tool_executor.definitions();
1958        let mcp_tools: Vec<&ToolDefinition> = live_tools
1959            .iter()
1960            .filter(|t| t.name.starts_with("mcp__"))
1961            .collect();
1962
1963        let mcp_section = if mcp_tools.is_empty() {
1964            String::new()
1965        } else {
1966            let mut lines = vec![
1967                "## MCP Tools".to_string(),
1968                String::new(),
1969                "The following MCP (Model Context Protocol) tools are available. Use them when the task requires external capabilities beyond the built-in tools:".to_string(),
1970                String::new(),
1971            ];
1972            for tool in &mcp_tools {
1973                let display = format!("- `{}` — {}", tool.name, tool.description);
1974                lines.push(display);
1975            }
1976            lines.join("\n")
1977        };
1978
1979        let parts: Vec<&str> = [base.as_str(), mcp_section.as_str()]
1980            .iter()
1981            .filter(|s| !s.is_empty())
1982            .copied()
1983            .collect();
1984
1985        // Auto-detect project type from workspace and inject language-specific guidelines,
1986        // but only when the user hasn't already set a custom `guidelines` slot.
1987        let project_hint = if self.config.prompt_slots.guidelines.is_none() {
1988            Self::detect_project_hint(&self.tool_context.workspace)
1989        } else {
1990            String::new()
1991        };
1992
1993        if context_results.is_empty() {
1994            if project_hint.is_empty() {
1995                return Some(parts.join("\n\n"));
1996            }
1997            return Some(format!("{}\n\n{}", parts.join("\n\n"), project_hint));
1998        }
1999
2000        // Build context XML block
2001        let context_xml: String = context_results
2002            .iter()
2003            .map(|r| r.to_xml())
2004            .collect::<Vec<_>>()
2005            .join("\n\n");
2006
2007        if project_hint.is_empty() {
2008            Some(format!("{}\n\n{}", parts.join("\n\n"), context_xml))
2009        } else {
2010            Some(format!(
2011                "{}\n\n{}\n\n{}",
2012                parts.join("\n\n"),
2013                project_hint,
2014                context_xml
2015            ))
2016        }
2017    }
2018
2019    /// Notify providers of turn completion for memory extraction
2020    async fn notify_turn_complete(&self, session_id: &str, prompt: &str, response: &str) {
2021        let futures = self
2022            .config
2023            .context_providers
2024            .iter()
2025            .map(|p| p.on_turn_complete(session_id, prompt, response));
2026        let outcomes = join_all(futures).await;
2027
2028        for (i, result) in outcomes.into_iter().enumerate() {
2029            if let Err(e) = result {
2030                tracing::warn!(
2031                    "Context provider '{}' on_turn_complete failed: {}",
2032                    self.config.context_providers[i].name(),
2033                    e
2034                );
2035            }
2036        }
2037    }
2038
2039    /// Fire PreToolUse hook event before tool execution.
2040    /// Returns the HookResult which may block the tool call.
2041    async fn fire_pre_tool_use(
2042        &self,
2043        session_id: &str,
2044        tool_name: &str,
2045        args: &serde_json::Value,
2046        recent_tools: Vec<String>,
2047    ) -> Option<HookResult> {
2048        if let Some(he) = &self.config.hook_engine {
2049            // Convert null args to empty object so JS callbacks don't get null.input errors
2050            let safe_args = if args.is_null() {
2051                serde_json::Value::Object(Default::default())
2052            } else {
2053                args.clone()
2054            };
2055            let event = HookEvent::PreToolUse(PreToolUseEvent {
2056                session_id: session_id.to_string(),
2057                tool: tool_name.to_string(),
2058                args: safe_args,
2059                working_directory: self.tool_context.workspace.to_string_lossy().to_string(),
2060                recent_tools,
2061            });
2062            let result = he.fire(&event).await;
2063            if result.is_block() {
2064                return Some(result);
2065            }
2066        }
2067        None
2068    }
2069
2070    /// Fire PostToolUse hook event after tool execution (fire-and-forget).
2071    async fn fire_post_tool_use(
2072        &self,
2073        session_id: &str,
2074        tool_name: &str,
2075        args: &serde_json::Value,
2076        output: &str,
2077        success: bool,
2078        duration_ms: u64,
2079    ) {
2080        if let Some(he) = &self.config.hook_engine {
2081            // Convert null args to empty object so JS callbacks don't get null.input errors
2082            let safe_args = if args.is_null() {
2083                serde_json::Value::Object(Default::default())
2084            } else {
2085                args.clone()
2086            };
2087            let event = HookEvent::PostToolUse(PostToolUseEvent {
2088                session_id: session_id.to_string(),
2089                tool: tool_name.to_string(),
2090                args: safe_args,
2091                result: ToolResultData {
2092                    success,
2093                    output: output.to_string(),
2094                    exit_code: if success { Some(0) } else { Some(1) },
2095                    duration_ms,
2096                },
2097            });
2098            let he = Arc::clone(he);
2099            tokio::spawn(async move {
2100                let _ = he.fire(&event).await;
2101            });
2102        }
2103    }
2104
2105    /// Fire GenerateStart hook event before an LLM call.
2106    async fn fire_generate_start(
2107        &self,
2108        session_id: &str,
2109        prompt: &str,
2110        system_prompt: &Option<String>,
2111    ) {
2112        if let Some(he) = &self.config.hook_engine {
2113            let event = HookEvent::GenerateStart(GenerateStartEvent {
2114                session_id: session_id.to_string(),
2115                prompt: prompt.to_string(),
2116                system_prompt: system_prompt.clone(),
2117                model_provider: String::new(),
2118                model_name: String::new(),
2119                available_tools: self.config.tools.iter().map(|t| t.name.clone()).collect(),
2120            });
2121            let _ = he.fire(&event).await;
2122        }
2123    }
2124
2125    /// Fire GenerateEnd hook event after an LLM call.
2126    async fn fire_generate_end(
2127        &self,
2128        session_id: &str,
2129        prompt: &str,
2130        response: &LlmResponse,
2131        duration_ms: u64,
2132    ) {
2133        if let Some(he) = &self.config.hook_engine {
2134            let tool_calls: Vec<ToolCallInfo> = response
2135                .tool_calls()
2136                .iter()
2137                .map(|tc| {
2138                    let args = if tc.args.is_null() {
2139                        serde_json::Value::Object(Default::default())
2140                    } else {
2141                        tc.args.clone()
2142                    };
2143                    ToolCallInfo {
2144                        name: tc.name.clone(),
2145                        args,
2146                    }
2147                })
2148                .collect();
2149
2150            let event = HookEvent::GenerateEnd(GenerateEndEvent {
2151                session_id: session_id.to_string(),
2152                prompt: prompt.to_string(),
2153                response_text: response.text().to_string(),
2154                tool_calls,
2155                usage: TokenUsageInfo {
2156                    prompt_tokens: response.usage.prompt_tokens as i32,
2157                    completion_tokens: response.usage.completion_tokens as i32,
2158                    total_tokens: response.usage.total_tokens as i32,
2159                },
2160                duration_ms,
2161            });
2162            let _ = he.fire(&event).await;
2163        }
2164    }
2165
2166    /// Fire PrePrompt hook event before prompt augmentation.
2167    /// Returns optional modified prompt text from the hook.
2168    async fn fire_pre_prompt(
2169        &self,
2170        session_id: &str,
2171        prompt: &str,
2172        system_prompt: &Option<String>,
2173        message_count: usize,
2174    ) -> Option<String> {
2175        if let Some(he) = &self.config.hook_engine {
2176            let event = HookEvent::PrePrompt(PrePromptEvent {
2177                session_id: session_id.to_string(),
2178                prompt: prompt.to_string(),
2179                system_prompt: system_prompt.clone(),
2180                message_count,
2181            });
2182            let result = he.fire(&event).await;
2183            if let HookResult::Continue(Some(modified)) = result {
2184                // Extract modified prompt from hook response
2185                if let Some(new_prompt) = modified.get("prompt").and_then(|v| v.as_str()) {
2186                    return Some(new_prompt.to_string());
2187                }
2188            }
2189        }
2190        None
2191    }
2192
2193    /// Fire PostResponse hook event after the agent loop completes.
2194    async fn fire_post_response(
2195        &self,
2196        session_id: &str,
2197        response_text: &str,
2198        tool_calls_count: usize,
2199        usage: &TokenUsage,
2200        duration_ms: u64,
2201    ) {
2202        if let Some(he) = &self.config.hook_engine {
2203            let event = HookEvent::PostResponse(PostResponseEvent {
2204                session_id: session_id.to_string(),
2205                response_text: response_text.to_string(),
2206                tool_calls_count,
2207                usage: TokenUsageInfo {
2208                    prompt_tokens: usage.prompt_tokens as i32,
2209                    completion_tokens: usage.completion_tokens as i32,
2210                    total_tokens: usage.total_tokens as i32,
2211                },
2212                duration_ms,
2213            });
2214            let he = Arc::clone(he);
2215            tokio::spawn(async move {
2216                let _ = he.fire(&event).await;
2217            });
2218        }
2219    }
2220
2221    /// Fire OnError hook event when an error occurs.
2222    async fn fire_on_error(
2223        &self,
2224        session_id: &str,
2225        error_type: ErrorType,
2226        error_message: &str,
2227        context: serde_json::Value,
2228    ) {
2229        if let Some(he) = &self.config.hook_engine {
2230            let event = HookEvent::OnError(OnErrorEvent {
2231                session_id: session_id.to_string(),
2232                error_type,
2233                error_message: error_message.to_string(),
2234                context,
2235            });
2236            let he = Arc::clone(he);
2237            tokio::spawn(async move {
2238                let _ = he.fire(&event).await;
2239            });
2240        }
2241    }
2242
2243    /// Execute the agent loop for a prompt
2244    ///
2245    /// Takes the conversation history and a new user prompt.
2246    /// Returns the agent result and updated message history.
2247    /// When event_tx is provided, uses streaming LLM API for real-time text output.
2248    pub async fn execute(
2249        &self,
2250        history: &[Message],
2251        prompt: &str,
2252        event_tx: Option<mpsc::Sender<AgentEvent>>,
2253    ) -> Result<AgentResult> {
2254        self.execute_with_session(history, prompt, None, event_tx, None)
2255            .await
2256    }
2257
2258    /// Execute the agent loop with pre-built messages (user message already included).
2259    ///
2260    /// Used by `send_with_attachments` / `stream_with_attachments` where the
2261    /// user message contains multi-modal content and is already appended to
2262    /// the messages vec.
2263    pub async fn execute_from_messages(
2264        &self,
2265        messages: Vec<Message>,
2266        session_id: Option<&str>,
2267        event_tx: Option<mpsc::Sender<AgentEvent>>,
2268        cancel_token: Option<&tokio_util::sync::CancellationToken>,
2269    ) -> Result<AgentResult> {
2270        let default_token = tokio_util::sync::CancellationToken::new();
2271        let token = cancel_token.unwrap_or(&default_token);
2272        tracing::info!(
2273            a3s.session.id = session_id.unwrap_or("none"),
2274            a3s.agent.max_turns = self.config.max_tool_rounds,
2275            "a3s.agent.execute_from_messages started"
2276        );
2277
2278        // Extract the last user message text for hooks, memory recall, and events.
2279        // Pass empty prompt so execute_loop skips adding a duplicate user message,
2280        // but provide effective_prompt for hook/memory/event purposes.
2281        let effective_prompt = messages
2282            .iter()
2283            .rev()
2284            .find(|m| m.role == "user")
2285            .map(|m| m.text())
2286            .unwrap_or_default();
2287
2288        let result = self
2289            .execute_loop_inner(
2290                &messages,
2291                "",
2292                &effective_prompt,
2293                None, // no pre-computed style; resolve inside the loop
2294                session_id,
2295                event_tx,
2296                token,
2297                true, // emit_end: this is a standalone execution
2298            )
2299            .await;
2300
2301        match &result {
2302            Ok(r) => tracing::info!(
2303                a3s.agent.tool_calls_count = r.tool_calls_count,
2304                a3s.llm.total_tokens = r.usage.total_tokens,
2305                "a3s.agent.execute_from_messages completed"
2306            ),
2307            Err(e) => tracing::warn!(
2308                error = %e,
2309                "a3s.agent.execute_from_messages failed"
2310            ),
2311        }
2312
2313        result
2314    }
2315
2316    /// Execute the agent loop for a prompt with session context
2317    ///
2318    /// Takes the conversation history, user prompt, and optional session ID.
2319    /// When session_id is provided, context providers can use it for session-specific context.
2320    pub async fn execute_with_session(
2321        &self,
2322        history: &[Message],
2323        prompt: &str,
2324        session_id: Option<&str>,
2325        event_tx: Option<mpsc::Sender<AgentEvent>>,
2326        cancel_token: Option<&tokio_util::sync::CancellationToken>,
2327    ) -> Result<AgentResult> {
2328        let default_token = tokio_util::sync::CancellationToken::new();
2329        let token = cancel_token.unwrap_or(&default_token);
2330        tracing::info!(
2331            a3s.session.id = session_id.unwrap_or("none"),
2332            a3s.agent.max_turns = self.config.max_tool_rounds,
2333            "a3s.agent.execute started"
2334        );
2335
2336        // Step 1: keyword-based detection (always fast, no LLM).
2337        // Only call LLM for style classification when confidence is Low.
2338        let (keyword_style, confidence) = AgentStyle::detect_with_confidence(prompt);
2339        let effective_style = if confidence == DetectionConfidence::Low {
2340            match AgentStyle::detect_with_llm(self.llm_client.as_ref(), prompt).await {
2341                Ok(classified_style) => {
2342                    tracing::debug!(
2343                        intent.classification = ?classified_style,
2344                        intent.source = "llm",
2345                        "Intent classified via LLM"
2346                    );
2347                    classified_style
2348                }
2349                Err(e) => {
2350                    tracing::warn!(error = %e, "LLM intent classification failed, using keyword detection");
2351                    keyword_style
2352                }
2353            }
2354        } else {
2355            keyword_style
2356        };
2357
2358        // Step 2: pre-analysis — intent + goal + plan + input optimization in ONE LLM call.
2359        // Skipped for High-confidence simple queries (just keyword intent, no planning needed).
2360        let pre_analysis: Option<PreAnalysis> = {
2361            let needs_llm_prep = effective_style.requires_planning()
2362                || confidence == DetectionConfidence::Low
2363                || self.config.planning_mode == PlanningMode::Enabled;
2364
2365            if !needs_llm_prep {
2366                None
2367            } else {
2368                match LlmPlanner::pre_analyze(&self.llm_client.clone(), prompt).await {
2369                    Ok(analysis) => {
2370                        tracing::debug!(
2371                            intent = ?analysis.intent,
2372                            requires_planning = analysis.requires_planning,
2373                            plan_steps = analysis.execution_plan.steps.len(),
2374                            "Pre-analysis completed"
2375                        );
2376                        Some(analysis)
2377                    }
2378                    Err(e) => {
2379                        tracing::warn!(error = %e, "Pre-analysis failed, falling back to keyword intent");
2380                        None
2381                    }
2382                }
2383            }
2384        };
2385
2386        // Use pre-analysis style if available, otherwise use resolved style.
2387        let exec_style = pre_analysis
2388            .as_ref()
2389            .map(|a| &a.intent)
2390            .unwrap_or(&effective_style);
2391
2392        // Check if a subagent should be launched for this task
2393        if let Some((subagent_def, cleaned_prompt)) =
2394            self.should_launch_subagent(prompt, *exec_style)
2395        {
2396            tracing::info!(subagent = %subagent_def.name, "Subagent launch requested");
2397
2398            // If callback is configured, use it to handle subagent launch
2399            if let Some(ref callback) = self.config.on_subagent_launch {
2400                if let Some(result) = callback(&subagent_def, &cleaned_prompt) {
2401                    tracing::info!(subagent = %subagent_def.name, "Subagent executed successfully");
2402                    return result;
2403                }
2404            }
2405            // If callback not configured or returned None, fall through to normal execution
2406            tracing::debug!(subagent = %subagent_def.name, "No callback or callback returned None, continuing with normal execution");
2407        }
2408
2409        // Determine whether to use planning mode.
2410        // Prefer pre-analysis result if available (from the single LLM pre-analysis call).
2411        let use_planning = if let Some(ref analysis) = pre_analysis {
2412            analysis.requires_planning
2413        } else if self.config.planning_mode == PlanningMode::Auto {
2414            exec_style.requires_planning()
2415        } else {
2416            // Explicit mode: Enabled or Disabled
2417            self.config.planning_mode.should_plan(prompt)
2418        };
2419
2420        // Create agent task if task_manager is available
2421        let task_id = if let Some(ref tm) = self.task_manager {
2422            let workspace = self.tool_context.workspace.display().to_string();
2423            let task = crate::task::Task::agent("agent", &workspace, prompt);
2424            let id = task.id;
2425            tm.spawn(task);
2426            let _ = tm.start(id);
2427            Some(id)
2428        } else {
2429            None
2430        };
2431
2432        // Determine the effective prompt: use optimized input from pre-analysis if available.
2433        // Clone to avoid borrow conflict when pre_analysis is moved.
2434        let effective_prompt: String = match pre_analysis.as_ref() {
2435            Some(a) => a.optimized_input.clone(),
2436            None => prompt.to_string(),
2437        };
2438
2439        let result = if use_planning {
2440            self.execute_with_planning(history, &effective_prompt, event_tx, pre_analysis)
2441                .await
2442        } else {
2443            self.execute_loop(
2444                history,
2445                &effective_prompt,
2446                *exec_style,
2447                session_id,
2448                event_tx,
2449                token,
2450                true,
2451            )
2452            .await
2453        };
2454
2455        // Complete or fail agent task based on result
2456        if let Some(ref tm) = self.task_manager {
2457            if let Some(tid) = task_id {
2458                match &result {
2459                    Ok(r) => {
2460                        let output = serde_json::json!({
2461                            "text": r.text,
2462                            "tool_calls_count": r.tool_calls_count,
2463                            "usage": r.usage,
2464                        });
2465                        let _ = tm.complete(tid, Some(output));
2466                    }
2467                    Err(e) => {
2468                        let _ = tm.fail(tid, e.to_string());
2469                    }
2470                }
2471            }
2472        }
2473
2474        match &result {
2475            Ok(r) => {
2476                tracing::info!(
2477                    a3s.agent.tool_calls_count = r.tool_calls_count,
2478                    a3s.llm.total_tokens = r.usage.total_tokens,
2479                    "a3s.agent.execute completed"
2480                );
2481                // Fire PostResponse hook
2482                self.fire_post_response(
2483                    session_id.unwrap_or(""),
2484                    &r.text,
2485                    r.tool_calls_count,
2486                    &r.usage,
2487                    0, // duration tracked externally
2488                )
2489                .await;
2490            }
2491            Err(e) => {
2492                tracing::warn!(
2493                    error = %e,
2494                    "a3s.agent.execute failed"
2495                );
2496                // Fire OnError hook
2497                self.fire_on_error(
2498                    session_id.unwrap_or(""),
2499                    ErrorType::Other,
2500                    &e.to_string(),
2501                    serde_json::json!({"phase": "execute"}),
2502                )
2503                .await;
2504            }
2505        }
2506
2507        result
2508    }
2509
2510    /// Core execution loop (without planning routing).
2511    ///
2512    /// This is the inner loop that runs LLM calls and tool executions.
2513    /// Called directly by `execute_with_session` (after planning check)
2514    /// and by `execute_plan` (for individual steps, bypassing planning).
2515    #[allow(clippy::too_many_arguments)]
2516    async fn execute_loop(
2517        &self,
2518        history: &[Message],
2519        prompt: &str,
2520        effective_style: AgentStyle,
2521        session_id: Option<&str>,
2522        event_tx: Option<mpsc::Sender<AgentEvent>>,
2523        cancel_token: &tokio_util::sync::CancellationToken,
2524        emit_end: bool,
2525    ) -> Result<AgentResult> {
2526        // When called via execute_loop, the prompt is used for both
2527        // message-adding and hook/memory/event purposes.
2528        self.execute_loop_inner(
2529            history,
2530            prompt,
2531            prompt,
2532            Some(effective_style),
2533            session_id,
2534            event_tx,
2535            cancel_token,
2536            emit_end,
2537        )
2538        .await
2539    }
2540
2541    /// Inner execution loop.
2542    ///
2543    /// `msg_prompt` controls whether a user message is appended (empty = skip).
2544    /// `effective_prompt` is used for hooks, memory recall, taint tracking, and events.
2545    /// `effective_style` pre-computed style to skip redundant LLM-based intent detection.
2546    /// `emit_end` controls whether to send `AgentEvent::End` when the loop completes
2547    /// (should be false when called from `execute_plan` to avoid duplicate End events).
2548    #[allow(clippy::too_many_arguments)]
2549    async fn execute_loop_inner(
2550        &self,
2551        history: &[Message],
2552        msg_prompt: &str,
2553        effective_prompt: &str,
2554        effective_style: Option<AgentStyle>,
2555        session_id: Option<&str>,
2556        event_tx: Option<mpsc::Sender<AgentEvent>>,
2557        cancel_token: &tokio_util::sync::CancellationToken,
2558        emit_end: bool,
2559    ) -> Result<AgentResult> {
2560        let mut messages = history.to_vec();
2561        let mut total_usage = TokenUsage::default();
2562        let mut tool_calls_count = 0;
2563        let mut turn = 0;
2564        // Consecutive malformed-tool-args errors (4.1 parse error recovery)
2565        let mut parse_error_count: u32 = 0;
2566        // Continuation injection counter
2567        let mut continuation_count: u32 = 0;
2568        let mut recent_tool_signatures: Vec<String> = Vec::new();
2569        let style_prompt = if effective_prompt.is_empty() {
2570            msg_prompt
2571        } else {
2572            effective_prompt
2573        };
2574        let effective_style = match effective_style {
2575            Some(s) => s,
2576            None => self.resolve_effective_style(style_prompt).await,
2577        };
2578        let effective_system_prompt = self.system_prompt_for_style(effective_style);
2579        if let Some(tx) = &event_tx {
2580            tx.send(AgentEvent::AgentModeChanged {
2581                mode: effective_style.runtime_mode().to_string(),
2582                agent: effective_style.builtin_agent_name().to_string(),
2583                description: effective_style.description().to_string(),
2584            })
2585            .await
2586            .ok();
2587        }
2588
2589        // Send start event
2590        if let Some(tx) = &event_tx {
2591            tx.send(AgentEvent::Start {
2592                prompt: effective_prompt.to_string(),
2593            })
2594            .await
2595            .ok();
2596        }
2597
2598        // Forward queue events (CommandDeadLettered, CommandRetry, QueueAlert) to event stream
2599        let _queue_forward_handle =
2600            if let (Some(ref queue), Some(ref tx)) = (&self.command_queue, &event_tx) {
2601                let mut rx = queue.subscribe();
2602                let tx = tx.clone();
2603                Some(tokio::spawn(async move {
2604                    while let Ok(event) = rx.recv().await {
2605                        if tx.send(event).await.is_err() {
2606                            break;
2607                        }
2608                    }
2609                }))
2610            } else {
2611                None
2612            };
2613
2614        // Fire PrePrompt hook (may modify the prompt)
2615        let built_system_prompt = Some(effective_system_prompt.clone());
2616        let hooked_prompt = if let Some(modified) = self
2617            .fire_pre_prompt(
2618                session_id.unwrap_or(""),
2619                effective_prompt,
2620                &built_system_prompt,
2621                messages.len(),
2622            )
2623            .await
2624        {
2625            modified
2626        } else {
2627            effective_prompt.to_string()
2628        };
2629        let effective_prompt = hooked_prompt.as_str();
2630
2631        // Taint-track the incoming prompt for sensitive data detection
2632        if let Some(ref sp) = self.config.security_provider {
2633            sp.taint_input(effective_prompt);
2634        }
2635
2636        // Recall relevant memories and inject into system prompt
2637        let system_with_memory = if let Some(ref memory) = self.config.memory {
2638            match memory.recall_similar(effective_prompt, 5).await {
2639                Ok(items) if !items.is_empty() => {
2640                    if let Some(tx) = &event_tx {
2641                        for item in &items {
2642                            tx.send(AgentEvent::MemoryRecalled {
2643                                memory_id: item.id.clone(),
2644                                content: item.content.clone(),
2645                                relevance: item.relevance_score(),
2646                            })
2647                            .await
2648                            .ok();
2649                        }
2650                        tx.send(AgentEvent::MemoriesSearched {
2651                            query: Some(effective_prompt.to_string()),
2652                            tags: Vec::new(),
2653                            result_count: items.len(),
2654                        })
2655                        .await
2656                        .ok();
2657                    }
2658                    let memory_context = items
2659                        .iter()
2660                        .map(|i| format!("- {}", i.content))
2661                        .collect::<Vec<_>>()
2662                        .join(
2663                            "
2664",
2665                        );
2666                    let base = effective_system_prompt.clone();
2667                    Some(format!(
2668                        "{}
2669
2670## Relevant past experience
2671{}",
2672                        base, memory_context
2673                    ))
2674                }
2675                _ => Some(effective_system_prompt.clone()),
2676            }
2677        } else {
2678            Some(effective_system_prompt.clone())
2679        };
2680
2681        // Resolve context from providers on first turn (before adding user message)
2682        // Intent-driven: detect context perception need first via AHP harness, then fire hook
2683        let workspace = self.tool_context.workspace.display().to_string();
2684        let session_id_str = session_id.unwrap_or("");
2685        let context_results = if !self.config.context_providers.is_empty() {
2686            // Step 1: Fire IntentDetection harness point on EVERY prompt
2687            #[allow(clippy::needless_borrow)]
2688            let harness_intent = self
2689                .fire_intent_detection(effective_prompt, &session_id_str, &workspace)
2690                .await;
2691
2692            // Step 2: Build perception event from harness result, or fallback to local detection
2693            #[allow(clippy::needless_borrow)]
2694            let perception_event = if let Some(detected) = harness_intent {
2695                tracing::info!(
2696                    intent = %detected.detected_intent,
2697                    confidence = %detected.confidence,
2698                    "Intent detected from AHP harness"
2699                );
2700                Some(build_pre_context_perception_from_intent(
2701                    detected,
2702                    effective_prompt,
2703                    &session_id_str,
2704                    &workspace,
2705                ))
2706            } else {
2707                // Fallback to local keyword detection
2708                tracing::debug!("No intent from harness, using local keyword detection");
2709                self.detect_context_perception_intent(effective_prompt, &session_id_str, &workspace)
2710            };
2711
2712            if let Some(perception_event) = perception_event {
2713                // Step 3: Fire PreContextPerception hook to get harness decision
2714                tracing::info!(
2715                    intent = %perception_event.intent,
2716                    target_type = %perception_event.target_type,
2717                    "Context perception intent detected, firing AHP hook"
2718                );
2719
2720                let hook_result = self.fire_pre_context_perception(&perception_event).await;
2721
2722                match hook_result {
2723                    HookResult::Continue(Some(modified_context)) => {
2724                        // AHP harness returned injected context - parse and use it
2725                        #[cfg(feature = "ahp")]
2726                        {
2727                            if let Ok(injected) =
2728                                serde_json::from_value::<InjectedContext>(modified_context)
2729                            {
2730                                tracing::info!(
2731                                    facts = injected.facts.len(),
2732                                    "Using injected context from AHP harness"
2733                                );
2734                                self.apply_injected_context(injected)
2735                            } else {
2736                                // Fall back to normal providers if parsing fails
2737                                tracing::warn!(
2738                                    "Failed to parse injected context, falling back to providers"
2739                                );
2740                                self.resolve_context(effective_prompt, session_id).await
2741                            }
2742                        }
2743                        #[cfg(not(feature = "ahp"))]
2744                        {
2745                            // Without AHP, fall back to normal providers
2746                            let _ = modified_context; // suppress unused warning
2747                            self.resolve_context(effective_prompt, session_id).await
2748                        }
2749                    }
2750                    HookResult::Block(_) => {
2751                        // Harness blocked context injection - skip
2752                        tracing::info!("AHP harness blocked context injection");
2753                        Vec::new()
2754                    }
2755                    _ => {
2756                        // No modification or unknown result, proceed with normal providers
2757                        self.resolve_context(effective_prompt, session_id).await
2758                    }
2759                }
2760            } else {
2761                // No intent detected, proceed with normal providers
2762                self.resolve_context(effective_prompt, session_id).await
2763            }
2764        } else {
2765            Vec::new()
2766        };
2767
2768        // Send context resolved event
2769        if let Some(tx) = &event_tx {
2770            let total_items: usize = context_results.iter().map(|r| r.items.len()).sum();
2771            let total_tokens: usize = context_results.iter().map(|r| r.total_tokens).sum();
2772
2773            tracing::info!(
2774                context_items = total_items,
2775                context_tokens = total_tokens,
2776                "Context resolution completed"
2777            );
2778
2779            tx.send(AgentEvent::ContextResolved {
2780                total_items,
2781                total_tokens,
2782            })
2783            .await
2784            .ok();
2785        }
2786
2787        let augmented_system = self
2788            .build_augmented_system_prompt_with_base(&effective_system_prompt, &context_results);
2789
2790        // Merge memory context into system prompt
2791        let base_prompt = effective_system_prompt.clone();
2792        let augmented_system = match (augmented_system, system_with_memory) {
2793            (Some(ctx), Some(mem)) if ctx != mem => Some(ctx.replacen(&base_prompt, &mem, 1)),
2794            (Some(ctx), _) => Some(ctx),
2795            (None, mem) => mem,
2796        };
2797
2798        // Add user message
2799        if !msg_prompt.is_empty() {
2800            messages.push(Message::user(msg_prompt));
2801        }
2802
2803        loop {
2804            turn += 1;
2805
2806            if turn > self.config.max_tool_rounds {
2807                let error = format!("Max tool rounds ({}) exceeded", self.config.max_tool_rounds);
2808                if let Some(tx) = &event_tx {
2809                    tx.send(AgentEvent::Error {
2810                        message: error.clone(),
2811                    })
2812                    .await
2813                    .ok();
2814                }
2815                anyhow::bail!(error);
2816            }
2817
2818            // Send turn start event
2819            if let Some(tx) = &event_tx {
2820                tx.send(AgentEvent::TurnStart { turn }).await.ok();
2821            }
2822
2823            tracing::info!(
2824                turn = turn,
2825                max_turns = self.config.max_tool_rounds,
2826                "Agent turn started"
2827            );
2828
2829            // Call LLM - use streaming if we have an event channel
2830            tracing::info!(
2831                a3s.llm.streaming = event_tx.is_some(),
2832                "LLM completion started"
2833            );
2834
2835            // Fire GenerateStart hook
2836            self.fire_generate_start(
2837                session_id.unwrap_or(""),
2838                effective_prompt,
2839                &augmented_system,
2840            )
2841            .await;
2842
2843            let llm_start = std::time::Instant::now();
2844            // Circuit breaker (4.3): retry non-streaming LLM calls on transient failures.
2845            // Each failure increments `consecutive_llm_errors`; on success it resets to 0.
2846            // Streaming mode bails immediately on failure (events can't be replayed).
2847            let response = {
2848                let threshold = self.config.circuit_breaker_threshold.max(1);
2849                let mut attempt = 0u32;
2850                loop {
2851                    attempt += 1;
2852                    let result = self
2853                        .call_llm(
2854                            &messages,
2855                            augmented_system.as_deref(),
2856                            &event_tx,
2857                            cancel_token,
2858                        )
2859                        .await;
2860                    match result {
2861                        Ok(r) => {
2862                            break r;
2863                        }
2864                        // Never retry if cancelled
2865                        Err(e) if cancel_token.is_cancelled() => {
2866                            anyhow::bail!(e);
2867                        }
2868                        // Retry when: non-streaming under threshold, OR first streaming attempt
2869                        Err(e) if attempt < threshold && (event_tx.is_none() || attempt == 1) => {
2870                            tracing::warn!(
2871                                turn = turn,
2872                                attempt = attempt,
2873                                threshold = threshold,
2874                                error = %e,
2875                                "LLM call failed, will retry"
2876                            );
2877                            tokio::time::sleep(Duration::from_millis(100 * attempt as u64)).await;
2878                        }
2879                        // Threshold exceeded or streaming mid-stream: bail
2880                        Err(e) => {
2881                            let msg = if attempt > 1 {
2882                                format!(
2883                                    "LLM circuit breaker triggered: failed after {} attempt(s): {}",
2884                                    attempt, e
2885                                )
2886                            } else {
2887                                format!("LLM call failed: {}", e)
2888                            };
2889                            tracing::error!(turn = turn, attempt = attempt, "{}", msg);
2890                            // Fire OnError hook for LLM failure
2891                            self.fire_on_error(
2892                                session_id.unwrap_or(""),
2893                                ErrorType::LlmFailure,
2894                                &msg,
2895                                serde_json::json!({"turn": turn, "attempt": attempt}),
2896                            )
2897                            .await;
2898                            if let Some(tx) = &event_tx {
2899                                tx.send(AgentEvent::Error {
2900                                    message: msg.clone(),
2901                                })
2902                                .await
2903                                .ok();
2904                            }
2905                            anyhow::bail!(msg);
2906                        }
2907                    }
2908                }
2909            };
2910
2911            // Update usage
2912            total_usage.prompt_tokens += response.usage.prompt_tokens;
2913            total_usage.completion_tokens += response.usage.completion_tokens;
2914            total_usage.total_tokens += response.usage.total_tokens;
2915
2916            // Track token usage in progress tracker
2917            if let Some(ref tracker) = self.progress_tracker {
2918                let token_usage = crate::task::TaskTokenUsage {
2919                    input_tokens: response.usage.prompt_tokens as u64,
2920                    output_tokens: response.usage.completion_tokens as u64,
2921                    cache_read_tokens: response.usage.cache_read_tokens.unwrap_or(0) as u64,
2922                    cache_write_tokens: response.usage.cache_write_tokens.unwrap_or(0) as u64,
2923                };
2924                if let Ok(mut guard) = tracker.try_write() {
2925                    guard.track_tokens(token_usage);
2926                }
2927            }
2928
2929            // Record LLM completion telemetry
2930            let llm_duration = llm_start.elapsed();
2931            tracing::info!(
2932                turn = turn,
2933                streaming = event_tx.is_some(),
2934                prompt_tokens = response.usage.prompt_tokens,
2935                completion_tokens = response.usage.completion_tokens,
2936                total_tokens = response.usage.total_tokens,
2937                stop_reason = response.stop_reason.as_deref().unwrap_or("unknown"),
2938                duration_ms = llm_duration.as_millis() as u64,
2939                "LLM completion finished"
2940            );
2941
2942            // Fire GenerateEnd hook
2943            self.fire_generate_end(
2944                session_id.unwrap_or(""),
2945                effective_prompt,
2946                &response,
2947                llm_duration.as_millis() as u64,
2948            )
2949            .await;
2950
2951            // Record LLM usage on the llm span
2952            crate::telemetry::record_llm_usage(
2953                response.usage.prompt_tokens,
2954                response.usage.completion_tokens,
2955                response.usage.total_tokens,
2956                response.stop_reason.as_deref(),
2957            );
2958            // Log turn token usage
2959            tracing::info!(
2960                turn = turn,
2961                a3s.llm.total_tokens = response.usage.total_tokens,
2962                "Turn token usage"
2963            );
2964
2965            // Add assistant message to history
2966            messages.push(response.message.clone());
2967
2968            // Check for tool calls
2969            let tool_calls = response.tool_calls();
2970
2971            // Send turn end event
2972            if let Some(tx) = &event_tx {
2973                tx.send(AgentEvent::TurnEnd {
2974                    turn,
2975                    usage: response.usage.clone(),
2976                })
2977                .await
2978                .ok();
2979            }
2980
2981            // Auto-compact: check if context usage exceeds threshold
2982            if self.config.auto_compact {
2983                let used = response.usage.prompt_tokens;
2984                let max = self.config.max_context_tokens;
2985                let threshold = self.config.auto_compact_threshold;
2986
2987                if crate::session::compaction::should_auto_compact(used, max, threshold) {
2988                    let before_len = messages.len();
2989                    let percent_before = used as f32 / max as f32;
2990
2991                    tracing::info!(
2992                        used_tokens = used,
2993                        max_tokens = max,
2994                        percent = percent_before,
2995                        threshold = threshold,
2996                        "Auto-compact triggered"
2997                    );
2998
2999                    // Step 1: Prune large tool outputs first (cheap, no LLM call)
3000                    if let Some(pruned) = crate::session::compaction::prune_tool_outputs(&messages)
3001                    {
3002                        messages = pruned;
3003                        tracing::info!("Tool output pruning applied");
3004                    }
3005
3006                    // Step 2: Full summarization using the agent's LLM client
3007                    if let Ok(Some(compacted)) = crate::session::compaction::compact_messages(
3008                        session_id.unwrap_or(""),
3009                        &messages,
3010                        &self.llm_client,
3011                    )
3012                    .await
3013                    {
3014                        messages = compacted;
3015                    }
3016
3017                    // Emit compaction event
3018                    if let Some(tx) = &event_tx {
3019                        tx.send(AgentEvent::ContextCompacted {
3020                            session_id: session_id.unwrap_or("").to_string(),
3021                            before_messages: before_len,
3022                            after_messages: messages.len(),
3023                            percent_before,
3024                        })
3025                        .await
3026                        .ok();
3027                    }
3028                }
3029            }
3030
3031            if tool_calls.is_empty() {
3032                // No tool calls — check if we should inject a continuation message
3033                // before treating this as a final answer.
3034                let final_text = response.text();
3035
3036                if self.config.continuation_enabled
3037                    && continuation_count < self.config.max_continuation_turns
3038                    && turn < self.config.max_tool_rounds  // never inject past the turn limit
3039                    && Self::looks_incomplete(&final_text)
3040                {
3041                    continuation_count += 1;
3042                    tracing::info!(
3043                        turn = turn,
3044                        continuation = continuation_count,
3045                        max_continuation = self.config.max_continuation_turns,
3046                        "Injecting continuation message — response looks incomplete"
3047                    );
3048                    // Inject continuation as a user message and keep looping
3049                    messages.push(Message::user(crate::prompts::CONTINUATION));
3050                    continue;
3051                }
3052
3053                // Sanitize output to redact any sensitive data before returning
3054                let final_text = if let Some(ref sp) = self.config.security_provider {
3055                    sp.sanitize_output(&final_text)
3056                } else {
3057                    final_text
3058                };
3059
3060                // Record final totals
3061                tracing::info!(
3062                    tool_calls_count = tool_calls_count,
3063                    total_prompt_tokens = total_usage.prompt_tokens,
3064                    total_completion_tokens = total_usage.completion_tokens,
3065                    total_tokens = total_usage.total_tokens,
3066                    turns = turn,
3067                    "Agent execution completed"
3068                );
3069
3070                if emit_end {
3071                    if let Some(tx) = &event_tx {
3072                        tx.send(AgentEvent::End {
3073                            text: final_text.clone(),
3074                            usage: total_usage.clone(),
3075                            meta: response.meta.clone(),
3076                        })
3077                        .await
3078                        .ok();
3079                    }
3080                }
3081
3082                // Notify context providers of turn completion for memory extraction
3083                if let Some(sid) = session_id {
3084                    self.notify_turn_complete(sid, effective_prompt, &final_text)
3085                        .await;
3086                }
3087
3088                return Ok(AgentResult {
3089                    text: final_text,
3090                    messages,
3091                    usage: total_usage,
3092                    tool_calls_count,
3093                });
3094            }
3095
3096            // Execute tools sequentially
3097            // Fast path: when all tool calls are independent file writes and no hooks/HITL
3098            // are configured, execute them concurrently to avoid serial I/O bottleneck.
3099            let tool_calls = if self.config.hook_engine.is_none()
3100                && self.config.confirmation_manager.is_none()
3101                && tool_calls.len() > 1
3102                && tool_calls
3103                    .iter()
3104                    .all(|tc| Self::is_parallel_safe_write(&tc.name, &tc.args))
3105                && {
3106                    // All target paths must be distinct (no write-write conflicts)
3107                    let paths: Vec<_> = tool_calls
3108                        .iter()
3109                        .filter_map(|tc| Self::extract_write_path(&tc.args))
3110                        .collect();
3111                    paths.len() == tool_calls.len()
3112                        && paths.iter().collect::<std::collections::HashSet<_>>().len()
3113                            == paths.len()
3114                } {
3115                tracing::info!(
3116                    count = tool_calls.len(),
3117                    "Parallel write batch: executing {} independent file writes concurrently",
3118                    tool_calls.len()
3119                );
3120
3121                let futures: Vec<_> = tool_calls
3122                    .iter()
3123                    .map(|tc| {
3124                        let ctx = self.tool_context.clone();
3125                        let executor = Arc::clone(&self.tool_executor);
3126                        let name = tc.name.clone();
3127                        let args = tc.args.clone();
3128                        async move { executor.execute_with_context(&name, &args, &ctx).await }
3129                    })
3130                    .collect();
3131
3132                let results = join_all(futures).await;
3133
3134                // Post-process results in original order (sequential, preserves message ordering)
3135                for (tc, result) in tool_calls.iter().zip(results) {
3136                    tool_calls_count += 1;
3137                    let (output, exit_code, is_error, metadata, images) =
3138                        Self::tool_result_to_tuple(result);
3139
3140                    // Track tool call in progress tracker
3141                    self.track_tool_result(&tc.name, &tc.args, exit_code);
3142
3143                    let output = if let Some(ref sp) = self.config.security_provider {
3144                        sp.sanitize_output(&output)
3145                    } else {
3146                        output
3147                    };
3148
3149                    if let Some(tx) = &event_tx {
3150                        tx.send(AgentEvent::ToolEnd {
3151                            id: tc.id.clone(),
3152                            name: tc.name.clone(),
3153                            output: output.clone(),
3154                            exit_code,
3155                            metadata,
3156                        })
3157                        .await
3158                        .ok();
3159                    }
3160
3161                    if images.is_empty() {
3162                        messages.push(Message::tool_result(&tc.id, &output, is_error));
3163                    } else {
3164                        messages.push(Message::tool_result_with_images(
3165                            &tc.id, &output, &images, is_error,
3166                        ));
3167                    }
3168                }
3169
3170                // Skip the sequential loop below
3171                continue;
3172            } else {
3173                tool_calls
3174            };
3175
3176            for tool_call in tool_calls {
3177                tool_calls_count += 1;
3178
3179                let tool_start = std::time::Instant::now();
3180
3181                tracing::info!(
3182                    tool_name = tool_call.name.as_str(),
3183                    tool_id = tool_call.id.as_str(),
3184                    "Tool execution started"
3185                );
3186
3187                // Send tool start event (only if not already sent during streaming)
3188                // In streaming mode, ToolStart is sent when we receive ToolUseStart from LLM
3189                // But we still need to send ToolEnd after execution
3190
3191                // Check for malformed tool arguments from LLM (4.1 parse error recovery)
3192                if let Some(parse_error) =
3193                    tool_call.args.get("__parse_error").and_then(|v| v.as_str())
3194                {
3195                    parse_error_count += 1;
3196                    let error_msg = format!("Error: {}", parse_error);
3197                    tracing::warn!(
3198                        tool = tool_call.name.as_str(),
3199                        parse_error_count = parse_error_count,
3200                        max_parse_retries = self.config.max_parse_retries,
3201                        "Malformed tool arguments from LLM"
3202                    );
3203
3204                    if let Some(tx) = &event_tx {
3205                        tx.send(AgentEvent::ToolEnd {
3206                            id: tool_call.id.clone(),
3207                            name: tool_call.name.clone(),
3208                            output: error_msg.clone(),
3209                            exit_code: 1,
3210                            metadata: None,
3211                        })
3212                        .await
3213                        .ok();
3214                    }
3215
3216                    messages.push(Message::tool_result(&tool_call.id, &error_msg, true));
3217
3218                    if parse_error_count > self.config.max_parse_retries {
3219                        let msg = format!(
3220                            "LLM produced malformed tool arguments {} time(s) in a row \
3221                             (max_parse_retries={}); giving up",
3222                            parse_error_count, self.config.max_parse_retries
3223                        );
3224                        tracing::error!("{}", msg);
3225                        if let Some(tx) = &event_tx {
3226                            tx.send(AgentEvent::Error {
3227                                message: msg.clone(),
3228                            })
3229                            .await
3230                            .ok();
3231                        }
3232                        anyhow::bail!(msg);
3233                    }
3234                    continue;
3235                }
3236
3237                // Tool args are valid — reset parse error counter
3238                parse_error_count = 0;
3239
3240                // Check skill-based tool permissions
3241                if let Some(ref registry) = self.config.skill_registry {
3242                    let instruction_skills =
3243                        registry.by_kind(crate::skills::SkillKind::Instruction);
3244                    let has_restrictions =
3245                        instruction_skills.iter().any(|s| s.allowed_tools.is_some());
3246                    if has_restrictions {
3247                        let allowed = instruction_skills
3248                            .iter()
3249                            .any(|s| s.is_tool_allowed(&tool_call.name));
3250                        if !allowed {
3251                            let msg = format!(
3252                                "Tool '{}' is not allowed by any active skill.",
3253                                tool_call.name
3254                            );
3255                            tracing::info!(
3256                                tool_name = tool_call.name.as_str(),
3257                                "Tool blocked by skill registry"
3258                            );
3259                            if let Some(tx) = &event_tx {
3260                                tx.send(AgentEvent::PermissionDenied {
3261                                    tool_id: tool_call.id.clone(),
3262                                    tool_name: tool_call.name.clone(),
3263                                    args: tool_call.args.clone(),
3264                                    reason: msg.clone(),
3265                                })
3266                                .await
3267                                .ok();
3268                            }
3269                            messages.push(Message::tool_result(&tool_call.id, &msg, true));
3270                            continue;
3271                        }
3272                    }
3273                }
3274
3275                // Fire PreToolUse hook (may block the tool call)
3276                if let Some(HookResult::Block(reason)) = self
3277                    .fire_pre_tool_use(
3278                        session_id.unwrap_or(""),
3279                        &tool_call.name,
3280                        &tool_call.args,
3281                        recent_tool_signatures.clone(),
3282                    )
3283                    .await
3284                {
3285                    let msg = format!("Tool '{}' blocked by hook: {}", tool_call.name, reason);
3286                    tracing::info!(
3287                        tool_name = tool_call.name.as_str(),
3288                        "Tool blocked by PreToolUse hook"
3289                    );
3290
3291                    if let Some(tx) = &event_tx {
3292                        tx.send(AgentEvent::PermissionDenied {
3293                            tool_id: tool_call.id.clone(),
3294                            tool_name: tool_call.name.clone(),
3295                            args: tool_call.args.clone(),
3296                            reason: reason.clone(),
3297                        })
3298                        .await
3299                        .ok();
3300                    }
3301
3302                    messages.push(Message::tool_result(&tool_call.id, &msg, true));
3303                    continue;
3304                }
3305
3306                // Check permission before executing tool
3307                let permission_decision = if let Some(checker) = &self.config.permission_checker {
3308                    checker.check(&tool_call.name, &tool_call.args)
3309                } else {
3310                    // No policy configured — default to Ask so HITL can still intervene
3311                    PermissionDecision::Ask
3312                };
3313
3314                let (output, exit_code, is_error, metadata, images) = match permission_decision {
3315                    PermissionDecision::Deny => {
3316                        tracing::info!(
3317                            tool_name = tool_call.name.as_str(),
3318                            permission = "deny",
3319                            "Tool permission denied"
3320                        );
3321                        // Tool execution denied by permission policy
3322                        let denial_msg = format!(
3323                            "Permission denied: Tool '{}' is blocked by permission policy.",
3324                            tool_call.name
3325                        );
3326
3327                        // Send permission denied event
3328                        if let Some(tx) = &event_tx {
3329                            tx.send(AgentEvent::PermissionDenied {
3330                                tool_id: tool_call.id.clone(),
3331                                tool_name: tool_call.name.clone(),
3332                                args: tool_call.args.clone(),
3333                                reason: "Blocked by deny rule in permission policy".to_string(),
3334                            })
3335                            .await
3336                            .ok();
3337                        }
3338
3339                        (denial_msg, 1, true, None, Vec::new())
3340                    }
3341                    PermissionDecision::Allow => {
3342                        tracing::info!(
3343                            tool_name = tool_call.name.as_str(),
3344                            permission = "allow",
3345                            "Tool permission: allow"
3346                        );
3347                        // Permission explicitly allows — execute directly, no HITL
3348                        let stream_ctx =
3349                            self.streaming_tool_context(&event_tx, &tool_call.id, &tool_call.name);
3350                        let result = self
3351                            .execute_tool_queued_or_direct(
3352                                &tool_call.name,
3353                                &tool_call.args,
3354                                &stream_ctx,
3355                            )
3356                            .await;
3357
3358                        let tuple = Self::tool_result_to_tuple(result);
3359                        // Track tool call in progress tracker
3360                        let (_, exit_code, _, _, _) = tuple;
3361                        self.track_tool_result(&tool_call.name, &tool_call.args, exit_code);
3362                        tuple
3363                    }
3364                    PermissionDecision::Ask => {
3365                        tracing::info!(
3366                            tool_name = tool_call.name.as_str(),
3367                            permission = "ask",
3368                            "Tool permission: ask"
3369                        );
3370                        // Permission says Ask — delegate to HITL confirmation manager
3371                        if let Some(cm) = &self.config.confirmation_manager {
3372                            // Check YOLO lanes: if the tool's lane is in YOLO mode, skip confirmation
3373                            if !cm.requires_confirmation(&tool_call.name).await {
3374                                let stream_ctx = self.streaming_tool_context(
3375                                    &event_tx,
3376                                    &tool_call.id,
3377                                    &tool_call.name,
3378                                );
3379                                let result = self
3380                                    .execute_tool_queued_or_direct(
3381                                        &tool_call.name,
3382                                        &tool_call.args,
3383                                        &stream_ctx,
3384                                    )
3385                                    .await;
3386
3387                                let (output, exit_code, is_error, metadata, images) =
3388                                    Self::tool_result_to_tuple(result);
3389
3390                                // Track tool call in progress tracker
3391                                self.track_tool_result(&tool_call.name, &tool_call.args, exit_code);
3392
3393                                // Add tool result to messages
3394                                if images.is_empty() {
3395                                    messages.push(Message::tool_result(
3396                                        &tool_call.id,
3397                                        &output,
3398                                        is_error,
3399                                    ));
3400                                } else {
3401                                    messages.push(Message::tool_result_with_images(
3402                                        &tool_call.id,
3403                                        &output,
3404                                        &images,
3405                                        is_error,
3406                                    ));
3407                                }
3408
3409                                // Record tool result on the tool span for early exit
3410                                let tool_duration = tool_start.elapsed();
3411                                crate::telemetry::record_tool_result(exit_code, tool_duration);
3412
3413                                // Send ToolEnd event
3414                                if let Some(tx) = &event_tx {
3415                                    tx.send(AgentEvent::ToolEnd {
3416                                        id: tool_call.id.clone(),
3417                                        name: tool_call.name.clone(),
3418                                        output: output.clone(),
3419                                        exit_code,
3420                                        metadata,
3421                                    })
3422                                    .await
3423                                    .ok();
3424                                }
3425
3426                                // Fire PostToolUse hook (fire-and-forget)
3427                                self.fire_post_tool_use(
3428                                    session_id.unwrap_or(""),
3429                                    &tool_call.name,
3430                                    &tool_call.args,
3431                                    &output,
3432                                    exit_code == 0,
3433                                    tool_duration.as_millis() as u64,
3434                                )
3435                                .await;
3436
3437                                continue; // Skip the rest, move to next tool call
3438                            }
3439
3440                            // Get timeout from policy
3441                            let policy = cm.policy().await;
3442                            let timeout_ms = policy.default_timeout_ms;
3443                            let timeout_action = policy.timeout_action;
3444
3445                            // Request confirmation (this emits ConfirmationRequired event)
3446                            let rx = cm
3447                                .request_confirmation(
3448                                    &tool_call.id,
3449                                    &tool_call.name,
3450                                    &tool_call.args,
3451                                )
3452                                .await;
3453
3454                            // Forward ConfirmationRequired to the streaming event channel
3455                            // so external consumers (e.g. SafeClaw engine) can relay it
3456                            // to the browser UI.
3457                            if let Some(tx) = &event_tx {
3458                                tx.send(AgentEvent::ConfirmationRequired {
3459                                    tool_id: tool_call.id.clone(),
3460                                    tool_name: tool_call.name.clone(),
3461                                    args: tool_call.args.clone(),
3462                                    timeout_ms,
3463                                })
3464                                .await
3465                                .ok();
3466                            }
3467
3468                            // Wait for confirmation with timeout
3469                            let confirmation_result =
3470                                tokio::time::timeout(Duration::from_millis(timeout_ms), rx).await;
3471
3472                            match confirmation_result {
3473                                Ok(Ok(response)) => {
3474                                    // Forward ConfirmationReceived
3475                                    if let Some(tx) = &event_tx {
3476                                        tx.send(AgentEvent::ConfirmationReceived {
3477                                            tool_id: tool_call.id.clone(),
3478                                            approved: response.approved,
3479                                            reason: response.reason.clone(),
3480                                        })
3481                                        .await
3482                                        .ok();
3483                                    }
3484                                    if response.approved {
3485                                        let stream_ctx = self.streaming_tool_context(
3486                                            &event_tx,
3487                                            &tool_call.id,
3488                                            &tool_call.name,
3489                                        );
3490                                        let result = self
3491                                            .execute_tool_queued_or_direct(
3492                                                &tool_call.name,
3493                                                &tool_call.args,
3494                                                &stream_ctx,
3495                                            )
3496                                            .await;
3497
3498                                        let tuple = Self::tool_result_to_tuple(result);
3499                                        // Track tool call in progress tracker
3500                                        let (_, exit_code, _, _, _) = tuple;
3501                                        self.track_tool_result(
3502                                            &tool_call.name,
3503                                            &tool_call.args,
3504                                            exit_code,
3505                                        );
3506                                        tuple
3507                                    } else {
3508                                        let rejection_msg = format!(
3509                                            "Tool '{}' execution was REJECTED by the user. Reason: {}. \
3510                                             DO NOT retry this tool call unless the user explicitly asks you to.",
3511                                            tool_call.name,
3512                                            response.reason.unwrap_or_else(|| "No reason provided".to_string())
3513                                        );
3514                                        (rejection_msg, 1, true, None, Vec::new())
3515                                    }
3516                                }
3517                                Ok(Err(_)) => {
3518                                    // Forward ConfirmationTimeout (channel closed = effectively timed out)
3519                                    if let Some(tx) = &event_tx {
3520                                        tx.send(AgentEvent::ConfirmationTimeout {
3521                                            tool_id: tool_call.id.clone(),
3522                                            action_taken: "rejected".to_string(),
3523                                        })
3524                                        .await
3525                                        .ok();
3526                                    }
3527                                    let msg = format!(
3528                                        "Tool '{}' confirmation failed: confirmation channel closed",
3529                                        tool_call.name
3530                                    );
3531                                    (msg, 1, true, None, Vec::new())
3532                                }
3533                                Err(_) => {
3534                                    cm.check_timeouts().await;
3535
3536                                    // Forward ConfirmationTimeout
3537                                    if let Some(tx) = &event_tx {
3538                                        tx.send(AgentEvent::ConfirmationTimeout {
3539                                            tool_id: tool_call.id.clone(),
3540                                            action_taken: match timeout_action {
3541                                                crate::hitl::TimeoutAction::Reject => {
3542                                                    "rejected".to_string()
3543                                                }
3544                                                crate::hitl::TimeoutAction::AutoApprove => {
3545                                                    "auto_approved".to_string()
3546                                                }
3547                                            },
3548                                        })
3549                                        .await
3550                                        .ok();
3551                                    }
3552
3553                                    match timeout_action {
3554                                        crate::hitl::TimeoutAction::Reject => {
3555                                            let msg = format!(
3556                                                "Tool '{}' execution was REJECTED: user confirmation timed out after {}ms. \
3557                                                 DO NOT retry this tool call — the user did not approve it. \
3558                                                 Inform the user that the operation requires their approval and ask them to try again.",
3559                                                tool_call.name, timeout_ms
3560                                            );
3561                                            (msg, 1, true, None, Vec::new())
3562                                        }
3563                                        crate::hitl::TimeoutAction::AutoApprove => {
3564                                            let stream_ctx = self.streaming_tool_context(
3565                                                &event_tx,
3566                                                &tool_call.id,
3567                                                &tool_call.name,
3568                                            );
3569                                            let result = self
3570                                                .execute_tool_queued_or_direct(
3571                                                    &tool_call.name,
3572                                                    &tool_call.args,
3573                                                    &stream_ctx,
3574                                                )
3575                                                .await;
3576
3577                                            let tuple = Self::tool_result_to_tuple(result);
3578                                            // Track tool call in progress tracker
3579                                            let (_, exit_code, _, _, _) = tuple;
3580                                            self.track_tool_result(
3581                                                &tool_call.name,
3582                                                &tool_call.args,
3583                                                exit_code,
3584                                            );
3585                                            tuple
3586                                        }
3587                                    }
3588                                }
3589                            }
3590                        } else {
3591                            // Ask without confirmation manager — safe deny
3592                            let msg = format!(
3593                                "Tool '{}' requires confirmation but no HITL confirmation manager is configured. \
3594                                 Configure a confirmation policy to enable tool execution.",
3595                                tool_call.name
3596                            );
3597                            tracing::warn!(
3598                                tool_name = tool_call.name.as_str(),
3599                                "Tool requires confirmation but no HITL manager configured"
3600                            );
3601                            (msg, 1, true, None, Vec::new())
3602                        }
3603                    }
3604                };
3605
3606                let tool_duration = tool_start.elapsed();
3607                crate::telemetry::record_tool_result(exit_code, tool_duration);
3608
3609                // Sanitize tool output for sensitive data before it enters the message history
3610                let output = if let Some(ref sp) = self.config.security_provider {
3611                    sp.sanitize_output(&output)
3612                } else {
3613                    output
3614                };
3615
3616                recent_tool_signatures.push(format!(
3617                    "{}:{} => {}",
3618                    tool_call.name,
3619                    serde_json::to_string(&tool_call.args).unwrap_or_default(),
3620                    if is_error { "error" } else { "ok" }
3621                ));
3622                if recent_tool_signatures.len() > 8 {
3623                    let overflow = recent_tool_signatures.len() - 8;
3624                    recent_tool_signatures.drain(0..overflow);
3625                }
3626
3627                // Fire PostToolUse hook (fire-and-forget)
3628                self.fire_post_tool_use(
3629                    session_id.unwrap_or(""),
3630                    &tool_call.name,
3631                    &tool_call.args,
3632                    &output,
3633                    exit_code == 0,
3634                    tool_duration.as_millis() as u64,
3635                )
3636                .await;
3637
3638                // Auto-remember tool result in long-term memory
3639                if let Some(ref memory) = self.config.memory {
3640                    let tools_used = [tool_call.name.clone()];
3641                    let remember_result = if exit_code == 0 {
3642                        memory
3643                            .remember_success(effective_prompt, &tools_used, &output)
3644                            .await
3645                    } else {
3646                        memory
3647                            .remember_failure(effective_prompt, &output, &tools_used)
3648                            .await
3649                    };
3650                    match remember_result {
3651                        Ok(()) => {
3652                            if let Some(tx) = &event_tx {
3653                                let item_type = if exit_code == 0 { "success" } else { "failure" };
3654                                tx.send(AgentEvent::MemoryStored {
3655                                    memory_id: uuid::Uuid::new_v4().to_string(),
3656                                    memory_type: item_type.to_string(),
3657                                    importance: if exit_code == 0 { 0.8 } else { 0.9 },
3658                                    tags: vec![item_type.to_string(), tool_call.name.clone()],
3659                                })
3660                                .await
3661                                .ok();
3662                            }
3663                        }
3664                        Err(e) => {
3665                            tracing::warn!("Failed to store memory after tool execution: {}", e);
3666                        }
3667                    }
3668                }
3669
3670                // Send tool end event
3671                if let Some(tx) = &event_tx {
3672                    tx.send(AgentEvent::ToolEnd {
3673                        id: tool_call.id.clone(),
3674                        name: tool_call.name.clone(),
3675                        output: output.clone(),
3676                        exit_code,
3677                        metadata,
3678                    })
3679                    .await
3680                    .ok();
3681                }
3682
3683                // Add tool result to messages
3684                if images.is_empty() {
3685                    messages.push(Message::tool_result(&tool_call.id, &output, is_error));
3686                } else {
3687                    messages.push(Message::tool_result_with_images(
3688                        &tool_call.id,
3689                        &output,
3690                        &images,
3691                        is_error,
3692                    ));
3693                }
3694            }
3695        }
3696    }
3697
3698    /// Execute with streaming events
3699    pub async fn execute_streaming(
3700        &self,
3701        history: &[Message],
3702        prompt: &str,
3703    ) -> Result<(
3704        mpsc::Receiver<AgentEvent>,
3705        tokio::task::JoinHandle<Result<AgentResult>>,
3706        tokio_util::sync::CancellationToken,
3707    )> {
3708        let (tx, rx) = mpsc::channel(100);
3709        let cancel_token = tokio_util::sync::CancellationToken::new();
3710
3711        let llm_client = self.llm_client.clone();
3712        let tool_executor = self.tool_executor.clone();
3713        let tool_context = self.tool_context.clone();
3714        let config = self.config.clone();
3715        let tool_metrics = self.tool_metrics.clone();
3716        let command_queue = self.command_queue.clone();
3717        let history = history.to_vec();
3718        let prompt = prompt.to_string();
3719        let token_clone = cancel_token.clone();
3720
3721        let handle = tokio::spawn(async move {
3722            let mut agent = AgentLoop::new(llm_client, tool_executor, tool_context, config);
3723            if let Some(metrics) = tool_metrics {
3724                agent = agent.with_tool_metrics(metrics);
3725            }
3726            if let Some(queue) = command_queue {
3727                agent = agent.with_queue(queue);
3728            }
3729            agent
3730                .execute_with_session(&history, &prompt, None, Some(tx), Some(&token_clone))
3731                .await
3732        });
3733
3734        Ok((rx, handle, cancel_token))
3735    }
3736
3737    /// Create an execution plan for a prompt
3738    ///
3739    /// Delegates to [`LlmPlanner`] for structured JSON plan generation,
3740    /// falling back to heuristic planning if the LLM call fails.
3741    pub async fn plan(&self, prompt: &str, _context: Option<&str>) -> Result<ExecutionPlan> {
3742        use crate::planning::LlmPlanner;
3743
3744        match LlmPlanner::create_plan(&self.llm_client, prompt).await {
3745            Ok(plan) => Ok(plan),
3746            Err(e) => {
3747                tracing::warn!("LLM plan creation failed, using fallback: {}", e);
3748                Ok(LlmPlanner::fallback_plan(prompt))
3749            }
3750        }
3751    }
3752
3753    /// Execute with planning phase.
3754    ///
3755    /// If `pre_analysis` is provided (from a single pre-analysis LLM call in
3756    /// `execute_with_session`), the goal and plan are already available and no
3757    /// additional LLM calls are needed for planning. Otherwise, falls back to
3758    /// calling `extract_goal` and `plan` individually.
3759    pub async fn execute_with_planning(
3760        &self,
3761        history: &[Message],
3762        prompt: &str,
3763        event_tx: Option<mpsc::Sender<AgentEvent>>,
3764        pre_analysis: Option<PreAnalysis>,
3765    ) -> Result<AgentResult> {
3766        // Send planning start event
3767        if let Some(tx) = &event_tx {
3768            tx.send(AgentEvent::PlanningStart {
3769                prompt: prompt.to_string(),
3770            })
3771            .await
3772            .ok();
3773        }
3774
3775        // Use pre-analysis result if available (goal + plan already computed in one LLM call).
3776        let (goal, plan) = if let Some(analysis) = pre_analysis {
3777            (Some(analysis.goal.clone()), analysis.execution_plan.clone())
3778        } else {
3779            // Fall back: extract goal and create plan via separate LLM calls.
3780            let g = if self.config.goal_tracking {
3781                Some(self.extract_goal(prompt).await?)
3782            } else {
3783                None
3784            };
3785            let p = self.plan(prompt, None).await?;
3786            (g, p)
3787        };
3788
3789        // Send GoalExtracted event if goal_tracking is enabled.
3790        if self.config.goal_tracking {
3791            if let Some(ref g) = goal {
3792                if let Some(tx) = &event_tx {
3793                    tx.send(AgentEvent::GoalExtracted { goal: g.clone() })
3794                        .await
3795                        .ok();
3796                }
3797            }
3798        }
3799
3800        // Send planning end event
3801        if let Some(tx) = &event_tx {
3802            tx.send(AgentEvent::PlanningEnd {
3803                estimated_steps: plan.steps.len(),
3804                plan: plan.clone(),
3805            })
3806            .await
3807            .ok();
3808        }
3809
3810        let plan_start = std::time::Instant::now();
3811
3812        // Execute the plan step by step
3813        let result = self.execute_plan(history, &plan, event_tx.clone()).await?;
3814
3815        // Emit the final End event (execute_loop_inner does not emit End in planning mode)
3816        if let Some(tx) = &event_tx {
3817            tx.send(AgentEvent::End {
3818                text: result.text.clone(),
3819                usage: result.usage.clone(),
3820                meta: None,
3821            })
3822            .await
3823            .ok();
3824        }
3825
3826        // Check goal achievement when goal_tracking is enabled
3827        if self.config.goal_tracking {
3828            if let Some(ref g) = goal {
3829                let achieved = self.check_goal_achievement(g, &result.text).await?;
3830                if achieved {
3831                    if let Some(tx) = &event_tx {
3832                        tx.send(AgentEvent::GoalAchieved {
3833                            goal: g.description.clone(),
3834                            total_steps: result.messages.len(),
3835                            duration_ms: plan_start.elapsed().as_millis() as i64,
3836                        })
3837                        .await
3838                        .ok();
3839                    }
3840                }
3841            }
3842        }
3843
3844        Ok(result)
3845    }
3846
3847    /// Execute an execution plan using wave-based dependency-aware scheduling.
3848    ///
3849    /// Steps with no unmet dependencies are grouped into "waves". A wave with
3850    /// a single step executes sequentially (preserving the history chain). A
3851    /// wave with multiple independent steps executes them in parallel via
3852    /// `JoinSet`, then merges their results back into the shared history.
3853    async fn execute_plan(
3854        &self,
3855        history: &[Message],
3856        plan: &ExecutionPlan,
3857        event_tx: Option<mpsc::Sender<AgentEvent>>,
3858    ) -> Result<AgentResult> {
3859        let mut plan = plan.clone();
3860        let mut current_history = history.to_vec();
3861        let mut total_usage = TokenUsage::default();
3862        let mut tool_calls_count = 0;
3863        let total_steps = plan.steps.len();
3864
3865        // Add initial user message with the goal
3866        let steps_text = plan
3867            .steps
3868            .iter()
3869            .enumerate()
3870            .map(|(i, step)| format!("{}. {}", i + 1, step.content))
3871            .collect::<Vec<_>>()
3872            .join("\n");
3873        current_history.push(Message::user(&crate::prompts::render(
3874            crate::prompts::PLAN_EXECUTE_GOAL,
3875            &[("goal", &plan.goal), ("steps", &steps_text)],
3876        )));
3877
3878        loop {
3879            let ready: Vec<String> = plan
3880                .get_ready_steps()
3881                .iter()
3882                .map(|s| s.id.clone())
3883                .collect();
3884
3885            if ready.is_empty() {
3886                // All done or deadlock
3887                if plan.has_deadlock() {
3888                    tracing::warn!(
3889                        "Plan deadlock detected: {} pending steps with unresolvable dependencies",
3890                        plan.pending_count()
3891                    );
3892                }
3893                break;
3894            }
3895
3896            if ready.len() == 1 {
3897                // === Single step: sequential execution (preserves history chain) ===
3898                let step_id = &ready[0];
3899                let step = plan
3900                    .steps
3901                    .iter()
3902                    .find(|s| s.id == *step_id)
3903                    .ok_or_else(|| anyhow::anyhow!("step '{}' not found in plan", step_id))?
3904                    .clone();
3905                let step_number = plan
3906                    .steps
3907                    .iter()
3908                    .position(|s| s.id == *step_id)
3909                    .unwrap_or(0)
3910                    + 1;
3911
3912                // Send step start event
3913                if let Some(tx) = &event_tx {
3914                    tx.send(AgentEvent::StepStart {
3915                        step_id: step.id.clone(),
3916                        description: step.content.clone(),
3917                        step_number,
3918                        total_steps,
3919                    })
3920                    .await
3921                    .ok();
3922                }
3923
3924                plan.mark_status(&step.id, TaskStatus::InProgress);
3925
3926                let step_prompt = crate::prompts::render(
3927                    crate::prompts::PLAN_EXECUTE_STEP,
3928                    &[
3929                        ("step_num", &step_number.to_string()),
3930                        ("description", &step.content),
3931                    ],
3932                );
3933
3934                match self
3935                    .execute_loop(
3936                        &current_history,
3937                        &step_prompt,
3938                        AgentStyle::GeneralPurpose,
3939                        None,
3940                        event_tx.clone(),
3941                        &tokio_util::sync::CancellationToken::new(),
3942                        false, // emit_end: false — End is emitted by execute_with_planning after execute_plan
3943                    )
3944                    .await
3945                {
3946                    Ok(result) => {
3947                        current_history = result.messages.clone();
3948                        total_usage.prompt_tokens += result.usage.prompt_tokens;
3949                        total_usage.completion_tokens += result.usage.completion_tokens;
3950                        total_usage.total_tokens += result.usage.total_tokens;
3951                        tool_calls_count += result.tool_calls_count;
3952                        plan.mark_status(&step.id, TaskStatus::Completed);
3953
3954                        if let Some(tx) = &event_tx {
3955                            tx.send(AgentEvent::StepEnd {
3956                                step_id: step.id.clone(),
3957                                status: TaskStatus::Completed,
3958                                step_number,
3959                                total_steps,
3960                            })
3961                            .await
3962                            .ok();
3963                        }
3964                    }
3965                    Err(e) => {
3966                        tracing::error!("Plan step '{}' failed: {}", step.id, e);
3967                        plan.mark_status(&step.id, TaskStatus::Failed);
3968
3969                        if let Some(tx) = &event_tx {
3970                            tx.send(AgentEvent::StepEnd {
3971                                step_id: step.id.clone(),
3972                                status: TaskStatus::Failed,
3973                                step_number,
3974                                total_steps,
3975                            })
3976                            .await
3977                            .ok();
3978                        }
3979                    }
3980                }
3981            } else {
3982                // === Multiple steps: parallel execution via JoinSet ===
3983                // NOTE: Each parallel branch gets a clone of the base history.
3984                // Individual branch histories (tool calls, LLM turns) are NOT merged
3985                // back — only a summary message is appended. This is a deliberate
3986                // trade-off: merging divergent histories in a deterministic order is
3987                // complex and the summary approach keeps the context window manageable.
3988                let ready_steps: Vec<_> = ready
3989                    .iter()
3990                    .filter_map(|id| {
3991                        let step = plan.steps.iter().find(|s| s.id == *id)?.clone();
3992                        let step_number =
3993                            plan.steps.iter().position(|s| s.id == *id).unwrap_or(0) + 1;
3994                        Some((step, step_number))
3995                    })
3996                    .collect();
3997
3998                // Mark all as InProgress and emit StepStart events
3999                for (step, step_number) in &ready_steps {
4000                    plan.mark_status(&step.id, TaskStatus::InProgress);
4001                    if let Some(tx) = &event_tx {
4002                        tx.send(AgentEvent::StepStart {
4003                            step_id: step.id.clone(),
4004                            description: step.content.clone(),
4005                            step_number: *step_number,
4006                            total_steps,
4007                        })
4008                        .await
4009                        .ok();
4010                    }
4011                }
4012
4013                // Spawn all into JoinSet, each with a clone of the base history
4014                let mut join_set = tokio::task::JoinSet::new();
4015                for (step, step_number) in &ready_steps {
4016                    let base_history = current_history.clone();
4017                    let agent_clone = self.clone();
4018                    let tx = event_tx.clone();
4019                    let step_clone = step.clone();
4020                    let sn = *step_number;
4021
4022                    join_set.spawn(async move {
4023                        let prompt = crate::prompts::render(
4024                            crate::prompts::PLAN_EXECUTE_STEP,
4025                            &[
4026                                ("step_num", &sn.to_string()),
4027                                ("description", &step_clone.content),
4028                            ],
4029                        );
4030                        let result = agent_clone
4031                            .execute_loop(
4032                                &base_history,
4033                                &prompt,
4034                                AgentStyle::GeneralPurpose,
4035                                None,
4036                                tx,
4037                                &tokio_util::sync::CancellationToken::new(),
4038                                false, // emit_end: false — End is emitted by execute_with_planning after execute_plan
4039                            )
4040                            .await;
4041                        (step_clone.id, sn, result)
4042                    });
4043                }
4044
4045                // Collect results
4046                let mut parallel_results: Vec<ParallelStepResult> = Vec::new();
4047                while let Some(join_result) = join_set.join_next().await {
4048                    match join_result {
4049                        Ok((step_id, step_number, step_result)) => match step_result {
4050                            Ok(result) => {
4051                                total_usage.prompt_tokens += result.usage.prompt_tokens;
4052                                total_usage.completion_tokens += result.usage.completion_tokens;
4053                                total_usage.total_tokens += result.usage.total_tokens;
4054                                tool_calls_count += result.tool_calls_count;
4055                                plan.mark_status(&step_id, TaskStatus::Completed);
4056
4057                                parallel_results.push(ParallelStepResult {
4058                                    step_id: step_id.clone(),
4059                                    step_number: step_number as u32,
4060                                    status: "completed".to_string(),
4061                                    summary: result.text.trim().to_string(),
4062                                    key_findings: None,
4063                                    error: None,
4064                                    data: None,
4065                                });
4066
4067                                if let Some(tx) = &event_tx {
4068                                    tx.send(AgentEvent::StepEnd {
4069                                        step_id,
4070                                        status: TaskStatus::Completed,
4071                                        step_number,
4072                                        total_steps,
4073                                    })
4074                                    .await
4075                                    .ok();
4076                                }
4077                            }
4078                            Err(e) => {
4079                                tracing::error!("Plan step '{}' failed: {}", step_id, e);
4080                                plan.mark_status(&step_id, TaskStatus::Failed);
4081
4082                                parallel_results.push(ParallelStepResult {
4083                                    step_id: step_id.clone(),
4084                                    step_number: step_number as u32,
4085                                    status: "failed".to_string(),
4086                                    summary: String::new(),
4087                                    key_findings: None,
4088                                    error: Some(e.to_string()),
4089                                    data: None,
4090                                });
4091
4092                                if let Some(tx) = &event_tx {
4093                                    tx.send(AgentEvent::StepEnd {
4094                                        step_id,
4095                                        status: TaskStatus::Failed,
4096                                        step_number,
4097                                        total_steps,
4098                                    })
4099                                    .await
4100                                    .ok();
4101                                }
4102                            }
4103                        },
4104                        Err(e) => {
4105                            tracing::error!("JoinSet task panicked: {}", e);
4106                        }
4107                    }
4108                }
4109
4110                // Merge parallel results into history for subsequent steps.
4111                // Emit as a structured JSON USER message so the frontend can
4112                // parse and render it in the user's language.
4113                if !parallel_results.is_empty() {
4114                    parallel_results.sort_by_key(|r| r.step_number);
4115                    let envelope = ParallelStepResult::build_envelope(parallel_results);
4116                    current_history.push(Message::user(
4117                        &serde_json::to_string(&envelope).unwrap_or_default(),
4118                    ));
4119                }
4120            }
4121
4122            // Emit GoalProgress after each wave
4123            if self.config.goal_tracking {
4124                let completed = plan
4125                    .steps
4126                    .iter()
4127                    .filter(|s| s.status == TaskStatus::Completed)
4128                    .count();
4129                if let Some(tx) = &event_tx {
4130                    tx.send(AgentEvent::GoalProgress {
4131                        goal: plan.goal.clone(),
4132                        progress: plan.progress(),
4133                        completed_steps: completed,
4134                        total_steps,
4135                    })
4136                    .await
4137                    .ok();
4138                }
4139            }
4140        }
4141
4142        // Get final response — find the last assistant message (not the last history
4143        // entry, which may be a user-summary injected after parallel execution)
4144        let final_text = current_history
4145            .iter()
4146            .rev()
4147            .find(|m| m.role == "assistant")
4148            .map(|m| {
4149                m.content
4150                    .iter()
4151                    .filter_map(|block| {
4152                        if let crate::llm::ContentBlock::Text { text } = block {
4153                            Some(text.as_str())
4154                        } else {
4155                            None
4156                        }
4157                    })
4158                    .collect::<Vec<_>>()
4159                    .join("\n")
4160            })
4161            .unwrap_or_default();
4162
4163        Ok(AgentResult {
4164            text: final_text,
4165            messages: current_history,
4166            usage: total_usage,
4167            tool_calls_count,
4168        })
4169    }
4170
4171    /// Extract goal from prompt
4172    ///
4173    /// Delegates to [`LlmPlanner`] for structured JSON goal extraction,
4174    /// falling back to heuristic logic if the LLM call fails.
4175    pub async fn extract_goal(&self, prompt: &str) -> Result<AgentGoal> {
4176        use crate::planning::LlmPlanner;
4177
4178        match LlmPlanner::extract_goal(&self.llm_client, prompt).await {
4179            Ok(goal) => Ok(goal),
4180            Err(e) => {
4181                tracing::warn!("LLM goal extraction failed, using fallback: {}", e);
4182                Ok(LlmPlanner::fallback_goal(prompt))
4183            }
4184        }
4185    }
4186
4187    /// Check if goal is achieved
4188    ///
4189    /// Delegates to [`LlmPlanner`] for structured JSON achievement check,
4190    /// falling back to heuristic logic if the LLM call fails.
4191    pub async fn check_goal_achievement(
4192        &self,
4193        goal: &AgentGoal,
4194        current_state: &str,
4195    ) -> Result<bool> {
4196        use crate::planning::LlmPlanner;
4197
4198        match LlmPlanner::check_achievement(&self.llm_client, goal, current_state).await {
4199            Ok(result) => Ok(result.achieved),
4200            Err(e) => {
4201                tracing::warn!("LLM achievement check failed, using fallback: {}", e);
4202                let result = LlmPlanner::fallback_check_achievement(goal, current_state);
4203                Ok(result.achieved)
4204            }
4205        }
4206    }
4207}
4208
4209#[cfg(test)]
4210mod tests {
4211    use super::*;
4212    use crate::llm::{ContentBlock, StreamEvent};
4213    use crate::permissions::PermissionPolicy;
4214    use crate::tools::ToolExecutor;
4215    use std::path::PathBuf;
4216    use std::sync::atomic::{AtomicUsize, Ordering};
4217
4218    /// Create a default ToolContext for tests
4219    fn test_tool_context() -> ToolContext {
4220        ToolContext::new(PathBuf::from("/tmp"))
4221    }
4222
4223    #[test]
4224    fn test_agent_config_default() {
4225        let config = AgentConfig::default();
4226        assert!(config.prompt_slots.is_empty());
4227        assert!(config.tools.is_empty()); // Tools are provided externally
4228        assert_eq!(config.max_tool_rounds, MAX_TOOL_ROUNDS);
4229        assert!(config.permission_checker.is_none());
4230        assert!(config.context_providers.is_empty());
4231        // Built-in skills are always present by default
4232        let registry = config
4233            .skill_registry
4234            .expect("skill_registry must be Some by default");
4235        assert!(registry.len() >= 7, "expected at least 7 built-in skills");
4236        assert!(registry.get("code-search").is_some());
4237        assert!(registry.get("find-bugs").is_some());
4238    }
4239
4240    // ========================================================================
4241    // Mock LLM Client for Testing
4242    // ========================================================================
4243
4244    /// Mock LLM client that returns predefined responses
4245    pub(crate) struct MockLlmClient {
4246        /// Responses to return (consumed in order)
4247        responses: std::sync::Mutex<Vec<LlmResponse>>,
4248        /// Number of calls made
4249        pub(crate) call_count: AtomicUsize,
4250    }
4251
4252    impl MockLlmClient {
4253        pub(crate) fn new(responses: Vec<LlmResponse>) -> Self {
4254            Self {
4255                responses: std::sync::Mutex::new(responses),
4256                call_count: AtomicUsize::new(0),
4257            }
4258        }
4259
4260        /// Create a response with text only (no tool calls)
4261        pub(crate) fn text_response(text: &str) -> LlmResponse {
4262            LlmResponse {
4263                message: Message {
4264                    role: "assistant".to_string(),
4265                    content: vec![ContentBlock::Text {
4266                        text: text.to_string(),
4267                    }],
4268                    reasoning_content: None,
4269                },
4270                usage: TokenUsage {
4271                    prompt_tokens: 10,
4272                    completion_tokens: 5,
4273                    total_tokens: 15,
4274                    cache_read_tokens: None,
4275                    cache_write_tokens: None,
4276                },
4277                stop_reason: Some("end_turn".to_string()),
4278                meta: None,
4279            }
4280        }
4281
4282        /// Create a response with a tool call
4283        pub(crate) fn tool_call_response(
4284            tool_id: &str,
4285            tool_name: &str,
4286            args: serde_json::Value,
4287        ) -> LlmResponse {
4288            LlmResponse {
4289                message: Message {
4290                    role: "assistant".to_string(),
4291                    content: vec![ContentBlock::ToolUse {
4292                        id: tool_id.to_string(),
4293                        name: tool_name.to_string(),
4294                        input: args,
4295                    }],
4296                    reasoning_content: None,
4297                },
4298                usage: TokenUsage {
4299                    prompt_tokens: 10,
4300                    completion_tokens: 5,
4301                    total_tokens: 15,
4302                    cache_read_tokens: None,
4303                    cache_write_tokens: None,
4304                },
4305                stop_reason: Some("tool_use".to_string()),
4306                meta: None,
4307            }
4308        }
4309    }
4310
4311    #[async_trait::async_trait]
4312    impl LlmClient for MockLlmClient {
4313        async fn complete(
4314            &self,
4315            _messages: &[Message],
4316            _system: Option<&str>,
4317            _tools: &[ToolDefinition],
4318        ) -> Result<LlmResponse> {
4319            self.call_count.fetch_add(1, Ordering::SeqCst);
4320            let mut responses = self.responses.lock().unwrap();
4321            if responses.is_empty() {
4322                anyhow::bail!("No more mock responses available");
4323            }
4324            Ok(responses.remove(0))
4325        }
4326
4327        async fn complete_streaming(
4328            &self,
4329            _messages: &[Message],
4330            _system: Option<&str>,
4331            _tools: &[ToolDefinition],
4332        ) -> Result<mpsc::Receiver<StreamEvent>> {
4333            self.call_count.fetch_add(1, Ordering::SeqCst);
4334            let mut responses = self.responses.lock().unwrap();
4335            if responses.is_empty() {
4336                anyhow::bail!("No more mock responses available");
4337            }
4338            let response = responses.remove(0);
4339
4340            let (tx, rx) = mpsc::channel(10);
4341            tokio::spawn(async move {
4342                // Send text deltas if any
4343                for block in &response.message.content {
4344                    if let ContentBlock::Text { text } = block {
4345                        tx.send(StreamEvent::TextDelta(text.clone())).await.ok();
4346                    }
4347                }
4348                tx.send(StreamEvent::Done(response)).await.ok();
4349            });
4350
4351            Ok(rx)
4352        }
4353    }
4354
4355    // ========================================================================
4356    // Agent Loop Tests
4357    // ========================================================================
4358
4359    #[tokio::test]
4360    async fn test_agent_simple_response() {
4361        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
4362            "Hello, I'm an AI assistant.",
4363        )]));
4364
4365        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4366        let config = AgentConfig::default();
4367
4368        let agent = AgentLoop::new(
4369            mock_client.clone(),
4370            tool_executor,
4371            test_tool_context(),
4372            config,
4373        );
4374        let result = agent.execute(&[], "Hello", None).await.unwrap();
4375
4376        assert_eq!(result.text, "Hello, I'm an AI assistant.");
4377        assert_eq!(result.tool_calls_count, 0);
4378        assert_eq!(mock_client.call_count.load(Ordering::SeqCst), 1);
4379    }
4380
4381    #[tokio::test]
4382    async fn test_agent_with_tool_call() {
4383        let mock_client = Arc::new(MockLlmClient::new(vec![
4384            // First response: tool call
4385            MockLlmClient::tool_call_response(
4386                "tool-1",
4387                "bash",
4388                serde_json::json!({"command": "echo hello"}),
4389            ),
4390            // Second response: final text
4391            MockLlmClient::text_response("The command output was: hello"),
4392        ]));
4393
4394        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4395        let config = AgentConfig::default();
4396
4397        let agent = AgentLoop::new(
4398            mock_client.clone(),
4399            tool_executor,
4400            test_tool_context(),
4401            config,
4402        );
4403        let result = agent.execute(&[], "Run echo hello", None).await.unwrap();
4404
4405        assert_eq!(result.text, "The command output was: hello");
4406        assert_eq!(result.tool_calls_count, 1);
4407        assert_eq!(mock_client.call_count.load(Ordering::SeqCst), 2);
4408    }
4409
4410    #[tokio::test]
4411    async fn test_agent_permission_deny() {
4412        let mock_client = Arc::new(MockLlmClient::new(vec![
4413            // First response: tool call that will be denied
4414            MockLlmClient::tool_call_response(
4415                "tool-1",
4416                "bash",
4417                serde_json::json!({"command": "rm -rf /tmp/test"}),
4418            ),
4419            // Second response: LLM responds to the denial
4420            MockLlmClient::text_response(
4421                "I cannot execute that command due to permission restrictions.",
4422            ),
4423        ]));
4424
4425        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4426
4427        // Create permission policy that denies rm commands
4428        let permission_policy = PermissionPolicy::new().deny("bash(rm:*)");
4429
4430        let config = AgentConfig {
4431            permission_checker: Some(Arc::new(permission_policy)),
4432            ..Default::default()
4433        };
4434
4435        let (tx, mut rx) = mpsc::channel(100);
4436        let agent = AgentLoop::new(
4437            mock_client.clone(),
4438            tool_executor,
4439            test_tool_context(),
4440            config,
4441        );
4442        let result = agent.execute(&[], "Delete files", Some(tx)).await.unwrap();
4443
4444        // Check that we received a PermissionDenied event
4445        let mut found_permission_denied = false;
4446        while let Ok(event) = rx.try_recv() {
4447            if let AgentEvent::PermissionDenied { tool_name, .. } = event {
4448                assert_eq!(tool_name, "bash");
4449                found_permission_denied = true;
4450            }
4451        }
4452        assert!(
4453            found_permission_denied,
4454            "Should have received PermissionDenied event"
4455        );
4456
4457        assert_eq!(result.tool_calls_count, 1);
4458    }
4459
4460    #[tokio::test]
4461    async fn test_agent_permission_allow() {
4462        let mock_client = Arc::new(MockLlmClient::new(vec![
4463            // First response: tool call that will be allowed
4464            MockLlmClient::tool_call_response(
4465                "tool-1",
4466                "bash",
4467                serde_json::json!({"command": "echo hello"}),
4468            ),
4469            // Second response: final text
4470            MockLlmClient::text_response("Done!"),
4471        ]));
4472
4473        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4474
4475        // Create permission policy that allows echo commands
4476        let permission_policy = PermissionPolicy::new()
4477            .allow("bash(echo:*)")
4478            .deny("bash(rm:*)");
4479
4480        let config = AgentConfig {
4481            permission_checker: Some(Arc::new(permission_policy)),
4482            ..Default::default()
4483        };
4484
4485        let agent = AgentLoop::new(
4486            mock_client.clone(),
4487            tool_executor,
4488            test_tool_context(),
4489            config,
4490        );
4491        let result = agent.execute(&[], "Echo hello", None).await.unwrap();
4492
4493        assert_eq!(result.text, "Done!");
4494        assert_eq!(result.tool_calls_count, 1);
4495    }
4496
4497    #[tokio::test]
4498    async fn test_agent_streaming_events() {
4499        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
4500            "Hello!",
4501        )]));
4502
4503        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4504        let config = AgentConfig::default();
4505
4506        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
4507        let (mut rx, handle, _cancel_token) = agent.execute_streaming(&[], "Hi").await.unwrap();
4508
4509        // Collect events
4510        let mut events = Vec::new();
4511        while let Some(event) = rx.recv().await {
4512            events.push(event);
4513        }
4514
4515        let result = handle.await.unwrap().unwrap();
4516        assert_eq!(result.text, "Hello!");
4517
4518        // Check we received Start and End events
4519        assert!(events.iter().any(|e| matches!(e, AgentEvent::Start { .. })));
4520        assert!(events.iter().any(|e| matches!(e, AgentEvent::End { .. })));
4521    }
4522
4523    #[tokio::test]
4524    async fn test_agent_max_tool_rounds() {
4525        // Create a mock that always returns tool calls (infinite loop)
4526        let responses: Vec<LlmResponse> = (0..100)
4527            .map(|i| {
4528                MockLlmClient::tool_call_response(
4529                    &format!("tool-{}", i),
4530                    "bash",
4531                    serde_json::json!({"command": "echo loop"}),
4532                )
4533            })
4534            .collect();
4535
4536        let mock_client = Arc::new(MockLlmClient::new(responses));
4537        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4538
4539        let config = AgentConfig {
4540            max_tool_rounds: 3,
4541            ..Default::default()
4542        };
4543
4544        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
4545        let result = agent.execute(&[], "Loop forever", None).await;
4546
4547        // Should fail due to max tool rounds exceeded
4548        assert!(result.is_err());
4549        assert!(result.unwrap_err().to_string().contains("Max tool rounds"));
4550    }
4551
4552    #[tokio::test]
4553    async fn test_agent_no_permission_policy_defaults_to_ask() {
4554        // When no permission policy is set, tools default to Ask.
4555        // Without a confirmation manager, Ask = safe deny.
4556        let mock_client = Arc::new(MockLlmClient::new(vec![
4557            MockLlmClient::tool_call_response(
4558                "tool-1",
4559                "bash",
4560                serde_json::json!({"command": "rm -rf /tmp/test"}),
4561            ),
4562            MockLlmClient::text_response("Denied!"),
4563        ]));
4564
4565        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4566        let config = AgentConfig {
4567            permission_checker: None, // No policy → defaults to Ask
4568            // No confirmation_manager → safe deny
4569            ..Default::default()
4570        };
4571
4572        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
4573        let result = agent.execute(&[], "Delete", None).await.unwrap();
4574
4575        // Should be denied (no policy + no CM = safe deny)
4576        assert_eq!(result.text, "Denied!");
4577        assert_eq!(result.tool_calls_count, 1);
4578    }
4579
4580    #[tokio::test]
4581    async fn test_agent_permission_ask_without_cm_denies() {
4582        // When permission is Ask and no confirmation manager configured,
4583        // tool execution should be denied (safe default).
4584        let mock_client = Arc::new(MockLlmClient::new(vec![
4585            MockLlmClient::tool_call_response(
4586                "tool-1",
4587                "bash",
4588                serde_json::json!({"command": "echo test"}),
4589            ),
4590            MockLlmClient::text_response("Denied!"),
4591        ]));
4592
4593        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4594
4595        // Create policy where bash falls through to Ask (default)
4596        let permission_policy = PermissionPolicy::new(); // Default decision is Ask
4597
4598        let config = AgentConfig {
4599            permission_checker: Some(Arc::new(permission_policy)),
4600            // No confirmation_manager — safe deny
4601            ..Default::default()
4602        };
4603
4604        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
4605        let result = agent.execute(&[], "Echo", None).await.unwrap();
4606
4607        // Should deny (Ask without CM = safe deny)
4608        assert_eq!(result.text, "Denied!");
4609        // The tool result should contain the denial message
4610        assert!(result.tool_calls_count >= 1);
4611    }
4612
4613    // ========================================================================
4614    // HITL (Human-in-the-Loop) Tests
4615    // ========================================================================
4616
4617    #[tokio::test]
4618    async fn test_agent_hitl_approved() {
4619        use crate::hitl::{ConfirmationManager, ConfirmationPolicy};
4620        use tokio::sync::broadcast;
4621
4622        let mock_client = Arc::new(MockLlmClient::new(vec![
4623            MockLlmClient::tool_call_response(
4624                "tool-1",
4625                "bash",
4626                serde_json::json!({"command": "echo hello"}),
4627            ),
4628            MockLlmClient::text_response("Command executed!"),
4629        ]));
4630
4631        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4632
4633        // Create HITL confirmation manager with policy enabled
4634        let (event_tx, _event_rx) = broadcast::channel(100);
4635        let hitl_policy = ConfirmationPolicy {
4636            enabled: true,
4637            ..Default::default()
4638        };
4639        let confirmation_manager = Arc::new(ConfirmationManager::new(hitl_policy, event_tx));
4640
4641        // Create permission policy that returns Ask for bash
4642        let permission_policy = PermissionPolicy::new(); // Default is Ask
4643
4644        let config = AgentConfig {
4645            permission_checker: Some(Arc::new(permission_policy)),
4646            confirmation_manager: Some(confirmation_manager.clone()),
4647            ..Default::default()
4648        };
4649
4650        // Spawn a task to approve the confirmation
4651        let cm_clone = confirmation_manager.clone();
4652        tokio::spawn(async move {
4653            // Wait a bit for the confirmation request to be created
4654            tokio::time::sleep(std::time::Duration::from_millis(50)).await;
4655            // Approve it
4656            cm_clone.confirm("tool-1", true, None).await.ok();
4657        });
4658
4659        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
4660        let result = agent.execute(&[], "Run echo", None).await.unwrap();
4661
4662        assert_eq!(result.text, "Command executed!");
4663        assert_eq!(result.tool_calls_count, 1);
4664    }
4665
4666    #[tokio::test]
4667    async fn test_agent_hitl_rejected() {
4668        use crate::hitl::{ConfirmationManager, ConfirmationPolicy};
4669        use tokio::sync::broadcast;
4670
4671        let mock_client = Arc::new(MockLlmClient::new(vec![
4672            MockLlmClient::tool_call_response(
4673                "tool-1",
4674                "bash",
4675                serde_json::json!({"command": "rm -rf /"}),
4676            ),
4677            MockLlmClient::text_response("Understood, I won't do that."),
4678        ]));
4679
4680        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4681
4682        // Create HITL confirmation manager
4683        let (event_tx, _event_rx) = broadcast::channel(100);
4684        let hitl_policy = ConfirmationPolicy {
4685            enabled: true,
4686            ..Default::default()
4687        };
4688        let confirmation_manager = Arc::new(ConfirmationManager::new(hitl_policy, event_tx));
4689
4690        // Permission policy returns Ask
4691        let permission_policy = PermissionPolicy::new();
4692
4693        let config = AgentConfig {
4694            permission_checker: Some(Arc::new(permission_policy)),
4695            confirmation_manager: Some(confirmation_manager.clone()),
4696            ..Default::default()
4697        };
4698
4699        // Spawn a task to reject the confirmation
4700        let cm_clone = confirmation_manager.clone();
4701        tokio::spawn(async move {
4702            tokio::time::sleep(std::time::Duration::from_millis(50)).await;
4703            cm_clone
4704                .confirm("tool-1", false, Some("Too dangerous".to_string()))
4705                .await
4706                .ok();
4707        });
4708
4709        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
4710        let result = agent.execute(&[], "Delete everything", None).await.unwrap();
4711
4712        // LLM should respond to the rejection
4713        assert_eq!(result.text, "Understood, I won't do that.");
4714    }
4715
4716    #[tokio::test]
4717    async fn test_agent_hitl_timeout_reject() {
4718        use crate::hitl::{ConfirmationManager, ConfirmationPolicy, TimeoutAction};
4719        use tokio::sync::broadcast;
4720
4721        let mock_client = Arc::new(MockLlmClient::new(vec![
4722            MockLlmClient::tool_call_response(
4723                "tool-1",
4724                "bash",
4725                serde_json::json!({"command": "echo test"}),
4726            ),
4727            MockLlmClient::text_response("Timed out, I understand."),
4728        ]));
4729
4730        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4731
4732        // Create HITL with very short timeout and Reject action
4733        let (event_tx, _event_rx) = broadcast::channel(100);
4734        let hitl_policy = ConfirmationPolicy {
4735            enabled: true,
4736            default_timeout_ms: 50, // Very short timeout
4737            timeout_action: TimeoutAction::Reject,
4738            ..Default::default()
4739        };
4740        let confirmation_manager = Arc::new(ConfirmationManager::new(hitl_policy, event_tx));
4741
4742        let permission_policy = PermissionPolicy::new();
4743
4744        let config = AgentConfig {
4745            permission_checker: Some(Arc::new(permission_policy)),
4746            confirmation_manager: Some(confirmation_manager),
4747            ..Default::default()
4748        };
4749
4750        // Don't approve - let it timeout
4751        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
4752        let result = agent.execute(&[], "Echo", None).await.unwrap();
4753
4754        // Should get timeout rejection response from LLM
4755        assert_eq!(result.text, "Timed out, I understand.");
4756    }
4757
4758    #[tokio::test]
4759    async fn test_agent_hitl_timeout_auto_approve() {
4760        use crate::hitl::{ConfirmationManager, ConfirmationPolicy, TimeoutAction};
4761        use tokio::sync::broadcast;
4762
4763        let mock_client = Arc::new(MockLlmClient::new(vec![
4764            MockLlmClient::tool_call_response(
4765                "tool-1",
4766                "bash",
4767                serde_json::json!({"command": "echo hello"}),
4768            ),
4769            MockLlmClient::text_response("Auto-approved and executed!"),
4770        ]));
4771
4772        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4773
4774        // Create HITL with very short timeout and AutoApprove action
4775        let (event_tx, _event_rx) = broadcast::channel(100);
4776        let hitl_policy = ConfirmationPolicy {
4777            enabled: true,
4778            default_timeout_ms: 50, // Very short timeout
4779            timeout_action: TimeoutAction::AutoApprove,
4780            ..Default::default()
4781        };
4782        let confirmation_manager = Arc::new(ConfirmationManager::new(hitl_policy, event_tx));
4783
4784        let permission_policy = PermissionPolicy::new();
4785
4786        let config = AgentConfig {
4787            permission_checker: Some(Arc::new(permission_policy)),
4788            confirmation_manager: Some(confirmation_manager),
4789            ..Default::default()
4790        };
4791
4792        // Don't approve - let it timeout and auto-approve
4793        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
4794        let result = agent.execute(&[], "Echo", None).await.unwrap();
4795
4796        // Should auto-approve on timeout and execute
4797        assert_eq!(result.text, "Auto-approved and executed!");
4798        assert_eq!(result.tool_calls_count, 1);
4799    }
4800
4801    #[tokio::test]
4802    async fn test_agent_hitl_confirmation_events() {
4803        use crate::hitl::{ConfirmationManager, ConfirmationPolicy};
4804        use tokio::sync::broadcast;
4805
4806        let mock_client = Arc::new(MockLlmClient::new(vec![
4807            MockLlmClient::tool_call_response(
4808                "tool-1",
4809                "bash",
4810                serde_json::json!({"command": "echo test"}),
4811            ),
4812            MockLlmClient::text_response("Done!"),
4813        ]));
4814
4815        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4816
4817        // Create HITL confirmation manager
4818        let (event_tx, mut event_rx) = broadcast::channel(100);
4819        let hitl_policy = ConfirmationPolicy {
4820            enabled: true,
4821            default_timeout_ms: 5000, // Long enough timeout
4822            ..Default::default()
4823        };
4824        let confirmation_manager = Arc::new(ConfirmationManager::new(hitl_policy, event_tx));
4825
4826        let permission_policy = PermissionPolicy::new();
4827
4828        let config = AgentConfig {
4829            permission_checker: Some(Arc::new(permission_policy)),
4830            confirmation_manager: Some(confirmation_manager.clone()),
4831            ..Default::default()
4832        };
4833
4834        // Spawn task to approve and collect events
4835        let cm_clone = confirmation_manager.clone();
4836        let event_handle = tokio::spawn(async move {
4837            let mut events = Vec::new();
4838            // Wait for ConfirmationRequired event
4839            while let Ok(event) = event_rx.recv().await {
4840                events.push(event.clone());
4841                if let AgentEvent::ConfirmationRequired { tool_id, .. } = event {
4842                    // Approve it
4843                    cm_clone.confirm(&tool_id, true, None).await.ok();
4844                    // Wait for ConfirmationReceived
4845                    if let Ok(recv_event) = event_rx.recv().await {
4846                        events.push(recv_event);
4847                    }
4848                    break;
4849                }
4850            }
4851            events
4852        });
4853
4854        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
4855        let _result = agent.execute(&[], "Echo", None).await.unwrap();
4856
4857        // Check events
4858        let events = event_handle.await.unwrap();
4859        assert!(
4860            events
4861                .iter()
4862                .any(|e| matches!(e, AgentEvent::ConfirmationRequired { .. })),
4863            "Should have ConfirmationRequired event"
4864        );
4865        assert!(
4866            events
4867                .iter()
4868                .any(|e| matches!(e, AgentEvent::ConfirmationReceived { approved: true, .. })),
4869            "Should have ConfirmationReceived event with approved=true"
4870        );
4871    }
4872
4873    #[tokio::test]
4874    async fn test_agent_hitl_disabled_auto_executes() {
4875        // When HITL is disabled, tools should execute automatically even with Ask permission
4876        use crate::hitl::{ConfirmationManager, ConfirmationPolicy};
4877        use tokio::sync::broadcast;
4878
4879        let mock_client = Arc::new(MockLlmClient::new(vec![
4880            MockLlmClient::tool_call_response(
4881                "tool-1",
4882                "bash",
4883                serde_json::json!({"command": "echo auto"}),
4884            ),
4885            MockLlmClient::text_response("Auto executed!"),
4886        ]));
4887
4888        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4889
4890        // Create HITL with enabled=false
4891        let (event_tx, _event_rx) = broadcast::channel(100);
4892        let hitl_policy = ConfirmationPolicy {
4893            enabled: false, // HITL disabled
4894            ..Default::default()
4895        };
4896        let confirmation_manager = Arc::new(ConfirmationManager::new(hitl_policy, event_tx));
4897
4898        let permission_policy = PermissionPolicy::new(); // Default is Ask
4899
4900        let config = AgentConfig {
4901            permission_checker: Some(Arc::new(permission_policy)),
4902            confirmation_manager: Some(confirmation_manager),
4903            ..Default::default()
4904        };
4905
4906        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
4907        let result = agent.execute(&[], "Echo", None).await.unwrap();
4908
4909        // Should execute without waiting for confirmation
4910        assert_eq!(result.text, "Auto executed!");
4911        assert_eq!(result.tool_calls_count, 1);
4912    }
4913
4914    #[tokio::test]
4915    async fn test_agent_hitl_with_permission_deny_skips_hitl() {
4916        // When permission is Deny, HITL should not be triggered
4917        use crate::hitl::{ConfirmationManager, ConfirmationPolicy};
4918        use tokio::sync::broadcast;
4919
4920        let mock_client = Arc::new(MockLlmClient::new(vec![
4921            MockLlmClient::tool_call_response(
4922                "tool-1",
4923                "bash",
4924                serde_json::json!({"command": "rm -rf /"}),
4925            ),
4926            MockLlmClient::text_response("Blocked by permission."),
4927        ]));
4928
4929        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4930
4931        // Create HITL enabled
4932        let (event_tx, mut event_rx) = broadcast::channel(100);
4933        let hitl_policy = ConfirmationPolicy {
4934            enabled: true,
4935            ..Default::default()
4936        };
4937        let confirmation_manager = Arc::new(ConfirmationManager::new(hitl_policy, event_tx));
4938
4939        // Permission policy denies rm commands
4940        let permission_policy = PermissionPolicy::new().deny("bash(rm:*)");
4941
4942        let config = AgentConfig {
4943            permission_checker: Some(Arc::new(permission_policy)),
4944            confirmation_manager: Some(confirmation_manager),
4945            ..Default::default()
4946        };
4947
4948        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
4949        let result = agent.execute(&[], "Delete", None).await.unwrap();
4950
4951        // Should be denied without HITL
4952        assert_eq!(result.text, "Blocked by permission.");
4953
4954        // Should NOT have any ConfirmationRequired events
4955        let mut found_confirmation = false;
4956        while let Ok(event) = event_rx.try_recv() {
4957            if matches!(event, AgentEvent::ConfirmationRequired { .. }) {
4958                found_confirmation = true;
4959            }
4960        }
4961        assert!(
4962            !found_confirmation,
4963            "HITL should not be triggered when permission is Deny"
4964        );
4965    }
4966
4967    #[tokio::test]
4968    async fn test_agent_hitl_with_permission_allow_skips_hitl() {
4969        // When permission is Allow, HITL confirmation is skipped entirely.
4970        // PermissionPolicy is the declarative rule engine; Allow = execute directly.
4971        use crate::hitl::{ConfirmationManager, ConfirmationPolicy};
4972        use tokio::sync::broadcast;
4973
4974        let mock_client = Arc::new(MockLlmClient::new(vec![
4975            MockLlmClient::tool_call_response(
4976                "tool-1",
4977                "bash",
4978                serde_json::json!({"command": "echo hello"}),
4979            ),
4980            MockLlmClient::text_response("Allowed!"),
4981        ]));
4982
4983        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4984
4985        // Create HITL enabled
4986        let (event_tx, mut event_rx) = broadcast::channel(100);
4987        let hitl_policy = ConfirmationPolicy {
4988            enabled: true,
4989            ..Default::default()
4990        };
4991        let confirmation_manager = Arc::new(ConfirmationManager::new(hitl_policy, event_tx));
4992
4993        // Permission policy allows echo commands
4994        let permission_policy = PermissionPolicy::new().allow("bash(echo:*)");
4995
4996        let config = AgentConfig {
4997            permission_checker: Some(Arc::new(permission_policy)),
4998            confirmation_manager: Some(confirmation_manager.clone()),
4999            ..Default::default()
5000        };
5001
5002        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
5003        let result = agent.execute(&[], "Echo", None).await.unwrap();
5004
5005        // Should execute directly without HITL (permission Allow skips confirmation)
5006        assert_eq!(result.text, "Allowed!");
5007
5008        // Should NOT have ConfirmationRequired event (Allow bypasses HITL)
5009        let mut found_confirmation = false;
5010        while let Ok(event) = event_rx.try_recv() {
5011            if matches!(event, AgentEvent::ConfirmationRequired { .. }) {
5012                found_confirmation = true;
5013            }
5014        }
5015        assert!(
5016            !found_confirmation,
5017            "Permission Allow should skip HITL confirmation"
5018        );
5019    }
5020
5021    #[tokio::test]
5022    async fn test_agent_hitl_multiple_tool_calls() {
5023        // Test multiple tool calls in sequence with HITL
5024        use crate::hitl::{ConfirmationManager, ConfirmationPolicy};
5025        use tokio::sync::broadcast;
5026
5027        let mock_client = Arc::new(MockLlmClient::new(vec![
5028            // First response: two tool calls
5029            LlmResponse {
5030                message: Message {
5031                    role: "assistant".to_string(),
5032                    content: vec![
5033                        ContentBlock::ToolUse {
5034                            id: "tool-1".to_string(),
5035                            name: "bash".to_string(),
5036                            input: serde_json::json!({"command": "echo first"}),
5037                        },
5038                        ContentBlock::ToolUse {
5039                            id: "tool-2".to_string(),
5040                            name: "bash".to_string(),
5041                            input: serde_json::json!({"command": "echo second"}),
5042                        },
5043                    ],
5044                    reasoning_content: None,
5045                },
5046                usage: TokenUsage {
5047                    prompt_tokens: 10,
5048                    completion_tokens: 5,
5049                    total_tokens: 15,
5050                    cache_read_tokens: None,
5051                    cache_write_tokens: None,
5052                },
5053                stop_reason: Some("tool_use".to_string()),
5054                meta: None,
5055            },
5056            MockLlmClient::text_response("Both executed!"),
5057        ]));
5058
5059        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5060
5061        // Create HITL
5062        let (event_tx, _event_rx) = broadcast::channel(100);
5063        let hitl_policy = ConfirmationPolicy {
5064            enabled: true,
5065            default_timeout_ms: 5000,
5066            ..Default::default()
5067        };
5068        let confirmation_manager = Arc::new(ConfirmationManager::new(hitl_policy, event_tx));
5069
5070        let permission_policy = PermissionPolicy::new(); // Default Ask
5071
5072        let config = AgentConfig {
5073            permission_checker: Some(Arc::new(permission_policy)),
5074            confirmation_manager: Some(confirmation_manager.clone()),
5075            ..Default::default()
5076        };
5077
5078        // Spawn task to approve both tools
5079        let cm_clone = confirmation_manager.clone();
5080        tokio::spawn(async move {
5081            tokio::time::sleep(std::time::Duration::from_millis(30)).await;
5082            cm_clone.confirm("tool-1", true, None).await.ok();
5083            tokio::time::sleep(std::time::Duration::from_millis(30)).await;
5084            cm_clone.confirm("tool-2", true, None).await.ok();
5085        });
5086
5087        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
5088        let result = agent.execute(&[], "Run both", None).await.unwrap();
5089
5090        assert_eq!(result.text, "Both executed!");
5091        assert_eq!(result.tool_calls_count, 2);
5092    }
5093
5094    #[tokio::test]
5095    async fn test_agent_hitl_partial_approval() {
5096        // Test: first tool approved, second rejected
5097        use crate::hitl::{ConfirmationManager, ConfirmationPolicy};
5098        use tokio::sync::broadcast;
5099
5100        let mock_client = Arc::new(MockLlmClient::new(vec![
5101            // First response: two tool calls
5102            LlmResponse {
5103                message: Message {
5104                    role: "assistant".to_string(),
5105                    content: vec![
5106                        ContentBlock::ToolUse {
5107                            id: "tool-1".to_string(),
5108                            name: "bash".to_string(),
5109                            input: serde_json::json!({"command": "echo safe"}),
5110                        },
5111                        ContentBlock::ToolUse {
5112                            id: "tool-2".to_string(),
5113                            name: "bash".to_string(),
5114                            input: serde_json::json!({"command": "rm -rf /"}),
5115                        },
5116                    ],
5117                    reasoning_content: None,
5118                },
5119                usage: TokenUsage {
5120                    prompt_tokens: 10,
5121                    completion_tokens: 5,
5122                    total_tokens: 15,
5123                    cache_read_tokens: None,
5124                    cache_write_tokens: None,
5125                },
5126                stop_reason: Some("tool_use".to_string()),
5127                meta: None,
5128            },
5129            MockLlmClient::text_response("First worked, second rejected."),
5130        ]));
5131
5132        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5133
5134        let (event_tx, _event_rx) = broadcast::channel(100);
5135        let hitl_policy = ConfirmationPolicy {
5136            enabled: true,
5137            default_timeout_ms: 5000,
5138            ..Default::default()
5139        };
5140        let confirmation_manager = Arc::new(ConfirmationManager::new(hitl_policy, event_tx));
5141
5142        let permission_policy = PermissionPolicy::new();
5143
5144        let config = AgentConfig {
5145            permission_checker: Some(Arc::new(permission_policy)),
5146            confirmation_manager: Some(confirmation_manager.clone()),
5147            ..Default::default()
5148        };
5149
5150        // Approve first, reject second
5151        let cm_clone = confirmation_manager.clone();
5152        tokio::spawn(async move {
5153            tokio::time::sleep(std::time::Duration::from_millis(30)).await;
5154            cm_clone.confirm("tool-1", true, None).await.ok();
5155            tokio::time::sleep(std::time::Duration::from_millis(30)).await;
5156            cm_clone
5157                .confirm("tool-2", false, Some("Dangerous".to_string()))
5158                .await
5159                .ok();
5160        });
5161
5162        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
5163        let result = agent.execute(&[], "Run both", None).await.unwrap();
5164
5165        assert_eq!(result.text, "First worked, second rejected.");
5166        assert_eq!(result.tool_calls_count, 2);
5167    }
5168
5169    #[tokio::test]
5170    async fn test_agent_hitl_yolo_mode_auto_approves() {
5171        // YOLO mode: specific lanes auto-approve without confirmation
5172        use crate::hitl::{ConfirmationManager, ConfirmationPolicy, SessionLane};
5173        use tokio::sync::broadcast;
5174
5175        let mock_client = Arc::new(MockLlmClient::new(vec![
5176            MockLlmClient::tool_call_response(
5177                "tool-1",
5178                "read", // Query lane tool
5179                serde_json::json!({"path": "/tmp/test.txt"}),
5180            ),
5181            MockLlmClient::text_response("File read!"),
5182        ]));
5183
5184        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5185
5186        // YOLO mode for Query lane (read, glob, ls, grep)
5187        let (event_tx, mut event_rx) = broadcast::channel(100);
5188        let mut yolo_lanes = std::collections::HashSet::new();
5189        yolo_lanes.insert(SessionLane::Query);
5190        let hitl_policy = ConfirmationPolicy {
5191            enabled: true,
5192            yolo_lanes, // Auto-approve query operations
5193            ..Default::default()
5194        };
5195        let confirmation_manager = Arc::new(ConfirmationManager::new(hitl_policy, event_tx));
5196
5197        let permission_policy = PermissionPolicy::new();
5198
5199        let config = AgentConfig {
5200            permission_checker: Some(Arc::new(permission_policy)),
5201            confirmation_manager: Some(confirmation_manager),
5202            ..Default::default()
5203        };
5204
5205        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
5206        let result = agent.execute(&[], "Read file", None).await.unwrap();
5207
5208        // Should auto-execute without confirmation (YOLO mode)
5209        assert_eq!(result.text, "File read!");
5210
5211        // Should NOT have ConfirmationRequired for yolo lane
5212        let mut found_confirmation = false;
5213        while let Ok(event) = event_rx.try_recv() {
5214            if matches!(event, AgentEvent::ConfirmationRequired { .. }) {
5215                found_confirmation = true;
5216            }
5217        }
5218        assert!(
5219            !found_confirmation,
5220            "YOLO mode should not trigger confirmation"
5221        );
5222    }
5223
5224    #[tokio::test]
5225    async fn test_agent_config_with_all_options() {
5226        use crate::hitl::{ConfirmationManager, ConfirmationPolicy};
5227        use tokio::sync::broadcast;
5228
5229        let (event_tx, _) = broadcast::channel(100);
5230        let hitl_policy = ConfirmationPolicy::default();
5231        let confirmation_manager = Arc::new(ConfirmationManager::new(hitl_policy, event_tx));
5232
5233        let permission_policy = PermissionPolicy::new().allow("bash(*)");
5234
5235        let config = AgentConfig {
5236            prompt_slots: SystemPromptSlots {
5237                extra: Some("Test system prompt".to_string()),
5238                ..Default::default()
5239            },
5240            tools: vec![],
5241            max_tool_rounds: 10,
5242            permission_checker: Some(Arc::new(permission_policy)),
5243            confirmation_manager: Some(confirmation_manager),
5244            context_providers: vec![],
5245            planning_mode: PlanningMode::default(),
5246            goal_tracking: false,
5247            hook_engine: None,
5248            skill_registry: None,
5249            ..AgentConfig::default()
5250        };
5251
5252        assert!(config.prompt_slots.build().contains("Test system prompt"));
5253        assert_eq!(config.max_tool_rounds, 10);
5254        assert!(config.permission_checker.is_some());
5255        assert!(config.confirmation_manager.is_some());
5256        assert!(config.context_providers.is_empty());
5257
5258        // Test Debug trait
5259        let debug_str = format!("{:?}", config);
5260        assert!(debug_str.contains("AgentConfig"));
5261        assert!(debug_str.contains("permission_checker: true"));
5262        assert!(debug_str.contains("confirmation_manager: true"));
5263        assert!(debug_str.contains("context_providers: 0"));
5264    }
5265
5266    // ========================================================================
5267    // Context Provider Tests
5268    // ========================================================================
5269
5270    use crate::context::{ContextItem, ContextType};
5271
5272    /// Mock context provider for testing
5273    struct MockContextProvider {
5274        name: String,
5275        items: Vec<ContextItem>,
5276        on_turn_calls: std::sync::Arc<tokio::sync::RwLock<Vec<(String, String, String)>>>,
5277    }
5278
5279    impl MockContextProvider {
5280        fn new(name: &str) -> Self {
5281            Self {
5282                name: name.to_string(),
5283                items: Vec::new(),
5284                on_turn_calls: std::sync::Arc::new(tokio::sync::RwLock::new(Vec::new())),
5285            }
5286        }
5287
5288        fn with_items(mut self, items: Vec<ContextItem>) -> Self {
5289            self.items = items;
5290            self
5291        }
5292    }
5293
5294    #[async_trait::async_trait]
5295    impl ContextProvider for MockContextProvider {
5296        fn name(&self) -> &str {
5297            &self.name
5298        }
5299
5300        async fn query(&self, _query: &ContextQuery) -> anyhow::Result<ContextResult> {
5301            let mut result = ContextResult::new(&self.name);
5302            for item in &self.items {
5303                result.add_item(item.clone());
5304            }
5305            Ok(result)
5306        }
5307
5308        async fn on_turn_complete(
5309            &self,
5310            session_id: &str,
5311            prompt: &str,
5312            response: &str,
5313        ) -> anyhow::Result<()> {
5314            let mut calls = self.on_turn_calls.write().await;
5315            calls.push((
5316                session_id.to_string(),
5317                prompt.to_string(),
5318                response.to_string(),
5319            ));
5320            Ok(())
5321        }
5322    }
5323
5324    #[tokio::test]
5325    async fn test_agent_with_context_provider() {
5326        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
5327            "Response using context",
5328        )]));
5329
5330        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5331
5332        let provider =
5333            MockContextProvider::new("test-provider").with_items(vec![ContextItem::new(
5334                "ctx-1",
5335                ContextType::Resource,
5336                "Relevant context here",
5337            )
5338            .with_source("test://docs/example")]);
5339
5340        let config = AgentConfig {
5341            prompt_slots: SystemPromptSlots {
5342                extra: Some("You are helpful.".to_string()),
5343                ..Default::default()
5344            },
5345            context_providers: vec![Arc::new(provider)],
5346            ..Default::default()
5347        };
5348
5349        let agent = AgentLoop::new(
5350            mock_client.clone(),
5351            tool_executor,
5352            test_tool_context(),
5353            config,
5354        );
5355        let result = agent.execute(&[], "What is X?", None).await.unwrap();
5356
5357        assert_eq!(result.text, "Response using context");
5358        assert_eq!(mock_client.call_count.load(Ordering::SeqCst), 1);
5359    }
5360
5361    #[tokio::test]
5362    async fn test_agent_context_provider_events() {
5363        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
5364            "Answer",
5365        )]));
5366
5367        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5368
5369        let provider =
5370            MockContextProvider::new("event-provider").with_items(vec![ContextItem::new(
5371                "item-1",
5372                ContextType::Memory,
5373                "Memory content",
5374            )
5375            .with_token_count(50)]);
5376
5377        let config = AgentConfig {
5378            context_providers: vec![Arc::new(provider)],
5379            ..Default::default()
5380        };
5381
5382        let (tx, mut rx) = mpsc::channel(100);
5383        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
5384        let _result = agent.execute(&[], "Test prompt", Some(tx)).await.unwrap();
5385
5386        // Collect events
5387        let mut events = Vec::new();
5388        while let Ok(event) = rx.try_recv() {
5389            events.push(event);
5390        }
5391
5392        // Should have ContextResolving and ContextResolved events
5393        assert!(
5394            events
5395                .iter()
5396                .any(|e| matches!(e, AgentEvent::ContextResolving { .. })),
5397            "Should have ContextResolving event"
5398        );
5399        assert!(
5400            events
5401                .iter()
5402                .any(|e| matches!(e, AgentEvent::ContextResolved { .. })),
5403            "Should have ContextResolved event"
5404        );
5405
5406        // Check context resolved values
5407        for event in &events {
5408            if let AgentEvent::ContextResolved {
5409                total_items,
5410                total_tokens,
5411            } = event
5412            {
5413                assert_eq!(*total_items, 1);
5414                assert_eq!(*total_tokens, 50);
5415            }
5416        }
5417    }
5418
5419    #[tokio::test]
5420    async fn test_agent_multiple_context_providers() {
5421        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
5422            "Combined response",
5423        )]));
5424
5425        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5426
5427        let provider1 = MockContextProvider::new("provider-1").with_items(vec![ContextItem::new(
5428            "p1-1",
5429            ContextType::Resource,
5430            "Resource from P1",
5431        )
5432        .with_token_count(100)]);
5433
5434        let provider2 = MockContextProvider::new("provider-2").with_items(vec![
5435            ContextItem::new("p2-1", ContextType::Memory, "Memory from P2").with_token_count(50),
5436            ContextItem::new("p2-2", ContextType::Skill, "Skill from P2").with_token_count(75),
5437        ]);
5438
5439        let config = AgentConfig {
5440            prompt_slots: SystemPromptSlots {
5441                extra: Some("Base system prompt.".to_string()),
5442                ..Default::default()
5443            },
5444            context_providers: vec![Arc::new(provider1), Arc::new(provider2)],
5445            ..Default::default()
5446        };
5447
5448        let (tx, mut rx) = mpsc::channel(100);
5449        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
5450        let result = agent.execute(&[], "Query", Some(tx)).await.unwrap();
5451
5452        assert_eq!(result.text, "Combined response");
5453
5454        // Check context resolved event has combined totals
5455        while let Ok(event) = rx.try_recv() {
5456            if let AgentEvent::ContextResolved {
5457                total_items,
5458                total_tokens,
5459            } = event
5460            {
5461                assert_eq!(total_items, 3); // 1 + 2
5462                assert_eq!(total_tokens, 225); // 100 + 50 + 75
5463            }
5464        }
5465    }
5466
5467    #[tokio::test]
5468    async fn test_agent_no_context_providers() {
5469        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
5470            "No context",
5471        )]));
5472
5473        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5474
5475        // No context providers
5476        let config = AgentConfig::default();
5477
5478        let (tx, mut rx) = mpsc::channel(100);
5479        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
5480        let result = agent.execute(&[], "Simple prompt", Some(tx)).await.unwrap();
5481
5482        assert_eq!(result.text, "No context");
5483
5484        // Should NOT have context events when no providers
5485        let mut events = Vec::new();
5486        while let Ok(event) = rx.try_recv() {
5487            events.push(event);
5488        }
5489
5490        assert!(
5491            !events
5492                .iter()
5493                .any(|e| matches!(e, AgentEvent::ContextResolving { .. })),
5494            "Should NOT have ContextResolving event"
5495        );
5496    }
5497
5498    #[tokio::test]
5499    async fn test_agent_context_on_turn_complete() {
5500        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
5501            "Final response",
5502        )]));
5503
5504        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5505
5506        let provider = Arc::new(MockContextProvider::new("memory-provider"));
5507        let on_turn_calls = provider.on_turn_calls.clone();
5508
5509        let config = AgentConfig {
5510            context_providers: vec![provider],
5511            ..Default::default()
5512        };
5513
5514        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
5515
5516        // Execute with session ID
5517        let result = agent
5518            .execute_with_session(&[], "User prompt", Some("sess-123"), None, None)
5519            .await
5520            .unwrap();
5521
5522        assert_eq!(result.text, "Final response");
5523
5524        // Check on_turn_complete was called
5525        let calls = on_turn_calls.read().await;
5526        assert_eq!(calls.len(), 1);
5527        assert_eq!(calls[0].0, "sess-123");
5528        assert_eq!(calls[0].1, "User prompt");
5529        assert_eq!(calls[0].2, "Final response");
5530    }
5531
5532    #[tokio::test]
5533    async fn test_agent_context_on_turn_complete_no_session() {
5534        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
5535            "Response",
5536        )]));
5537
5538        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5539
5540        let provider = Arc::new(MockContextProvider::new("memory-provider"));
5541        let on_turn_calls = provider.on_turn_calls.clone();
5542
5543        let config = AgentConfig {
5544            context_providers: vec![provider],
5545            ..Default::default()
5546        };
5547
5548        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
5549
5550        // Execute without session ID (uses execute() which passes None)
5551        let _result = agent.execute(&[], "Prompt", None).await.unwrap();
5552
5553        // on_turn_complete should NOT be called when session_id is None
5554        let calls = on_turn_calls.read().await;
5555        assert!(calls.is_empty());
5556    }
5557
5558    #[tokio::test]
5559    async fn test_agent_build_augmented_system_prompt() {
5560        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response("OK")]));
5561
5562        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5563
5564        let provider = MockContextProvider::new("test").with_items(vec![ContextItem::new(
5565            "doc-1",
5566            ContextType::Resource,
5567            "Auth uses JWT tokens.",
5568        )
5569        .with_source("viking://docs/auth")]);
5570
5571        let config = AgentConfig {
5572            prompt_slots: SystemPromptSlots {
5573                extra: Some("You are helpful.".to_string()),
5574                ..Default::default()
5575            },
5576            context_providers: vec![Arc::new(provider)],
5577            ..Default::default()
5578        };
5579
5580        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
5581
5582        // Test building augmented prompt
5583        let context_results = agent.resolve_context("test", None).await;
5584        let augmented = agent.build_augmented_system_prompt(&context_results);
5585
5586        let augmented_str = augmented.unwrap();
5587        assert!(augmented_str.contains("You are helpful."));
5588        assert!(augmented_str.contains("<context source=\"viking://docs/auth\" type=\"Resource\">"));
5589        assert!(augmented_str.contains("Auth uses JWT tokens."));
5590    }
5591
5592    // ========================================================================
5593    // Agentic Loop Integration Tests
5594    // ========================================================================
5595
5596    /// Helper: collect all events from a channel
5597    async fn collect_events(mut rx: mpsc::Receiver<AgentEvent>) -> Vec<AgentEvent> {
5598        let mut events = Vec::new();
5599        while let Ok(event) = rx.try_recv() {
5600            events.push(event);
5601        }
5602        // Drain remaining
5603        while let Some(event) = rx.recv().await {
5604            events.push(event);
5605        }
5606        events
5607    }
5608
5609    #[tokio::test]
5610    async fn test_agent_multi_turn_tool_chain() {
5611        // LLM calls tool A → sees result → calls tool B → sees result → final answer
5612        let mock_client = Arc::new(MockLlmClient::new(vec![
5613            // Turn 1: call ls
5614            MockLlmClient::tool_call_response(
5615                "t1",
5616                "bash",
5617                serde_json::json!({"command": "echo step1"}),
5618            ),
5619            // Turn 2: call another tool based on first result
5620            MockLlmClient::tool_call_response(
5621                "t2",
5622                "bash",
5623                serde_json::json!({"command": "echo step2"}),
5624            ),
5625            // Turn 3: final answer
5626            MockLlmClient::text_response("Completed both steps: step1 then step2"),
5627        ]));
5628
5629        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5630        let config = AgentConfig::default();
5631
5632        let agent = AgentLoop::new(
5633            mock_client.clone(),
5634            tool_executor,
5635            test_tool_context(),
5636            config,
5637        );
5638        let result = agent.execute(&[], "Run two steps", None).await.unwrap();
5639
5640        assert_eq!(result.text, "Completed both steps: step1 then step2");
5641        assert_eq!(result.tool_calls_count, 2);
5642        assert_eq!(mock_client.call_count.load(Ordering::SeqCst), 3);
5643
5644        // Verify message history: user → assistant(tool_use) → user(tool_result) → assistant(tool_use) → user(tool_result) → assistant(text)
5645        assert_eq!(result.messages[0].role, "user");
5646        assert_eq!(result.messages[1].role, "assistant"); // tool call 1
5647        assert_eq!(result.messages[2].role, "user"); // tool result 1 (Anthropic convention)
5648        assert_eq!(result.messages[3].role, "assistant"); // tool call 2
5649        assert_eq!(result.messages[4].role, "user"); // tool result 2
5650        assert_eq!(result.messages[5].role, "assistant"); // final text
5651        assert_eq!(result.messages.len(), 6);
5652    }
5653
5654    #[tokio::test]
5655    async fn test_agent_conversation_history_preserved() {
5656        // Pass existing history, verify it's preserved in output
5657        let existing_history = vec![
5658            Message::user("What is Rust?"),
5659            Message {
5660                role: "assistant".to_string(),
5661                content: vec![ContentBlock::Text {
5662                    text: "Rust is a systems programming language.".to_string(),
5663                }],
5664                reasoning_content: None,
5665            },
5666        ];
5667
5668        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
5669            "Rust was created by Graydon Hoare at Mozilla.",
5670        )]));
5671
5672        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5673        let agent = AgentLoop::new(
5674            mock_client.clone(),
5675            tool_executor,
5676            test_tool_context(),
5677            AgentConfig::default(),
5678        );
5679
5680        let result = agent
5681            .execute(&existing_history, "Who created it?", None)
5682            .await
5683            .unwrap();
5684
5685        // History should contain: old user + old assistant + new user + new assistant
5686        assert_eq!(result.messages.len(), 4);
5687        assert_eq!(result.messages[0].text(), "What is Rust?");
5688        assert_eq!(
5689            result.messages[1].text(),
5690            "Rust is a systems programming language."
5691        );
5692        assert_eq!(result.messages[2].text(), "Who created it?");
5693        assert_eq!(
5694            result.messages[3].text(),
5695            "Rust was created by Graydon Hoare at Mozilla."
5696        );
5697    }
5698
5699    #[tokio::test]
5700    async fn test_agent_event_stream_completeness() {
5701        // Verify full event sequence for a single tool call loop
5702        let mock_client = Arc::new(MockLlmClient::new(vec![
5703            MockLlmClient::tool_call_response(
5704                "t1",
5705                "bash",
5706                serde_json::json!({"command": "echo hi"}),
5707            ),
5708            MockLlmClient::text_response("Done"),
5709        ]));
5710
5711        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5712        let agent = AgentLoop::new(
5713            mock_client,
5714            tool_executor,
5715            test_tool_context(),
5716            AgentConfig::default(),
5717        );
5718
5719        let (tx, rx) = mpsc::channel(100);
5720        let result = agent.execute(&[], "Say hi", Some(tx)).await.unwrap();
5721        assert_eq!(result.text, "Done");
5722
5723        let events = collect_events(rx).await;
5724
5725        // Verify event sequence
5726        let event_types: Vec<&str> = events
5727            .iter()
5728            .map(|e| match e {
5729                AgentEvent::Start { .. } => "Start",
5730                AgentEvent::TurnStart { .. } => "TurnStart",
5731                AgentEvent::TurnEnd { .. } => "TurnEnd",
5732                AgentEvent::ToolEnd { .. } => "ToolEnd",
5733                AgentEvent::End { .. } => "End",
5734                _ => "Other",
5735            })
5736            .collect();
5737
5738        // Must start with Start, end with End
5739        assert_eq!(event_types.first(), Some(&"Start"));
5740        assert_eq!(event_types.last(), Some(&"End"));
5741
5742        // Must have 2 TurnStarts (tool call turn + final answer turn)
5743        let turn_starts = event_types.iter().filter(|&&t| t == "TurnStart").count();
5744        assert_eq!(turn_starts, 2);
5745
5746        // Must have 1 ToolEnd
5747        let tool_ends = event_types.iter().filter(|&&t| t == "ToolEnd").count();
5748        assert_eq!(tool_ends, 1);
5749    }
5750
5751    #[tokio::test]
5752    async fn test_agent_multiple_tools_single_turn() {
5753        // LLM returns 2 tool calls in one response
5754        let mock_client = Arc::new(MockLlmClient::new(vec![
5755            LlmResponse {
5756                message: Message {
5757                    role: "assistant".to_string(),
5758                    content: vec![
5759                        ContentBlock::ToolUse {
5760                            id: "t1".to_string(),
5761                            name: "bash".to_string(),
5762                            input: serde_json::json!({"command": "echo first"}),
5763                        },
5764                        ContentBlock::ToolUse {
5765                            id: "t2".to_string(),
5766                            name: "bash".to_string(),
5767                            input: serde_json::json!({"command": "echo second"}),
5768                        },
5769                    ],
5770                    reasoning_content: None,
5771                },
5772                usage: TokenUsage {
5773                    prompt_tokens: 10,
5774                    completion_tokens: 5,
5775                    total_tokens: 15,
5776                    cache_read_tokens: None,
5777                    cache_write_tokens: None,
5778                },
5779                stop_reason: Some("tool_use".to_string()),
5780                meta: None,
5781            },
5782            MockLlmClient::text_response("Both commands ran"),
5783        ]));
5784
5785        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5786        let agent = AgentLoop::new(
5787            mock_client.clone(),
5788            tool_executor,
5789            test_tool_context(),
5790            AgentConfig::default(),
5791        );
5792
5793        let result = agent.execute(&[], "Run both", None).await.unwrap();
5794
5795        assert_eq!(result.text, "Both commands ran");
5796        assert_eq!(result.tool_calls_count, 2);
5797        assert_eq!(mock_client.call_count.load(Ordering::SeqCst), 2); // Only 2 LLM calls
5798
5799        // Messages: user → assistant(2 tools) → user(tool_result) → user(tool_result) → assistant(text)
5800        assert_eq!(result.messages[0].role, "user");
5801        assert_eq!(result.messages[1].role, "assistant");
5802        assert_eq!(result.messages[2].role, "user"); // tool result 1
5803        assert_eq!(result.messages[3].role, "user"); // tool result 2
5804        assert_eq!(result.messages[4].role, "assistant");
5805    }
5806
5807    #[tokio::test]
5808    async fn test_agent_token_usage_accumulation() {
5809        // Verify usage sums across multiple turns
5810        let mock_client = Arc::new(MockLlmClient::new(vec![
5811            MockLlmClient::tool_call_response(
5812                "t1",
5813                "bash",
5814                serde_json::json!({"command": "echo x"}),
5815            ),
5816            MockLlmClient::text_response("Done"),
5817        ]));
5818
5819        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5820        let agent = AgentLoop::new(
5821            mock_client,
5822            tool_executor,
5823            test_tool_context(),
5824            AgentConfig::default(),
5825        );
5826
5827        let result = agent.execute(&[], "test", None).await.unwrap();
5828
5829        // Each mock response has prompt=10, completion=5, total=15
5830        // 2 LLM calls → 20 prompt, 10 completion, 30 total
5831        assert_eq!(result.usage.prompt_tokens, 20);
5832        assert_eq!(result.usage.completion_tokens, 10);
5833        assert_eq!(result.usage.total_tokens, 30);
5834    }
5835
5836    #[tokio::test]
5837    async fn test_agent_system_prompt_passed() {
5838        // Verify system prompt is used (MockLlmClient captures calls)
5839        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
5840            "I am a coding assistant.",
5841        )]));
5842
5843        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5844        let config = AgentConfig {
5845            prompt_slots: SystemPromptSlots {
5846                extra: Some("You are a coding assistant.".to_string()),
5847                ..Default::default()
5848            },
5849            ..Default::default()
5850        };
5851
5852        let agent = AgentLoop::new(
5853            mock_client.clone(),
5854            tool_executor,
5855            test_tool_context(),
5856            config,
5857        );
5858        let result = agent.execute(&[], "What are you?", None).await.unwrap();
5859
5860        assert_eq!(result.text, "I am a coding assistant.");
5861        assert_eq!(mock_client.call_count.load(Ordering::SeqCst), 1);
5862    }
5863
5864    #[tokio::test]
5865    async fn test_agent_max_rounds_with_persistent_tool_calls() {
5866        // LLM keeps calling tools forever — should hit max_tool_rounds
5867        let mut responses = Vec::new();
5868        for i in 0..15 {
5869            responses.push(MockLlmClient::tool_call_response(
5870                &format!("t{}", i),
5871                "bash",
5872                serde_json::json!({"command": format!("echo round{}", i)}),
5873            ));
5874        }
5875
5876        let mock_client = Arc::new(MockLlmClient::new(responses));
5877        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5878        let config = AgentConfig {
5879            max_tool_rounds: 5,
5880            ..Default::default()
5881        };
5882
5883        let agent = AgentLoop::new(
5884            mock_client.clone(),
5885            tool_executor,
5886            test_tool_context(),
5887            config,
5888        );
5889        let result = agent.execute(&[], "Loop forever", None).await;
5890
5891        assert!(result.is_err());
5892        let err = result.unwrap_err().to_string();
5893        assert!(err.contains("Max tool rounds (5) exceeded"));
5894    }
5895
5896    #[tokio::test]
5897    async fn test_agent_end_event_contains_final_text() {
5898        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
5899            "Final answer here",
5900        )]));
5901
5902        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5903        let agent = AgentLoop::new(
5904            mock_client,
5905            tool_executor,
5906            test_tool_context(),
5907            AgentConfig::default(),
5908        );
5909
5910        let (tx, rx) = mpsc::channel(100);
5911        agent.execute(&[], "test", Some(tx)).await.unwrap();
5912
5913        let events = collect_events(rx).await;
5914        let end_event = events.iter().find(|e| matches!(e, AgentEvent::End { .. }));
5915        assert!(end_event.is_some());
5916
5917        if let AgentEvent::End { text, usage, .. } = end_event.unwrap() {
5918            assert_eq!(text, "Final answer here");
5919            assert_eq!(usage.total_tokens, 15);
5920        }
5921    }
5922}
5923
5924#[cfg(test)]
5925mod extra_agent_tests {
5926    use super::*;
5927    use crate::agent::tests::MockLlmClient;
5928    use crate::queue::SessionQueueConfig;
5929    use crate::tools::ToolExecutor;
5930    use std::path::PathBuf;
5931    use std::sync::atomic::{AtomicUsize, Ordering};
5932
5933    fn test_tool_context() -> ToolContext {
5934        ToolContext::new(PathBuf::from("/tmp"))
5935    }
5936
5937    // ========================================================================
5938    // AgentConfig
5939    // ========================================================================
5940
5941    #[test]
5942    fn test_agent_config_debug() {
5943        let config = AgentConfig {
5944            prompt_slots: SystemPromptSlots {
5945                extra: Some("You are helpful".to_string()),
5946                ..Default::default()
5947            },
5948            tools: vec![],
5949            max_tool_rounds: 10,
5950            permission_checker: None,
5951            confirmation_manager: None,
5952            context_providers: vec![],
5953            planning_mode: PlanningMode::Enabled,
5954            goal_tracking: false,
5955            hook_engine: None,
5956            skill_registry: None,
5957            ..AgentConfig::default()
5958        };
5959        let debug = format!("{:?}", config);
5960        assert!(debug.contains("AgentConfig"));
5961        assert!(debug.contains("planning_mode"));
5962    }
5963
5964    #[test]
5965    fn test_agent_config_default_values() {
5966        let config = AgentConfig::default();
5967        assert_eq!(config.max_tool_rounds, MAX_TOOL_ROUNDS);
5968        assert_eq!(config.planning_mode, PlanningMode::Auto);
5969        assert!(!config.goal_tracking);
5970        assert!(config.context_providers.is_empty());
5971    }
5972
5973    // ========================================================================
5974    // AgentEvent serialization
5975    // ========================================================================
5976
5977    #[test]
5978    fn test_agent_event_serialize_start() {
5979        let event = AgentEvent::Start {
5980            prompt: "Hello".to_string(),
5981        };
5982        let json = serde_json::to_string(&event).unwrap();
5983        assert!(json.contains("agent_start"));
5984        assert!(json.contains("Hello"));
5985    }
5986
5987    #[test]
5988    fn test_agent_event_serialize_text_delta() {
5989        let event = AgentEvent::TextDelta {
5990            text: "chunk".to_string(),
5991        };
5992        let json = serde_json::to_string(&event).unwrap();
5993        assert!(json.contains("text_delta"));
5994    }
5995
5996    #[test]
5997    fn test_agent_event_serialize_tool_start() {
5998        let event = AgentEvent::ToolStart {
5999            id: "t1".to_string(),
6000            name: "bash".to_string(),
6001        };
6002        let json = serde_json::to_string(&event).unwrap();
6003        assert!(json.contains("tool_start"));
6004        assert!(json.contains("bash"));
6005    }
6006
6007    #[test]
6008    fn test_agent_event_serialize_tool_end() {
6009        let event = AgentEvent::ToolEnd {
6010            id: "t1".to_string(),
6011            name: "bash".to_string(),
6012            output: "hello".to_string(),
6013            exit_code: 0,
6014            metadata: None,
6015        };
6016        let json = serde_json::to_string(&event).unwrap();
6017        assert!(json.contains("tool_end"));
6018    }
6019
6020    #[test]
6021    fn test_agent_event_tool_end_has_metadata_field() {
6022        let event = AgentEvent::ToolEnd {
6023            id: "t1".to_string(),
6024            name: "write".to_string(),
6025            output: "Wrote 5 bytes".to_string(),
6026            exit_code: 0,
6027            metadata: Some(
6028                serde_json::json!({ "before": "old", "after": "new", "file_path": "f.txt" }),
6029            ),
6030        };
6031        let json = serde_json::to_string(&event).unwrap();
6032        assert!(json.contains("\"before\""));
6033    }
6034
6035    #[test]
6036    fn test_agent_event_serialize_error() {
6037        let event = AgentEvent::Error {
6038            message: "oops".to_string(),
6039        };
6040        let json = serde_json::to_string(&event).unwrap();
6041        assert!(json.contains("error"));
6042        assert!(json.contains("oops"));
6043    }
6044
6045    #[test]
6046    fn test_agent_event_serialize_confirmation_required() {
6047        let event = AgentEvent::ConfirmationRequired {
6048            tool_id: "t1".to_string(),
6049            tool_name: "bash".to_string(),
6050            args: serde_json::json!({"cmd": "rm"}),
6051            timeout_ms: 30000,
6052        };
6053        let json = serde_json::to_string(&event).unwrap();
6054        assert!(json.contains("confirmation_required"));
6055    }
6056
6057    #[test]
6058    fn test_agent_event_serialize_confirmation_received() {
6059        let event = AgentEvent::ConfirmationReceived {
6060            tool_id: "t1".to_string(),
6061            approved: true,
6062            reason: Some("safe".to_string()),
6063        };
6064        let json = serde_json::to_string(&event).unwrap();
6065        assert!(json.contains("confirmation_received"));
6066    }
6067
6068    #[test]
6069    fn test_agent_event_serialize_confirmation_timeout() {
6070        let event = AgentEvent::ConfirmationTimeout {
6071            tool_id: "t1".to_string(),
6072            action_taken: "rejected".to_string(),
6073        };
6074        let json = serde_json::to_string(&event).unwrap();
6075        assert!(json.contains("confirmation_timeout"));
6076    }
6077
6078    #[test]
6079    fn test_agent_event_serialize_external_task_pending() {
6080        let event = AgentEvent::ExternalTaskPending {
6081            task_id: "task-1".to_string(),
6082            session_id: "sess-1".to_string(),
6083            lane: crate::hitl::SessionLane::Execute,
6084            command_type: "bash".to_string(),
6085            payload: serde_json::json!({}),
6086            timeout_ms: 60000,
6087        };
6088        let json = serde_json::to_string(&event).unwrap();
6089        assert!(json.contains("external_task_pending"));
6090    }
6091
6092    #[test]
6093    fn test_agent_event_serialize_external_task_completed() {
6094        let event = AgentEvent::ExternalTaskCompleted {
6095            task_id: "task-1".to_string(),
6096            session_id: "sess-1".to_string(),
6097            success: false,
6098        };
6099        let json = serde_json::to_string(&event).unwrap();
6100        assert!(json.contains("external_task_completed"));
6101    }
6102
6103    #[test]
6104    fn test_agent_event_serialize_permission_denied() {
6105        let event = AgentEvent::PermissionDenied {
6106            tool_id: "t1".to_string(),
6107            tool_name: "bash".to_string(),
6108            args: serde_json::json!({}),
6109            reason: "denied".to_string(),
6110        };
6111        let json = serde_json::to_string(&event).unwrap();
6112        assert!(json.contains("permission_denied"));
6113    }
6114
6115    #[test]
6116    fn test_agent_event_serialize_context_compacted() {
6117        let event = AgentEvent::ContextCompacted {
6118            session_id: "sess-1".to_string(),
6119            before_messages: 100,
6120            after_messages: 20,
6121            percent_before: 0.85,
6122        };
6123        let json = serde_json::to_string(&event).unwrap();
6124        assert!(json.contains("context_compacted"));
6125    }
6126
6127    #[test]
6128    fn test_agent_event_serialize_turn_start() {
6129        let event = AgentEvent::TurnStart { turn: 3 };
6130        let json = serde_json::to_string(&event).unwrap();
6131        assert!(json.contains("turn_start"));
6132    }
6133
6134    #[test]
6135    fn test_agent_event_serialize_turn_end() {
6136        let event = AgentEvent::TurnEnd {
6137            turn: 3,
6138            usage: TokenUsage::default(),
6139        };
6140        let json = serde_json::to_string(&event).unwrap();
6141        assert!(json.contains("turn_end"));
6142    }
6143
6144    #[test]
6145    fn test_agent_event_serialize_end() {
6146        let event = AgentEvent::End {
6147            text: "Done".to_string(),
6148            usage: TokenUsage {
6149                prompt_tokens: 100,
6150                completion_tokens: 50,
6151                total_tokens: 150,
6152                cache_read_tokens: None,
6153                cache_write_tokens: None,
6154            },
6155            meta: None,
6156        };
6157        let json = serde_json::to_string(&event).unwrap();
6158        assert!(json.contains("agent_end"));
6159    }
6160
6161    // ========================================================================
6162    // AgentResult
6163    // ========================================================================
6164
6165    #[test]
6166    fn test_agent_result_fields() {
6167        let result = AgentResult {
6168            text: "output".to_string(),
6169            messages: vec![Message::user("hello")],
6170            usage: TokenUsage::default(),
6171            tool_calls_count: 3,
6172        };
6173        assert_eq!(result.text, "output");
6174        assert_eq!(result.messages.len(), 1);
6175        assert_eq!(result.tool_calls_count, 3);
6176    }
6177
6178    // ========================================================================
6179    // Missing AgentEvent serialization tests
6180    // ========================================================================
6181
6182    #[test]
6183    fn test_agent_event_serialize_context_resolving() {
6184        let event = AgentEvent::ContextResolving {
6185            providers: vec!["provider1".to_string(), "provider2".to_string()],
6186        };
6187        let json = serde_json::to_string(&event).unwrap();
6188        assert!(json.contains("context_resolving"));
6189        assert!(json.contains("provider1"));
6190    }
6191
6192    #[test]
6193    fn test_agent_event_serialize_context_resolved() {
6194        let event = AgentEvent::ContextResolved {
6195            total_items: 5,
6196            total_tokens: 1000,
6197        };
6198        let json = serde_json::to_string(&event).unwrap();
6199        assert!(json.contains("context_resolved"));
6200        assert!(json.contains("1000"));
6201    }
6202
6203    #[test]
6204    fn test_agent_event_serialize_command_dead_lettered() {
6205        let event = AgentEvent::CommandDeadLettered {
6206            command_id: "cmd-1".to_string(),
6207            command_type: "bash".to_string(),
6208            lane: "execute".to_string(),
6209            error: "timeout".to_string(),
6210            attempts: 3,
6211        };
6212        let json = serde_json::to_string(&event).unwrap();
6213        assert!(json.contains("command_dead_lettered"));
6214        assert!(json.contains("cmd-1"));
6215    }
6216
6217    #[test]
6218    fn test_agent_event_serialize_command_retry() {
6219        let event = AgentEvent::CommandRetry {
6220            command_id: "cmd-2".to_string(),
6221            command_type: "read".to_string(),
6222            lane: "query".to_string(),
6223            attempt: 2,
6224            delay_ms: 1000,
6225        };
6226        let json = serde_json::to_string(&event).unwrap();
6227        assert!(json.contains("command_retry"));
6228        assert!(json.contains("cmd-2"));
6229    }
6230
6231    #[test]
6232    fn test_agent_event_serialize_queue_alert() {
6233        let event = AgentEvent::QueueAlert {
6234            level: "warning".to_string(),
6235            alert_type: "depth".to_string(),
6236            message: "Queue depth exceeded".to_string(),
6237        };
6238        let json = serde_json::to_string(&event).unwrap();
6239        assert!(json.contains("queue_alert"));
6240        assert!(json.contains("warning"));
6241    }
6242
6243    #[test]
6244    fn test_agent_event_serialize_task_updated() {
6245        let event = AgentEvent::TaskUpdated {
6246            session_id: "sess-1".to_string(),
6247            tasks: vec![],
6248        };
6249        let json = serde_json::to_string(&event).unwrap();
6250        assert!(json.contains("task_updated"));
6251        assert!(json.contains("sess-1"));
6252    }
6253
6254    #[test]
6255    fn test_agent_event_serialize_memory_stored() {
6256        let event = AgentEvent::MemoryStored {
6257            memory_id: "mem-1".to_string(),
6258            memory_type: "conversation".to_string(),
6259            importance: 0.8,
6260            tags: vec!["important".to_string()],
6261        };
6262        let json = serde_json::to_string(&event).unwrap();
6263        assert!(json.contains("memory_stored"));
6264        assert!(json.contains("mem-1"));
6265    }
6266
6267    #[test]
6268    fn test_agent_event_serialize_memory_recalled() {
6269        let event = AgentEvent::MemoryRecalled {
6270            memory_id: "mem-2".to_string(),
6271            content: "Previous conversation".to_string(),
6272            relevance: 0.9,
6273        };
6274        let json = serde_json::to_string(&event).unwrap();
6275        assert!(json.contains("memory_recalled"));
6276        assert!(json.contains("mem-2"));
6277    }
6278
6279    #[test]
6280    fn test_agent_event_serialize_memories_searched() {
6281        let event = AgentEvent::MemoriesSearched {
6282            query: Some("search term".to_string()),
6283            tags: vec!["tag1".to_string()],
6284            result_count: 5,
6285        };
6286        let json = serde_json::to_string(&event).unwrap();
6287        assert!(json.contains("memories_searched"));
6288        assert!(json.contains("search term"));
6289    }
6290
6291    #[test]
6292    fn test_agent_event_serialize_memory_cleared() {
6293        let event = AgentEvent::MemoryCleared {
6294            tier: "short_term".to_string(),
6295            count: 10,
6296        };
6297        let json = serde_json::to_string(&event).unwrap();
6298        assert!(json.contains("memory_cleared"));
6299        assert!(json.contains("short_term"));
6300    }
6301
6302    #[test]
6303    fn test_agent_event_serialize_subagent_start() {
6304        let event = AgentEvent::SubagentStart {
6305            task_id: "task-1".to_string(),
6306            session_id: "child-sess".to_string(),
6307            parent_session_id: "parent-sess".to_string(),
6308            agent: "explore".to_string(),
6309            description: "Explore codebase".to_string(),
6310        };
6311        let json = serde_json::to_string(&event).unwrap();
6312        assert!(json.contains("subagent_start"));
6313        assert!(json.contains("explore"));
6314    }
6315
6316    #[test]
6317    fn test_agent_event_serialize_subagent_progress() {
6318        let event = AgentEvent::SubagentProgress {
6319            task_id: "task-1".to_string(),
6320            session_id: "child-sess".to_string(),
6321            status: "processing".to_string(),
6322            metadata: serde_json::json!({"progress": 50}),
6323        };
6324        let json = serde_json::to_string(&event).unwrap();
6325        assert!(json.contains("subagent_progress"));
6326        assert!(json.contains("processing"));
6327    }
6328
6329    #[test]
6330    fn test_agent_event_serialize_subagent_end() {
6331        let event = AgentEvent::SubagentEnd {
6332            task_id: "task-1".to_string(),
6333            session_id: "child-sess".to_string(),
6334            agent: "explore".to_string(),
6335            output: "Found 10 files".to_string(),
6336            success: true,
6337        };
6338        let json = serde_json::to_string(&event).unwrap();
6339        assert!(json.contains("subagent_end"));
6340        assert!(json.contains("Found 10 files"));
6341    }
6342
6343    #[test]
6344    fn test_agent_event_serialize_planning_start() {
6345        let event = AgentEvent::PlanningStart {
6346            prompt: "Build a web app".to_string(),
6347        };
6348        let json = serde_json::to_string(&event).unwrap();
6349        assert!(json.contains("planning_start"));
6350        assert!(json.contains("Build a web app"));
6351    }
6352
6353    #[test]
6354    fn test_agent_event_serialize_planning_end() {
6355        use crate::planning::{Complexity, ExecutionPlan};
6356        let plan = ExecutionPlan::new("Test goal".to_string(), Complexity::Simple);
6357        let event = AgentEvent::PlanningEnd {
6358            plan,
6359            estimated_steps: 3,
6360        };
6361        let json = serde_json::to_string(&event).unwrap();
6362        assert!(json.contains("planning_end"));
6363        assert!(json.contains("estimated_steps"));
6364    }
6365
6366    #[test]
6367    fn test_agent_event_serialize_step_start() {
6368        let event = AgentEvent::StepStart {
6369            step_id: "step-1".to_string(),
6370            description: "Initialize project".to_string(),
6371            step_number: 1,
6372            total_steps: 5,
6373        };
6374        let json = serde_json::to_string(&event).unwrap();
6375        assert!(json.contains("step_start"));
6376        assert!(json.contains("Initialize project"));
6377    }
6378
6379    #[test]
6380    fn test_agent_event_serialize_step_end() {
6381        let event = AgentEvent::StepEnd {
6382            step_id: "step-1".to_string(),
6383            status: TaskStatus::Completed,
6384            step_number: 1,
6385            total_steps: 5,
6386        };
6387        let json = serde_json::to_string(&event).unwrap();
6388        assert!(json.contains("step_end"));
6389        assert!(json.contains("step-1"));
6390    }
6391
6392    #[test]
6393    fn test_agent_event_serialize_goal_extracted() {
6394        use crate::planning::AgentGoal;
6395        let goal = AgentGoal::new("Complete the task".to_string());
6396        let event = AgentEvent::GoalExtracted { goal };
6397        let json = serde_json::to_string(&event).unwrap();
6398        assert!(json.contains("goal_extracted"));
6399    }
6400
6401    #[test]
6402    fn test_agent_event_serialize_goal_progress() {
6403        let event = AgentEvent::GoalProgress {
6404            goal: "Build app".to_string(),
6405            progress: 0.5,
6406            completed_steps: 2,
6407            total_steps: 4,
6408        };
6409        let json = serde_json::to_string(&event).unwrap();
6410        assert!(json.contains("goal_progress"));
6411        assert!(json.contains("0.5"));
6412    }
6413
6414    #[test]
6415    fn test_agent_event_serialize_goal_achieved() {
6416        let event = AgentEvent::GoalAchieved {
6417            goal: "Build app".to_string(),
6418            total_steps: 4,
6419            duration_ms: 5000,
6420        };
6421        let json = serde_json::to_string(&event).unwrap();
6422        assert!(json.contains("goal_achieved"));
6423        assert!(json.contains("5000"));
6424    }
6425
6426    #[tokio::test]
6427    async fn test_extract_goal_with_json_response() {
6428        // LlmPlanner expects JSON with "description" and "success_criteria" fields
6429        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
6430            r#"{"description": "Build web app", "success_criteria": ["App runs on port 3000", "Has login page"]}"#,
6431        )]));
6432        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
6433        let agent = AgentLoop::new(
6434            mock_client,
6435            tool_executor,
6436            test_tool_context(),
6437            AgentConfig::default(),
6438        );
6439
6440        let goal = agent.extract_goal("Build a web app").await.unwrap();
6441        assert_eq!(goal.description, "Build web app");
6442        assert_eq!(goal.success_criteria.len(), 2);
6443        assert_eq!(goal.success_criteria[0], "App runs on port 3000");
6444    }
6445
6446    #[tokio::test]
6447    async fn test_extract_goal_fallback_on_non_json() {
6448        // Non-JSON response triggers fallback: returns the original prompt as goal
6449        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
6450            "Some non-JSON response",
6451        )]));
6452        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
6453        let agent = AgentLoop::new(
6454            mock_client,
6455            tool_executor,
6456            test_tool_context(),
6457            AgentConfig::default(),
6458        );
6459
6460        let goal = agent.extract_goal("Do something").await.unwrap();
6461        // Fallback uses the original prompt as description
6462        assert_eq!(goal.description, "Do something");
6463        // Fallback adds 2 generic criteria
6464        assert_eq!(goal.success_criteria.len(), 2);
6465    }
6466
6467    #[tokio::test]
6468    async fn test_check_goal_achievement_json_yes() {
6469        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
6470            r#"{"achieved": true, "progress": 1.0, "remaining_criteria": []}"#,
6471        )]));
6472        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
6473        let agent = AgentLoop::new(
6474            mock_client,
6475            tool_executor,
6476            test_tool_context(),
6477            AgentConfig::default(),
6478        );
6479
6480        let goal = crate::planning::AgentGoal::new("Test goal".to_string());
6481        let achieved = agent
6482            .check_goal_achievement(&goal, "All done")
6483            .await
6484            .unwrap();
6485        assert!(achieved);
6486    }
6487
6488    #[tokio::test]
6489    async fn test_check_goal_achievement_fallback_not_done() {
6490        // Non-JSON response triggers heuristic fallback
6491        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
6492            "invalid json",
6493        )]));
6494        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
6495        let agent = AgentLoop::new(
6496            mock_client,
6497            tool_executor,
6498            test_tool_context(),
6499            AgentConfig::default(),
6500        );
6501
6502        let goal = crate::planning::AgentGoal::new("Test goal".to_string());
6503        // "still working" doesn't contain "complete"/"done"/"finished"
6504        let achieved = agent
6505            .check_goal_achievement(&goal, "still working")
6506            .await
6507            .unwrap();
6508        assert!(!achieved);
6509    }
6510
6511    // ========================================================================
6512    // build_augmented_system_prompt Tests
6513    // ========================================================================
6514
6515    #[test]
6516    fn test_build_augmented_system_prompt_empty_context() {
6517        let mock_client = Arc::new(MockLlmClient::new(vec![]));
6518        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
6519        let config = AgentConfig {
6520            prompt_slots: SystemPromptSlots {
6521                extra: Some("Base prompt".to_string()),
6522                ..Default::default()
6523            },
6524            ..Default::default()
6525        };
6526        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
6527
6528        let result = agent.build_augmented_system_prompt(&[]);
6529        assert!(result.unwrap().contains("Base prompt"));
6530    }
6531
6532    #[test]
6533    fn test_build_augmented_system_prompt_no_custom_slots() {
6534        let mock_client = Arc::new(MockLlmClient::new(vec![]));
6535        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
6536        let agent = AgentLoop::new(
6537            mock_client,
6538            tool_executor,
6539            test_tool_context(),
6540            AgentConfig::default(),
6541        );
6542
6543        let result = agent.build_augmented_system_prompt(&[]);
6544        // Default slots still produce the default agentic prompt
6545        assert!(result.is_some());
6546        assert!(result.unwrap().contains("Core Behaviour"));
6547    }
6548
6549    #[test]
6550    fn test_build_augmented_system_prompt_with_context_no_base() {
6551        use crate::context::{ContextItem, ContextResult, ContextType};
6552
6553        let mock_client = Arc::new(MockLlmClient::new(vec![]));
6554        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
6555        let agent = AgentLoop::new(
6556            mock_client,
6557            tool_executor,
6558            test_tool_context(),
6559            AgentConfig::default(),
6560        );
6561
6562        let context = vec![ContextResult {
6563            provider: "test".to_string(),
6564            items: vec![ContextItem::new("id1", ContextType::Resource, "Content")],
6565            total_tokens: 10,
6566            truncated: false,
6567        }];
6568
6569        let result = agent.build_augmented_system_prompt(&context);
6570        assert!(result.is_some());
6571        let text = result.unwrap();
6572        assert!(text.contains("<context"));
6573        assert!(text.contains("Content"));
6574    }
6575
6576    // ========================================================================
6577    // AgentResult Clone and Debug
6578    // ========================================================================
6579
6580    #[test]
6581    fn test_agent_result_clone() {
6582        let result = AgentResult {
6583            text: "output".to_string(),
6584            messages: vec![Message::user("hello")],
6585            usage: TokenUsage::default(),
6586            tool_calls_count: 3,
6587        };
6588        let cloned = result.clone();
6589        assert_eq!(cloned.text, result.text);
6590        assert_eq!(cloned.tool_calls_count, result.tool_calls_count);
6591    }
6592
6593    #[test]
6594    fn test_agent_result_debug() {
6595        let result = AgentResult {
6596            text: "output".to_string(),
6597            messages: vec![Message::user("hello")],
6598            usage: TokenUsage::default(),
6599            tool_calls_count: 3,
6600        };
6601        let debug = format!("{:?}", result);
6602        assert!(debug.contains("AgentResult"));
6603        assert!(debug.contains("output"));
6604    }
6605
6606    // ========================================================================
6607    // handle_post_execution_metadata Tests
6608    // ========================================================================
6609
6610    // ========================================================================
6611    // ToolCommand adapter tests
6612    // ========================================================================
6613
6614    #[tokio::test]
6615    async fn test_tool_command_command_type() {
6616        let executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
6617        let cmd = ToolCommand {
6618            tool_executor: executor,
6619            tool_name: "read".to_string(),
6620            tool_args: serde_json::json!({"file": "test.rs"}),
6621            skill_registry: None,
6622            tool_context: test_tool_context(),
6623        };
6624        assert_eq!(cmd.command_type(), "read");
6625    }
6626
6627    #[tokio::test]
6628    async fn test_tool_command_payload() {
6629        let executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
6630        let args = serde_json::json!({"file": "test.rs", "offset": 10});
6631        let cmd = ToolCommand {
6632            tool_executor: executor,
6633            tool_name: "read".to_string(),
6634            tool_args: args.clone(),
6635            skill_registry: None,
6636            tool_context: test_tool_context(),
6637        };
6638        assert_eq!(cmd.payload(), args);
6639    }
6640
6641    // ========================================================================
6642    // AgentLoop with queue builder tests
6643    // ========================================================================
6644
6645    #[tokio::test(flavor = "multi_thread")]
6646    async fn test_agent_loop_with_queue() {
6647        use tokio::sync::broadcast;
6648
6649        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
6650            "Hello",
6651        )]));
6652        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
6653        let config = AgentConfig::default();
6654
6655        let (event_tx, _) = broadcast::channel(100);
6656        let queue = SessionLaneQueue::new("test-session", SessionQueueConfig::default(), event_tx)
6657            .await
6658            .unwrap();
6659
6660        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config)
6661            .with_queue(Arc::new(queue));
6662
6663        assert!(agent.command_queue.is_some());
6664    }
6665
6666    #[tokio::test]
6667    async fn test_agent_loop_without_queue() {
6668        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
6669            "Hello",
6670        )]));
6671        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
6672        let config = AgentConfig::default();
6673
6674        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
6675
6676        assert!(agent.command_queue.is_none());
6677    }
6678
6679    // ========================================================================
6680    // Parallel Plan Execution Tests
6681    // ========================================================================
6682
6683    #[tokio::test]
6684    async fn test_execute_plan_parallel_independent() {
6685        use crate::planning::{Complexity, ExecutionPlan, Task};
6686
6687        // 3 independent steps (no dependencies) — should all execute.
6688        // MockLlmClient needs one response per execute_loop call per step.
6689        let mock_client = Arc::new(MockLlmClient::new(vec![
6690            MockLlmClient::text_response("Step 1 done"),
6691            MockLlmClient::text_response("Step 2 done"),
6692            MockLlmClient::text_response("Step 3 done"),
6693        ]));
6694
6695        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
6696        let config = AgentConfig::default();
6697        let agent = AgentLoop::new(
6698            mock_client.clone(),
6699            tool_executor,
6700            test_tool_context(),
6701            config,
6702        );
6703
6704        let mut plan = ExecutionPlan::new("Test parallel", Complexity::Simple);
6705        plan.add_step(Task::new("s1", "First step"));
6706        plan.add_step(Task::new("s2", "Second step"));
6707        plan.add_step(Task::new("s3", "Third step"));
6708
6709        let (tx, mut rx) = mpsc::channel(100);
6710        let result = agent.execute_plan(&[], &plan, Some(tx)).await.unwrap();
6711
6712        // All 3 steps should have been executed (3 * 15 = 45 total tokens)
6713        assert_eq!(result.usage.total_tokens, 45);
6714
6715        // Verify we received StepStart and StepEnd events for all 3 steps
6716        let mut step_starts = Vec::new();
6717        let mut step_ends = Vec::new();
6718        rx.close();
6719        while let Some(event) = rx.recv().await {
6720            match event {
6721                AgentEvent::StepStart { step_id, .. } => step_starts.push(step_id),
6722                AgentEvent::StepEnd {
6723                    step_id, status, ..
6724                } => {
6725                    assert_eq!(status, TaskStatus::Completed);
6726                    step_ends.push(step_id);
6727                }
6728                _ => {}
6729            }
6730        }
6731        assert_eq!(step_starts.len(), 3);
6732        assert_eq!(step_ends.len(), 3);
6733    }
6734
6735    #[tokio::test]
6736    async fn test_execute_plan_respects_dependencies() {
6737        use crate::planning::{Complexity, ExecutionPlan, Task};
6738
6739        // s1 and s2 are independent (wave 1), s3 depends on both (wave 2).
6740        // This requires 3 responses total.
6741        let mock_client = Arc::new(MockLlmClient::new(vec![
6742            MockLlmClient::text_response("Step 1 done"),
6743            MockLlmClient::text_response("Step 2 done"),
6744            MockLlmClient::text_response("Step 3 done"),
6745        ]));
6746
6747        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
6748        let config = AgentConfig::default();
6749        let agent = AgentLoop::new(
6750            mock_client.clone(),
6751            tool_executor,
6752            test_tool_context(),
6753            config,
6754        );
6755
6756        let mut plan = ExecutionPlan::new("Test deps", Complexity::Medium);
6757        plan.add_step(Task::new("s1", "Independent A"));
6758        plan.add_step(Task::new("s2", "Independent B"));
6759        plan.add_step(
6760            Task::new("s3", "Depends on A+B")
6761                .with_dependencies(vec!["s1".to_string(), "s2".to_string()]),
6762        );
6763
6764        let (tx, mut rx) = mpsc::channel(100);
6765        let result = agent.execute_plan(&[], &plan, Some(tx)).await.unwrap();
6766
6767        // All 3 steps should have been executed (3 * 15 = 45 total tokens)
6768        assert_eq!(result.usage.total_tokens, 45);
6769
6770        // Verify ordering: s3's StepStart must come after s1 and s2's StepEnd
6771        let mut events = Vec::new();
6772        rx.close();
6773        while let Some(event) = rx.recv().await {
6774            match &event {
6775                AgentEvent::StepStart { step_id, .. } => {
6776                    events.push(format!("start:{}", step_id));
6777                }
6778                AgentEvent::StepEnd { step_id, .. } => {
6779                    events.push(format!("end:{}", step_id));
6780                }
6781                _ => {}
6782            }
6783        }
6784
6785        // s3 start must occur after both s1 end and s2 end
6786        let s1_end = events.iter().position(|e| e == "end:s1").unwrap();
6787        let s2_end = events.iter().position(|e| e == "end:s2").unwrap();
6788        let s3_start = events.iter().position(|e| e == "start:s3").unwrap();
6789        assert!(
6790            s3_start > s1_end,
6791            "s3 started before s1 ended: {:?}",
6792            events
6793        );
6794        assert!(
6795            s3_start > s2_end,
6796            "s3 started before s2 ended: {:?}",
6797            events
6798        );
6799
6800        // Final result should reflect step 3 (last sequential step)
6801        assert!(result.text.contains("Step 3 done") || !result.text.is_empty());
6802    }
6803
6804    #[tokio::test]
6805    async fn test_execute_plan_handles_step_failure() {
6806        use crate::planning::{Complexity, ExecutionPlan, Task};
6807
6808        // s1 succeeds, s2 depends on s1 (succeeds), s3 depends on nothing (succeeds),
6809        // s4 depends on a step that will fail (s_fail).
6810        // We simulate failure by providing no responses for s_fail's execute_loop.
6811        //
6812        // Simpler approach: s1 succeeds, s2 depends on s1 (will fail because no
6813        // mock response left), s3 is independent.
6814        // Layout: s1 (independent), s3 (independent) → wave 1 parallel
6815        //         s2 depends on s1 → wave 2
6816        //         s4 depends on s2 → wave 3 (should deadlock since s2 fails)
6817        let mock_client = Arc::new(MockLlmClient::new(vec![
6818            // Wave 1: s1 and s3 execute in parallel
6819            MockLlmClient::text_response("s1 done"),
6820            MockLlmClient::text_response("s3 done"),
6821            // Wave 2: s2 executes — but we give it no response, causing failure
6822            // Actually the MockLlmClient will fail with "No more mock responses"
6823        ]));
6824
6825        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
6826        let config = AgentConfig::default();
6827        let agent = AgentLoop::new(
6828            mock_client.clone(),
6829            tool_executor,
6830            test_tool_context(),
6831            config,
6832        );
6833
6834        let mut plan = ExecutionPlan::new("Test failure", Complexity::Medium);
6835        plan.add_step(Task::new("s1", "Independent step"));
6836        plan.add_step(Task::new("s2", "Depends on s1").with_dependencies(vec!["s1".to_string()]));
6837        plan.add_step(Task::new("s3", "Another independent"));
6838        plan.add_step(Task::new("s4", "Depends on s2").with_dependencies(vec!["s2".to_string()]));
6839
6840        let (tx, mut rx) = mpsc::channel(100);
6841        let _result = agent.execute_plan(&[], &plan, Some(tx)).await.unwrap();
6842
6843        // s1 and s3 should succeed (wave 1), s2 should fail (wave 2),
6844        // s4 should never execute (deadlock — dep s2 failed, not completed)
6845        let mut completed_steps = Vec::new();
6846        let mut failed_steps = Vec::new();
6847        rx.close();
6848        while let Some(event) = rx.recv().await {
6849            if let AgentEvent::StepEnd {
6850                step_id, status, ..
6851            } = event
6852            {
6853                match status {
6854                    TaskStatus::Completed => completed_steps.push(step_id),
6855                    TaskStatus::Failed => failed_steps.push(step_id),
6856                    _ => {}
6857                }
6858            }
6859        }
6860
6861        assert!(
6862            completed_steps.contains(&"s1".to_string()),
6863            "s1 should complete"
6864        );
6865        assert!(
6866            completed_steps.contains(&"s3".to_string()),
6867            "s3 should complete"
6868        );
6869        assert!(failed_steps.contains(&"s2".to_string()), "s2 should fail");
6870        // s4 should NOT appear in either list — it was never started
6871        assert!(
6872            !completed_steps.contains(&"s4".to_string()),
6873            "s4 should not complete"
6874        );
6875        assert!(
6876            !failed_steps.contains(&"s4".to_string()),
6877            "s4 should not fail (never started)"
6878        );
6879    }
6880
6881    // ========================================================================
6882    // Phase 4: Error Recovery & Resilience Tests
6883    // ========================================================================
6884
6885    #[test]
6886    fn test_agent_config_resilience_defaults() {
6887        let config = AgentConfig::default();
6888        assert_eq!(config.max_parse_retries, 2);
6889        assert_eq!(config.tool_timeout_ms, None);
6890        assert_eq!(config.circuit_breaker_threshold, 3);
6891    }
6892
6893    /// 4.1 — Parse error recovery: bails after max_parse_retries exceeded
6894    #[tokio::test]
6895    async fn test_parse_error_recovery_bails_after_threshold() {
6896        // 3 parse errors with max_parse_retries=2: count reaches 3 > 2 → bail
6897        let mock_client = Arc::new(MockLlmClient::new(vec![
6898            MockLlmClient::tool_call_response(
6899                "c1",
6900                "bash",
6901                serde_json::json!({"__parse_error": "unexpected token at position 5"}),
6902            ),
6903            MockLlmClient::tool_call_response(
6904                "c2",
6905                "bash",
6906                serde_json::json!({"__parse_error": "missing closing brace"}),
6907            ),
6908            MockLlmClient::tool_call_response(
6909                "c3",
6910                "bash",
6911                serde_json::json!({"__parse_error": "still broken"}),
6912            ),
6913            MockLlmClient::text_response("Done"), // never reached
6914        ]));
6915
6916        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
6917        let config = AgentConfig {
6918            max_parse_retries: 2,
6919            ..AgentConfig::default()
6920        };
6921        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
6922        let result = agent.execute(&[], "Do something", None).await;
6923        assert!(result.is_err(), "should bail after parse error threshold");
6924        let err = result.unwrap_err().to_string();
6925        assert!(
6926            err.contains("malformed tool arguments"),
6927            "error should mention malformed tool arguments, got: {}",
6928            err
6929        );
6930    }
6931
6932    /// 4.1 — Parse error recovery: counter resets after a valid tool execution
6933    #[tokio::test]
6934    async fn test_parse_error_counter_resets_on_success() {
6935        // 2 parse errors (= max_parse_retries, not yet exceeded)
6936        // Then a valid tool call (resets counter)
6937        // Then final text — should NOT bail
6938        let mock_client = Arc::new(MockLlmClient::new(vec![
6939            MockLlmClient::tool_call_response(
6940                "c1",
6941                "bash",
6942                serde_json::json!({"__parse_error": "bad args"}),
6943            ),
6944            MockLlmClient::tool_call_response(
6945                "c2",
6946                "bash",
6947                serde_json::json!({"__parse_error": "bad args again"}),
6948            ),
6949            // Valid call — resets parse_error_count to 0
6950            MockLlmClient::tool_call_response(
6951                "c3",
6952                "bash",
6953                serde_json::json!({"command": "echo ok"}),
6954            ),
6955            MockLlmClient::text_response("All done"),
6956        ]));
6957
6958        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
6959        let config = AgentConfig {
6960            max_parse_retries: 2,
6961            ..AgentConfig::default()
6962        };
6963        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
6964        let result = agent.execute(&[], "Do something", None).await;
6965        assert!(
6966            result.is_ok(),
6967            "should not bail — counter reset after successful tool, got: {:?}",
6968            result.err()
6969        );
6970        assert_eq!(result.unwrap().text, "All done");
6971    }
6972
6973    /// 4.2 — Tool timeout: slow tool produces a timeout error result; session continues
6974    #[tokio::test]
6975    async fn test_tool_timeout_produces_error_result() {
6976        let mock_client = Arc::new(MockLlmClient::new(vec![
6977            MockLlmClient::tool_call_response(
6978                "t1",
6979                "bash",
6980                serde_json::json!({"command": "sleep 10"}),
6981            ),
6982            MockLlmClient::text_response("The command timed out."),
6983        ]));
6984
6985        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
6986        let config = AgentConfig {
6987            // 50ms — sleep 10 will never finish
6988            tool_timeout_ms: Some(50),
6989            ..AgentConfig::default()
6990        };
6991        let agent = AgentLoop::new(
6992            mock_client.clone(),
6993            tool_executor,
6994            test_tool_context(),
6995            config,
6996        );
6997        let result = agent.execute(&[], "Run sleep", None).await;
6998        assert!(
6999            result.is_ok(),
7000            "session should continue after tool timeout: {:?}",
7001            result.err()
7002        );
7003        assert_eq!(result.unwrap().text, "The command timed out.");
7004        // LLM called twice: initial request + response after timeout error
7005        assert_eq!(mock_client.call_count.load(Ordering::SeqCst), 2);
7006    }
7007
7008    /// 4.2 — Tool timeout: tool that finishes before the deadline succeeds normally
7009    #[tokio::test]
7010    async fn test_tool_within_timeout_succeeds() {
7011        let mock_client = Arc::new(MockLlmClient::new(vec![
7012            MockLlmClient::tool_call_response(
7013                "t1",
7014                "bash",
7015                serde_json::json!({"command": "echo fast"}),
7016            ),
7017            MockLlmClient::text_response("Command succeeded."),
7018        ]));
7019
7020        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
7021        let config = AgentConfig {
7022            tool_timeout_ms: Some(5_000), // 5 s — echo completes in <100ms
7023            ..AgentConfig::default()
7024        };
7025        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
7026        let result = agent.execute(&[], "Run something fast", None).await;
7027        assert!(
7028            result.is_ok(),
7029            "fast tool should succeed: {:?}",
7030            result.err()
7031        );
7032        assert_eq!(result.unwrap().text, "Command succeeded.");
7033    }
7034
7035    /// 4.3 — Circuit breaker: retries non-streaming LLM failures up to threshold
7036    #[tokio::test]
7037    async fn test_circuit_breaker_retries_non_streaming() {
7038        // Empty response list → every call bails with "No more mock responses"
7039        // threshold=2 → tries twice, then bails with circuit-breaker message
7040        let mock_client = Arc::new(MockLlmClient::new(vec![]));
7041
7042        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
7043        let config = AgentConfig {
7044            circuit_breaker_threshold: 2,
7045            ..AgentConfig::default()
7046        };
7047        let agent = AgentLoop::new(
7048            mock_client.clone(),
7049            tool_executor,
7050            test_tool_context(),
7051            config,
7052        );
7053        let result = agent.execute(&[], "Hello", None).await;
7054        assert!(result.is_err(), "should fail when LLM always errors");
7055        let err = result.unwrap_err().to_string();
7056        assert!(
7057            err.contains("circuit breaker"),
7058            "error should mention circuit breaker, got: {}",
7059            err
7060        );
7061        assert_eq!(
7062            mock_client.call_count.load(Ordering::SeqCst),
7063            2,
7064            "should make exactly threshold=2 LLM calls"
7065        );
7066    }
7067
7068    /// 4.3 — Circuit breaker: threshold=1 bails on the very first failure
7069    #[tokio::test]
7070    async fn test_circuit_breaker_threshold_one_no_retry() {
7071        let mock_client = Arc::new(MockLlmClient::new(vec![]));
7072
7073        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
7074        let config = AgentConfig {
7075            circuit_breaker_threshold: 1,
7076            ..AgentConfig::default()
7077        };
7078        let agent = AgentLoop::new(
7079            mock_client.clone(),
7080            tool_executor,
7081            test_tool_context(),
7082            config,
7083        );
7084        let result = agent.execute(&[], "Hello", None).await;
7085        assert!(result.is_err());
7086        assert_eq!(
7087            mock_client.call_count.load(Ordering::SeqCst),
7088            1,
7089            "with threshold=1 exactly one attempt should be made"
7090        );
7091    }
7092
7093    /// 4.3 — Circuit breaker: succeeds when LLM recovers before hitting threshold
7094    #[tokio::test]
7095    async fn test_circuit_breaker_succeeds_if_llm_recovers() {
7096        // First call fails, second call succeeds; threshold=3 — recovery within threshold
7097        struct FailOnceThenSucceed {
7098            inner: MockLlmClient,
7099            failed_once: std::sync::atomic::AtomicBool,
7100            call_count: AtomicUsize,
7101        }
7102
7103        #[async_trait::async_trait]
7104        impl LlmClient for FailOnceThenSucceed {
7105            async fn complete(
7106                &self,
7107                messages: &[Message],
7108                system: Option<&str>,
7109                tools: &[ToolDefinition],
7110            ) -> Result<LlmResponse> {
7111                self.call_count.fetch_add(1, Ordering::SeqCst);
7112                let already_failed = self
7113                    .failed_once
7114                    .swap(true, std::sync::atomic::Ordering::SeqCst);
7115                if !already_failed {
7116                    anyhow::bail!("transient network error");
7117                }
7118                self.inner.complete(messages, system, tools).await
7119            }
7120
7121            async fn complete_streaming(
7122                &self,
7123                messages: &[Message],
7124                system: Option<&str>,
7125                tools: &[ToolDefinition],
7126            ) -> Result<tokio::sync::mpsc::Receiver<crate::llm::StreamEvent>> {
7127                self.inner.complete_streaming(messages, system, tools).await
7128            }
7129        }
7130
7131        let mock = Arc::new(FailOnceThenSucceed {
7132            inner: MockLlmClient::new(vec![MockLlmClient::text_response("Recovered!")]),
7133            failed_once: std::sync::atomic::AtomicBool::new(false),
7134            call_count: AtomicUsize::new(0),
7135        });
7136
7137        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
7138        let config = AgentConfig {
7139            circuit_breaker_threshold: 3,
7140            ..AgentConfig::default()
7141        };
7142        let agent = AgentLoop::new(mock.clone(), tool_executor, test_tool_context(), config);
7143        let result = agent.execute(&[], "Hello", None).await;
7144        assert!(
7145            result.is_ok(),
7146            "should succeed when LLM recovers within threshold: {:?}",
7147            result.err()
7148        );
7149        assert_eq!(result.unwrap().text, "Recovered!");
7150        assert_eq!(
7151            mock.call_count.load(Ordering::SeqCst),
7152            2,
7153            "should have made exactly 2 calls (1 fail + 1 success)"
7154        );
7155    }
7156
7157    // ── Continuation detection tests ─────────────────────────────────────────
7158
7159    #[test]
7160    fn test_looks_incomplete_empty() {
7161        assert!(AgentLoop::looks_incomplete(""));
7162        assert!(AgentLoop::looks_incomplete("   "));
7163    }
7164
7165    #[test]
7166    fn test_looks_incomplete_trailing_colon() {
7167        assert!(AgentLoop::looks_incomplete("Let me check the file:"));
7168        assert!(AgentLoop::looks_incomplete("Next steps:"));
7169    }
7170
7171    #[test]
7172    fn test_looks_incomplete_ellipsis() {
7173        assert!(AgentLoop::looks_incomplete("Working on it..."));
7174        assert!(AgentLoop::looks_incomplete("Processing…"));
7175    }
7176
7177    #[test]
7178    fn test_looks_incomplete_intent_phrases() {
7179        assert!(AgentLoop::looks_incomplete(
7180            "I'll start by reading the file."
7181        ));
7182        assert!(AgentLoop::looks_incomplete(
7183            "Let me check the configuration."
7184        ));
7185        assert!(AgentLoop::looks_incomplete("I will now run the tests."));
7186        assert!(AgentLoop::looks_incomplete(
7187            "I need to update the Cargo.toml."
7188        ));
7189    }
7190
7191    #[test]
7192    fn test_looks_complete_final_answer() {
7193        // Clear final answers should NOT trigger continuation
7194        assert!(!AgentLoop::looks_incomplete(
7195            "The tests pass. All changes have been applied successfully."
7196        ));
7197        assert!(!AgentLoop::looks_incomplete(
7198            "Done. I've updated the three files and verified the build succeeds."
7199        ));
7200        assert!(!AgentLoop::looks_incomplete("42"));
7201        assert!(!AgentLoop::looks_incomplete("Yes."));
7202    }
7203
7204    #[test]
7205    fn test_looks_incomplete_multiline_complete() {
7206        let text = "Here is the summary:\n\n- Fixed the bug in agent.rs\n- All tests pass\n- Build succeeds";
7207        assert!(!AgentLoop::looks_incomplete(text));
7208    }
7209}