Skip to main content

a3s_code_core/
agent.rs

1//! Agent Loop Implementation
2//!
3//! The agent loop handles the core conversation cycle:
4//! 1. User sends a prompt
5//! 2. LLM generates a response (possibly with tool calls)
6//! 3. If tool calls present, execute them and send results back
7//! 4. Repeat until LLM returns without tool calls
8//!
9//! This implements agentic behavior where the LLM can use tools
10//! to accomplish tasks agentically.
11
12#[cfg(feature = "ahp")]
13use crate::ahp::InjectedContext;
14use crate::context::{
15    ContextAssembler, ContextAssembly, ContextItem, ContextProvider, ContextQuery, ContextResult,
16    ContextType,
17};
18use crate::hitl::ConfirmationProvider;
19use crate::hooks::{
20    ErrorType, GenerateEndEvent, GenerateStartEvent, HookEvent, HookExecutor, HookResult,
21    IntentDetectionEvent, OnErrorEvent, PostResponseEvent, PostToolUseEvent,
22    PreContextPerceptionEvent, PrePromptEvent, PreToolUseEvent, TokenUsageInfo, ToolCallInfo,
23    ToolResultData,
24};
25use crate::llm::{LlmClient, LlmResponse, Message, TokenUsage, ToolDefinition};
26use crate::permissions::{PermissionChecker, PermissionDecision};
27use crate::planning::{AgentGoal, ExecutionPlan, LlmPlanner, PreAnalysis, TaskStatus};
28use crate::prompts::{AgentStyle, PlanningMode, SystemPromptSlots};
29use crate::queue::SessionCommand;
30use crate::session_lane_queue::SessionLaneQueue;
31use crate::tools::{ToolContext, ToolExecutor, ToolStreamEvent};
32use anyhow::{Context, Result};
33use async_trait::async_trait;
34use futures::future::join_all;
35use serde::{Deserialize, Serialize};
36use serde_json::{json, Value};
37use std::sync::Arc;
38use std::time::Duration;
39use tokio::sync::mpsc;
40
41/// Maximum number of tool execution rounds before stopping
42const MAX_TOOL_ROUNDS: usize = 50;
43
44/// Result of a single parallel step execution, emitted as structured JSON
45/// so the frontend can render it dynamically in the user's language.
46#[derive(Debug, Clone, Serialize, Deserialize)]
47pub struct ParallelStepResult {
48    pub step_id: String,
49    pub step_number: u32,
50    pub status: String, // "completed" | "failed"
51    /// Brief summary of what this step did (in the LLM's language).
52    pub summary: String,
53    #[serde(skip_serializing_if = "Option::is_none")]
54    pub key_findings: Option<Vec<String>>,
55    #[serde(skip_serializing_if = "Option::is_none")]
56    pub error: Option<String>,
57    #[serde(skip_serializing_if = "Option::is_none")]
58    pub data: Option<Value>,
59}
60
61impl ParallelStepResult {
62    /// Build the full parallel-results JSON envelope injected into history.
63    pub fn build_envelope(results: Vec<ParallelStepResult>) -> Value {
64        json!({
65            "type": "parallel_results",
66            "steps": results
67        })
68    }
69}
70
71/// Internal agent loop configuration.
72#[derive(Clone)]
73pub(crate) struct AgentConfig {
74    /// Slot-based system prompt customization.
75    ///
76    /// Users can customize specific parts (role, guidelines, response style, extra)
77    /// without overriding the core agentic capabilities. The default agentic core
78    /// (tool usage, autonomous behavior, completion criteria) is always preserved.
79    pub prompt_slots: SystemPromptSlots,
80    pub tools: Vec<ToolDefinition>,
81    pub max_tool_rounds: usize,
82    /// Optional security provider for input taint tracking and output sanitization
83    pub security_provider: Option<Arc<dyn crate::security::SecurityProvider>>,
84    /// Optional permission checker for tool execution control
85    pub permission_checker: Option<Arc<dyn PermissionChecker>>,
86    /// Optional confirmation manager for HITL (Human-in-the-Loop)
87    pub confirmation_manager: Option<Arc<dyn ConfirmationProvider>>,
88    /// Context providers for augmenting prompts with external context
89    pub context_providers: Vec<Arc<dyn ContextProvider>>,
90    /// Planning mode — Auto (detect from message), Enabled, or Disabled.
91    pub planning_mode: PlanningMode,
92    /// Enable goal tracking
93    pub goal_tracking: bool,
94    /// Optional hook engine for firing lifecycle events (PreToolUse, PostToolUse, etc.)
95    pub hook_engine: Option<Arc<dyn HookExecutor>>,
96    /// Optional skill registry for tool permission enforcement
97    pub skill_registry: Option<Arc<crate::skills::SkillRegistry>>,
98    /// Max consecutive malformed-tool-args errors before aborting (default: 2).
99    ///
100    /// When the LLM returns tool arguments with `__parse_error`, the error is
101    /// fed back as a tool result. After this many consecutive parse errors the
102    /// loop bails instead of retrying indefinitely.
103    pub max_parse_retries: u32,
104    /// Per-tool execution timeout in milliseconds (`None` = no timeout).
105    ///
106    /// When set, each tool execution is wrapped in `tokio::time::timeout`.
107    /// A timeout produces an error result sent back to the LLM rather than
108    /// crashing the session.
109    pub tool_timeout_ms: Option<u64>,
110    /// Circuit-breaker threshold: max consecutive LLM API failures before
111    /// aborting (default: 3).
112    ///
113    /// In non-streaming mode, transient LLM failures are retried up to this
114    /// many times (with short exponential backoff) before the loop bails.
115    /// In streaming mode, any failure is fatal (events cannot be replayed).
116    pub circuit_breaker_threshold: u32,
117    /// Max consecutive identical tool signatures before aborting (default: 3).
118    ///
119    /// A tool signature is the exact combination of tool name + compact JSON
120    /// arguments. This prevents the agent from getting stuck repeating the same
121    /// tool call in a loop, for example repeatedly fetching the same URL.
122    pub duplicate_tool_call_threshold: u32,
123    /// Enable auto-compaction when context usage exceeds threshold.
124    pub auto_compact: bool,
125    /// Context usage percentage threshold to trigger auto-compaction (0.0 - 1.0).
126    /// Default: 0.80 (80%).
127    pub auto_compact_threshold: f32,
128    /// Maximum context window size in tokens (used for auto-compact calculation).
129    /// Default: 200_000.
130    pub max_context_tokens: usize,
131    /// Optional agent memory for auto-remember after tool execution and recall before prompts.
132    pub memory: Option<Arc<crate::memory::AgentMemory>>,
133    /// Inject a continuation message when the LLM stops calling tools before the
134    /// task is complete. Enabled by default. Set to `false` to disable.
135    ///
136    /// When enabled, if the LLM produces a response with no tool calls but the
137    /// response text looks like an intermediate step (not a final answer), the
138    /// loop injects [`crate::prompts::CONTINUATION`] as a user message and
139    /// continues for up to `max_continuation_turns` additional turns.
140    pub continuation_enabled: bool,
141    /// Maximum number of continuation injections per execution (default: 3).
142    ///
143    /// Prevents infinite loops when the LLM repeatedly stops without completing.
144    pub max_continuation_turns: u32,
145}
146
147impl std::fmt::Debug for AgentConfig {
148    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
149        f.debug_struct("AgentConfig")
150            .field("prompt_slots", &self.prompt_slots)
151            .field("tools", &self.tools)
152            .field("max_tool_rounds", &self.max_tool_rounds)
153            .field("security_provider", &self.security_provider.is_some())
154            .field("permission_checker", &self.permission_checker.is_some())
155            .field("confirmation_manager", &self.confirmation_manager.is_some())
156            .field("context_providers", &self.context_providers.len())
157            .field("planning_mode", &self.planning_mode)
158            .field("goal_tracking", &self.goal_tracking)
159            .field("hook_engine", &self.hook_engine.is_some())
160            .field(
161                "skill_registry",
162                &self.skill_registry.as_ref().map(|r| r.len()),
163            )
164            .field("max_parse_retries", &self.max_parse_retries)
165            .field("tool_timeout_ms", &self.tool_timeout_ms)
166            .field("circuit_breaker_threshold", &self.circuit_breaker_threshold)
167            .field(
168                "duplicate_tool_call_threshold",
169                &self.duplicate_tool_call_threshold,
170            )
171            .field("auto_compact", &self.auto_compact)
172            .field("auto_compact_threshold", &self.auto_compact_threshold)
173            .field("max_context_tokens", &self.max_context_tokens)
174            .field("continuation_enabled", &self.continuation_enabled)
175            .field("max_continuation_turns", &self.max_continuation_turns)
176            .field("memory", &self.memory.is_some())
177            .finish()
178    }
179}
180
181impl Default for AgentConfig {
182    fn default() -> Self {
183        Self {
184            prompt_slots: SystemPromptSlots::default(),
185            tools: Vec::new(), // Tools are provided by ToolExecutor
186            max_tool_rounds: MAX_TOOL_ROUNDS,
187            security_provider: None,
188            permission_checker: None,
189            confirmation_manager: None,
190            context_providers: Vec::new(),
191            planning_mode: PlanningMode::default(),
192            goal_tracking: false,
193            hook_engine: None,
194            skill_registry: Some(Arc::new(crate::skills::SkillRegistry::with_builtins())),
195            max_parse_retries: 2,
196            tool_timeout_ms: None,
197            circuit_breaker_threshold: 3,
198            duplicate_tool_call_threshold: 3,
199            auto_compact: false,
200            auto_compact_threshold: 0.80,
201            max_context_tokens: 200_000,
202            memory: None,
203            continuation_enabled: true,
204            max_continuation_turns: 3,
205        }
206    }
207}
208
209/// Events emitted during agent execution
210///
211/// Subscribe via [`crate::AgentSession::stream`].
212/// New variants may be added in minor releases — always include a wildcard arm
213/// (`_ => {}`) when matching.
214#[derive(Debug, Clone, Serialize, Deserialize)]
215#[serde(tag = "type")]
216#[non_exhaustive]
217pub enum AgentEvent {
218    /// Agent started processing
219    #[serde(rename = "agent_start")]
220    Start { prompt: String },
221
222    /// Runtime agent style/mode selected for the current execution.
223    #[serde(rename = "agent_mode_changed")]
224    AgentModeChanged {
225        /// Stable UI/runtime mode label, e.g. "general", "planning", "explore".
226        mode: String,
227        /// Canonical built-in agent name associated with this mode.
228        agent: String,
229        /// Human-readable explanation of the selected style.
230        description: String,
231    },
232
233    /// LLM turn started
234    #[serde(rename = "turn_start")]
235    TurnStart { turn: usize },
236
237    /// Text delta from streaming
238    #[serde(rename = "text_delta")]
239    TextDelta { text: String },
240
241    /// Reasoning/thinking delta from streaming (for models like kimi, deepseek)
242    #[serde(rename = "reasoning_delta")]
243    ReasoningDelta { text: String },
244
245    /// Tool execution started
246    #[serde(rename = "tool_start")]
247    ToolStart { id: String, name: String },
248
249    /// Tool input delta from streaming (partial JSON arguments)
250    #[serde(rename = "tool_input_delta")]
251    ToolInputDelta { delta: String },
252
253    /// Tool execution completed
254    #[serde(rename = "tool_end")]
255    ToolEnd {
256        id: String,
257        name: String,
258        output: String,
259        exit_code: i32,
260        #[serde(skip_serializing_if = "Option::is_none")]
261        metadata: Option<serde_json::Value>,
262    },
263
264    /// Intermediate tool output (streaming delta)
265    #[serde(rename = "tool_output_delta")]
266    ToolOutputDelta {
267        id: String,
268        name: String,
269        delta: String,
270    },
271
272    /// LLM turn completed
273    #[serde(rename = "turn_end")]
274    TurnEnd { turn: usize, usage: TokenUsage },
275
276    /// Agent completed
277    #[serde(rename = "agent_end")]
278    End {
279        text: String,
280        usage: TokenUsage,
281        verification_summary: Box<crate::verification::VerificationSummary>,
282        #[serde(skip_serializing_if = "Option::is_none")]
283        meta: Option<crate::llm::LlmResponseMeta>,
284    },
285
286    /// Error occurred
287    #[serde(rename = "error")]
288    Error { message: String },
289
290    /// Tool execution requires confirmation (HITL)
291    #[serde(rename = "confirmation_required")]
292    ConfirmationRequired {
293        tool_id: String,
294        tool_name: String,
295        args: serde_json::Value,
296        timeout_ms: u64,
297    },
298
299    /// Confirmation received from user (HITL)
300    #[serde(rename = "confirmation_received")]
301    ConfirmationReceived {
302        tool_id: String,
303        approved: bool,
304        reason: Option<String>,
305    },
306
307    /// Confirmation timed out (HITL)
308    #[serde(rename = "confirmation_timeout")]
309    ConfirmationTimeout {
310        tool_id: String,
311        action_taken: String, // "rejected" or "auto_approved"
312    },
313
314    /// External task pending (needs SDK processing)
315    #[serde(rename = "external_task_pending")]
316    ExternalTaskPending {
317        task_id: String,
318        session_id: String,
319        lane: crate::queue::SessionLane,
320        command_type: String,
321        payload: serde_json::Value,
322        timeout_ms: u64,
323    },
324
325    /// External task completed
326    #[serde(rename = "external_task_completed")]
327    ExternalTaskCompleted {
328        task_id: String,
329        session_id: String,
330        success: bool,
331    },
332
333    /// Tool execution denied by permission policy
334    #[serde(rename = "permission_denied")]
335    PermissionDenied {
336        tool_id: String,
337        tool_name: String,
338        args: serde_json::Value,
339        reason: String,
340    },
341
342    /// Context resolution started
343    #[serde(rename = "context_resolving")]
344    ContextResolving { providers: Vec<String> },
345
346    /// Context resolution completed
347    #[serde(rename = "context_resolved")]
348    ContextResolved {
349        total_items: usize,
350        total_tokens: usize,
351    },
352
353    // ========================================================================
354    // a3s-lane integration events
355    // ========================================================================
356    /// Command moved to dead letter queue after exhausting retries
357    #[serde(rename = "command_dead_lettered")]
358    CommandDeadLettered {
359        command_id: String,
360        command_type: String,
361        lane: String,
362        error: String,
363        attempts: u32,
364    },
365
366    /// Command retry attempt
367    #[serde(rename = "command_retry")]
368    CommandRetry {
369        command_id: String,
370        command_type: String,
371        lane: String,
372        attempt: u32,
373        delay_ms: u64,
374    },
375
376    /// Queue alert (depth warning, latency alert, etc.)
377    #[serde(rename = "queue_alert")]
378    QueueAlert {
379        level: String,
380        alert_type: String,
381        message: String,
382    },
383
384    // ========================================================================
385    // Task tracking events
386    // ========================================================================
387    /// Task list updated
388    #[serde(rename = "task_updated")]
389    TaskUpdated {
390        session_id: String,
391        tasks: Vec<crate::planning::Task>,
392    },
393
394    // ========================================================================
395    // Memory System events (Phase 3)
396    // ========================================================================
397    /// Memory stored
398    #[serde(rename = "memory_stored")]
399    MemoryStored {
400        memory_id: String,
401        memory_type: String,
402        importance: f32,
403        tags: Vec<String>,
404    },
405
406    /// Memory recalled
407    #[serde(rename = "memory_recalled")]
408    MemoryRecalled {
409        memory_id: String,
410        content: String,
411        relevance: f32,
412    },
413
414    /// Memories searched
415    #[serde(rename = "memories_searched")]
416    MemoriesSearched {
417        query: Option<String>,
418        tags: Vec<String>,
419        result_count: usize,
420    },
421
422    /// Memory cleared
423    #[serde(rename = "memory_cleared")]
424    MemoryCleared {
425        tier: String, // "long_term", "short_term", "working"
426        count: u64,
427    },
428
429    // ========================================================================
430    // Subagent events
431    // ========================================================================
432    /// Subagent task started
433    #[serde(rename = "subagent_start")]
434    SubagentStart {
435        /// Unique task identifier
436        task_id: String,
437        /// Child session ID
438        session_id: String,
439        /// Parent session ID
440        parent_session_id: String,
441        /// Agent type (e.g., "explore", "general")
442        agent: String,
443        /// Short description of the task
444        description: String,
445    },
446
447    /// Subagent task progress update
448    #[serde(rename = "subagent_progress")]
449    SubagentProgress {
450        /// Task identifier
451        task_id: String,
452        /// Child session ID
453        session_id: String,
454        /// Progress status message
455        status: String,
456        /// Additional metadata
457        metadata: serde_json::Value,
458    },
459
460    /// Subagent task completed
461    #[serde(rename = "subagent_end")]
462    SubagentEnd {
463        /// Task identifier
464        task_id: String,
465        /// Child session ID
466        session_id: String,
467        /// Agent type
468        agent: String,
469        /// Task output/result
470        output: String,
471        /// Whether the task succeeded
472        success: bool,
473    },
474
475    // ========================================================================
476    // Planning and Goal Tracking Events (Phase 1)
477    // ========================================================================
478    /// Planning phase started
479    #[serde(rename = "planning_start")]
480    PlanningStart { prompt: String },
481
482    /// Planning phase completed
483    #[serde(rename = "planning_end")]
484    PlanningEnd {
485        plan: ExecutionPlan,
486        estimated_steps: usize,
487    },
488
489    /// Step execution started
490    #[serde(rename = "step_start")]
491    StepStart {
492        step_id: String,
493        description: String,
494        step_number: usize,
495        total_steps: usize,
496    },
497
498    /// Step execution completed
499    #[serde(rename = "step_end")]
500    StepEnd {
501        step_id: String,
502        status: TaskStatus,
503        step_number: usize,
504        total_steps: usize,
505    },
506
507    /// Goal extracted from prompt
508    #[serde(rename = "goal_extracted")]
509    GoalExtracted { goal: AgentGoal },
510
511    /// Goal progress update
512    #[serde(rename = "goal_progress")]
513    GoalProgress {
514        goal: String,
515        progress: f32,
516        completed_steps: usize,
517        total_steps: usize,
518    },
519
520    /// Goal achieved
521    #[serde(rename = "goal_achieved")]
522    GoalAchieved {
523        goal: String,
524        total_steps: usize,
525        duration_ms: i64,
526    },
527
528    // ========================================================================
529    // Context Compaction events
530    // ========================================================================
531    /// Context automatically compacted due to high usage
532    #[serde(rename = "context_compacted")]
533    ContextCompacted {
534        session_id: String,
535        before_messages: usize,
536        after_messages: usize,
537        percent_before: f32,
538    },
539
540    // ========================================================================
541    // Persistence events
542    // ========================================================================
543    /// Session persistence failed — SDK clients should handle this
544    #[serde(rename = "persistence_failed")]
545    PersistenceFailed {
546        session_id: String,
547        operation: String,
548        error: String,
549    },
550
551    // ========================================================================
552    // Side question (btw)
553    // ========================================================================
554    /// Ephemeral side question answered.
555    ///
556    /// Emitted by [`crate::agent_api::AgentSession::btw()`] in streaming mode.
557    /// The answer is never added to conversation history.
558    #[serde(rename = "btw_answer")]
559    BtwAnswer {
560        question: String,
561        answer: String,
562        usage: TokenUsage,
563    },
564}
565
566/// Result of agent execution
567#[derive(Debug, Clone)]
568pub struct AgentResult {
569    pub text: String,
570    pub messages: Vec<Message>,
571    pub usage: TokenUsage,
572    pub tool_calls_count: usize,
573    pub verification_reports: Vec<crate::verification::VerificationReport>,
574}
575
576impl AgentResult {
577    pub fn verification_summary(&self) -> crate::verification::VerificationSummary {
578        crate::verification::VerificationSummary::from_reports(&self.verification_reports)
579    }
580
581    pub fn verification_summary_text(&self) -> String {
582        crate::verification::format_verification_summary(&self.verification_summary())
583    }
584
585    pub fn has_pending_verification(&self) -> bool {
586        matches!(
587            self.verification_summary().status,
588            crate::verification::VerificationStatus::NeedsReview
589        )
590    }
591}
592
593// ============================================================================
594// ToolCommand — bridges ToolExecutor to SessionCommand for queue submission
595// ============================================================================
596
597/// Adapter that implements `SessionCommand` for tool execution via the queue.
598///
599/// Wraps a `ToolExecutor` call so it can be submitted to `SessionLaneQueue`.
600pub struct ToolCommand {
601    tool_executor: Arc<ToolExecutor>,
602    tool_name: String,
603    tool_args: Value,
604    tool_context: ToolContext,
605    skill_registry: Option<Arc<crate::skills::SkillRegistry>>,
606}
607
608impl ToolCommand {
609    /// Create a new ToolCommand
610    pub fn new(
611        tool_executor: Arc<ToolExecutor>,
612        tool_name: String,
613        tool_args: Value,
614        tool_context: ToolContext,
615        skill_registry: Option<Arc<crate::skills::SkillRegistry>>,
616    ) -> Self {
617        Self {
618            tool_executor,
619            tool_name,
620            tool_args,
621            tool_context,
622            skill_registry,
623        }
624    }
625}
626
627#[async_trait]
628impl SessionCommand for ToolCommand {
629    async fn execute(&self) -> Result<Value> {
630        // Check skill-based tool permissions
631        if let Some(registry) = &self.skill_registry {
632            let instruction_skills = registry.by_kind(crate::skills::SkillKind::Instruction);
633
634            // If there are instruction skills with tool restrictions, check permissions
635            let has_restrictions = instruction_skills.iter().any(|s| s.allowed_tools.is_some());
636
637            if has_restrictions {
638                let mut allowed = false;
639
640                for skill in &instruction_skills {
641                    if skill.is_tool_allowed(&self.tool_name) {
642                        allowed = true;
643                        break;
644                    }
645                }
646
647                if !allowed {
648                    return Err(anyhow::anyhow!(
649                        "Tool '{}' is not allowed by any active skill. Active skills restrict tools to their allowed-tools lists.",
650                        self.tool_name
651                    ));
652                }
653            }
654        }
655
656        // Execute the tool
657        let result = self
658            .tool_executor
659            .execute_with_context(&self.tool_name, &self.tool_args, &self.tool_context)
660            .await?;
661        Ok(serde_json::json!({
662            "output": result.output,
663            "exit_code": result.exit_code,
664            "metadata": result.metadata,
665        }))
666    }
667
668    fn command_type(&self) -> &str {
669        &self.tool_name
670    }
671
672    fn payload(&self) -> Value {
673        self.tool_args.clone()
674    }
675}
676
677// ============================================================================
678// AgentLoop
679// ============================================================================
680
681/// Internal agent loop executor.
682#[derive(Clone)]
683pub(crate) struct AgentLoop {
684    llm_client: Arc<dyn LlmClient>,
685    tool_executor: Arc<ToolExecutor>,
686    tool_context: ToolContext,
687    config: AgentConfig,
688    /// Optional lane queue for priority-based tool execution
689    command_queue: Option<Arc<SessionLaneQueue>>,
690}
691
692// ============================================================================
693// Intent Detection Helpers (for AHP Context Perception)
694// ============================================================================
695
696/// Extract a target name from the prompt (e.g., function name, file path).
697#[allow(clippy::extra_unused_lifetimes)]
698fn extract_target_name_from_prompt<'a>(prompt: &str, _patterns: &[&str]) -> String {
699    // Try to extract quoted strings first
700    if let Some(start) = prompt.find('"') {
701        if let Some(end) = prompt[start + 1..].find('"') {
702            return prompt[start + 1..start + 1 + end].to_string();
703        }
704    }
705
706    // Try single quotes
707    if let Some(start) = prompt.find('\'') {
708        if let Some(end) = prompt[start + 1..].find('\'') {
709            return prompt[start + 1..start + 1 + end].to_string();
710        }
711    }
712
713    // Try backticks
714    if let Some(start) = prompt.find('`') {
715        if let Some(end) = prompt[start + 1..].find('`') {
716            return prompt[start + 1..start + 1 + end].to_string();
717        }
718    }
719
720    // Fall back to extracting a reasonable word boundary
721    let words: Vec<&str> = prompt.split_whitespace().collect();
722    if words.len() > 2 {
723        // Look for likely target words (after "the", "find", "where is", etc.)
724        for word in words.iter() {
725            if word.len() > 3
726                && !["where", "what", "find", "the", "how", "is", "are"].contains(word)
727            {
728                return word.to_string();
729            }
730        }
731    }
732
733    String::new()
734}
735
736/// Detect the domain from prompt keywords.
737fn detect_domain_from_prompt(prompt: &str) -> String {
738    let lower = prompt.to_lowercase();
739
740    if lower.contains("rust") || lower.contains("cargo") || lower.contains(".rs") {
741        "rust".to_string()
742    } else if lower.contains("javascript")
743        || lower.contains("typescript")
744        || lower.contains("node")
745        || lower.contains(".js")
746        || lower.contains(".ts")
747    {
748        "javascript".to_string()
749    } else if lower.contains("python") || lower.contains(".py") {
750        "python".to_string()
751    } else if lower.contains("go") || lower.contains(".go") {
752        "go".to_string()
753    } else if lower.contains("java") || lower.contains(".java") {
754        "java".to_string()
755    } else if lower.contains("docker") || lower.contains("container") {
756        "docker".to_string()
757    } else if lower.contains("kubernetes") || lower.contains("k8s") {
758        "kubernetes".to_string()
759    } else if lower.contains("sql")
760        || lower.contains("database")
761        || lower.contains("postgres")
762        || lower.contains("mysql")
763    {
764        "database".to_string()
765    } else if lower.contains("api") || lower.contains("rest") || lower.contains("grpc") {
766        "api".to_string()
767    } else if lower.contains("auth")
768        || lower.contains("login")
769        || lower.contains("password")
770        || lower.contains("token")
771    {
772        "security".to_string()
773    } else if lower.contains("test") || lower.contains("spec") || lower.contains("mock") {
774        "testing".to_string()
775    } else {
776        "general".to_string()
777    }
778}
779
780/// Result from IntentDetection harness
781#[derive(Debug, Clone, Serialize, Deserialize)]
782pub struct IntentDetectionResult {
783    /// Detected intent: "locate" | "understand" | "retrieve" | "explore" | "reason" | "validate" | "compare" | "track"
784    pub detected_intent: String,
785    /// Confidence score 0.0 - 1.0
786    pub confidence: f32,
787    /// Optional target hints from the harness
788    #[serde(skip_serializing_if = "Option::is_none")]
789    pub target_hints: Option<TargetHints>,
790}
791
792/// Target hints from IntentDetection harness
793#[derive(Debug, Clone, Serialize, Deserialize)]
794pub struct TargetHints {
795    #[serde(skip_serializing_if = "Option::is_none")]
796    pub target_type: Option<String>,
797    #[serde(skip_serializing_if = "Option::is_none")]
798    pub target_name: Option<String>,
799    #[serde(skip_serializing_if = "Option::is_none")]
800    pub domain: Option<String>,
801}
802
803/// Detect language hint from prompt characters.
804fn detect_language_hint(prompt: &str) -> Option<String> {
805    // Check for Chinese characters
806    if prompt
807        .chars()
808        .any(|c| ('\u{4e00}'..='\u{9fff}').contains(&c))
809    {
810        return Some("zh".to_string());
811    }
812    // Check for Japanese characters (Hiragana, Katakana, or CJK unified ideographs outside Chinese range)
813    if prompt
814        .chars()
815        .any(|c| ('\u{3040}'..='\u{309f}').contains(&c) || ('\u{30a0}'..='\u{30ff}').contains(&c))
816    {
817        return Some("ja".to_string());
818    }
819    // Check for Korean characters
820    if prompt
821        .chars()
822        .any(|c| ('\u{ac00}'..='\u{d7af}').contains(&c))
823    {
824        return Some("ko".to_string());
825    }
826    // Check for Arabic
827    if prompt
828        .chars()
829        .any(|c| ('\u{0600}'..='\u{06ff}').contains(&c))
830    {
831        return Some("ar".to_string());
832    }
833    // Check for Russian/Cyrillic
834    if prompt
835        .chars()
836        .any(|c| ('\u{0400}'..='\u{04ff}').contains(&c))
837    {
838        return Some("ru".to_string());
839    }
840    None
841}
842
843/// Build PreContextPerceptionEvent from IntentDetection result.
844fn build_pre_context_perception_from_intent(
845    result: IntentDetectionResult,
846    prompt: &str,
847    session_id: &str,
848    workspace: &str,
849) -> PreContextPerceptionEvent {
850    let target_hints = result.target_hints;
851    PreContextPerceptionEvent {
852        session_id: session_id.to_string(),
853        intent: result.detected_intent,
854        target_type: target_hints
855            .as_ref()
856            .and_then(|h| h.target_type.clone())
857            .unwrap_or_else(|| "unknown".to_string()),
858        target_name: target_hints
859            .as_ref()
860            .and_then(|h| h.target_name.clone())
861            .unwrap_or_else(|| extract_target_name_from_prompt(prompt, &[])),
862        domain: target_hints
863            .as_ref()
864            .and_then(|h| h.domain.clone())
865            .unwrap_or_else(|| detect_domain_from_prompt(prompt)),
866        query: Some(prompt.to_string()),
867        working_directory: workspace.to_string(),
868        urgency: "normal".to_string(),
869    }
870}
871
872/// Rough token estimation (~4 chars per token for English/code).
873#[cfg(feature = "ahp")]
874fn estimate_tokens(text: &str) -> usize {
875    (text.len() / 4).max(1)
876}
877
878#[cfg(feature = "ahp")]
879fn ahp_context_result(items: Vec<ContextItem>) -> Option<ContextResult> {
880    if items.is_empty() {
881        return None;
882    }
883
884    let total_tokens = items.iter().map(|item| item.token_count).sum();
885    Some(ContextResult {
886        items,
887        total_tokens,
888        provider: "ahp_harness".to_string(),
889        truncated: false,
890    })
891}
892
893#[cfg(feature = "ahp")]
894fn injected_context_to_results(injected: InjectedContext) -> Vec<ContextResult> {
895    let mut results = Vec::new();
896
897    let fact_items = injected
898        .facts
899        .into_iter()
900        .map(|fact| {
901            let token_count = estimate_tokens(&fact.content);
902            ContextItem::new(
903                uuid::Uuid::new_v4().to_string(),
904                ContextType::Resource,
905                fact.content,
906            )
907            .with_source(fact.source)
908            .with_provenance("ahp_fact")
909            .with_priority(0.75)
910            .with_trust(fact.confidence)
911            .with_freshness(0.85)
912            .with_relevance(fact.confidence)
913            .with_token_count(token_count)
914        })
915        .collect::<Vec<_>>();
916    if let Some(result) = ahp_context_result(fact_items) {
917        results.push(result);
918    }
919
920    if let Some(file_contents) = injected.file_contents {
921        let file_items = file_contents
922            .into_iter()
923            .map(|file| {
924                let token_count = estimate_tokens(&file.snippet);
925                ContextItem::new(
926                    uuid::Uuid::new_v4().to_string(),
927                    ContextType::Resource,
928                    file.snippet,
929                )
930                .with_source(file.path)
931                .with_provenance("ahp_file_snippet")
932                .with_priority(0.8)
933                .with_trust(0.8)
934                .with_freshness(0.8)
935                .with_relevance(file.relevance_score)
936                .with_token_count(token_count)
937            })
938            .collect::<Vec<_>>();
939        if let Some(result) = ahp_context_result(file_items) {
940            results.push(result);
941        }
942    }
943
944    if let Some(summary) = injected.project_summary {
945        let mut lines = vec![
946            format!("Project: {}", summary.project_name),
947            summary.structure_description,
948        ];
949        if let Some(language) = summary.language {
950            lines.push(format!("Language: {language}"));
951        }
952        if let Some(key_files) = summary.key_files.filter(|files| !files.is_empty()) {
953            lines.push(format!("Key files: {}", key_files.join(", ")));
954        }
955        let content = lines.join("\n");
956        let token_count = estimate_tokens(&content);
957        if let Some(result) = ahp_context_result(vec![ContextItem::new(
958            uuid::Uuid::new_v4().to_string(),
959            ContextType::Resource,
960            content,
961        )
962        .with_source("ahp://project-summary")
963        .with_provenance("ahp_project_summary")
964        .with_priority(0.7)
965        .with_trust(0.75)
966        .with_freshness(0.8)
967        .with_relevance(0.9)
968        .with_token_count(token_count)])
969        {
970            results.push(result);
971        }
972    }
973
974    if let Some(knowledge) = injected.knowledge {
975        let knowledge_items = knowledge
976            .into_iter()
977            .map(|content| {
978                let token_count = estimate_tokens(&content);
979                ContextItem::new(
980                    uuid::Uuid::new_v4().to_string(),
981                    ContextType::Resource,
982                    content,
983                )
984                .with_source("ahp://knowledge")
985                .with_provenance("ahp_knowledge")
986                .with_priority(0.55)
987                .with_trust(0.65)
988                .with_freshness(0.6)
989                .with_relevance(0.8)
990                .with_token_count(token_count)
991            })
992            .collect::<Vec<_>>();
993        if let Some(result) = ahp_context_result(knowledge_items) {
994            results.push(result);
995        }
996    }
997
998    if let Some(suggestions) = injected.suggestions.filter(|items| !items.is_empty()) {
999        let content = format!("Harness suggestions:\n- {}", suggestions.join("\n- "));
1000        let token_count = estimate_tokens(&content);
1001        if let Some(result) = ahp_context_result(vec![ContextItem::new(
1002            uuid::Uuid::new_v4().to_string(),
1003            ContextType::Resource,
1004            content,
1005        )
1006        .with_source("ahp://suggestions")
1007        .with_provenance("ahp_suggestions")
1008        .with_priority(0.45)
1009        .with_trust(0.6)
1010        .with_freshness(0.8)
1011        .with_relevance(0.7)
1012        .with_token_count(token_count)])
1013        {
1014            results.push(result);
1015        }
1016    }
1017
1018    results
1019}
1020
1021impl AgentLoop {
1022    pub(crate) fn new(
1023        llm_client: Arc<dyn LlmClient>,
1024        tool_executor: Arc<ToolExecutor>,
1025        tool_context: ToolContext,
1026        config: AgentConfig,
1027    ) -> Self {
1028        Self {
1029            llm_client,
1030            tool_executor,
1031            tool_context,
1032            config,
1033            command_queue: None,
1034        }
1035    }
1036
1037    /// Set the lane queue for priority-based tool execution.
1038    ///
1039    /// When set, tools are routed through the lane queue which supports
1040    /// External task handling for multi-machine parallel processing.
1041    pub fn with_queue(mut self, queue: Arc<SessionLaneQueue>) -> Self {
1042        self.command_queue = Some(queue);
1043        self
1044    }
1045
1046    /// Track a tool call result in telemetry.
1047    fn track_tool_result(&self, tool_name: &str, args: &serde_json::Value, exit_code: i32) {
1048        let _ = (tool_name, args, exit_code);
1049    }
1050
1051    /// Execute a tool, applying the configured timeout if set.
1052    ///
1053    /// On timeout, returns an error describing which tool timed out and after
1054    /// how many milliseconds. The caller converts this to a tool-result error
1055    /// message that is fed back to the LLM.
1056    async fn execute_tool_timed(
1057        &self,
1058        name: &str,
1059        args: &serde_json::Value,
1060        ctx: &ToolContext,
1061    ) -> anyhow::Result<crate::tools::ToolResult> {
1062        let fut = self.tool_executor.execute_with_context(name, args, ctx);
1063        if let Some(timeout_ms) = self.config.tool_timeout_ms {
1064            match tokio::time::timeout(Duration::from_millis(timeout_ms), fut).await {
1065                Ok(result) => result,
1066                Err(_) => Err(anyhow::anyhow!(
1067                    "Tool '{}' timed out after {}ms",
1068                    name,
1069                    timeout_ms
1070                )),
1071            }
1072        } else {
1073            fut.await
1074        }
1075    }
1076
1077    /// Convert a tool execution result into the (output, exit_code, is_error, metadata, images) tuple.
1078    fn tool_result_to_tuple(
1079        result: anyhow::Result<crate::tools::ToolResult>,
1080    ) -> (
1081        String,
1082        i32,
1083        bool,
1084        Option<serde_json::Value>,
1085        Vec<crate::llm::Attachment>,
1086    ) {
1087        match result {
1088            Ok(r) => (
1089                r.output,
1090                r.exit_code,
1091                r.exit_code != 0,
1092                r.metadata,
1093                r.images,
1094            ),
1095            Err(e) => {
1096                let msg = e.to_string();
1097                // Classify the error so the LLM knows whether retrying makes sense.
1098                let hint = if Self::is_transient_error(&msg) {
1099                    " [transient — you may retry this tool call]"
1100                } else {
1101                    " [permanent — do not retry without changing the arguments]"
1102                };
1103                (
1104                    format!("Tool execution error: {}{}", msg, hint),
1105                    1,
1106                    true,
1107                    None,
1108                    Vec::new(),
1109                )
1110            }
1111        }
1112    }
1113
1114    fn collect_verification_report(
1115        reports: &mut Vec<crate::verification::VerificationReport>,
1116        metadata: &Option<serde_json::Value>,
1117    ) {
1118        let Some(metadata) = metadata else {
1119            return;
1120        };
1121        let Some(report) = metadata.get("verification_report") else {
1122            return;
1123        };
1124
1125        match serde_json::from_value::<crate::verification::VerificationReport>(report.clone()) {
1126            Ok(report) => reports.push(report),
1127            Err(err) => tracing::warn!(
1128                error = %err,
1129                "Ignoring malformed verification_report tool metadata"
1130            ),
1131        }
1132    }
1133
1134    /// Inspect the workspace for well-known project marker files and return a short
1135    /// `## Project Context` section that the agent can use without any manual configuration.
1136    /// Returns an empty string when the workspace type cannot be determined.
1137    fn detect_project_hint(workspace: &std::path::Path) -> String {
1138        struct Marker {
1139            file: &'static str,
1140            lang: &'static str,
1141            tip: &'static str,
1142        }
1143
1144        let markers = [
1145            Marker {
1146                file: "Cargo.toml",
1147                lang: "Rust",
1148                tip: "Use `cargo build`, `cargo test`, `cargo clippy`, and `cargo fmt`. \
1149                  Prefer `anyhow` / `thiserror` for error handling. \
1150                  Follow the Microsoft Rust Guidelines (no panics in library code, \
1151                  async-first with Tokio).",
1152            },
1153            Marker {
1154                file: "package.json",
1155                lang: "Node.js / TypeScript",
1156                tip: "Check `package.json` for the package manager (npm/yarn/pnpm/bun) \
1157                  and available scripts. Prefer TypeScript with strict mode. \
1158                  Use ESM imports unless the project is CommonJS.",
1159            },
1160            Marker {
1161                file: "pyproject.toml",
1162                lang: "Python",
1163                tip: "Use the package manager declared in `pyproject.toml` \
1164                  (uv, poetry, hatch, etc.). Prefer type hints and async/await for I/O.",
1165            },
1166            Marker {
1167                file: "setup.py",
1168                lang: "Python",
1169                tip: "Legacy Python project. Prefer type hints and async/await for I/O.",
1170            },
1171            Marker {
1172                file: "requirements.txt",
1173                lang: "Python",
1174                tip: "Python project with pip-style dependencies. \
1175                  Prefer type hints and async/await for I/O.",
1176            },
1177            Marker {
1178                file: "go.mod",
1179                lang: "Go",
1180                tip: "Use `go build ./...` and `go test ./...`. \
1181                  Follow standard Go project layout. Use `gofmt` for formatting.",
1182            },
1183            Marker {
1184                file: "pom.xml",
1185                lang: "Java / Maven",
1186                tip: "Use `mvn compile`, `mvn test`, `mvn package`. \
1187                  Follow standard Maven project structure.",
1188            },
1189            Marker {
1190                file: "build.gradle",
1191                lang: "Java / Gradle",
1192                tip: "Use `./gradlew build` and `./gradlew test`. \
1193                  Follow standard Gradle project structure.",
1194            },
1195            Marker {
1196                file: "build.gradle.kts",
1197                lang: "Kotlin / Gradle",
1198                tip: "Use `./gradlew build` and `./gradlew test`. \
1199                  Prefer Kotlin coroutines for async work.",
1200            },
1201            Marker {
1202                file: "CMakeLists.txt",
1203                lang: "C / C++",
1204                tip: "Use `cmake -B build && cmake --build build`. \
1205                  Check for `compile_commands.json` for IDE tooling.",
1206            },
1207            Marker {
1208                file: "Makefile",
1209                lang: "C / C++ (or generic)",
1210                tip: "Use `make` or `make <target>`. \
1211                  Check available targets with `make help` or by reading the Makefile.",
1212            },
1213        ];
1214
1215        // Check for C# / .NET — no single fixed filename, so glob for *.csproj / *.sln
1216        let is_dotnet = workspace.join("*.csproj").exists() || {
1217            // Fast check: look for any .csproj or .sln in the workspace root
1218            std::fs::read_dir(workspace)
1219                .map(|entries| {
1220                    entries.flatten().any(|e| {
1221                        let name = e.file_name();
1222                        let s = name.to_string_lossy();
1223                        s.ends_with(".csproj") || s.ends_with(".sln")
1224                    })
1225                })
1226                .unwrap_or(false)
1227        };
1228
1229        if is_dotnet {
1230            return "## Project Context\n\nThis is a **C# / .NET** project. \
1231             Use `dotnet build`, `dotnet test`, and `dotnet run`. \
1232             Follow C# coding conventions and async/await patterns."
1233                .to_string();
1234        }
1235
1236        for marker in &markers {
1237            if workspace.join(marker.file).exists() {
1238                return format!(
1239                    "## Project Context\n\nThis is a **{}** project. {}",
1240                    marker.lang, marker.tip
1241                );
1242            }
1243        }
1244
1245        String::new()
1246    }
1247
1248    /// Returns `true` for errors that are likely transient (network, timeout, I/O contention).
1249    /// Used to annotate tool error messages so the LLM knows whether retrying is safe.
1250    fn is_transient_error(msg: &str) -> bool {
1251        let lower = msg.to_lowercase();
1252        lower.contains("timeout")
1253        || lower.contains("timed out")
1254        || lower.contains("connection refused")
1255        || lower.contains("connection reset")
1256        || lower.contains("broken pipe")
1257        || lower.contains("temporarily unavailable")
1258        || lower.contains("resource temporarily unavailable")
1259        || lower.contains("os error 11")  // EAGAIN
1260        || lower.contains("os error 35")  // EAGAIN on macOS
1261        || lower.contains("rate limit")
1262        || lower.contains("too many requests")
1263        || lower.contains("service unavailable")
1264        || lower.contains("network unreachable")
1265    }
1266
1267    /// Returns `true` when a tool writes a file and is safe to run concurrently with other
1268    /// independent writes (no ordering dependencies, no side-channel output).
1269    fn is_parallel_safe_write(name: &str, _args: &serde_json::Value) -> bool {
1270        matches!(
1271            name,
1272            "write_file" | "edit_file" | "create_file" | "append_to_file" | "replace_in_file"
1273        )
1274    }
1275
1276    /// Extract the target file path from write-tool arguments so we can check for conflicts.
1277    fn extract_write_path(args: &serde_json::Value) -> Option<String> {
1278        // write_file / create_file / append_to_file / replace_in_file use "path"
1279        // edit_file uses "path" as well
1280        args.get("path")
1281            .and_then(|v| v.as_str())
1282            .map(|s| s.to_string())
1283    }
1284
1285    /// Execute a tool through the lane queue (if configured) or directly.
1286    async fn execute_tool_queued_or_direct(
1287        &self,
1288        name: &str,
1289        args: &serde_json::Value,
1290        ctx: &ToolContext,
1291    ) -> anyhow::Result<crate::tools::ToolResult> {
1292        self.execute_tool_queued_or_direct_inner(name, args, ctx)
1293            .await
1294    }
1295
1296    /// Inner execution without task lifecycle wrapping.
1297    async fn execute_tool_queued_or_direct_inner(
1298        &self,
1299        name: &str,
1300        args: &serde_json::Value,
1301        ctx: &ToolContext,
1302    ) -> anyhow::Result<crate::tools::ToolResult> {
1303        if let Some(ref queue) = self.command_queue {
1304            let command = ToolCommand::new(
1305                Arc::clone(&self.tool_executor),
1306                name.to_string(),
1307                args.clone(),
1308                ctx.clone(),
1309                self.config.skill_registry.clone(),
1310            );
1311            let rx = queue.submit_by_tool(name, Box::new(command)).await;
1312            match rx.await {
1313                Ok(Ok(value)) => {
1314                    let output = value["output"]
1315                        .as_str()
1316                        .ok_or_else(|| {
1317                            anyhow::anyhow!(
1318                                "Queue result missing 'output' field for tool '{}'",
1319                                name
1320                            )
1321                        })?
1322                        .to_string();
1323                    let exit_code = value["exit_code"].as_i64().unwrap_or(0) as i32;
1324                    return Ok(crate::tools::ToolResult {
1325                        name: name.to_string(),
1326                        output,
1327                        exit_code,
1328                        metadata: None,
1329                        images: Vec::new(),
1330                    });
1331                }
1332                Ok(Err(e)) => {
1333                    tracing::warn!(
1334                        "Queue execution failed for tool '{}', falling back to direct: {}",
1335                        name,
1336                        e
1337                    );
1338                }
1339                Err(_) => {
1340                    tracing::warn!(
1341                        "Queue channel closed for tool '{}', falling back to direct",
1342                        name
1343                    );
1344                }
1345            }
1346        }
1347        self.execute_tool_timed(name, args, ctx).await
1348    }
1349
1350    /// Call the LLM, handling streaming vs non-streaming internally.
1351    ///
1352    /// Streaming events (`TextDelta`, `ToolStart`) are forwarded to `event_tx`
1353    /// as they arrive. Non-streaming mode simply awaits the complete response.
1354    ///
1355    /// Tool definitions are selected per turn by the centralized tool selector.
1356    ///
1357    /// Returns `Err` on any LLM API failure. The circuit breaker in
1358    /// `execute_loop` wraps this call with retry logic for non-streaming mode.
1359    async fn call_llm(
1360        &self,
1361        messages: &[Message],
1362        system: Option<&str>,
1363        event_tx: &Option<mpsc::Sender<AgentEvent>>,
1364        cancel_token: &tokio_util::sync::CancellationToken,
1365    ) -> anyhow::Result<LlmResponse> {
1366        let tools = crate::tools::select_tools_for_messages(&self.config.tools, messages);
1367
1368        if event_tx.is_some() {
1369            let mut stream_rx = match self
1370                .llm_client
1371                .complete_streaming(messages, system, &tools, cancel_token.clone())
1372                .await
1373            {
1374                Ok(rx) => rx,
1375                Err(stream_error) => {
1376                    // Do not fall back to non-streaming if cancelled — propagate cancellation
1377                    if cancel_token.is_cancelled() {
1378                        anyhow::bail!("Operation cancelled by user");
1379                    }
1380                    tracing::warn!(
1381                        error = %stream_error,
1382                        "LLM streaming setup failed; falling back to non-streaming completion"
1383                    );
1384                    return self
1385                        .llm_client
1386                        .complete(messages, system, &tools)
1387                        .await
1388                        .with_context(|| {
1389                            format!(
1390                                "LLM streaming call failed ({stream_error}); non-streaming fallback also failed"
1391                            )
1392                        });
1393                }
1394            };
1395
1396            let mut final_response: Option<LlmResponse> = None;
1397            loop {
1398                tokio::select! {
1399                    _ = cancel_token.cancelled() => {
1400                        tracing::info!("🛑 LLM streaming cancelled by CancellationToken");
1401                        anyhow::bail!("Operation cancelled by user");
1402                    }
1403                    event = stream_rx.recv() => {
1404                        match event {
1405                            Some(crate::llm::StreamEvent::TextDelta(text)) => {
1406                                if let Some(tx) = event_tx {
1407                                    tx.send(AgentEvent::TextDelta { text }).await.ok();
1408                                }
1409                            }
1410                            Some(crate::llm::StreamEvent::ReasoningDelta(text)) => {
1411                                if let Some(tx) = event_tx {
1412                                    tx.send(AgentEvent::ReasoningDelta { text }).await.ok();
1413                                }
1414                            }
1415                            Some(crate::llm::StreamEvent::ToolUseStart { id, name }) => {
1416                                if let Some(tx) = event_tx {
1417                                    tx.send(AgentEvent::ToolStart { id, name }).await.ok();
1418                                }
1419                            }
1420                            Some(crate::llm::StreamEvent::ToolUseInputDelta(delta)) => {
1421                                if let Some(tx) = event_tx {
1422                                    tx.send(AgentEvent::ToolInputDelta { delta }).await.ok();
1423                                }
1424                            }
1425                            Some(crate::llm::StreamEvent::Done(resp)) => {
1426                                final_response = Some(resp);
1427                                break;
1428                            }
1429                            None => break,
1430                        }
1431                    }
1432                }
1433            }
1434            final_response.context("Stream ended without final response")
1435        } else {
1436            self.llm_client
1437                .complete(messages, system, &tools)
1438                .await
1439                .context("LLM call failed")
1440        }
1441    }
1442
1443    /// Create a tool context with streaming support.
1444    ///
1445    /// When `event_tx` is Some, spawns a forwarder task that converts
1446    /// `ToolStreamEvent::OutputDelta` into `AgentEvent::ToolOutputDelta`
1447    /// and sends them to the agent event channel.
1448    ///
1449    /// Returns the augmented `ToolContext`. The forwarder task runs until
1450    /// the tool-side sender is dropped (i.e., tool execution finishes).
1451    fn streaming_tool_context(
1452        &self,
1453        event_tx: &Option<mpsc::Sender<AgentEvent>>,
1454        tool_id: &str,
1455        tool_name: &str,
1456    ) -> ToolContext {
1457        let mut ctx = self.tool_context.clone();
1458        if let Some(agent_tx) = event_tx {
1459            let (tool_tx, mut tool_rx) = mpsc::channel::<ToolStreamEvent>(64);
1460            ctx.event_tx = Some(tool_tx);
1461
1462            let agent_tx = agent_tx.clone();
1463            let tool_id = tool_id.to_string();
1464            let tool_name = tool_name.to_string();
1465            tokio::spawn(async move {
1466                while let Some(event) = tool_rx.recv().await {
1467                    match event {
1468                        ToolStreamEvent::OutputDelta(delta) => {
1469                            agent_tx
1470                                .send(AgentEvent::ToolOutputDelta {
1471                                    id: tool_id.clone(),
1472                                    name: tool_name.clone(),
1473                                    delta,
1474                                })
1475                                .await
1476                                .ok();
1477                        }
1478                    }
1479                }
1480            });
1481        }
1482        ctx
1483    }
1484
1485    /// Resolve context from all providers for a given prompt
1486    ///
1487    /// Returns aggregated context results from all configured providers.
1488    async fn resolve_context(&self, prompt: &str, session_id: Option<&str>) -> Vec<ContextResult> {
1489        if self.config.context_providers.is_empty() {
1490            return Vec::new();
1491        }
1492
1493        let query = ContextQuery::new(prompt).with_session_id(session_id.unwrap_or(""));
1494
1495        let futures = self
1496            .config
1497            .context_providers
1498            .iter()
1499            .map(|p| p.query(&query));
1500        let outcomes = join_all(futures).await;
1501
1502        outcomes
1503            .into_iter()
1504            .enumerate()
1505            .filter_map(|(i, r)| match r {
1506                Ok(result) if !result.is_empty() => Some(result),
1507                Ok(_) => None,
1508                Err(e) => {
1509                    tracing::warn!(
1510                        "Context provider '{}' failed: {}",
1511                        self.config.context_providers[i].name(),
1512                        e
1513                    );
1514                    None
1515                }
1516            })
1517            .collect()
1518    }
1519
1520    /// Detect whether the LLM's no-tool-call response looks like an intermediate
1521    /// step rather than a genuine final answer.
1522    ///
1523    /// Returns `true` when continuation should be injected. Heuristics:
1524    /// - Response ends with a colon or ellipsis (mid-thought)
1525    /// - Response contains phrases that signal incomplete work
1526    /// - Response is very short (< 80 chars) and doesn't look like a summary
1527    fn looks_incomplete(text: &str) -> bool {
1528        let t = text.trim();
1529        if t.is_empty() {
1530            return true;
1531        }
1532        // Very short responses that aren't clearly a final answer
1533        if t.len() < 80 && !t.contains('\n') {
1534            // Short single-line — could be a genuine one-liner answer, keep going
1535            // only if it ends with punctuation that signals continuation
1536            let ends_continuation =
1537                t.ends_with(':') || t.ends_with("...") || t.ends_with('…') || t.ends_with(',');
1538            if ends_continuation {
1539                return true;
1540            }
1541        }
1542        // Phrases that signal the LLM is describing what it will do rather than doing it
1543        let incomplete_phrases = [
1544            "i'll ",
1545            "i will ",
1546            "let me ",
1547            "i need to ",
1548            "i should ",
1549            "next, i",
1550            "first, i",
1551            "now i",
1552            "i'll start",
1553            "i'll begin",
1554            "i'll now",
1555            "let's start",
1556            "let's begin",
1557            "to do this",
1558            "i'm going to",
1559        ];
1560        let lower = t.to_lowercase();
1561        for phrase in &incomplete_phrases {
1562            if lower.contains(phrase) {
1563                return true;
1564            }
1565        }
1566        false
1567    }
1568
1569    /// Get the assembled system prompt from slots.
1570    #[allow(dead_code)]
1571    fn system_prompt(&self) -> String {
1572        self.config.prompt_slots.build()
1573    }
1574
1575    /// Get the assembled system prompt from slots with an explicit style.
1576    fn system_prompt_for_style(&self, style: AgentStyle) -> String {
1577        let mut slots = self.config.prompt_slots.clone();
1578        slots.style = Some(style);
1579        slots.build()
1580    }
1581
1582    async fn resolve_effective_style(&self, prompt: &str) -> AgentStyle {
1583        if let Some(style) = self.config.prompt_slots.style {
1584            return style;
1585        }
1586
1587        let (style, confidence) = AgentStyle::detect_with_confidence(prompt);
1588        tracing::debug!(
1589            intent.classification = ?style,
1590            intent.confidence = ?confidence,
1591            intent.source = "local",
1592            "Intent classified locally"
1593        );
1594        style
1595    }
1596
1597    /// Detect if context perception is needed based on user prompt.
1598    ///
1599    /// Returns `Some(PreContextPerceptionEvent)` if the prompt suggests the model
1600    /// needs workspace knowledge (finding files, understanding code, etc.).
1601    pub fn detect_context_perception_intent(
1602        &self,
1603        prompt: &str,
1604        session_id: &str,
1605        workspace: &str,
1606    ) -> Option<PreContextPerceptionEvent> {
1607        let lower = prompt.to_lowercase();
1608
1609        // Pattern matching for different intents that suggest context perception is needed
1610        let intents: &[(&[&str], &str)] = &[
1611            // Locate: finding files, functions, resources
1612            (
1613                &[
1614                    "where is",
1615                    "where are",
1616                    "find the file",
1617                    "find all",
1618                    "find files",
1619                    "who wrote",
1620                    "locate",
1621                    "search for",
1622                    "look for",
1623                    "search",
1624                ],
1625                "locate",
1626            ),
1627            // Understand: explaining how something works
1628            (
1629                &[
1630                    "how does",
1631                    "what does",
1632                    "explain",
1633                    "understand",
1634                    "what is this",
1635                    "how does this work",
1636                ],
1637                "understand",
1638            ),
1639            // Retrieve: recalling from memory/past
1640            (
1641                &[
1642                    "remember",
1643                    "earlier",
1644                    "before",
1645                    "previously",
1646                    "last time",
1647                    "past",
1648                    "previous",
1649                ],
1650                "retrieve",
1651            ),
1652            // Explore: understanding structure
1653            (
1654                &[
1655                    "how is organized",
1656                    "project structure",
1657                    "what files",
1658                    "show me the structure",
1659                    "explore",
1660                ],
1661                "explore",
1662            ),
1663            // Reason: asking why/causality
1664            (
1665                &[
1666                    "why did",
1667                    "why is",
1668                    "cause",
1669                    "reason",
1670                    "what happened",
1671                    "why does",
1672                ],
1673                "reason",
1674            ),
1675            // Validate: checking correctness
1676            (
1677                &["is this correct", "verify", "validate", "check if", "debug"],
1678                "validate",
1679            ),
1680            // Compare: comparing things
1681            (
1682                &[
1683                    "difference between",
1684                    "compare",
1685                    "versus",
1686                    " vs ",
1687                    "different from",
1688                ],
1689                "compare",
1690            ),
1691            // Track: status/history
1692            (
1693                &[
1694                    "status",
1695                    "progress",
1696                    "how far",
1697                    "history",
1698                    "what's the current",
1699                ],
1700                "track",
1701            ),
1702        ];
1703
1704        // Detect target type from keywords
1705        let target_type = if lower.contains("function") || lower.contains("method") {
1706            "function"
1707        } else if lower.contains("file") || lower.contains("config") {
1708            "file"
1709        } else if lower.contains("class") {
1710            "entity"
1711        } else if lower.contains("module") || lower.contains("package") {
1712            "module"
1713        } else if lower.contains("test") {
1714            "test"
1715        } else {
1716            "unknown"
1717        };
1718
1719        // Find matching intent
1720        let matched_intent = intents
1721            .iter()
1722            .find(|(patterns, _)| patterns.iter().any(|p| lower.contains(p)));
1723
1724        matched_intent.map(|(patterns, intent)| {
1725            // Extract target name if possible (simplified extraction)
1726            let target_name = extract_target_name_from_prompt(prompt, patterns);
1727
1728            PreContextPerceptionEvent {
1729                session_id: session_id.to_string(),
1730                intent: intent.to_string(),
1731                target_type: target_type.to_string(),
1732                target_name,
1733                domain: detect_domain_from_prompt(prompt),
1734                query: Some(prompt.to_string()),
1735                working_directory: workspace.to_string(),
1736                urgency: "normal".to_string(),
1737            }
1738        })
1739    }
1740
1741    /// Fire PreContextPerception hook and wait for harness decision.
1742    async fn fire_pre_context_perception(&self, event: &PreContextPerceptionEvent) -> HookResult {
1743        if let Some(he) = &self.config.hook_engine {
1744            let hook_event = HookEvent::PreContextPerception(event.clone());
1745            he.fire(&hook_event).await
1746        } else {
1747            HookResult::continue_()
1748        }
1749    }
1750
1751    /// Fire IntentDetection hook and wait for harness decision.
1752    ///
1753    /// This is called on every prompt to detect user intent via the AHP harness.
1754    /// Returns the detected intent if the harness provides one, or None if blocked/failed.
1755    async fn fire_intent_detection(
1756        &self,
1757        prompt: &str,
1758        session_id: &str,
1759        workspace: &str,
1760    ) -> Option<IntentDetectionResult> {
1761        let event = IntentDetectionEvent {
1762            session_id: session_id.to_string(),
1763            prompt: prompt.to_string(),
1764            workspace: workspace.to_string(),
1765            language_hint: detect_language_hint(prompt),
1766        };
1767
1768        let hook_result = if let Some(he) = &self.config.hook_engine {
1769            let hook_event = HookEvent::IntentDetection(event);
1770            he.fire(&hook_event).await
1771        } else {
1772            return None;
1773        };
1774
1775        match hook_result {
1776            HookResult::Continue(Some(modified)) => {
1777                // Parse the intent detection result
1778                serde_json::from_value::<IntentDetectionResult>(modified).ok()
1779            }
1780            HookResult::Block(_) => {
1781                // Harness blocked intent detection - use fallback
1782                tracing::info!("AHP harness blocked intent detection");
1783                None
1784            }
1785            _ => None,
1786        }
1787    }
1788
1789    /// Apply injected context from AHP harness decision.
1790    #[cfg(feature = "ahp")]
1791    fn apply_injected_context(&self, injected: InjectedContext) -> Vec<ContextResult> {
1792        injected_context_to_results(injected)
1793    }
1794
1795    /// Build augmented system prompt with context
1796    #[allow(dead_code)]
1797    fn build_augmented_system_prompt(&self, context_results: &[ContextResult]) -> Option<String> {
1798        let base = self.system_prompt();
1799        let context_assembly = self.assemble_context_results(context_results);
1800        self.build_augmented_system_prompt_with_base(&base, &context_assembly)
1801    }
1802
1803    fn assemble_context_results(&self, context_results: &[ContextResult]) -> ContextAssembly {
1804        let mut results = context_results.to_vec();
1805
1806        if self.config.prompt_slots.guidelines.is_none() {
1807            let project_hint = Self::detect_project_hint(&self.tool_context.workspace);
1808            if !project_hint.is_empty() {
1809                let token_count = project_hint.split_whitespace().count().max(1);
1810                let mut result = ContextResult::new("project_hint");
1811                result.add_item(
1812                    ContextItem::new("project_hint", ContextType::Resource, project_hint)
1813                        .with_source("a3s://project-hint")
1814                        .with_provenance("workspace_marker")
1815                        .with_priority(0.65)
1816                        .with_trust(0.8)
1817                        .with_freshness(1.0)
1818                        .with_relevance(0.9)
1819                        .with_token_count(token_count),
1820                );
1821                results.push(result);
1822            }
1823        }
1824
1825        ContextAssembler::with_default_budget().assemble(&results)
1826    }
1827
1828    fn build_augmented_system_prompt_with_base(
1829        &self,
1830        base: &str,
1831        context_assembly: &ContextAssembly,
1832    ) -> Option<String> {
1833        let base = base.to_string();
1834
1835        // MCP tool definitions are selected per turn by ToolSelector. Keep the
1836        // system prompt small instead of listing every external tool here.
1837        let has_mcp_tools = self
1838            .tool_executor
1839            .definitions()
1840            .iter()
1841            .any(|t| t.name.starts_with("mcp__"));
1842
1843        let mcp_section = if has_mcp_tools {
1844            "## MCP Tools\n\nExternal MCP tools are available on demand when relevant to the current request.".to_string()
1845        } else {
1846            String::new()
1847        };
1848
1849        let parts: Vec<&str> = [base.as_str(), mcp_section.as_str()]
1850            .iter()
1851            .filter(|s| !s.is_empty())
1852            .copied()
1853            .collect();
1854
1855        if context_assembly.is_empty() {
1856            return Some(parts.join("\n\n"));
1857        }
1858
1859        let context_xml = context_assembly.to_xml();
1860        Some(format!("{}\n\n{}", parts.join("\n\n"), context_xml))
1861    }
1862
1863    /// Notify providers of turn completion for memory extraction
1864    async fn notify_turn_complete(&self, session_id: &str, prompt: &str, response: &str) {
1865        let futures = self
1866            .config
1867            .context_providers
1868            .iter()
1869            .map(|p| p.on_turn_complete(session_id, prompt, response));
1870        let outcomes = join_all(futures).await;
1871
1872        for (i, result) in outcomes.into_iter().enumerate() {
1873            if let Err(e) = result {
1874                tracing::warn!(
1875                    "Context provider '{}' on_turn_complete failed: {}",
1876                    self.config.context_providers[i].name(),
1877                    e
1878                );
1879            }
1880        }
1881    }
1882
1883    /// Fire PreToolUse hook event before tool execution.
1884    /// Returns the HookResult which may block the tool call.
1885    async fn fire_pre_tool_use(
1886        &self,
1887        session_id: &str,
1888        tool_name: &str,
1889        args: &serde_json::Value,
1890        recent_tools: Vec<String>,
1891    ) -> Option<HookResult> {
1892        if let Some(he) = &self.config.hook_engine {
1893            // Convert null args to empty object so JS callbacks don't get null.input errors
1894            let safe_args = if args.is_null() {
1895                serde_json::Value::Object(Default::default())
1896            } else {
1897                args.clone()
1898            };
1899            let event = HookEvent::PreToolUse(PreToolUseEvent {
1900                session_id: session_id.to_string(),
1901                tool: tool_name.to_string(),
1902                args: safe_args,
1903                working_directory: self.tool_context.workspace.to_string_lossy().to_string(),
1904                recent_tools,
1905            });
1906            let result = he.fire(&event).await;
1907            if result.is_block() {
1908                return Some(result);
1909            }
1910        }
1911        None
1912    }
1913
1914    /// Fire PostToolUse hook event after tool execution (fire-and-forget).
1915    async fn fire_post_tool_use(
1916        &self,
1917        session_id: &str,
1918        tool_name: &str,
1919        args: &serde_json::Value,
1920        output: &str,
1921        success: bool,
1922        duration_ms: u64,
1923    ) {
1924        if let Some(he) = &self.config.hook_engine {
1925            // Convert null args to empty object so JS callbacks don't get null.input errors
1926            let safe_args = if args.is_null() {
1927                serde_json::Value::Object(Default::default())
1928            } else {
1929                args.clone()
1930            };
1931            let event = HookEvent::PostToolUse(PostToolUseEvent {
1932                session_id: session_id.to_string(),
1933                tool: tool_name.to_string(),
1934                args: safe_args,
1935                result: ToolResultData {
1936                    success,
1937                    output: output.to_string(),
1938                    exit_code: if success { Some(0) } else { Some(1) },
1939                    duration_ms,
1940                },
1941            });
1942            let he = Arc::clone(he);
1943            tokio::spawn(async move {
1944                let _ = he.fire(&event).await;
1945            });
1946        }
1947    }
1948
1949    /// Fire GenerateStart hook event before an LLM call.
1950    async fn fire_generate_start(
1951        &self,
1952        session_id: &str,
1953        prompt: &str,
1954        system_prompt: &Option<String>,
1955    ) {
1956        if let Some(he) = &self.config.hook_engine {
1957            let event = HookEvent::GenerateStart(GenerateStartEvent {
1958                session_id: session_id.to_string(),
1959                prompt: prompt.to_string(),
1960                system_prompt: system_prompt.clone(),
1961                model_provider: String::new(),
1962                model_name: String::new(),
1963                available_tools: self.config.tools.iter().map(|t| t.name.clone()).collect(),
1964            });
1965            let _ = he.fire(&event).await;
1966        }
1967    }
1968
1969    /// Fire GenerateEnd hook event after an LLM call.
1970    async fn fire_generate_end(
1971        &self,
1972        session_id: &str,
1973        prompt: &str,
1974        response: &LlmResponse,
1975        duration_ms: u64,
1976    ) {
1977        if let Some(he) = &self.config.hook_engine {
1978            let tool_calls: Vec<ToolCallInfo> = response
1979                .tool_calls()
1980                .iter()
1981                .map(|tc| {
1982                    let args = if tc.args.is_null() {
1983                        serde_json::Value::Object(Default::default())
1984                    } else {
1985                        tc.args.clone()
1986                    };
1987                    ToolCallInfo {
1988                        name: tc.name.clone(),
1989                        args,
1990                    }
1991                })
1992                .collect();
1993
1994            let event = HookEvent::GenerateEnd(GenerateEndEvent {
1995                session_id: session_id.to_string(),
1996                prompt: prompt.to_string(),
1997                response_text: response.text().to_string(),
1998                tool_calls,
1999                usage: TokenUsageInfo {
2000                    prompt_tokens: response.usage.prompt_tokens as i32,
2001                    completion_tokens: response.usage.completion_tokens as i32,
2002                    total_tokens: response.usage.total_tokens as i32,
2003                },
2004                duration_ms,
2005            });
2006            let _ = he.fire(&event).await;
2007        }
2008    }
2009
2010    /// Fire PrePrompt hook event before prompt augmentation.
2011    /// Returns optional modified prompt text from the hook.
2012    async fn fire_pre_prompt(
2013        &self,
2014        session_id: &str,
2015        prompt: &str,
2016        system_prompt: &Option<String>,
2017        message_count: usize,
2018    ) -> Option<String> {
2019        if let Some(he) = &self.config.hook_engine {
2020            let event = HookEvent::PrePrompt(PrePromptEvent {
2021                session_id: session_id.to_string(),
2022                prompt: prompt.to_string(),
2023                system_prompt: system_prompt.clone(),
2024                message_count,
2025            });
2026            let result = he.fire(&event).await;
2027            if let HookResult::Continue(Some(modified)) = result {
2028                // Extract modified prompt from hook response
2029                if let Some(new_prompt) = modified.get("prompt").and_then(|v| v.as_str()) {
2030                    return Some(new_prompt.to_string());
2031                }
2032            }
2033        }
2034        None
2035    }
2036
2037    /// Fire PostResponse hook event after the agent loop completes.
2038    async fn fire_post_response(
2039        &self,
2040        session_id: &str,
2041        response_text: &str,
2042        tool_calls_count: usize,
2043        usage: &TokenUsage,
2044        duration_ms: u64,
2045    ) {
2046        if let Some(he) = &self.config.hook_engine {
2047            let event = HookEvent::PostResponse(PostResponseEvent {
2048                session_id: session_id.to_string(),
2049                response_text: response_text.to_string(),
2050                tool_calls_count,
2051                usage: TokenUsageInfo {
2052                    prompt_tokens: usage.prompt_tokens as i32,
2053                    completion_tokens: usage.completion_tokens as i32,
2054                    total_tokens: usage.total_tokens as i32,
2055                },
2056                duration_ms,
2057            });
2058            let he = Arc::clone(he);
2059            tokio::spawn(async move {
2060                let _ = he.fire(&event).await;
2061            });
2062        }
2063    }
2064
2065    /// Fire OnError hook event when an error occurs.
2066    async fn fire_on_error(
2067        &self,
2068        session_id: &str,
2069        error_type: ErrorType,
2070        error_message: &str,
2071        context: serde_json::Value,
2072    ) {
2073        if let Some(he) = &self.config.hook_engine {
2074            let event = HookEvent::OnError(OnErrorEvent {
2075                session_id: session_id.to_string(),
2076                error_type,
2077                error_message: error_message.to_string(),
2078                context,
2079            });
2080            let he = Arc::clone(he);
2081            tokio::spawn(async move {
2082                let _ = he.fire(&event).await;
2083            });
2084        }
2085    }
2086
2087    /// Execute the agent loop for a prompt
2088    ///
2089    /// Takes the conversation history and a new user prompt.
2090    /// Returns the agent result and updated message history.
2091    /// When event_tx is provided, uses streaming LLM API for real-time text output.
2092    pub async fn execute(
2093        &self,
2094        history: &[Message],
2095        prompt: &str,
2096        event_tx: Option<mpsc::Sender<AgentEvent>>,
2097    ) -> Result<AgentResult> {
2098        self.execute_with_session(history, prompt, None, event_tx, None)
2099            .await
2100    }
2101
2102    /// Execute the agent loop with pre-built messages (user message already included).
2103    ///
2104    /// Used by `send_with_attachments` / `stream_with_attachments` where the
2105    /// user message contains multi-modal content and is already appended to
2106    /// the messages vec.
2107    pub async fn execute_from_messages(
2108        &self,
2109        messages: Vec<Message>,
2110        session_id: Option<&str>,
2111        event_tx: Option<mpsc::Sender<AgentEvent>>,
2112        cancel_token: Option<&tokio_util::sync::CancellationToken>,
2113    ) -> Result<AgentResult> {
2114        let default_token = tokio_util::sync::CancellationToken::new();
2115        let token = cancel_token.unwrap_or(&default_token);
2116        tracing::info!(
2117            a3s.session.id = session_id.unwrap_or("none"),
2118            a3s.agent.max_turns = self.config.max_tool_rounds,
2119            "a3s.agent.execute_from_messages started"
2120        );
2121
2122        // Extract the last user message text for hooks, memory recall, and events.
2123        // Pass empty prompt so execute_loop skips adding a duplicate user message,
2124        // but provide effective_prompt for hook/memory/event purposes.
2125        let effective_prompt = messages
2126            .iter()
2127            .rev()
2128            .find(|m| m.role == "user")
2129            .map(|m| m.text())
2130            .unwrap_or_default();
2131
2132        let result = self
2133            .execute_loop_inner(
2134                &messages,
2135                "",
2136                &effective_prompt,
2137                None, // no pre-computed style; resolve inside the loop
2138                session_id,
2139                event_tx,
2140                token,
2141                true, // emit_end: this is a standalone execution
2142            )
2143            .await;
2144
2145        match &result {
2146            Ok(r) => tracing::info!(
2147                a3s.agent.tool_calls_count = r.tool_calls_count,
2148                a3s.llm.total_tokens = r.usage.total_tokens,
2149                "a3s.agent.execute_from_messages completed"
2150            ),
2151            Err(e) => tracing::warn!(
2152                error = %e,
2153                "a3s.agent.execute_from_messages failed"
2154            ),
2155        }
2156
2157        result
2158    }
2159
2160    /// Execute the agent loop for a prompt with session context
2161    ///
2162    /// Takes the conversation history, user prompt, and optional session ID.
2163    /// When session_id is provided, context providers can use it for session-specific context.
2164    pub async fn execute_with_session(
2165        &self,
2166        history: &[Message],
2167        prompt: &str,
2168        session_id: Option<&str>,
2169        event_tx: Option<mpsc::Sender<AgentEvent>>,
2170        cancel_token: Option<&tokio_util::sync::CancellationToken>,
2171    ) -> Result<AgentResult> {
2172        let default_token = tokio_util::sync::CancellationToken::new();
2173        let token = cancel_token.unwrap_or(&default_token);
2174        tracing::info!(
2175            a3s.session.id = session_id.unwrap_or("none"),
2176            a3s.agent.max_turns = self.config.max_tool_rounds,
2177            "a3s.agent.execute started"
2178        );
2179
2180        // Step 1: local deterministic style detection. The default runtime path
2181        // must not spend an extra model turn just to route the request.
2182        let (keyword_style, confidence) = AgentStyle::detect_with_confidence(prompt);
2183        let effective_style = keyword_style;
2184        tracing::debug!(
2185            intent.classification = ?effective_style,
2186            intent.confidence = ?confidence,
2187            intent.source = "local",
2188            "Intent classified locally"
2189        );
2190
2191        // Step 2: pre-analysis — intent + goal + plan + input optimization in ONE LLM call.
2192        // This is only used when planning is explicitly requested or locally detected.
2193        let pre_analysis: Option<PreAnalysis> = {
2194            let needs_llm_prep = effective_style.requires_planning()
2195                || self.config.planning_mode == PlanningMode::Enabled;
2196
2197            if !needs_llm_prep {
2198                None
2199            } else {
2200                match LlmPlanner::pre_analyze(&self.llm_client.clone(), prompt).await {
2201                    Ok(analysis) => {
2202                        tracing::debug!(
2203                            intent = ?analysis.intent,
2204                            requires_planning = analysis.requires_planning,
2205                            plan_steps = analysis.execution_plan.steps.len(),
2206                            "Pre-analysis completed"
2207                        );
2208                        Some(analysis)
2209                    }
2210                    Err(e) => {
2211                        tracing::warn!(error = %e, "Pre-analysis failed, falling back to keyword intent");
2212                        None
2213                    }
2214                }
2215            }
2216        };
2217
2218        // Use pre-analysis style if available, otherwise use resolved style.
2219        let exec_style = pre_analysis
2220            .as_ref()
2221            .map(|a| &a.intent)
2222            .unwrap_or(&effective_style);
2223
2224        // Determine whether to use planning mode.
2225        // Prefer pre-analysis result if available (from the single LLM pre-analysis call).
2226        let use_planning = if let Some(ref analysis) = pre_analysis {
2227            analysis.requires_planning
2228        } else if self.config.planning_mode == PlanningMode::Auto {
2229            exec_style.requires_planning()
2230        } else {
2231            // Explicit mode: Enabled or Disabled
2232            self.config.planning_mode.should_plan(prompt)
2233        };
2234
2235        // Determine the effective prompt: use optimized input from pre-analysis if available.
2236        // Clone to avoid borrow conflict when pre_analysis is moved.
2237        let effective_prompt: String = match pre_analysis.as_ref() {
2238            Some(a) => a.optimized_input.clone(),
2239            None => prompt.to_string(),
2240        };
2241
2242        let result = if use_planning {
2243            self.execute_with_planning(history, &effective_prompt, event_tx, pre_analysis)
2244                .await
2245        } else {
2246            self.execute_loop(
2247                history,
2248                &effective_prompt,
2249                *exec_style,
2250                session_id,
2251                event_tx,
2252                token,
2253                true,
2254            )
2255            .await
2256        };
2257
2258        match &result {
2259            Ok(r) => {
2260                tracing::info!(
2261                    a3s.agent.tool_calls_count = r.tool_calls_count,
2262                    a3s.llm.total_tokens = r.usage.total_tokens,
2263                    "a3s.agent.execute completed"
2264                );
2265                // Fire PostResponse hook
2266                self.fire_post_response(
2267                    session_id.unwrap_or(""),
2268                    &r.text,
2269                    r.tool_calls_count,
2270                    &r.usage,
2271                    0, // duration tracked externally
2272                )
2273                .await;
2274            }
2275            Err(e) => {
2276                tracing::warn!(
2277                    error = %e,
2278                    "a3s.agent.execute failed"
2279                );
2280                // Fire OnError hook
2281                self.fire_on_error(
2282                    session_id.unwrap_or(""),
2283                    ErrorType::Other,
2284                    &e.to_string(),
2285                    serde_json::json!({"phase": "execute"}),
2286                )
2287                .await;
2288            }
2289        }
2290
2291        result
2292    }
2293
2294    /// Core execution loop (without planning routing).
2295    ///
2296    /// This is the inner loop that runs LLM calls and tool executions.
2297    /// Called directly by `execute_with_session` (after planning check)
2298    /// and by `execute_plan` (for individual steps, bypassing planning).
2299    #[allow(clippy::too_many_arguments)]
2300    async fn execute_loop(
2301        &self,
2302        history: &[Message],
2303        prompt: &str,
2304        effective_style: AgentStyle,
2305        session_id: Option<&str>,
2306        event_tx: Option<mpsc::Sender<AgentEvent>>,
2307        cancel_token: &tokio_util::sync::CancellationToken,
2308        emit_end: bool,
2309    ) -> Result<AgentResult> {
2310        // When called via execute_loop, the prompt is used for both
2311        // message-adding and hook/memory/event purposes.
2312        self.execute_loop_inner(
2313            history,
2314            prompt,
2315            prompt,
2316            Some(effective_style),
2317            session_id,
2318            event_tx,
2319            cancel_token,
2320            emit_end,
2321        )
2322        .await
2323    }
2324
2325    /// Inner execution loop.
2326    ///
2327    /// `msg_prompt` controls whether a user message is appended (empty = skip).
2328    /// `effective_prompt` is used for hooks, memory recall, taint tracking, and events.
2329    /// `effective_style` pre-computed style to skip redundant LLM-based intent detection.
2330    /// `emit_end` controls whether to send `AgentEvent::End` when the loop completes
2331    /// (should be false when called from `execute_plan` to avoid duplicate End events).
2332    #[allow(clippy::too_many_arguments)]
2333    async fn execute_loop_inner(
2334        &self,
2335        history: &[Message],
2336        msg_prompt: &str,
2337        effective_prompt: &str,
2338        effective_style: Option<AgentStyle>,
2339        session_id: Option<&str>,
2340        event_tx: Option<mpsc::Sender<AgentEvent>>,
2341        cancel_token: &tokio_util::sync::CancellationToken,
2342        emit_end: bool,
2343    ) -> Result<AgentResult> {
2344        let mut messages = history.to_vec();
2345        let mut total_usage = TokenUsage::default();
2346        let mut tool_calls_count = 0;
2347        let mut verification_reports = Vec::new();
2348        let mut turn = 0;
2349        // Consecutive malformed-tool-args errors (4.1 parse error recovery)
2350        let mut parse_error_count: u32 = 0;
2351        // Continuation injection counter
2352        let mut continuation_count: u32 = 0;
2353        let mut recent_tool_signatures: Vec<String> = Vec::new();
2354        let style_prompt = if effective_prompt.is_empty() {
2355            msg_prompt
2356        } else {
2357            effective_prompt
2358        };
2359        let effective_style = match effective_style {
2360            Some(s) => s,
2361            None => self.resolve_effective_style(style_prompt).await,
2362        };
2363        let effective_system_prompt = self.system_prompt_for_style(effective_style);
2364        if let Some(tx) = &event_tx {
2365            tx.send(AgentEvent::AgentModeChanged {
2366                mode: effective_style.runtime_mode().to_string(),
2367                agent: effective_style.builtin_agent_name().to_string(),
2368                description: effective_style.description().to_string(),
2369            })
2370            .await
2371            .ok();
2372        }
2373
2374        // Send start event
2375        if let Some(tx) = &event_tx {
2376            tx.send(AgentEvent::Start {
2377                prompt: effective_prompt.to_string(),
2378            })
2379            .await
2380            .ok();
2381        }
2382
2383        // Forward queue events (CommandDeadLettered, CommandRetry, QueueAlert) to event stream
2384        let _queue_forward_handle =
2385            if let (Some(ref queue), Some(ref tx)) = (&self.command_queue, &event_tx) {
2386                let mut rx = queue.subscribe();
2387                let tx = tx.clone();
2388                Some(tokio::spawn(async move {
2389                    while let Ok(event) = rx.recv().await {
2390                        if tx.send(event).await.is_err() {
2391                            break;
2392                        }
2393                    }
2394                }))
2395            } else {
2396                None
2397            };
2398
2399        // Fire PrePrompt hook (may modify the prompt)
2400        let built_system_prompt = Some(effective_system_prompt.clone());
2401        let hooked_prompt = if let Some(modified) = self
2402            .fire_pre_prompt(
2403                session_id.unwrap_or(""),
2404                effective_prompt,
2405                &built_system_prompt,
2406                messages.len(),
2407            )
2408            .await
2409        {
2410            modified
2411        } else {
2412            effective_prompt.to_string()
2413        };
2414        let effective_prompt = hooked_prompt.as_str();
2415
2416        // Taint-track the incoming prompt for sensitive data detection
2417        if let Some(ref sp) = self.config.security_provider {
2418            sp.taint_input(effective_prompt);
2419        }
2420
2421        // Resolve context from providers on first turn (before adding user message)
2422        // Intent-driven: detect context perception need first via AHP harness, then fire hook
2423        let workspace = self.tool_context.workspace.display().to_string();
2424        let session_id_str = session_id.unwrap_or("");
2425        let mut context_results = if !self.config.context_providers.is_empty() {
2426            if let Some(tx) = &event_tx {
2427                tx.send(AgentEvent::ContextResolving {
2428                    providers: self
2429                        .config
2430                        .context_providers
2431                        .iter()
2432                        .map(|p| p.name().to_string())
2433                        .collect(),
2434                })
2435                .await
2436                .ok();
2437            }
2438
2439            // Step 1: Fire IntentDetection harness point on EVERY prompt
2440            #[allow(clippy::needless_borrow)]
2441            let harness_intent = self
2442                .fire_intent_detection(effective_prompt, &session_id_str, &workspace)
2443                .await;
2444
2445            // Step 2: Build perception event from harness result, or fallback to local detection
2446            #[allow(clippy::needless_borrow)]
2447            let perception_event = if let Some(detected) = harness_intent {
2448                tracing::info!(
2449                    intent = %detected.detected_intent,
2450                    confidence = %detected.confidence,
2451                    "Intent detected from AHP harness"
2452                );
2453                Some(build_pre_context_perception_from_intent(
2454                    detected,
2455                    effective_prompt,
2456                    &session_id_str,
2457                    &workspace,
2458                ))
2459            } else {
2460                // Fallback to local keyword detection
2461                tracing::debug!("No intent from harness, using local keyword detection");
2462                self.detect_context_perception_intent(effective_prompt, &session_id_str, &workspace)
2463            };
2464
2465            if let Some(perception_event) = perception_event {
2466                // Step 3: Fire PreContextPerception hook to get harness decision
2467                tracing::info!(
2468                    intent = %perception_event.intent,
2469                    target_type = %perception_event.target_type,
2470                    "Context perception intent detected, firing AHP hook"
2471                );
2472
2473                let hook_result = self.fire_pre_context_perception(&perception_event).await;
2474
2475                match hook_result {
2476                    HookResult::Continue(Some(modified_context)) => {
2477                        // AHP harness returned injected context - parse and use it
2478                        #[cfg(feature = "ahp")]
2479                        {
2480                            if let Ok(injected) =
2481                                serde_json::from_value::<InjectedContext>(modified_context)
2482                            {
2483                                tracing::info!(
2484                                    facts = injected.facts.len(),
2485                                    "Using injected context from AHP harness"
2486                                );
2487                                self.apply_injected_context(injected)
2488                            } else {
2489                                // Fall back to normal providers if parsing fails
2490                                tracing::warn!(
2491                                    "Failed to parse injected context, falling back to providers"
2492                                );
2493                                self.resolve_context(effective_prompt, session_id).await
2494                            }
2495                        }
2496                        #[cfg(not(feature = "ahp"))]
2497                        {
2498                            // Without AHP, fall back to normal providers
2499                            let _ = modified_context; // suppress unused warning
2500                            self.resolve_context(effective_prompt, session_id).await
2501                        }
2502                    }
2503                    HookResult::Block(_) => {
2504                        // Harness blocked context injection - skip
2505                        tracing::info!("AHP harness blocked context injection");
2506                        Vec::new()
2507                    }
2508                    _ => {
2509                        // No modification or unknown result, proceed with normal providers
2510                        self.resolve_context(effective_prompt, session_id).await
2511                    }
2512                }
2513            } else {
2514                // No intent detected, proceed with normal providers
2515                self.resolve_context(effective_prompt, session_id).await
2516            }
2517        } else {
2518            Vec::new()
2519        };
2520
2521        // Recall relevant memories as structured context instead of directly
2522        // rewriting the system prompt.
2523        if let Some(ref memory) = self.config.memory {
2524            match memory.recall_similar(effective_prompt, 5).await {
2525                Ok(items) if !items.is_empty() => {
2526                    if let Some(tx) = &event_tx {
2527                        for item in &items {
2528                            tx.send(AgentEvent::MemoryRecalled {
2529                                memory_id: item.id.clone(),
2530                                content: item.content.clone(),
2531                                relevance: item.relevance_score(),
2532                            })
2533                            .await
2534                            .ok();
2535                        }
2536                        tx.send(AgentEvent::MemoriesSearched {
2537                            query: Some(effective_prompt.to_string()),
2538                            tags: Vec::new(),
2539                            result_count: items.len(),
2540                        })
2541                        .await
2542                        .ok();
2543                    }
2544                    context_results.push(crate::memory::memory_items_to_context_result(
2545                        "memory", items,
2546                    ));
2547                }
2548                Ok(_) => {}
2549                Err(e) => {
2550                    tracing::warn!(error = %e, "Failed to recall memory context");
2551                }
2552            }
2553        }
2554
2555        let context_assembly = self.assemble_context_results(&context_results);
2556
2557        // Send context resolved event
2558        if let Some(tx) = &event_tx {
2559            let total_items = context_assembly.items.len();
2560            let total_tokens = context_assembly.total_tokens;
2561
2562            tracing::info!(
2563                context_items = total_items,
2564                context_tokens = total_tokens,
2565                context_truncated = context_assembly.truncated,
2566                "Context resolution completed"
2567            );
2568
2569            tx.send(AgentEvent::ContextResolved {
2570                total_items,
2571                total_tokens,
2572            })
2573            .await
2574            .ok();
2575        }
2576
2577        let augmented_system = self
2578            .build_augmented_system_prompt_with_base(&effective_system_prompt, &context_assembly);
2579
2580        // Add user message
2581        if !msg_prompt.is_empty() {
2582            messages.push(Message::user(msg_prompt));
2583        }
2584
2585        loop {
2586            turn += 1;
2587
2588            if turn > self.config.max_tool_rounds {
2589                let error = format!("Max tool rounds ({}) exceeded", self.config.max_tool_rounds);
2590                if let Some(tx) = &event_tx {
2591                    tx.send(AgentEvent::Error {
2592                        message: error.clone(),
2593                    })
2594                    .await
2595                    .ok();
2596                }
2597                anyhow::bail!(error);
2598            }
2599
2600            // Send turn start event
2601            if let Some(tx) = &event_tx {
2602                tx.send(AgentEvent::TurnStart { turn }).await.ok();
2603            }
2604
2605            tracing::info!(
2606                turn = turn,
2607                max_turns = self.config.max_tool_rounds,
2608                "Agent turn started"
2609            );
2610
2611            // Call LLM - use streaming if we have an event channel
2612            tracing::info!(
2613                a3s.llm.streaming = event_tx.is_some(),
2614                "LLM completion started"
2615            );
2616
2617            // Fire GenerateStart hook
2618            self.fire_generate_start(
2619                session_id.unwrap_or(""),
2620                effective_prompt,
2621                &augmented_system,
2622            )
2623            .await;
2624
2625            let llm_start = std::time::Instant::now();
2626            // Circuit breaker (4.3): retry non-streaming LLM calls on transient failures.
2627            // Each failure increments `consecutive_llm_errors`; on success it resets to 0.
2628            // Streaming mode bails immediately on failure (events can't be replayed).
2629            let response = {
2630                let threshold = self.config.circuit_breaker_threshold.max(1);
2631                let mut attempt = 0u32;
2632                loop {
2633                    attempt += 1;
2634                    let result = self
2635                        .call_llm(
2636                            &messages,
2637                            augmented_system.as_deref(),
2638                            &event_tx,
2639                            cancel_token,
2640                        )
2641                        .await;
2642                    match result {
2643                        Ok(r) => {
2644                            break r;
2645                        }
2646                        // Never retry if cancelled
2647                        Err(e) if cancel_token.is_cancelled() => {
2648                            anyhow::bail!(e);
2649                        }
2650                        // Retry when: non-streaming under threshold, OR first streaming attempt
2651                        Err(e) if attempt < threshold && (event_tx.is_none() || attempt == 1) => {
2652                            tracing::warn!(
2653                                turn = turn,
2654                                attempt = attempt,
2655                                threshold = threshold,
2656                                error = %e,
2657                                "LLM call failed, will retry"
2658                            );
2659                            tokio::time::sleep(Duration::from_millis(100 * attempt as u64)).await;
2660                        }
2661                        // Threshold exceeded or streaming mid-stream: bail
2662                        Err(e) => {
2663                            let msg = if attempt > 1 {
2664                                format!(
2665                                    "LLM circuit breaker triggered: failed after {} attempt(s): {}",
2666                                    attempt, e
2667                                )
2668                            } else {
2669                                format!("LLM call failed: {}", e)
2670                            };
2671                            tracing::error!(turn = turn, attempt = attempt, "{}", msg);
2672                            // Fire OnError hook for LLM failure
2673                            self.fire_on_error(
2674                                session_id.unwrap_or(""),
2675                                ErrorType::LlmFailure,
2676                                &msg,
2677                                serde_json::json!({"turn": turn, "attempt": attempt}),
2678                            )
2679                            .await;
2680                            if let Some(tx) = &event_tx {
2681                                tx.send(AgentEvent::Error {
2682                                    message: msg.clone(),
2683                                })
2684                                .await
2685                                .ok();
2686                            }
2687                            anyhow::bail!(msg);
2688                        }
2689                    }
2690                }
2691            };
2692
2693            // Update usage
2694            total_usage.prompt_tokens += response.usage.prompt_tokens;
2695            total_usage.completion_tokens += response.usage.completion_tokens;
2696            total_usage.total_tokens += response.usage.total_tokens;
2697
2698            // Record LLM completion telemetry
2699            let llm_duration = llm_start.elapsed();
2700            tracing::info!(
2701                turn = turn,
2702                streaming = event_tx.is_some(),
2703                prompt_tokens = response.usage.prompt_tokens,
2704                completion_tokens = response.usage.completion_tokens,
2705                total_tokens = response.usage.total_tokens,
2706                stop_reason = response.stop_reason.as_deref().unwrap_or("unknown"),
2707                duration_ms = llm_duration.as_millis() as u64,
2708                "LLM completion finished"
2709            );
2710
2711            // Fire GenerateEnd hook
2712            self.fire_generate_end(
2713                session_id.unwrap_or(""),
2714                effective_prompt,
2715                &response,
2716                llm_duration.as_millis() as u64,
2717            )
2718            .await;
2719
2720            // Record LLM usage on the llm span
2721            crate::telemetry::record_llm_usage(
2722                response.usage.prompt_tokens,
2723                response.usage.completion_tokens,
2724                response.usage.total_tokens,
2725                response.stop_reason.as_deref(),
2726            );
2727            // Log turn token usage
2728            tracing::info!(
2729                turn = turn,
2730                a3s.llm.total_tokens = response.usage.total_tokens,
2731                "Turn token usage"
2732            );
2733
2734            // Add assistant message to history
2735            messages.push(response.message.clone());
2736
2737            // Check for tool calls
2738            let tool_calls = response.tool_calls();
2739
2740            // Send turn end event
2741            if let Some(tx) = &event_tx {
2742                tx.send(AgentEvent::TurnEnd {
2743                    turn,
2744                    usage: response.usage.clone(),
2745                })
2746                .await
2747                .ok();
2748            }
2749
2750            // Auto-compact: check if context usage exceeds threshold
2751            if self.config.auto_compact {
2752                let used = response.usage.prompt_tokens;
2753                let max = self.config.max_context_tokens;
2754                let threshold = self.config.auto_compact_threshold;
2755
2756                if crate::compaction::should_auto_compact(used, max, threshold) {
2757                    let before_len = messages.len();
2758                    let percent_before = used as f32 / max as f32;
2759
2760                    tracing::info!(
2761                        used_tokens = used,
2762                        max_tokens = max,
2763                        percent = percent_before,
2764                        threshold = threshold,
2765                        "Auto-compact triggered"
2766                    );
2767
2768                    // Step 1: Prune large tool outputs first (cheap, no LLM call)
2769                    if let Some(pruned) = crate::compaction::prune_tool_outputs(&messages) {
2770                        messages = pruned;
2771                        tracing::info!("Tool output pruning applied");
2772                    }
2773
2774                    // Step 2: Full summarization using the agent's LLM client
2775                    if let Ok(Some(compacted)) = crate::compaction::compact_messages(
2776                        session_id.unwrap_or(""),
2777                        &messages,
2778                        &self.llm_client,
2779                    )
2780                    .await
2781                    {
2782                        messages = compacted;
2783                    }
2784
2785                    // Emit compaction event
2786                    if let Some(tx) = &event_tx {
2787                        tx.send(AgentEvent::ContextCompacted {
2788                            session_id: session_id.unwrap_or("").to_string(),
2789                            before_messages: before_len,
2790                            after_messages: messages.len(),
2791                            percent_before,
2792                        })
2793                        .await
2794                        .ok();
2795                    }
2796                }
2797            }
2798
2799            if tool_calls.is_empty() {
2800                // No tool calls — check if we should inject a continuation message
2801                // before treating this as a final answer.
2802                let final_text = response.text();
2803
2804                if self.config.continuation_enabled
2805                    && continuation_count < self.config.max_continuation_turns
2806                    && turn < self.config.max_tool_rounds  // never inject past the turn limit
2807                    && Self::looks_incomplete(&final_text)
2808                {
2809                    continuation_count += 1;
2810                    tracing::info!(
2811                        turn = turn,
2812                        continuation = continuation_count,
2813                        max_continuation = self.config.max_continuation_turns,
2814                        "Injecting continuation message — response looks incomplete"
2815                    );
2816                    // Inject continuation as a user message and keep looping
2817                    messages.push(Message::user(crate::prompts::CONTINUATION));
2818                    continue;
2819                }
2820
2821                // Sanitize output to redact any sensitive data before returning
2822                let final_text = if let Some(ref sp) = self.config.security_provider {
2823                    sp.sanitize_output(&final_text)
2824                } else {
2825                    final_text
2826                };
2827
2828                // Record final totals
2829                tracing::info!(
2830                    tool_calls_count = tool_calls_count,
2831                    total_prompt_tokens = total_usage.prompt_tokens,
2832                    total_completion_tokens = total_usage.completion_tokens,
2833                    total_tokens = total_usage.total_tokens,
2834                    turns = turn,
2835                    "Agent execution completed"
2836                );
2837
2838                if emit_end {
2839                    if let Some(tx) = &event_tx {
2840                        let verification_summary =
2841                            crate::verification::VerificationSummary::from_reports(
2842                                &verification_reports,
2843                            );
2844                        tx.send(AgentEvent::End {
2845                            text: final_text.clone(),
2846                            usage: total_usage.clone(),
2847                            verification_summary: Box::new(verification_summary),
2848                            meta: response.meta.clone(),
2849                        })
2850                        .await
2851                        .ok();
2852                    }
2853                }
2854
2855                // Notify context providers of turn completion for memory extraction
2856                if let Some(sid) = session_id {
2857                    self.notify_turn_complete(sid, effective_prompt, &final_text)
2858                        .await;
2859                }
2860
2861                return Ok(AgentResult {
2862                    text: final_text,
2863                    messages,
2864                    usage: total_usage,
2865                    tool_calls_count,
2866                    verification_reports,
2867                });
2868            }
2869
2870            // Execute tools sequentially
2871            // Fast path: when all tool calls are independent file writes and no hooks/HITL
2872            // are configured, execute them concurrently to avoid serial I/O bottleneck.
2873            let tool_calls = if self.config.hook_engine.is_none()
2874                && self.config.confirmation_manager.is_none()
2875                && tool_calls.len() > 1
2876                && tool_calls
2877                    .iter()
2878                    .all(|tc| Self::is_parallel_safe_write(&tc.name, &tc.args))
2879                && {
2880                    // All target paths must be distinct (no write-write conflicts)
2881                    let paths: Vec<_> = tool_calls
2882                        .iter()
2883                        .filter_map(|tc| Self::extract_write_path(&tc.args))
2884                        .collect();
2885                    paths.len() == tool_calls.len()
2886                        && paths.iter().collect::<std::collections::HashSet<_>>().len()
2887                            == paths.len()
2888                } {
2889                tracing::info!(
2890                    count = tool_calls.len(),
2891                    "Parallel write batch: executing {} independent file writes concurrently",
2892                    tool_calls.len()
2893                );
2894
2895                let futures: Vec<_> = tool_calls
2896                    .iter()
2897                    .map(|tc| {
2898                        let ctx = self.tool_context.clone();
2899                        let executor = Arc::clone(&self.tool_executor);
2900                        let name = tc.name.clone();
2901                        let args = tc.args.clone();
2902                        async move { executor.execute_with_context(&name, &args, &ctx).await }
2903                    })
2904                    .collect();
2905
2906                let results = join_all(futures).await;
2907
2908                // Post-process results in original order (sequential, preserves message ordering)
2909                for (tc, result) in tool_calls.iter().zip(results) {
2910                    tool_calls_count += 1;
2911                    let (output, exit_code, is_error, metadata, images) =
2912                        Self::tool_result_to_tuple(result);
2913                    Self::collect_verification_report(&mut verification_reports, &metadata);
2914
2915                    // Track tool call in progress tracker
2916                    self.track_tool_result(&tc.name, &tc.args, exit_code);
2917
2918                    let output = if let Some(ref sp) = self.config.security_provider {
2919                        sp.sanitize_output(&output)
2920                    } else {
2921                        output
2922                    };
2923
2924                    if let Some(tx) = &event_tx {
2925                        tx.send(AgentEvent::ToolEnd {
2926                            id: tc.id.clone(),
2927                            name: tc.name.clone(),
2928                            output: output.clone(),
2929                            exit_code,
2930                            metadata,
2931                        })
2932                        .await
2933                        .ok();
2934                    }
2935
2936                    if images.is_empty() {
2937                        messages.push(Message::tool_result(&tc.id, &output, is_error));
2938                    } else {
2939                        messages.push(Message::tool_result_with_images(
2940                            &tc.id, &output, &images, is_error,
2941                        ));
2942                    }
2943                }
2944
2945                // Skip the sequential loop below
2946                continue;
2947            } else {
2948                tool_calls
2949            };
2950
2951            for tool_call in tool_calls {
2952                tool_calls_count += 1;
2953
2954                let tool_start = std::time::Instant::now();
2955
2956                tracing::info!(
2957                    tool_name = tool_call.name.as_str(),
2958                    tool_id = tool_call.id.as_str(),
2959                    "Tool execution started"
2960                );
2961
2962                // Send tool start event (only if not already sent during streaming)
2963                // In streaming mode, ToolStart is sent when we receive ToolUseStart from LLM
2964                // But we still need to send ToolEnd after execution
2965
2966                // Check for malformed tool arguments from LLM (4.1 parse error recovery)
2967                if let Some(parse_error) =
2968                    tool_call.args.get("__parse_error").and_then(|v| v.as_str())
2969                {
2970                    parse_error_count += 1;
2971                    let error_msg = format!("Error: {}", parse_error);
2972                    tracing::warn!(
2973                        tool = tool_call.name.as_str(),
2974                        parse_error_count = parse_error_count,
2975                        max_parse_retries = self.config.max_parse_retries,
2976                        "Malformed tool arguments from LLM"
2977                    );
2978
2979                    if let Some(tx) = &event_tx {
2980                        tx.send(AgentEvent::ToolEnd {
2981                            id: tool_call.id.clone(),
2982                            name: tool_call.name.clone(),
2983                            output: error_msg.clone(),
2984                            exit_code: 1,
2985                            metadata: None,
2986                        })
2987                        .await
2988                        .ok();
2989                    }
2990
2991                    messages.push(Message::tool_result(&tool_call.id, &error_msg, true));
2992
2993                    if parse_error_count > self.config.max_parse_retries {
2994                        let msg = format!(
2995                            "LLM produced malformed tool arguments {} time(s) in a row \
2996                             (max_parse_retries={}); giving up",
2997                            parse_error_count, self.config.max_parse_retries
2998                        );
2999                        tracing::error!("{}", msg);
3000                        if let Some(tx) = &event_tx {
3001                            tx.send(AgentEvent::Error {
3002                                message: msg.clone(),
3003                            })
3004                            .await
3005                            .ok();
3006                        }
3007                        anyhow::bail!(msg);
3008                    }
3009                    continue;
3010                }
3011
3012                // Tool args are valid — reset parse error counter
3013                parse_error_count = 0;
3014
3015                // Check skill-based tool permissions
3016                if let Some(ref registry) = self.config.skill_registry {
3017                    let instruction_skills =
3018                        registry.by_kind(crate::skills::SkillKind::Instruction);
3019                    let has_restrictions =
3020                        instruction_skills.iter().any(|s| s.allowed_tools.is_some());
3021                    if has_restrictions {
3022                        let allowed = instruction_skills
3023                            .iter()
3024                            .any(|s| s.is_tool_allowed(&tool_call.name));
3025                        if !allowed {
3026                            let msg = format!(
3027                                "Tool '{}' is not allowed by any active skill.",
3028                                tool_call.name
3029                            );
3030                            tracing::info!(
3031                                tool_name = tool_call.name.as_str(),
3032                                "Tool blocked by skill registry"
3033                            );
3034                            if let Some(tx) = &event_tx {
3035                                tx.send(AgentEvent::PermissionDenied {
3036                                    tool_id: tool_call.id.clone(),
3037                                    tool_name: tool_call.name.clone(),
3038                                    args: tool_call.args.clone(),
3039                                    reason: msg.clone(),
3040                                })
3041                                .await
3042                                .ok();
3043                            }
3044                            messages.push(Message::tool_result(&tool_call.id, &msg, true));
3045                            continue;
3046                        }
3047                    }
3048                }
3049
3050                // Fire PreToolUse hook (may block the tool call)
3051                if let Some(HookResult::Block(reason)) = self
3052                    .fire_pre_tool_use(
3053                        session_id.unwrap_or(""),
3054                        &tool_call.name,
3055                        &tool_call.args,
3056                        recent_tool_signatures.clone(),
3057                    )
3058                    .await
3059                {
3060                    let msg = format!("Tool '{}' blocked by hook: {}", tool_call.name, reason);
3061                    tracing::info!(
3062                        tool_name = tool_call.name.as_str(),
3063                        "Tool blocked by PreToolUse hook"
3064                    );
3065
3066                    if let Some(tx) = &event_tx {
3067                        tx.send(AgentEvent::PermissionDenied {
3068                            tool_id: tool_call.id.clone(),
3069                            tool_name: tool_call.name.clone(),
3070                            args: tool_call.args.clone(),
3071                            reason: reason.clone(),
3072                        })
3073                        .await
3074                        .ok();
3075                    }
3076
3077                    messages.push(Message::tool_result(&tool_call.id, &msg, true));
3078                    continue;
3079                }
3080
3081                // Check permission before executing tool
3082                let permission_decision = if let Some(checker) = &self.config.permission_checker {
3083                    checker.check(&tool_call.name, &tool_call.args)
3084                } else {
3085                    // No policy configured — default to Ask so HITL can still intervene
3086                    PermissionDecision::Ask
3087                };
3088
3089                let (output, exit_code, is_error, metadata, images) = match permission_decision {
3090                    PermissionDecision::Deny => {
3091                        tracing::info!(
3092                            tool_name = tool_call.name.as_str(),
3093                            permission = "deny",
3094                            "Tool permission denied"
3095                        );
3096                        // Tool execution denied by permission policy
3097                        let denial_msg = format!(
3098                            "Permission denied: Tool '{}' is blocked by permission policy.",
3099                            tool_call.name
3100                        );
3101
3102                        // Send permission denied event
3103                        if let Some(tx) = &event_tx {
3104                            tx.send(AgentEvent::PermissionDenied {
3105                                tool_id: tool_call.id.clone(),
3106                                tool_name: tool_call.name.clone(),
3107                                args: tool_call.args.clone(),
3108                                reason: "Blocked by deny rule in permission policy".to_string(),
3109                            })
3110                            .await
3111                            .ok();
3112                        }
3113
3114                        (denial_msg, 1, true, None, Vec::new())
3115                    }
3116                    PermissionDecision::Allow => {
3117                        tracing::info!(
3118                            tool_name = tool_call.name.as_str(),
3119                            permission = "allow",
3120                            "Tool permission: allow"
3121                        );
3122                        // Permission explicitly allows — execute directly, no HITL
3123                        let stream_ctx =
3124                            self.streaming_tool_context(&event_tx, &tool_call.id, &tool_call.name);
3125                        let result = self
3126                            .execute_tool_queued_or_direct(
3127                                &tool_call.name,
3128                                &tool_call.args,
3129                                &stream_ctx,
3130                            )
3131                            .await;
3132
3133                        let tuple = Self::tool_result_to_tuple(result);
3134                        // Track tool call in progress tracker
3135                        let (_, exit_code, _, _, _) = tuple;
3136                        self.track_tool_result(&tool_call.name, &tool_call.args, exit_code);
3137                        tuple
3138                    }
3139                    PermissionDecision::Ask => {
3140                        tracing::info!(
3141                            tool_name = tool_call.name.as_str(),
3142                            permission = "ask",
3143                            "Tool permission: ask"
3144                        );
3145                        // Permission says Ask — delegate to HITL confirmation manager
3146                        if let Some(cm) = &self.config.confirmation_manager {
3147                            // Check YOLO lanes: if the tool's lane is in YOLO mode, skip confirmation
3148                            if !cm.requires_confirmation(&tool_call.name).await {
3149                                let stream_ctx = self.streaming_tool_context(
3150                                    &event_tx,
3151                                    &tool_call.id,
3152                                    &tool_call.name,
3153                                );
3154                                let result = self
3155                                    .execute_tool_queued_or_direct(
3156                                        &tool_call.name,
3157                                        &tool_call.args,
3158                                        &stream_ctx,
3159                                    )
3160                                    .await;
3161
3162                                let (output, exit_code, is_error, metadata, images) =
3163                                    Self::tool_result_to_tuple(result);
3164                                Self::collect_verification_report(
3165                                    &mut verification_reports,
3166                                    &metadata,
3167                                );
3168
3169                                // Track tool call in progress tracker
3170                                self.track_tool_result(&tool_call.name, &tool_call.args, exit_code);
3171
3172                                // Add tool result to messages
3173                                if images.is_empty() {
3174                                    messages.push(Message::tool_result(
3175                                        &tool_call.id,
3176                                        &output,
3177                                        is_error,
3178                                    ));
3179                                } else {
3180                                    messages.push(Message::tool_result_with_images(
3181                                        &tool_call.id,
3182                                        &output,
3183                                        &images,
3184                                        is_error,
3185                                    ));
3186                                }
3187
3188                                // Record tool result on the tool span for early exit
3189                                let tool_duration = tool_start.elapsed();
3190                                crate::telemetry::record_tool_result(exit_code, tool_duration);
3191
3192                                // Send ToolEnd event
3193                                if let Some(tx) = &event_tx {
3194                                    tx.send(AgentEvent::ToolEnd {
3195                                        id: tool_call.id.clone(),
3196                                        name: tool_call.name.clone(),
3197                                        output: output.clone(),
3198                                        exit_code,
3199                                        metadata,
3200                                    })
3201                                    .await
3202                                    .ok();
3203                                }
3204
3205                                // Fire PostToolUse hook (fire-and-forget)
3206                                self.fire_post_tool_use(
3207                                    session_id.unwrap_or(""),
3208                                    &tool_call.name,
3209                                    &tool_call.args,
3210                                    &output,
3211                                    exit_code == 0,
3212                                    tool_duration.as_millis() as u64,
3213                                )
3214                                .await;
3215
3216                                continue; // Skip the rest, move to next tool call
3217                            }
3218
3219                            // Get timeout from policy
3220                            let policy = cm.policy().await;
3221                            let timeout_ms = policy.default_timeout_ms;
3222                            let timeout_action = policy.timeout_action;
3223
3224                            // Request confirmation (this emits ConfirmationRequired event)
3225                            let rx = cm
3226                                .request_confirmation(
3227                                    &tool_call.id,
3228                                    &tool_call.name,
3229                                    &tool_call.args,
3230                                )
3231                                .await;
3232
3233                            // Forward ConfirmationRequired to the streaming event channel
3234                            // so external consumers (e.g. SafeClaw engine) can relay it
3235                            // to the browser UI.
3236                            if let Some(tx) = &event_tx {
3237                                tx.send(AgentEvent::ConfirmationRequired {
3238                                    tool_id: tool_call.id.clone(),
3239                                    tool_name: tool_call.name.clone(),
3240                                    args: tool_call.args.clone(),
3241                                    timeout_ms,
3242                                })
3243                                .await
3244                                .ok();
3245                            }
3246
3247                            // Wait for confirmation with timeout
3248                            let confirmation_result =
3249                                tokio::time::timeout(Duration::from_millis(timeout_ms), rx).await;
3250
3251                            match confirmation_result {
3252                                Ok(Ok(response)) => {
3253                                    // Forward ConfirmationReceived
3254                                    if let Some(tx) = &event_tx {
3255                                        tx.send(AgentEvent::ConfirmationReceived {
3256                                            tool_id: tool_call.id.clone(),
3257                                            approved: response.approved,
3258                                            reason: response.reason.clone(),
3259                                        })
3260                                        .await
3261                                        .ok();
3262                                    }
3263                                    if response.approved {
3264                                        let stream_ctx = self.streaming_tool_context(
3265                                            &event_tx,
3266                                            &tool_call.id,
3267                                            &tool_call.name,
3268                                        );
3269                                        let result = self
3270                                            .execute_tool_queued_or_direct(
3271                                                &tool_call.name,
3272                                                &tool_call.args,
3273                                                &stream_ctx,
3274                                            )
3275                                            .await;
3276
3277                                        let tuple = Self::tool_result_to_tuple(result);
3278                                        // Track tool call in progress tracker
3279                                        let (_, exit_code, _, _, _) = tuple;
3280                                        self.track_tool_result(
3281                                            &tool_call.name,
3282                                            &tool_call.args,
3283                                            exit_code,
3284                                        );
3285                                        tuple
3286                                    } else {
3287                                        let rejection_msg = format!(
3288                                            "Tool '{}' execution was REJECTED by the user. Reason: {}. \
3289                                             DO NOT retry this tool call unless the user explicitly asks you to.",
3290                                            tool_call.name,
3291                                            response.reason.unwrap_or_else(|| "No reason provided".to_string())
3292                                        );
3293                                        (rejection_msg, 1, true, None, Vec::new())
3294                                    }
3295                                }
3296                                Ok(Err(_)) => {
3297                                    // Forward ConfirmationTimeout (channel closed = effectively timed out)
3298                                    if let Some(tx) = &event_tx {
3299                                        tx.send(AgentEvent::ConfirmationTimeout {
3300                                            tool_id: tool_call.id.clone(),
3301                                            action_taken: "rejected".to_string(),
3302                                        })
3303                                        .await
3304                                        .ok();
3305                                    }
3306                                    let msg = format!(
3307                                        "Tool '{}' confirmation failed: confirmation channel closed",
3308                                        tool_call.name
3309                                    );
3310                                    (msg, 1, true, None, Vec::new())
3311                                }
3312                                Err(_) => {
3313                                    cm.check_timeouts().await;
3314
3315                                    // Forward ConfirmationTimeout
3316                                    if let Some(tx) = &event_tx {
3317                                        tx.send(AgentEvent::ConfirmationTimeout {
3318                                            tool_id: tool_call.id.clone(),
3319                                            action_taken: match timeout_action {
3320                                                crate::hitl::TimeoutAction::Reject => {
3321                                                    "rejected".to_string()
3322                                                }
3323                                                crate::hitl::TimeoutAction::AutoApprove => {
3324                                                    "auto_approved".to_string()
3325                                                }
3326                                            },
3327                                        })
3328                                        .await
3329                                        .ok();
3330                                    }
3331
3332                                    match timeout_action {
3333                                        crate::hitl::TimeoutAction::Reject => {
3334                                            let msg = format!(
3335                                                "Tool '{}' execution was REJECTED: user confirmation timed out after {}ms. \
3336                                                 DO NOT retry this tool call — the user did not approve it. \
3337                                                 Inform the user that the operation requires their approval and ask them to try again.",
3338                                                tool_call.name, timeout_ms
3339                                            );
3340                                            (msg, 1, true, None, Vec::new())
3341                                        }
3342                                        crate::hitl::TimeoutAction::AutoApprove => {
3343                                            let stream_ctx = self.streaming_tool_context(
3344                                                &event_tx,
3345                                                &tool_call.id,
3346                                                &tool_call.name,
3347                                            );
3348                                            let result = self
3349                                                .execute_tool_queued_or_direct(
3350                                                    &tool_call.name,
3351                                                    &tool_call.args,
3352                                                    &stream_ctx,
3353                                                )
3354                                                .await;
3355
3356                                            let tuple = Self::tool_result_to_tuple(result);
3357                                            // Track tool call in progress tracker
3358                                            let (_, exit_code, _, _, _) = tuple;
3359                                            self.track_tool_result(
3360                                                &tool_call.name,
3361                                                &tool_call.args,
3362                                                exit_code,
3363                                            );
3364                                            tuple
3365                                        }
3366                                    }
3367                                }
3368                            }
3369                        } else {
3370                            // Ask without confirmation manager — safe deny
3371                            let msg = format!(
3372                                "Tool '{}' requires confirmation but no HITL confirmation manager is configured. \
3373                                 Configure a confirmation policy to enable tool execution.",
3374                                tool_call.name
3375                            );
3376                            tracing::warn!(
3377                                tool_name = tool_call.name.as_str(),
3378                                "Tool requires confirmation but no HITL manager configured"
3379                            );
3380                            (msg, 1, true, None, Vec::new())
3381                        }
3382                    }
3383                };
3384
3385                let tool_duration = tool_start.elapsed();
3386                crate::telemetry::record_tool_result(exit_code, tool_duration);
3387                Self::collect_verification_report(&mut verification_reports, &metadata);
3388
3389                // Sanitize tool output for sensitive data before it enters the message history
3390                let output = if let Some(ref sp) = self.config.security_provider {
3391                    sp.sanitize_output(&output)
3392                } else {
3393                    output
3394                };
3395
3396                recent_tool_signatures.push(format!(
3397                    "{}:{} => {}",
3398                    tool_call.name,
3399                    serde_json::to_string(&tool_call.args).unwrap_or_default(),
3400                    if is_error { "error" } else { "ok" }
3401                ));
3402                if recent_tool_signatures.len() > 8 {
3403                    let overflow = recent_tool_signatures.len() - 8;
3404                    recent_tool_signatures.drain(0..overflow);
3405                }
3406
3407                // Fire PostToolUse hook (fire-and-forget)
3408                self.fire_post_tool_use(
3409                    session_id.unwrap_or(""),
3410                    &tool_call.name,
3411                    &tool_call.args,
3412                    &output,
3413                    exit_code == 0,
3414                    tool_duration.as_millis() as u64,
3415                )
3416                .await;
3417
3418                // Auto-remember tool result in long-term memory
3419                if let Some(ref memory) = self.config.memory {
3420                    let tools_used = [tool_call.name.clone()];
3421                    let remember_result = if exit_code == 0 {
3422                        memory
3423                            .remember_success(effective_prompt, &tools_used, &output)
3424                            .await
3425                    } else {
3426                        memory
3427                            .remember_failure(effective_prompt, &output, &tools_used)
3428                            .await
3429                    };
3430                    match remember_result {
3431                        Ok(()) => {
3432                            if let Some(tx) = &event_tx {
3433                                let item_type = if exit_code == 0 { "success" } else { "failure" };
3434                                tx.send(AgentEvent::MemoryStored {
3435                                    memory_id: uuid::Uuid::new_v4().to_string(),
3436                                    memory_type: item_type.to_string(),
3437                                    importance: if exit_code == 0 { 0.8 } else { 0.9 },
3438                                    tags: vec![item_type.to_string(), tool_call.name.clone()],
3439                                })
3440                                .await
3441                                .ok();
3442                            }
3443                        }
3444                        Err(e) => {
3445                            tracing::warn!("Failed to store memory after tool execution: {}", e);
3446                        }
3447                    }
3448                }
3449
3450                // Send tool end event
3451                if let Some(tx) = &event_tx {
3452                    tx.send(AgentEvent::ToolEnd {
3453                        id: tool_call.id.clone(),
3454                        name: tool_call.name.clone(),
3455                        output: output.clone(),
3456                        exit_code,
3457                        metadata,
3458                    })
3459                    .await
3460                    .ok();
3461                }
3462
3463                // Add tool result to messages
3464                if images.is_empty() {
3465                    messages.push(Message::tool_result(&tool_call.id, &output, is_error));
3466                } else {
3467                    messages.push(Message::tool_result_with_images(
3468                        &tool_call.id,
3469                        &output,
3470                        &images,
3471                        is_error,
3472                    ));
3473                }
3474            }
3475        }
3476    }
3477
3478    /// Create an execution plan for a prompt
3479    ///
3480    /// Delegates to [`LlmPlanner`] for structured JSON plan generation,
3481    /// falling back to heuristic planning if the LLM call fails.
3482    pub async fn plan(&self, prompt: &str, _context: Option<&str>) -> Result<ExecutionPlan> {
3483        use crate::planning::LlmPlanner;
3484
3485        match LlmPlanner::create_plan(&self.llm_client, prompt).await {
3486            Ok(plan) => Ok(plan),
3487            Err(e) => {
3488                tracing::warn!("LLM plan creation failed, using fallback: {}", e);
3489                Ok(LlmPlanner::fallback_plan(prompt))
3490            }
3491        }
3492    }
3493
3494    /// Execute with planning phase.
3495    ///
3496    /// If `pre_analysis` is provided (from a single pre-analysis LLM call in
3497    /// `execute_with_session`), the goal and plan are already available and no
3498    /// additional LLM calls are needed for planning. Otherwise, falls back to
3499    /// calling `extract_goal` and `plan` individually.
3500    pub async fn execute_with_planning(
3501        &self,
3502        history: &[Message],
3503        prompt: &str,
3504        event_tx: Option<mpsc::Sender<AgentEvent>>,
3505        pre_analysis: Option<PreAnalysis>,
3506    ) -> Result<AgentResult> {
3507        // Send planning start event
3508        if let Some(tx) = &event_tx {
3509            tx.send(AgentEvent::PlanningStart {
3510                prompt: prompt.to_string(),
3511            })
3512            .await
3513            .ok();
3514        }
3515
3516        // Use pre-analysis result if available (goal + plan already computed in one LLM call).
3517        let (goal, plan) = if let Some(analysis) = pre_analysis {
3518            (Some(analysis.goal.clone()), analysis.execution_plan.clone())
3519        } else {
3520            // Fall back: extract goal and create plan via separate LLM calls.
3521            let g = if self.config.goal_tracking {
3522                Some(self.extract_goal(prompt).await?)
3523            } else {
3524                None
3525            };
3526            let p = self.plan(prompt, None).await?;
3527            (g, p)
3528        };
3529
3530        // Send GoalExtracted event if goal_tracking is enabled.
3531        if self.config.goal_tracking {
3532            if let Some(ref g) = goal {
3533                if let Some(tx) = &event_tx {
3534                    tx.send(AgentEvent::GoalExtracted { goal: g.clone() })
3535                        .await
3536                        .ok();
3537                }
3538            }
3539        }
3540
3541        // Send planning end event
3542        if let Some(tx) = &event_tx {
3543            tx.send(AgentEvent::PlanningEnd {
3544                estimated_steps: plan.steps.len(),
3545                plan: plan.clone(),
3546            })
3547            .await
3548            .ok();
3549        }
3550
3551        let plan_start = std::time::Instant::now();
3552
3553        // Execute the plan step by step
3554        let result = self.execute_plan(history, &plan, event_tx.clone()).await?;
3555
3556        // Emit the final End event (execute_loop_inner does not emit End in planning mode)
3557        if let Some(tx) = &event_tx {
3558            tx.send(AgentEvent::End {
3559                text: result.text.clone(),
3560                usage: result.usage.clone(),
3561                verification_summary: Box::new(result.verification_summary()),
3562                meta: None,
3563            })
3564            .await
3565            .ok();
3566        }
3567
3568        // Check goal achievement when goal_tracking is enabled
3569        if self.config.goal_tracking {
3570            if let Some(ref g) = goal {
3571                let achieved = self.check_goal_achievement(g, &result.text).await?;
3572                if achieved {
3573                    if let Some(tx) = &event_tx {
3574                        tx.send(AgentEvent::GoalAchieved {
3575                            goal: g.description.clone(),
3576                            total_steps: result.messages.len(),
3577                            duration_ms: plan_start.elapsed().as_millis() as i64,
3578                        })
3579                        .await
3580                        .ok();
3581                    }
3582                }
3583            }
3584        }
3585
3586        Ok(result)
3587    }
3588
3589    /// Execute an execution plan using wave-based dependency-aware scheduling.
3590    ///
3591    /// Steps with no unmet dependencies are grouped into "waves". A wave with
3592    /// a single step executes sequentially (preserving the history chain). A
3593    /// wave with multiple independent steps executes them in parallel via
3594    /// `JoinSet`, then merges their results back into the shared history.
3595    async fn execute_plan(
3596        &self,
3597        history: &[Message],
3598        plan: &ExecutionPlan,
3599        event_tx: Option<mpsc::Sender<AgentEvent>>,
3600    ) -> Result<AgentResult> {
3601        let mut plan = plan.clone();
3602        let mut current_history = history.to_vec();
3603        let mut total_usage = TokenUsage::default();
3604        let mut tool_calls_count = 0;
3605        let total_steps = plan.steps.len();
3606
3607        // Add initial user message with the goal
3608        let steps_text = plan
3609            .steps
3610            .iter()
3611            .enumerate()
3612            .map(|(i, step)| format!("{}. {}", i + 1, step.content))
3613            .collect::<Vec<_>>()
3614            .join("\n");
3615        current_history.push(Message::user(&crate::prompts::render(
3616            crate::prompts::PLAN_EXECUTE_GOAL,
3617            &[("goal", &plan.goal), ("steps", &steps_text)],
3618        )));
3619
3620        loop {
3621            let ready: Vec<String> = plan
3622                .get_ready_steps()
3623                .iter()
3624                .map(|s| s.id.clone())
3625                .collect();
3626
3627            if ready.is_empty() {
3628                // All done or deadlock
3629                if plan.has_deadlock() {
3630                    tracing::warn!(
3631                        "Plan deadlock detected: {} pending steps with unresolvable dependencies",
3632                        plan.pending_count()
3633                    );
3634                }
3635                break;
3636            }
3637
3638            if ready.len() == 1 {
3639                // === Single step: sequential execution (preserves history chain) ===
3640                let step_id = &ready[0];
3641                let step = plan
3642                    .steps
3643                    .iter()
3644                    .find(|s| s.id == *step_id)
3645                    .ok_or_else(|| anyhow::anyhow!("step '{}' not found in plan", step_id))?
3646                    .clone();
3647                let step_number = plan
3648                    .steps
3649                    .iter()
3650                    .position(|s| s.id == *step_id)
3651                    .unwrap_or(0)
3652                    + 1;
3653
3654                // Send step start event
3655                if let Some(tx) = &event_tx {
3656                    tx.send(AgentEvent::StepStart {
3657                        step_id: step.id.clone(),
3658                        description: step.content.clone(),
3659                        step_number,
3660                        total_steps,
3661                    })
3662                    .await
3663                    .ok();
3664                }
3665
3666                plan.mark_status(&step.id, TaskStatus::InProgress);
3667
3668                let step_prompt = crate::prompts::render(
3669                    crate::prompts::PLAN_EXECUTE_STEP,
3670                    &[
3671                        ("step_num", &step_number.to_string()),
3672                        ("description", &step.content),
3673                    ],
3674                );
3675
3676                match self
3677                    .execute_loop(
3678                        &current_history,
3679                        &step_prompt,
3680                        AgentStyle::GeneralPurpose,
3681                        None,
3682                        event_tx.clone(),
3683                        &tokio_util::sync::CancellationToken::new(),
3684                        false, // emit_end: false — End is emitted by execute_with_planning after execute_plan
3685                    )
3686                    .await
3687                {
3688                    Ok(result) => {
3689                        current_history = result.messages.clone();
3690                        total_usage.prompt_tokens += result.usage.prompt_tokens;
3691                        total_usage.completion_tokens += result.usage.completion_tokens;
3692                        total_usage.total_tokens += result.usage.total_tokens;
3693                        tool_calls_count += result.tool_calls_count;
3694                        plan.mark_status(&step.id, TaskStatus::Completed);
3695
3696                        if let Some(tx) = &event_tx {
3697                            tx.send(AgentEvent::StepEnd {
3698                                step_id: step.id.clone(),
3699                                status: TaskStatus::Completed,
3700                                step_number,
3701                                total_steps,
3702                            })
3703                            .await
3704                            .ok();
3705                        }
3706                    }
3707                    Err(e) => {
3708                        tracing::error!("Plan step '{}' failed: {}", step.id, e);
3709                        plan.mark_status(&step.id, TaskStatus::Failed);
3710
3711                        if let Some(tx) = &event_tx {
3712                            tx.send(AgentEvent::StepEnd {
3713                                step_id: step.id.clone(),
3714                                status: TaskStatus::Failed,
3715                                step_number,
3716                                total_steps,
3717                            })
3718                            .await
3719                            .ok();
3720                        }
3721                    }
3722                }
3723            } else {
3724                // === Multiple steps: parallel execution via JoinSet ===
3725                // NOTE: Each parallel branch gets a clone of the base history.
3726                // Individual branch histories (tool calls, LLM turns) are NOT merged
3727                // back — only a summary message is appended. This is a deliberate
3728                // trade-off: merging divergent histories in a deterministic order is
3729                // complex and the summary approach keeps the context window manageable.
3730                let ready_steps: Vec<_> = ready
3731                    .iter()
3732                    .filter_map(|id| {
3733                        let step = plan.steps.iter().find(|s| s.id == *id)?.clone();
3734                        let step_number =
3735                            plan.steps.iter().position(|s| s.id == *id).unwrap_or(0) + 1;
3736                        Some((step, step_number))
3737                    })
3738                    .collect();
3739
3740                // Mark all as InProgress and emit StepStart events
3741                for (step, step_number) in &ready_steps {
3742                    plan.mark_status(&step.id, TaskStatus::InProgress);
3743                    if let Some(tx) = &event_tx {
3744                        tx.send(AgentEvent::StepStart {
3745                            step_id: step.id.clone(),
3746                            description: step.content.clone(),
3747                            step_number: *step_number,
3748                            total_steps,
3749                        })
3750                        .await
3751                        .ok();
3752                    }
3753                }
3754
3755                // Spawn all into JoinSet, each with a clone of the base history
3756                let mut join_set = tokio::task::JoinSet::new();
3757                for (step, step_number) in &ready_steps {
3758                    let base_history = current_history.clone();
3759                    let agent_clone = self.clone();
3760                    let tx = event_tx.clone();
3761                    let step_clone = step.clone();
3762                    let sn = *step_number;
3763
3764                    join_set.spawn(async move {
3765                        let prompt = crate::prompts::render(
3766                            crate::prompts::PLAN_EXECUTE_STEP,
3767                            &[
3768                                ("step_num", &sn.to_string()),
3769                                ("description", &step_clone.content),
3770                            ],
3771                        );
3772                        let result = agent_clone
3773                            .execute_loop(
3774                                &base_history,
3775                                &prompt,
3776                                AgentStyle::GeneralPurpose,
3777                                None,
3778                                tx,
3779                                &tokio_util::sync::CancellationToken::new(),
3780                                false, // emit_end: false — End is emitted by execute_with_planning after execute_plan
3781                            )
3782                            .await;
3783                        (step_clone.id, sn, result)
3784                    });
3785                }
3786
3787                // Collect results
3788                let mut parallel_results: Vec<ParallelStepResult> = Vec::new();
3789                while let Some(join_result) = join_set.join_next().await {
3790                    match join_result {
3791                        Ok((step_id, step_number, step_result)) => match step_result {
3792                            Ok(result) => {
3793                                total_usage.prompt_tokens += result.usage.prompt_tokens;
3794                                total_usage.completion_tokens += result.usage.completion_tokens;
3795                                total_usage.total_tokens += result.usage.total_tokens;
3796                                tool_calls_count += result.tool_calls_count;
3797                                plan.mark_status(&step_id, TaskStatus::Completed);
3798
3799                                parallel_results.push(ParallelStepResult {
3800                                    step_id: step_id.clone(),
3801                                    step_number: step_number as u32,
3802                                    status: "completed".to_string(),
3803                                    summary: result.text.trim().to_string(),
3804                                    key_findings: None,
3805                                    error: None,
3806                                    data: None,
3807                                });
3808
3809                                if let Some(tx) = &event_tx {
3810                                    tx.send(AgentEvent::StepEnd {
3811                                        step_id,
3812                                        status: TaskStatus::Completed,
3813                                        step_number,
3814                                        total_steps,
3815                                    })
3816                                    .await
3817                                    .ok();
3818                                }
3819                            }
3820                            Err(e) => {
3821                                tracing::error!("Plan step '{}' failed: {}", step_id, e);
3822                                plan.mark_status(&step_id, TaskStatus::Failed);
3823
3824                                parallel_results.push(ParallelStepResult {
3825                                    step_id: step_id.clone(),
3826                                    step_number: step_number as u32,
3827                                    status: "failed".to_string(),
3828                                    summary: String::new(),
3829                                    key_findings: None,
3830                                    error: Some(e.to_string()),
3831                                    data: None,
3832                                });
3833
3834                                if let Some(tx) = &event_tx {
3835                                    tx.send(AgentEvent::StepEnd {
3836                                        step_id,
3837                                        status: TaskStatus::Failed,
3838                                        step_number,
3839                                        total_steps,
3840                                    })
3841                                    .await
3842                                    .ok();
3843                                }
3844                            }
3845                        },
3846                        Err(e) => {
3847                            tracing::error!("JoinSet task panicked: {}", e);
3848                        }
3849                    }
3850                }
3851
3852                // Merge parallel results into history for subsequent steps.
3853                // Emit as a structured JSON USER message so the frontend can
3854                // parse and render it in the user's language.
3855                if !parallel_results.is_empty() {
3856                    parallel_results.sort_by_key(|r| r.step_number);
3857                    let envelope = ParallelStepResult::build_envelope(parallel_results);
3858                    current_history.push(Message::user(
3859                        &serde_json::to_string(&envelope).unwrap_or_default(),
3860                    ));
3861                }
3862            }
3863
3864            // Emit GoalProgress after each wave
3865            if self.config.goal_tracking {
3866                let completed = plan
3867                    .steps
3868                    .iter()
3869                    .filter(|s| s.status == TaskStatus::Completed)
3870                    .count();
3871                if let Some(tx) = &event_tx {
3872                    tx.send(AgentEvent::GoalProgress {
3873                        goal: plan.goal.clone(),
3874                        progress: plan.progress(),
3875                        completed_steps: completed,
3876                        total_steps,
3877                    })
3878                    .await
3879                    .ok();
3880                }
3881            }
3882        }
3883
3884        // Get final response — find the last assistant message (not the last history
3885        // entry, which may be a user-summary injected after parallel execution)
3886        let final_text = current_history
3887            .iter()
3888            .rev()
3889            .find(|m| m.role == "assistant")
3890            .map(|m| {
3891                m.content
3892                    .iter()
3893                    .filter_map(|block| {
3894                        if let crate::llm::ContentBlock::Text { text } = block {
3895                            Some(text.as_str())
3896                        } else {
3897                            None
3898                        }
3899                    })
3900                    .collect::<Vec<_>>()
3901                    .join("\n")
3902            })
3903            .unwrap_or_default();
3904
3905        Ok(AgentResult {
3906            text: final_text,
3907            messages: current_history,
3908            usage: total_usage,
3909            tool_calls_count,
3910            verification_reports: Vec::new(),
3911        })
3912    }
3913
3914    /// Extract goal from prompt
3915    ///
3916    /// Delegates to [`LlmPlanner`] for structured JSON goal extraction,
3917    /// falling back to heuristic logic if the LLM call fails.
3918    pub async fn extract_goal(&self, prompt: &str) -> Result<AgentGoal> {
3919        use crate::planning::LlmPlanner;
3920
3921        match LlmPlanner::extract_goal(&self.llm_client, prompt).await {
3922            Ok(goal) => Ok(goal),
3923            Err(e) => {
3924                tracing::warn!("LLM goal extraction failed, using fallback: {}", e);
3925                Ok(LlmPlanner::fallback_goal(prompt))
3926            }
3927        }
3928    }
3929
3930    /// Check if goal is achieved
3931    ///
3932    /// Delegates to [`LlmPlanner`] for structured JSON achievement check,
3933    /// falling back to heuristic logic if the LLM call fails.
3934    pub async fn check_goal_achievement(
3935        &self,
3936        goal: &AgentGoal,
3937        current_state: &str,
3938    ) -> Result<bool> {
3939        use crate::planning::LlmPlanner;
3940
3941        match LlmPlanner::check_achievement(&self.llm_client, goal, current_state).await {
3942            Ok(result) => Ok(result.achieved),
3943            Err(e) => {
3944                tracing::warn!("LLM achievement check failed, using fallback: {}", e);
3945                let result = LlmPlanner::fallback_check_achievement(goal, current_state);
3946                Ok(result.achieved)
3947            }
3948        }
3949    }
3950}
3951
3952#[cfg(test)]
3953mod tests {
3954    use super::*;
3955    use crate::llm::{ContentBlock, StreamEvent};
3956    use crate::permissions::PermissionPolicy;
3957    use crate::tools::ToolExecutor;
3958    use std::path::PathBuf;
3959    use std::sync::atomic::{AtomicUsize, Ordering};
3960
3961    /// Create a default ToolContext for tests
3962    fn test_tool_context() -> ToolContext {
3963        ToolContext::new(PathBuf::from("/tmp"))
3964    }
3965
3966    #[test]
3967    fn test_memory_items_become_context_result() {
3968        let item = a3s_memory::MemoryItem::new("Use focused regression tests for context changes.")
3969            .with_importance(0.8);
3970
3971        let result = crate::memory::memory_items_to_context_result("memory", vec![item.clone()]);
3972
3973        assert_eq!(result.provider, "memory");
3974        assert_eq!(result.items.len(), 1);
3975        assert_eq!(result.items[0].id, item.id.as_str());
3976        assert_eq!(result.items[0].context_type, ContextType::Memory);
3977        let expected_source = format!("memory://{}", item.id);
3978        assert_eq!(
3979            result.items[0].source.as_deref(),
3980            Some(expected_source.as_str())
3981        );
3982        assert!(result.items[0].content.contains("focused regression tests"));
3983        assert!(result.items[0].token_count > 0);
3984    }
3985
3986    #[cfg(feature = "ahp")]
3987    #[test]
3988    fn test_injected_context_to_results_includes_all_context_shapes() {
3989        let injected = a3s_ahp::InjectedContext {
3990            facts: vec![a3s_ahp::Fact {
3991                content: "Fact from harness".to_string(),
3992                source: "ahp://fact/source".to_string(),
3993                confidence: 0.92,
3994            }],
3995            file_contents: Some(vec![a3s_ahp::FileContentSnippet {
3996                path: "src/lib.rs".to_string(),
3997                snippet: "pub fn important() {}".to_string(),
3998                relevance_score: 0.88,
3999            }]),
4000            project_summary: Some(a3s_ahp::ProjectSummary {
4001                project_name: "demo".to_string(),
4002                language: Some("Rust".to_string()),
4003                key_files: Some(vec!["Cargo.toml".to_string(), "src/lib.rs".to_string()]),
4004                structure_description: "Small Rust crate".to_string(),
4005            }),
4006            knowledge: Some(vec!["Use context budgets".to_string()]),
4007            suggestions: Some(vec!["Prefer focused verification".to_string()]),
4008        };
4009
4010        let results = injected_context_to_results(injected);
4011        let items = results
4012            .iter()
4013            .flat_map(|result| result.items.iter())
4014            .collect::<Vec<_>>();
4015
4016        assert_eq!(results.len(), 5);
4017        assert!(items.iter().any(|item| item.content == "Fact from harness"
4018            && item.source.as_deref() == Some("ahp://fact/source")));
4019        assert!(items
4020            .iter()
4021            .any(|item| item.content == "pub fn important() {}"
4022                && item.source.as_deref() == Some("src/lib.rs")));
4023        assert!(items
4024            .iter()
4025            .any(|item| item.content.contains("Key files: Cargo.toml, src/lib.rs")));
4026        assert!(items
4027            .iter()
4028            .any(|item| item.source.as_deref() == Some("ahp://suggestions")
4029                && item.content.contains("Prefer focused verification")));
4030        assert!(results
4031            .iter()
4032            .all(|result| result.provider == "ahp_harness"));
4033    }
4034
4035    #[test]
4036    fn test_agent_config_default() {
4037        let config = AgentConfig::default();
4038        assert!(config.prompt_slots.is_empty());
4039        assert!(config.tools.is_empty()); // Tools are provided externally
4040        assert_eq!(config.max_tool_rounds, MAX_TOOL_ROUNDS);
4041        assert!(config.permission_checker.is_none());
4042        assert!(config.context_providers.is_empty());
4043        // Built-in skills are always present by default
4044        let registry = config
4045            .skill_registry
4046            .expect("skill_registry must be Some by default");
4047        assert!(registry.len() >= 7, "expected at least 7 built-in skills");
4048        assert!(registry.get("code-search").is_some());
4049        assert!(registry.get("find-bugs").is_some());
4050    }
4051
4052    // ========================================================================
4053    // Mock LLM Client for Testing
4054    // ========================================================================
4055
4056    /// Mock LLM client that returns predefined responses
4057    pub(crate) struct MockLlmClient {
4058        /// Responses to return (consumed in order)
4059        responses: std::sync::Mutex<Vec<LlmResponse>>,
4060        /// Number of calls made
4061        pub(crate) call_count: AtomicUsize,
4062    }
4063
4064    impl MockLlmClient {
4065        pub(crate) fn new(responses: Vec<LlmResponse>) -> Self {
4066            Self {
4067                responses: std::sync::Mutex::new(responses),
4068                call_count: AtomicUsize::new(0),
4069            }
4070        }
4071
4072        /// Create a response with text only (no tool calls)
4073        pub(crate) fn text_response(text: &str) -> LlmResponse {
4074            LlmResponse {
4075                message: Message {
4076                    role: "assistant".to_string(),
4077                    content: vec![ContentBlock::Text {
4078                        text: text.to_string(),
4079                    }],
4080                    reasoning_content: None,
4081                },
4082                usage: TokenUsage {
4083                    prompt_tokens: 10,
4084                    completion_tokens: 5,
4085                    total_tokens: 15,
4086                    cache_read_tokens: None,
4087                    cache_write_tokens: None,
4088                },
4089                stop_reason: Some("end_turn".to_string()),
4090                meta: None,
4091            }
4092        }
4093
4094        /// Create a response with a tool call
4095        pub(crate) fn tool_call_response(
4096            tool_id: &str,
4097            tool_name: &str,
4098            args: serde_json::Value,
4099        ) -> LlmResponse {
4100            LlmResponse {
4101                message: Message {
4102                    role: "assistant".to_string(),
4103                    content: vec![ContentBlock::ToolUse {
4104                        id: tool_id.to_string(),
4105                        name: tool_name.to_string(),
4106                        input: args,
4107                    }],
4108                    reasoning_content: None,
4109                },
4110                usage: TokenUsage {
4111                    prompt_tokens: 10,
4112                    completion_tokens: 5,
4113                    total_tokens: 15,
4114                    cache_read_tokens: None,
4115                    cache_write_tokens: None,
4116                },
4117                stop_reason: Some("tool_use".to_string()),
4118                meta: None,
4119            }
4120        }
4121    }
4122
4123    #[async_trait::async_trait]
4124    impl LlmClient for MockLlmClient {
4125        async fn complete(
4126            &self,
4127            _messages: &[Message],
4128            _system: Option<&str>,
4129            _tools: &[ToolDefinition],
4130        ) -> Result<LlmResponse> {
4131            self.call_count.fetch_add(1, Ordering::SeqCst);
4132            let mut responses = self.responses.lock().unwrap();
4133            if responses.is_empty() {
4134                anyhow::bail!("No more mock responses available");
4135            }
4136            Ok(responses.remove(0))
4137        }
4138
4139        async fn complete_streaming(
4140            &self,
4141            _messages: &[Message],
4142            _system: Option<&str>,
4143            _tools: &[ToolDefinition],
4144            _cancel_token: tokio_util::sync::CancellationToken,
4145        ) -> Result<mpsc::Receiver<StreamEvent>> {
4146            self.call_count.fetch_add(1, Ordering::SeqCst);
4147            let mut responses = self.responses.lock().unwrap();
4148            if responses.is_empty() {
4149                anyhow::bail!("No more mock responses available");
4150            }
4151            let response = responses.remove(0);
4152
4153            let (tx, rx) = mpsc::channel(10);
4154            tokio::spawn(async move {
4155                // Send text deltas if any
4156                for block in &response.message.content {
4157                    if let ContentBlock::Text { text } = block {
4158                        tx.send(StreamEvent::TextDelta(text.clone())).await.ok();
4159                    }
4160                }
4161                tx.send(StreamEvent::Done(response)).await.ok();
4162            });
4163
4164            Ok(rx)
4165        }
4166    }
4167
4168    // ========================================================================
4169    // Agent Loop Tests
4170    // ========================================================================
4171
4172    #[tokio::test]
4173    async fn test_agent_simple_response() {
4174        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
4175            "Hello, I'm an AI assistant.",
4176        )]));
4177
4178        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4179        let config = AgentConfig::default();
4180
4181        let agent = AgentLoop::new(
4182            mock_client.clone(),
4183            tool_executor,
4184            test_tool_context(),
4185            config,
4186        );
4187        let result = agent.execute(&[], "Hello", None).await.unwrap();
4188
4189        assert_eq!(result.text, "Hello, I'm an AI assistant.");
4190        assert_eq!(result.tool_calls_count, 0);
4191        assert_eq!(mock_client.call_count.load(Ordering::SeqCst), 1);
4192    }
4193
4194    #[tokio::test]
4195    async fn test_agent_with_tool_call() {
4196        let mock_client = Arc::new(MockLlmClient::new(vec![
4197            // First response: tool call
4198            MockLlmClient::tool_call_response(
4199                "tool-1",
4200                "bash",
4201                serde_json::json!({"command": "echo hello"}),
4202            ),
4203            // Second response: final text
4204            MockLlmClient::text_response("The command output was: hello"),
4205        ]));
4206
4207        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4208        let config = AgentConfig::default();
4209
4210        let agent = AgentLoop::new(
4211            mock_client.clone(),
4212            tool_executor,
4213            test_tool_context(),
4214            config,
4215        );
4216        let result = agent.execute(&[], "Run echo hello", None).await.unwrap();
4217
4218        assert_eq!(result.text, "The command output was: hello");
4219        assert_eq!(result.tool_calls_count, 1);
4220        assert_eq!(mock_client.call_count.load(Ordering::SeqCst), 2);
4221    }
4222
4223    #[tokio::test]
4224    async fn test_agent_permission_deny() {
4225        let mock_client = Arc::new(MockLlmClient::new(vec![
4226            // First response: tool call that will be denied
4227            MockLlmClient::tool_call_response(
4228                "tool-1",
4229                "bash",
4230                serde_json::json!({"command": "rm -rf /tmp/test"}),
4231            ),
4232            // Second response: LLM responds to the denial
4233            MockLlmClient::text_response(
4234                "I cannot execute that command due to permission restrictions.",
4235            ),
4236        ]));
4237
4238        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4239
4240        // Create permission policy that denies rm commands
4241        let permission_policy = PermissionPolicy::new().deny("bash(rm:*)");
4242
4243        let config = AgentConfig {
4244            permission_checker: Some(Arc::new(permission_policy)),
4245            ..Default::default()
4246        };
4247
4248        let (tx, mut rx) = mpsc::channel(100);
4249        let agent = AgentLoop::new(
4250            mock_client.clone(),
4251            tool_executor,
4252            test_tool_context(),
4253            config,
4254        );
4255        let result = agent.execute(&[], "Delete files", Some(tx)).await.unwrap();
4256
4257        // Check that we received a PermissionDenied event
4258        let mut found_permission_denied = false;
4259        while let Ok(event) = rx.try_recv() {
4260            if let AgentEvent::PermissionDenied { tool_name, .. } = event {
4261                assert_eq!(tool_name, "bash");
4262                found_permission_denied = true;
4263            }
4264        }
4265        assert!(
4266            found_permission_denied,
4267            "Should have received PermissionDenied event"
4268        );
4269
4270        assert_eq!(result.tool_calls_count, 1);
4271    }
4272
4273    #[tokio::test]
4274    async fn test_agent_permission_allow() {
4275        let mock_client = Arc::new(MockLlmClient::new(vec![
4276            // First response: tool call that will be allowed
4277            MockLlmClient::tool_call_response(
4278                "tool-1",
4279                "bash",
4280                serde_json::json!({"command": "echo hello"}),
4281            ),
4282            // Second response: final text
4283            MockLlmClient::text_response("Done!"),
4284        ]));
4285
4286        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4287
4288        // Create permission policy that allows echo commands
4289        let permission_policy = PermissionPolicy::new()
4290            .allow("bash(echo:*)")
4291            .deny("bash(rm:*)");
4292
4293        let config = AgentConfig {
4294            permission_checker: Some(Arc::new(permission_policy)),
4295            ..Default::default()
4296        };
4297
4298        let agent = AgentLoop::new(
4299            mock_client.clone(),
4300            tool_executor,
4301            test_tool_context(),
4302            config,
4303        );
4304        let result = agent.execute(&[], "Echo hello", None).await.unwrap();
4305
4306        assert_eq!(result.text, "Done!");
4307        assert_eq!(result.tool_calls_count, 1);
4308    }
4309
4310    #[tokio::test]
4311    async fn test_agent_streaming_events() {
4312        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
4313            "Hello!",
4314        )]));
4315
4316        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4317        let config = AgentConfig::default();
4318
4319        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
4320        let (tx, mut rx) = mpsc::channel(100);
4321        let cancel_token = tokio_util::sync::CancellationToken::new();
4322
4323        let result = agent
4324            .execute_with_session(&[], "Hi", None, Some(tx), Some(&cancel_token))
4325            .await
4326            .unwrap();
4327        let mut events = Vec::new();
4328        while let Some(event) = rx.recv().await {
4329            events.push(event);
4330        }
4331
4332        assert_eq!(result.text, "Hello!");
4333
4334        // Check we received Start and End events
4335        assert!(events.iter().any(|e| matches!(e, AgentEvent::Start { .. })));
4336        assert!(events.iter().any(|e| matches!(e, AgentEvent::End { .. })));
4337    }
4338
4339    #[tokio::test]
4340    async fn test_agent_max_tool_rounds() {
4341        // Create a mock that always returns tool calls (infinite loop)
4342        let responses: Vec<LlmResponse> = (0..100)
4343            .map(|i| {
4344                MockLlmClient::tool_call_response(
4345                    &format!("tool-{}", i),
4346                    "bash",
4347                    serde_json::json!({"command": "echo loop"}),
4348                )
4349            })
4350            .collect();
4351
4352        let mock_client = Arc::new(MockLlmClient::new(responses));
4353        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4354
4355        let config = AgentConfig {
4356            max_tool_rounds: 3,
4357            ..Default::default()
4358        };
4359
4360        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
4361        let result = agent.execute(&[], "Loop forever", None).await;
4362
4363        // Should fail due to max tool rounds exceeded
4364        assert!(result.is_err());
4365        assert!(result.unwrap_err().to_string().contains("Max tool rounds"));
4366    }
4367
4368    #[tokio::test]
4369    async fn test_agent_no_permission_policy_defaults_to_ask() {
4370        // When no permission policy is set, tools default to Ask.
4371        // Without a confirmation manager, Ask = safe deny.
4372        let mock_client = Arc::new(MockLlmClient::new(vec![
4373            MockLlmClient::tool_call_response(
4374                "tool-1",
4375                "bash",
4376                serde_json::json!({"command": "rm -rf /tmp/test"}),
4377            ),
4378            MockLlmClient::text_response("Denied!"),
4379        ]));
4380
4381        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4382        let config = AgentConfig {
4383            permission_checker: None, // No policy → defaults to Ask
4384            // No confirmation_manager → safe deny
4385            ..Default::default()
4386        };
4387
4388        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
4389        let result = agent.execute(&[], "Delete", None).await.unwrap();
4390
4391        // Should be denied (no policy + no CM = safe deny)
4392        assert_eq!(result.text, "Denied!");
4393        assert_eq!(result.tool_calls_count, 1);
4394    }
4395
4396    #[tokio::test]
4397    async fn test_agent_permission_ask_without_cm_denies() {
4398        // When permission is Ask and no confirmation manager configured,
4399        // tool execution should be denied (safe default).
4400        let mock_client = Arc::new(MockLlmClient::new(vec![
4401            MockLlmClient::tool_call_response(
4402                "tool-1",
4403                "bash",
4404                serde_json::json!({"command": "echo test"}),
4405            ),
4406            MockLlmClient::text_response("Denied!"),
4407        ]));
4408
4409        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4410
4411        // Create policy where bash falls through to Ask (default)
4412        let permission_policy = PermissionPolicy::new(); // Default decision is Ask
4413
4414        let config = AgentConfig {
4415            permission_checker: Some(Arc::new(permission_policy)),
4416            // No confirmation_manager — safe deny
4417            ..Default::default()
4418        };
4419
4420        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
4421        let result = agent.execute(&[], "Echo", None).await.unwrap();
4422
4423        // Should deny (Ask without CM = safe deny)
4424        assert_eq!(result.text, "Denied!");
4425        // The tool result should contain the denial message
4426        assert!(result.tool_calls_count >= 1);
4427    }
4428
4429    // ========================================================================
4430    // HITL (Human-in-the-Loop) Tests
4431    // ========================================================================
4432
4433    #[tokio::test]
4434    async fn test_agent_hitl_approved() {
4435        use crate::hitl::{ConfirmationManager, ConfirmationPolicy};
4436        use tokio::sync::broadcast;
4437
4438        let mock_client = Arc::new(MockLlmClient::new(vec![
4439            MockLlmClient::tool_call_response(
4440                "tool-1",
4441                "bash",
4442                serde_json::json!({"command": "echo hello"}),
4443            ),
4444            MockLlmClient::text_response("Command executed!"),
4445        ]));
4446
4447        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4448
4449        // Create HITL confirmation manager with policy enabled
4450        let (event_tx, _event_rx) = broadcast::channel(100);
4451        let hitl_policy = ConfirmationPolicy {
4452            enabled: true,
4453            ..Default::default()
4454        };
4455        let confirmation_manager = Arc::new(ConfirmationManager::new(hitl_policy, event_tx));
4456
4457        // Create permission policy that returns Ask for bash
4458        let permission_policy = PermissionPolicy::new(); // Default is Ask
4459
4460        let config = AgentConfig {
4461            permission_checker: Some(Arc::new(permission_policy)),
4462            confirmation_manager: Some(confirmation_manager.clone()),
4463            ..Default::default()
4464        };
4465
4466        // Spawn a task to approve the confirmation
4467        let cm_clone = confirmation_manager.clone();
4468        tokio::spawn(async move {
4469            // Wait a bit for the confirmation request to be created
4470            tokio::time::sleep(std::time::Duration::from_millis(50)).await;
4471            // Approve it
4472            cm_clone.confirm("tool-1", true, None).await.ok();
4473        });
4474
4475        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
4476        let result = agent.execute(&[], "Run echo", None).await.unwrap();
4477
4478        assert_eq!(result.text, "Command executed!");
4479        assert_eq!(result.tool_calls_count, 1);
4480    }
4481
4482    #[tokio::test]
4483    async fn test_agent_hitl_rejected() {
4484        use crate::hitl::{ConfirmationManager, ConfirmationPolicy};
4485        use tokio::sync::broadcast;
4486
4487        let mock_client = Arc::new(MockLlmClient::new(vec![
4488            MockLlmClient::tool_call_response(
4489                "tool-1",
4490                "bash",
4491                serde_json::json!({"command": "rm -rf /"}),
4492            ),
4493            MockLlmClient::text_response("Understood, I won't do that."),
4494        ]));
4495
4496        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4497
4498        // Create HITL confirmation manager
4499        let (event_tx, _event_rx) = broadcast::channel(100);
4500        let hitl_policy = ConfirmationPolicy {
4501            enabled: true,
4502            ..Default::default()
4503        };
4504        let confirmation_manager = Arc::new(ConfirmationManager::new(hitl_policy, event_tx));
4505
4506        // Permission policy returns Ask
4507        let permission_policy = PermissionPolicy::new();
4508
4509        let config = AgentConfig {
4510            permission_checker: Some(Arc::new(permission_policy)),
4511            confirmation_manager: Some(confirmation_manager.clone()),
4512            ..Default::default()
4513        };
4514
4515        // Spawn a task to reject the confirmation
4516        let cm_clone = confirmation_manager.clone();
4517        tokio::spawn(async move {
4518            tokio::time::sleep(std::time::Duration::from_millis(50)).await;
4519            cm_clone
4520                .confirm("tool-1", false, Some("Too dangerous".to_string()))
4521                .await
4522                .ok();
4523        });
4524
4525        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
4526        let result = agent.execute(&[], "Delete everything", None).await.unwrap();
4527
4528        // LLM should respond to the rejection
4529        assert_eq!(result.text, "Understood, I won't do that.");
4530    }
4531
4532    #[tokio::test]
4533    async fn test_agent_hitl_timeout_reject() {
4534        use crate::hitl::{ConfirmationManager, ConfirmationPolicy, TimeoutAction};
4535        use tokio::sync::broadcast;
4536
4537        let mock_client = Arc::new(MockLlmClient::new(vec![
4538            MockLlmClient::tool_call_response(
4539                "tool-1",
4540                "bash",
4541                serde_json::json!({"command": "echo test"}),
4542            ),
4543            MockLlmClient::text_response("Timed out, I understand."),
4544        ]));
4545
4546        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4547
4548        // Create HITL with very short timeout and Reject action
4549        let (event_tx, _event_rx) = broadcast::channel(100);
4550        let hitl_policy = ConfirmationPolicy {
4551            enabled: true,
4552            default_timeout_ms: 50, // Very short timeout
4553            timeout_action: TimeoutAction::Reject,
4554            ..Default::default()
4555        };
4556        let confirmation_manager = Arc::new(ConfirmationManager::new(hitl_policy, event_tx));
4557
4558        let permission_policy = PermissionPolicy::new();
4559
4560        let config = AgentConfig {
4561            permission_checker: Some(Arc::new(permission_policy)),
4562            confirmation_manager: Some(confirmation_manager),
4563            ..Default::default()
4564        };
4565
4566        // Don't approve - let it timeout
4567        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
4568        let result = agent.execute(&[], "Echo", None).await.unwrap();
4569
4570        // Should get timeout rejection response from LLM
4571        assert_eq!(result.text, "Timed out, I understand.");
4572    }
4573
4574    #[tokio::test]
4575    async fn test_agent_hitl_timeout_auto_approve() {
4576        use crate::hitl::{ConfirmationManager, ConfirmationPolicy, TimeoutAction};
4577        use tokio::sync::broadcast;
4578
4579        let mock_client = Arc::new(MockLlmClient::new(vec![
4580            MockLlmClient::tool_call_response(
4581                "tool-1",
4582                "bash",
4583                serde_json::json!({"command": "echo hello"}),
4584            ),
4585            MockLlmClient::text_response("Auto-approved and executed!"),
4586        ]));
4587
4588        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4589
4590        // Create HITL with very short timeout and AutoApprove action
4591        let (event_tx, _event_rx) = broadcast::channel(100);
4592        let hitl_policy = ConfirmationPolicy {
4593            enabled: true,
4594            default_timeout_ms: 50, // Very short timeout
4595            timeout_action: TimeoutAction::AutoApprove,
4596            ..Default::default()
4597        };
4598        let confirmation_manager = Arc::new(ConfirmationManager::new(hitl_policy, event_tx));
4599
4600        let permission_policy = PermissionPolicy::new();
4601
4602        let config = AgentConfig {
4603            permission_checker: Some(Arc::new(permission_policy)),
4604            confirmation_manager: Some(confirmation_manager),
4605            ..Default::default()
4606        };
4607
4608        // Don't approve - let it timeout and auto-approve
4609        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
4610        let result = agent.execute(&[], "Echo", None).await.unwrap();
4611
4612        // Should auto-approve on timeout and execute
4613        assert_eq!(result.text, "Auto-approved and executed!");
4614        assert_eq!(result.tool_calls_count, 1);
4615    }
4616
4617    #[tokio::test]
4618    async fn test_agent_hitl_confirmation_events() {
4619        use crate::hitl::{ConfirmationManager, ConfirmationPolicy};
4620        use tokio::sync::broadcast;
4621
4622        let mock_client = Arc::new(MockLlmClient::new(vec![
4623            MockLlmClient::tool_call_response(
4624                "tool-1",
4625                "bash",
4626                serde_json::json!({"command": "echo test"}),
4627            ),
4628            MockLlmClient::text_response("Done!"),
4629        ]));
4630
4631        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4632
4633        // Create HITL confirmation manager
4634        let (event_tx, mut event_rx) = broadcast::channel(100);
4635        let hitl_policy = ConfirmationPolicy {
4636            enabled: true,
4637            default_timeout_ms: 5000, // Long enough timeout
4638            ..Default::default()
4639        };
4640        let confirmation_manager = Arc::new(ConfirmationManager::new(hitl_policy, event_tx));
4641
4642        let permission_policy = PermissionPolicy::new();
4643
4644        let config = AgentConfig {
4645            permission_checker: Some(Arc::new(permission_policy)),
4646            confirmation_manager: Some(confirmation_manager.clone()),
4647            ..Default::default()
4648        };
4649
4650        // Spawn task to approve and collect events
4651        let cm_clone = confirmation_manager.clone();
4652        let event_handle = tokio::spawn(async move {
4653            let mut events = Vec::new();
4654            // Wait for ConfirmationRequired event
4655            while let Ok(event) = event_rx.recv().await {
4656                events.push(event.clone());
4657                if let AgentEvent::ConfirmationRequired { tool_id, .. } = event {
4658                    // Approve it
4659                    cm_clone.confirm(&tool_id, true, None).await.ok();
4660                    // Wait for ConfirmationReceived
4661                    if let Ok(recv_event) = event_rx.recv().await {
4662                        events.push(recv_event);
4663                    }
4664                    break;
4665                }
4666            }
4667            events
4668        });
4669
4670        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
4671        let _result = agent.execute(&[], "Echo", None).await.unwrap();
4672
4673        // Check events
4674        let events = event_handle.await.unwrap();
4675        assert!(
4676            events
4677                .iter()
4678                .any(|e| matches!(e, AgentEvent::ConfirmationRequired { .. })),
4679            "Should have ConfirmationRequired event"
4680        );
4681        assert!(
4682            events
4683                .iter()
4684                .any(|e| matches!(e, AgentEvent::ConfirmationReceived { approved: true, .. })),
4685            "Should have ConfirmationReceived event with approved=true"
4686        );
4687    }
4688
4689    #[tokio::test]
4690    async fn test_agent_hitl_disabled_auto_executes() {
4691        // When HITL is disabled, tools should execute automatically even with Ask permission
4692        use crate::hitl::{ConfirmationManager, ConfirmationPolicy};
4693        use tokio::sync::broadcast;
4694
4695        let mock_client = Arc::new(MockLlmClient::new(vec![
4696            MockLlmClient::tool_call_response(
4697                "tool-1",
4698                "bash",
4699                serde_json::json!({"command": "echo auto"}),
4700            ),
4701            MockLlmClient::text_response("Auto executed!"),
4702        ]));
4703
4704        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4705
4706        // Create HITL with enabled=false
4707        let (event_tx, _event_rx) = broadcast::channel(100);
4708        let hitl_policy = ConfirmationPolicy {
4709            enabled: false, // HITL disabled
4710            ..Default::default()
4711        };
4712        let confirmation_manager = Arc::new(ConfirmationManager::new(hitl_policy, event_tx));
4713
4714        let permission_policy = PermissionPolicy::new(); // Default is Ask
4715
4716        let config = AgentConfig {
4717            permission_checker: Some(Arc::new(permission_policy)),
4718            confirmation_manager: Some(confirmation_manager),
4719            ..Default::default()
4720        };
4721
4722        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
4723        let result = agent.execute(&[], "Echo", None).await.unwrap();
4724
4725        // Should execute without waiting for confirmation
4726        assert_eq!(result.text, "Auto executed!");
4727        assert_eq!(result.tool_calls_count, 1);
4728    }
4729
4730    #[tokio::test]
4731    async fn test_agent_hitl_with_permission_deny_skips_hitl() {
4732        // When permission is Deny, HITL should not be triggered
4733        use crate::hitl::{ConfirmationManager, ConfirmationPolicy};
4734        use tokio::sync::broadcast;
4735
4736        let mock_client = Arc::new(MockLlmClient::new(vec![
4737            MockLlmClient::tool_call_response(
4738                "tool-1",
4739                "bash",
4740                serde_json::json!({"command": "rm -rf /"}),
4741            ),
4742            MockLlmClient::text_response("Blocked by permission."),
4743        ]));
4744
4745        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4746
4747        // Create HITL enabled
4748        let (event_tx, mut event_rx) = broadcast::channel(100);
4749        let hitl_policy = ConfirmationPolicy {
4750            enabled: true,
4751            ..Default::default()
4752        };
4753        let confirmation_manager = Arc::new(ConfirmationManager::new(hitl_policy, event_tx));
4754
4755        // Permission policy denies rm commands
4756        let permission_policy = PermissionPolicy::new().deny("bash(rm:*)");
4757
4758        let config = AgentConfig {
4759            permission_checker: Some(Arc::new(permission_policy)),
4760            confirmation_manager: Some(confirmation_manager),
4761            ..Default::default()
4762        };
4763
4764        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
4765        let result = agent.execute(&[], "Delete", None).await.unwrap();
4766
4767        // Should be denied without HITL
4768        assert_eq!(result.text, "Blocked by permission.");
4769
4770        // Should NOT have any ConfirmationRequired events
4771        let mut found_confirmation = false;
4772        while let Ok(event) = event_rx.try_recv() {
4773            if matches!(event, AgentEvent::ConfirmationRequired { .. }) {
4774                found_confirmation = true;
4775            }
4776        }
4777        assert!(
4778            !found_confirmation,
4779            "HITL should not be triggered when permission is Deny"
4780        );
4781    }
4782
4783    #[tokio::test]
4784    async fn test_agent_hitl_with_permission_allow_skips_hitl() {
4785        // When permission is Allow, HITL confirmation is skipped entirely.
4786        // PermissionPolicy is the declarative rule engine; Allow = execute directly.
4787        use crate::hitl::{ConfirmationManager, ConfirmationPolicy};
4788        use tokio::sync::broadcast;
4789
4790        let mock_client = Arc::new(MockLlmClient::new(vec![
4791            MockLlmClient::tool_call_response(
4792                "tool-1",
4793                "bash",
4794                serde_json::json!({"command": "echo hello"}),
4795            ),
4796            MockLlmClient::text_response("Allowed!"),
4797        ]));
4798
4799        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4800
4801        // Create HITL enabled
4802        let (event_tx, mut event_rx) = broadcast::channel(100);
4803        let hitl_policy = ConfirmationPolicy {
4804            enabled: true,
4805            ..Default::default()
4806        };
4807        let confirmation_manager = Arc::new(ConfirmationManager::new(hitl_policy, event_tx));
4808
4809        // Permission policy allows echo commands
4810        let permission_policy = PermissionPolicy::new().allow("bash(echo:*)");
4811
4812        let config = AgentConfig {
4813            permission_checker: Some(Arc::new(permission_policy)),
4814            confirmation_manager: Some(confirmation_manager.clone()),
4815            ..Default::default()
4816        };
4817
4818        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
4819        let result = agent.execute(&[], "Echo", None).await.unwrap();
4820
4821        // Should execute directly without HITL (permission Allow skips confirmation)
4822        assert_eq!(result.text, "Allowed!");
4823
4824        // Should NOT have ConfirmationRequired event (Allow bypasses HITL)
4825        let mut found_confirmation = false;
4826        while let Ok(event) = event_rx.try_recv() {
4827            if matches!(event, AgentEvent::ConfirmationRequired { .. }) {
4828                found_confirmation = true;
4829            }
4830        }
4831        assert!(
4832            !found_confirmation,
4833            "Permission Allow should skip HITL confirmation"
4834        );
4835    }
4836
4837    #[tokio::test]
4838    async fn test_agent_hitl_multiple_tool_calls() {
4839        // Test multiple tool calls in sequence with HITL
4840        use crate::hitl::{ConfirmationManager, ConfirmationPolicy};
4841        use tokio::sync::broadcast;
4842
4843        let mock_client = Arc::new(MockLlmClient::new(vec![
4844            // First response: two tool calls
4845            LlmResponse {
4846                message: Message {
4847                    role: "assistant".to_string(),
4848                    content: vec![
4849                        ContentBlock::ToolUse {
4850                            id: "tool-1".to_string(),
4851                            name: "bash".to_string(),
4852                            input: serde_json::json!({"command": "echo first"}),
4853                        },
4854                        ContentBlock::ToolUse {
4855                            id: "tool-2".to_string(),
4856                            name: "bash".to_string(),
4857                            input: serde_json::json!({"command": "echo second"}),
4858                        },
4859                    ],
4860                    reasoning_content: None,
4861                },
4862                usage: TokenUsage {
4863                    prompt_tokens: 10,
4864                    completion_tokens: 5,
4865                    total_tokens: 15,
4866                    cache_read_tokens: None,
4867                    cache_write_tokens: None,
4868                },
4869                stop_reason: Some("tool_use".to_string()),
4870                meta: None,
4871            },
4872            MockLlmClient::text_response("Both executed!"),
4873        ]));
4874
4875        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4876
4877        // Create HITL
4878        let (event_tx, _event_rx) = broadcast::channel(100);
4879        let hitl_policy = ConfirmationPolicy {
4880            enabled: true,
4881            default_timeout_ms: 5000,
4882            ..Default::default()
4883        };
4884        let confirmation_manager = Arc::new(ConfirmationManager::new(hitl_policy, event_tx));
4885
4886        let permission_policy = PermissionPolicy::new(); // Default Ask
4887
4888        let config = AgentConfig {
4889            permission_checker: Some(Arc::new(permission_policy)),
4890            confirmation_manager: Some(confirmation_manager.clone()),
4891            ..Default::default()
4892        };
4893
4894        // Spawn task to approve both tools
4895        let cm_clone = confirmation_manager.clone();
4896        tokio::spawn(async move {
4897            tokio::time::sleep(std::time::Duration::from_millis(30)).await;
4898            cm_clone.confirm("tool-1", true, None).await.ok();
4899            tokio::time::sleep(std::time::Duration::from_millis(30)).await;
4900            cm_clone.confirm("tool-2", true, None).await.ok();
4901        });
4902
4903        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
4904        let result = agent
4905            .execute_loop(
4906                &[],
4907                "run both commands now",
4908                AgentStyle::GeneralPurpose,
4909                None,
4910                None,
4911                &tokio_util::sync::CancellationToken::new(),
4912                true,
4913            )
4914            .await
4915            .unwrap();
4916
4917        assert_eq!(result.text, "Both executed!");
4918        assert_eq!(result.tool_calls_count, 2);
4919    }
4920
4921    #[tokio::test]
4922    async fn test_agent_hitl_partial_approval() {
4923        // Test: first tool approved, second rejected
4924        use crate::hitl::{ConfirmationManager, ConfirmationPolicy};
4925        use tokio::sync::broadcast;
4926
4927        let mock_client = Arc::new(MockLlmClient::new(vec![
4928            // First response: two tool calls
4929            LlmResponse {
4930                message: Message {
4931                    role: "assistant".to_string(),
4932                    content: vec![
4933                        ContentBlock::ToolUse {
4934                            id: "tool-1".to_string(),
4935                            name: "bash".to_string(),
4936                            input: serde_json::json!({"command": "echo safe"}),
4937                        },
4938                        ContentBlock::ToolUse {
4939                            id: "tool-2".to_string(),
4940                            name: "bash".to_string(),
4941                            input: serde_json::json!({"command": "rm -rf /"}),
4942                        },
4943                    ],
4944                    reasoning_content: None,
4945                },
4946                usage: TokenUsage {
4947                    prompt_tokens: 10,
4948                    completion_tokens: 5,
4949                    total_tokens: 15,
4950                    cache_read_tokens: None,
4951                    cache_write_tokens: None,
4952                },
4953                stop_reason: Some("tool_use".to_string()),
4954                meta: None,
4955            },
4956            MockLlmClient::text_response("First worked, second rejected."),
4957        ]));
4958
4959        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4960
4961        let (event_tx, _event_rx) = broadcast::channel(100);
4962        let hitl_policy = ConfirmationPolicy {
4963            enabled: true,
4964            default_timeout_ms: 5000,
4965            ..Default::default()
4966        };
4967        let confirmation_manager = Arc::new(ConfirmationManager::new(hitl_policy, event_tx));
4968
4969        let permission_policy = PermissionPolicy::new();
4970
4971        let config = AgentConfig {
4972            permission_checker: Some(Arc::new(permission_policy)),
4973            confirmation_manager: Some(confirmation_manager.clone()),
4974            ..Default::default()
4975        };
4976
4977        // Approve first, reject second
4978        let cm_clone = confirmation_manager.clone();
4979        tokio::spawn(async move {
4980            tokio::time::sleep(std::time::Duration::from_millis(30)).await;
4981            cm_clone.confirm("tool-1", true, None).await.ok();
4982            tokio::time::sleep(std::time::Duration::from_millis(30)).await;
4983            cm_clone
4984                .confirm("tool-2", false, Some("Dangerous".to_string()))
4985                .await
4986                .ok();
4987        });
4988
4989        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
4990        let result = agent.execute(&[], "Run both", None).await.unwrap();
4991
4992        assert_eq!(result.text, "First worked, second rejected.");
4993        assert_eq!(result.tool_calls_count, 2);
4994    }
4995
4996    #[tokio::test]
4997    async fn test_agent_hitl_yolo_mode_auto_approves() {
4998        // YOLO mode: specific lanes auto-approve without confirmation
4999        use crate::hitl::{ConfirmationManager, ConfirmationPolicy};
5000        use crate::queue::SessionLane;
5001        use tokio::sync::broadcast;
5002
5003        let mock_client = Arc::new(MockLlmClient::new(vec![
5004            MockLlmClient::tool_call_response(
5005                "tool-1",
5006                "read", // Query lane tool
5007                serde_json::json!({"path": "/tmp/test.txt"}),
5008            ),
5009            MockLlmClient::text_response("File read!"),
5010        ]));
5011
5012        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5013
5014        // YOLO mode for Query lane (read, glob, ls, grep)
5015        let (event_tx, mut event_rx) = broadcast::channel(100);
5016        let mut yolo_lanes = std::collections::HashSet::new();
5017        yolo_lanes.insert(SessionLane::Query);
5018        let hitl_policy = ConfirmationPolicy {
5019            enabled: true,
5020            yolo_lanes, // Auto-approve query operations
5021            ..Default::default()
5022        };
5023        let confirmation_manager = Arc::new(ConfirmationManager::new(hitl_policy, event_tx));
5024
5025        let permission_policy = PermissionPolicy::new();
5026
5027        let config = AgentConfig {
5028            permission_checker: Some(Arc::new(permission_policy)),
5029            confirmation_manager: Some(confirmation_manager),
5030            ..Default::default()
5031        };
5032
5033        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
5034        let result = agent.execute(&[], "Read file", None).await.unwrap();
5035
5036        // Should auto-execute without confirmation (YOLO mode)
5037        assert_eq!(result.text, "File read!");
5038
5039        // Should NOT have ConfirmationRequired for yolo lane
5040        let mut found_confirmation = false;
5041        while let Ok(event) = event_rx.try_recv() {
5042            if matches!(event, AgentEvent::ConfirmationRequired { .. }) {
5043                found_confirmation = true;
5044            }
5045        }
5046        assert!(
5047            !found_confirmation,
5048            "YOLO mode should not trigger confirmation"
5049        );
5050    }
5051
5052    #[tokio::test]
5053    async fn test_agent_config_with_all_options() {
5054        use crate::hitl::{ConfirmationManager, ConfirmationPolicy};
5055        use tokio::sync::broadcast;
5056
5057        let (event_tx, _) = broadcast::channel(100);
5058        let hitl_policy = ConfirmationPolicy::default();
5059        let confirmation_manager = Arc::new(ConfirmationManager::new(hitl_policy, event_tx));
5060
5061        let permission_policy = PermissionPolicy::new().allow("bash(*)");
5062
5063        let config = AgentConfig {
5064            prompt_slots: SystemPromptSlots {
5065                extra: Some("Test system prompt".to_string()),
5066                ..Default::default()
5067            },
5068            tools: vec![],
5069            max_tool_rounds: 10,
5070            permission_checker: Some(Arc::new(permission_policy)),
5071            confirmation_manager: Some(confirmation_manager),
5072            context_providers: vec![],
5073            planning_mode: PlanningMode::default(),
5074            goal_tracking: false,
5075            hook_engine: None,
5076            skill_registry: None,
5077            ..AgentConfig::default()
5078        };
5079
5080        assert!(config.prompt_slots.build().contains("Test system prompt"));
5081        assert_eq!(config.max_tool_rounds, 10);
5082        assert!(config.permission_checker.is_some());
5083        assert!(config.confirmation_manager.is_some());
5084        assert!(config.context_providers.is_empty());
5085
5086        // Test Debug trait
5087        let debug_str = format!("{:?}", config);
5088        assert!(debug_str.contains("AgentConfig"));
5089        assert!(debug_str.contains("permission_checker: true"));
5090        assert!(debug_str.contains("confirmation_manager: true"));
5091        assert!(debug_str.contains("context_providers: 0"));
5092    }
5093
5094    // ========================================================================
5095    // Context Provider Tests
5096    // ========================================================================
5097
5098    use crate::context::{ContextItem, ContextType};
5099
5100    /// Mock context provider for testing
5101    struct MockContextProvider {
5102        name: String,
5103        items: Vec<ContextItem>,
5104        on_turn_calls: std::sync::Arc<tokio::sync::RwLock<Vec<(String, String, String)>>>,
5105    }
5106
5107    impl MockContextProvider {
5108        fn new(name: &str) -> Self {
5109            Self {
5110                name: name.to_string(),
5111                items: Vec::new(),
5112                on_turn_calls: std::sync::Arc::new(tokio::sync::RwLock::new(Vec::new())),
5113            }
5114        }
5115
5116        fn with_items(mut self, items: Vec<ContextItem>) -> Self {
5117            self.items = items;
5118            self
5119        }
5120    }
5121
5122    #[async_trait::async_trait]
5123    impl ContextProvider for MockContextProvider {
5124        fn name(&self) -> &str {
5125            &self.name
5126        }
5127
5128        async fn query(&self, _query: &ContextQuery) -> anyhow::Result<ContextResult> {
5129            let mut result = ContextResult::new(&self.name);
5130            for item in &self.items {
5131                result.add_item(item.clone());
5132            }
5133            Ok(result)
5134        }
5135
5136        async fn on_turn_complete(
5137            &self,
5138            session_id: &str,
5139            prompt: &str,
5140            response: &str,
5141        ) -> anyhow::Result<()> {
5142            let mut calls = self.on_turn_calls.write().await;
5143            calls.push((
5144                session_id.to_string(),
5145                prompt.to_string(),
5146                response.to_string(),
5147            ));
5148            Ok(())
5149        }
5150    }
5151
5152    #[tokio::test]
5153    async fn test_agent_with_context_provider() {
5154        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
5155            "Response using context",
5156        )]));
5157
5158        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5159
5160        let provider =
5161            MockContextProvider::new("test-provider").with_items(vec![ContextItem::new(
5162                "ctx-1",
5163                ContextType::Resource,
5164                "Relevant context here",
5165            )
5166            .with_source("test://docs/example")]);
5167
5168        let config = AgentConfig {
5169            prompt_slots: SystemPromptSlots {
5170                extra: Some("You are helpful.".to_string()),
5171                ..Default::default()
5172            },
5173            context_providers: vec![Arc::new(provider)],
5174            ..Default::default()
5175        };
5176
5177        let agent = AgentLoop::new(
5178            mock_client.clone(),
5179            tool_executor,
5180            test_tool_context(),
5181            config,
5182        );
5183        let result = agent
5184            .execute(&[], "verify context provider output", None)
5185            .await
5186            .unwrap();
5187
5188        assert_eq!(result.text, "Response using context");
5189        assert_eq!(mock_client.call_count.load(Ordering::SeqCst), 1);
5190    }
5191
5192    #[tokio::test]
5193    async fn test_agent_context_provider_events() {
5194        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
5195            "Answer",
5196        )]));
5197
5198        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5199
5200        let provider =
5201            MockContextProvider::new("event-provider").with_items(vec![ContextItem::new(
5202                "item-1",
5203                ContextType::Memory,
5204                "Memory content",
5205            )
5206            .with_token_count(50)]);
5207
5208        let config = AgentConfig {
5209            context_providers: vec![Arc::new(provider)],
5210            ..Default::default()
5211        };
5212
5213        let (tx, mut rx) = mpsc::channel(100);
5214        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
5215        let _result = agent.execute(&[], "Test prompt", Some(tx)).await.unwrap();
5216
5217        // Collect events
5218        let mut events = Vec::new();
5219        while let Ok(event) = rx.try_recv() {
5220            events.push(event);
5221        }
5222
5223        // Should have ContextResolving and ContextResolved events
5224        assert!(
5225            events
5226                .iter()
5227                .any(|e| matches!(e, AgentEvent::ContextResolving { .. })),
5228            "Should have ContextResolving event"
5229        );
5230        assert!(
5231            events
5232                .iter()
5233                .any(|e| matches!(e, AgentEvent::ContextResolved { .. })),
5234            "Should have ContextResolved event"
5235        );
5236
5237        // Check context resolved values
5238        for event in &events {
5239            if let AgentEvent::ContextResolved {
5240                total_items,
5241                total_tokens,
5242            } = event
5243            {
5244                assert_eq!(*total_items, 1);
5245                assert_eq!(*total_tokens, 50);
5246            }
5247        }
5248    }
5249
5250    #[tokio::test]
5251    async fn test_agent_multiple_context_providers() {
5252        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
5253            "Combined response",
5254        )]));
5255
5256        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5257
5258        let provider1 = MockContextProvider::new("provider-1").with_items(vec![ContextItem::new(
5259            "p1-1",
5260            ContextType::Resource,
5261            "Resource from P1",
5262        )
5263        .with_token_count(100)]);
5264
5265        let provider2 = MockContextProvider::new("provider-2").with_items(vec![
5266            ContextItem::new("p2-1", ContextType::Memory, "Memory from P2").with_token_count(50),
5267            ContextItem::new("p2-2", ContextType::Skill, "Skill from P2").with_token_count(75),
5268        ]);
5269
5270        let config = AgentConfig {
5271            prompt_slots: SystemPromptSlots {
5272                extra: Some("Base system prompt.".to_string()),
5273                ..Default::default()
5274            },
5275            context_providers: vec![Arc::new(provider1), Arc::new(provider2)],
5276            ..Default::default()
5277        };
5278
5279        let (tx, mut rx) = mpsc::channel(100);
5280        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
5281        let result = agent
5282            .execute(&[], "verify combined context", Some(tx))
5283            .await
5284            .unwrap();
5285
5286        assert_eq!(result.text, "Combined response");
5287
5288        // Check context resolved event has combined totals
5289        while let Ok(event) = rx.try_recv() {
5290            if let AgentEvent::ContextResolved {
5291                total_items,
5292                total_tokens,
5293            } = event
5294            {
5295                assert_eq!(total_items, 3); // 1 + 2
5296                assert_eq!(total_tokens, 225); // 100 + 50 + 75
5297            }
5298        }
5299    }
5300
5301    #[tokio::test]
5302    async fn test_agent_no_context_providers() {
5303        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
5304            "No context",
5305        )]));
5306
5307        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5308
5309        // No context providers
5310        let config = AgentConfig::default();
5311
5312        let (tx, mut rx) = mpsc::channel(100);
5313        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
5314        let result = agent
5315            .execute(&[], "verify simple prompt", Some(tx))
5316            .await
5317            .unwrap();
5318
5319        assert_eq!(result.text, "No context");
5320
5321        // Should NOT have context events when no providers
5322        let mut events = Vec::new();
5323        while let Ok(event) = rx.try_recv() {
5324            events.push(event);
5325        }
5326
5327        assert!(
5328            !events
5329                .iter()
5330                .any(|e| matches!(e, AgentEvent::ContextResolving { .. })),
5331            "Should NOT have ContextResolving event"
5332        );
5333    }
5334
5335    #[tokio::test]
5336    async fn test_agent_memory_recall_routes_through_context_assembly() {
5337        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
5338            "Memory-aware response",
5339        )]));
5340
5341        let memory = crate::memory::AgentMemory::new(Arc::new(a3s_memory::InMemoryStore::new()));
5342        memory
5343            .remember(
5344                a3s_memory::MemoryItem::new(
5345                    "verify focused regression tests caught context regressions.",
5346                )
5347                .with_importance(0.9),
5348            )
5349            .await
5350            .unwrap();
5351
5352        let temp_dir = tempfile::tempdir().unwrap();
5353        let tool_executor = Arc::new(ToolExecutor::new(temp_dir.path().display().to_string()));
5354        let config = AgentConfig {
5355            memory: Some(Arc::new(memory)),
5356            ..Default::default()
5357        };
5358
5359        let (tx, mut rx) = mpsc::channel(100);
5360        let agent = AgentLoop::new(
5361            mock_client,
5362            tool_executor,
5363            ToolContext::new(temp_dir.path().to_path_buf()),
5364            config,
5365        );
5366        let result = agent
5367            .execute(&[], "verify focused regression tests", Some(tx))
5368            .await
5369            .unwrap();
5370
5371        assert_eq!(result.text, "Memory-aware response");
5372
5373        let mut recalled = false;
5374        let mut resolved_items = None;
5375        while let Ok(event) = rx.try_recv() {
5376            match event {
5377                AgentEvent::MemoryRecalled { content, .. } => {
5378                    recalled = content.contains("focused regression tests");
5379                }
5380                AgentEvent::ContextResolved { total_items, .. } => {
5381                    resolved_items = Some(total_items);
5382                }
5383                _ => {}
5384            }
5385        }
5386
5387        assert!(recalled);
5388        assert_eq!(resolved_items, Some(1));
5389    }
5390
5391    #[tokio::test]
5392    async fn test_agent_context_on_turn_complete() {
5393        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
5394            "Final response",
5395        )]));
5396
5397        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5398
5399        let provider = Arc::new(MockContextProvider::new("memory-provider"));
5400        let on_turn_calls = provider.on_turn_calls.clone();
5401
5402        let config = AgentConfig {
5403            context_providers: vec![provider],
5404            ..Default::default()
5405        };
5406
5407        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
5408
5409        // Execute with session ID
5410        let result = agent
5411            .execute_with_session(&[], "verify user prompt", Some("sess-123"), None, None)
5412            .await
5413            .unwrap();
5414
5415        assert_eq!(result.text, "Final response");
5416
5417        // Check on_turn_complete was called
5418        let calls = on_turn_calls.read().await;
5419        assert_eq!(calls.len(), 1);
5420        assert_eq!(calls[0].0, "sess-123");
5421        assert_eq!(calls[0].1, "verify user prompt");
5422        assert_eq!(calls[0].2, "Final response");
5423    }
5424
5425    #[tokio::test]
5426    async fn test_agent_context_on_turn_complete_no_session() {
5427        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
5428            "Response",
5429        )]));
5430
5431        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5432
5433        let provider = Arc::new(MockContextProvider::new("memory-provider"));
5434        let on_turn_calls = provider.on_turn_calls.clone();
5435
5436        let config = AgentConfig {
5437            context_providers: vec![provider],
5438            ..Default::default()
5439        };
5440
5441        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
5442
5443        // Execute without session ID (uses execute() which passes None)
5444        let _result = agent.execute(&[], "Prompt", None).await.unwrap();
5445
5446        // on_turn_complete should NOT be called when session_id is None
5447        let calls = on_turn_calls.read().await;
5448        assert!(calls.is_empty());
5449    }
5450
5451    #[tokio::test]
5452    async fn test_agent_build_augmented_system_prompt() {
5453        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response("OK")]));
5454
5455        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5456
5457        let provider = MockContextProvider::new("test").with_items(vec![ContextItem::new(
5458            "doc-1",
5459            ContextType::Resource,
5460            "Auth uses JWT tokens.",
5461        )
5462        .with_source("viking://docs/auth")]);
5463
5464        let config = AgentConfig {
5465            prompt_slots: SystemPromptSlots {
5466                extra: Some("You are helpful.".to_string()),
5467                ..Default::default()
5468            },
5469            context_providers: vec![Arc::new(provider)],
5470            ..Default::default()
5471        };
5472
5473        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
5474
5475        // Test building augmented prompt
5476        let context_results = agent.resolve_context("test", None).await;
5477        let augmented = agent.build_augmented_system_prompt(&context_results);
5478
5479        let augmented_str = augmented.unwrap();
5480        assert!(augmented_str.contains("You are helpful."));
5481        assert!(augmented_str.contains("<context source=\"viking://docs/auth\" type=\"Resource\">"));
5482        assert!(augmented_str.contains("Auth uses JWT tokens."));
5483    }
5484
5485    // ========================================================================
5486    // Agentic Loop Integration Tests
5487    // ========================================================================
5488
5489    /// Helper: collect all events from a channel
5490    async fn collect_events(mut rx: mpsc::Receiver<AgentEvent>) -> Vec<AgentEvent> {
5491        let mut events = Vec::new();
5492        while let Ok(event) = rx.try_recv() {
5493            events.push(event);
5494        }
5495        // Drain remaining
5496        while let Some(event) = rx.recv().await {
5497            events.push(event);
5498        }
5499        events
5500    }
5501
5502    #[tokio::test]
5503    async fn test_agent_multi_turn_tool_chain() {
5504        // LLM calls tool A → sees result → calls tool B → sees result → final answer
5505        let mock_client = Arc::new(MockLlmClient::new(vec![
5506            // Turn 1: call ls
5507            MockLlmClient::tool_call_response(
5508                "t1",
5509                "bash",
5510                serde_json::json!({"command": "echo step1"}),
5511            ),
5512            // Turn 2: call another tool based on first result
5513            MockLlmClient::tool_call_response(
5514                "t2",
5515                "bash",
5516                serde_json::json!({"command": "echo step2"}),
5517            ),
5518            // Turn 3: final answer
5519            MockLlmClient::text_response("Completed both steps: step1 then step2"),
5520        ]));
5521
5522        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5523        let config = AgentConfig::default();
5524
5525        let agent = AgentLoop::new(
5526            mock_client.clone(),
5527            tool_executor,
5528            test_tool_context(),
5529            config,
5530        );
5531        let result = agent.execute(&[], "Run two steps", None).await.unwrap();
5532
5533        assert_eq!(result.text, "Completed both steps: step1 then step2");
5534        assert_eq!(result.tool_calls_count, 2);
5535        assert_eq!(mock_client.call_count.load(Ordering::SeqCst), 3);
5536
5537        // Verify message history: user → assistant(tool_use) → user(tool_result) → assistant(tool_use) → user(tool_result) → assistant(text)
5538        assert_eq!(result.messages[0].role, "user");
5539        assert_eq!(result.messages[1].role, "assistant"); // tool call 1
5540        assert_eq!(result.messages[2].role, "user"); // tool result 1 (Anthropic convention)
5541        assert_eq!(result.messages[3].role, "assistant"); // tool call 2
5542        assert_eq!(result.messages[4].role, "user"); // tool result 2
5543        assert_eq!(result.messages[5].role, "assistant"); // final text
5544        assert_eq!(result.messages.len(), 6);
5545    }
5546
5547    #[tokio::test]
5548    async fn test_agent_conversation_history_preserved() {
5549        // Pass existing history, verify it's preserved in output
5550        let existing_history = vec![
5551            Message::user("What is Rust?"),
5552            Message {
5553                role: "assistant".to_string(),
5554                content: vec![ContentBlock::Text {
5555                    text: "Rust is a systems programming language.".to_string(),
5556                }],
5557                reasoning_content: None,
5558            },
5559        ];
5560
5561        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
5562            "Rust was created by Graydon Hoare at Mozilla.",
5563        )]));
5564
5565        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5566        let agent = AgentLoop::new(
5567            mock_client.clone(),
5568            tool_executor,
5569            test_tool_context(),
5570            AgentConfig {
5571                prompt_slots: SystemPromptSlots {
5572                    style: Some(AgentStyle::GeneralPurpose),
5573                    ..Default::default()
5574                },
5575                ..Default::default()
5576            },
5577        );
5578
5579        let result = agent
5580            .execute(&existing_history, "Who created it?", None)
5581            .await
5582            .unwrap();
5583
5584        // History should contain: old user + old assistant + new user + new assistant
5585        assert_eq!(result.messages.len(), 4);
5586        assert_eq!(result.messages[0].text(), "What is Rust?");
5587        assert_eq!(
5588            result.messages[1].text(),
5589            "Rust is a systems programming language."
5590        );
5591        assert_eq!(result.messages[2].text(), "Who created it?");
5592        assert_eq!(
5593            result.messages[3].text(),
5594            "Rust was created by Graydon Hoare at Mozilla."
5595        );
5596    }
5597
5598    #[tokio::test]
5599    async fn test_agent_event_stream_completeness() {
5600        // Verify full event sequence for a single tool call loop
5601        let mock_client = Arc::new(MockLlmClient::new(vec![
5602            MockLlmClient::tool_call_response(
5603                "t1",
5604                "bash",
5605                serde_json::json!({"command": "echo hi"}),
5606            ),
5607            MockLlmClient::text_response("Done"),
5608        ]));
5609
5610        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5611        let agent = AgentLoop::new(
5612            mock_client,
5613            tool_executor,
5614            test_tool_context(),
5615            AgentConfig::default(),
5616        );
5617
5618        let (tx, rx) = mpsc::channel(100);
5619        let result = agent.execute(&[], "Say hi", Some(tx)).await.unwrap();
5620        assert_eq!(result.text, "Done");
5621
5622        let events = collect_events(rx).await;
5623
5624        // Verify event sequence
5625        let event_types: Vec<&str> = events
5626            .iter()
5627            .map(|e| match e {
5628                AgentEvent::Start { .. } => "Start",
5629                AgentEvent::TurnStart { .. } => "TurnStart",
5630                AgentEvent::TurnEnd { .. } => "TurnEnd",
5631                AgentEvent::ToolEnd { .. } => "ToolEnd",
5632                AgentEvent::End { .. } => "End",
5633                _ => "Other",
5634            })
5635            .collect();
5636
5637        // Mode/context events may precede Start; the execution lifecycle still
5638        // needs a Start before turns and an End as the final event.
5639        let start_index = event_types
5640            .iter()
5641            .position(|t| *t == "Start")
5642            .expect("Start event should be present");
5643        let first_turn_index = event_types
5644            .iter()
5645            .position(|t| *t == "TurnStart")
5646            .expect("TurnStart event should be present");
5647        assert!(start_index < first_turn_index);
5648        assert_eq!(event_types.last(), Some(&"End"));
5649
5650        // Must have 2 TurnStarts (tool call turn + final answer turn)
5651        let turn_starts = event_types.iter().filter(|&&t| t == "TurnStart").count();
5652        assert_eq!(turn_starts, 2);
5653
5654        // Must have 1 ToolEnd
5655        let tool_ends = event_types.iter().filter(|&&t| t == "ToolEnd").count();
5656        assert_eq!(tool_ends, 1);
5657    }
5658
5659    #[tokio::test]
5660    async fn test_agent_multiple_tools_single_turn() {
5661        // LLM returns 2 tool calls in one response
5662        let mock_client = Arc::new(MockLlmClient::new(vec![
5663            LlmResponse {
5664                message: Message {
5665                    role: "assistant".to_string(),
5666                    content: vec![
5667                        ContentBlock::ToolUse {
5668                            id: "t1".to_string(),
5669                            name: "bash".to_string(),
5670                            input: serde_json::json!({"command": "echo first"}),
5671                        },
5672                        ContentBlock::ToolUse {
5673                            id: "t2".to_string(),
5674                            name: "bash".to_string(),
5675                            input: serde_json::json!({"command": "echo second"}),
5676                        },
5677                    ],
5678                    reasoning_content: None,
5679                },
5680                usage: TokenUsage {
5681                    prompt_tokens: 10,
5682                    completion_tokens: 5,
5683                    total_tokens: 15,
5684                    cache_read_tokens: None,
5685                    cache_write_tokens: None,
5686                },
5687                stop_reason: Some("tool_use".to_string()),
5688                meta: None,
5689            },
5690            MockLlmClient::text_response("Both commands ran"),
5691            MockLlmClient::text_response("Both commands ran"),
5692        ]));
5693
5694        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5695        let agent = AgentLoop::new(
5696            mock_client.clone(),
5697            tool_executor,
5698            test_tool_context(),
5699            AgentConfig {
5700                prompt_slots: SystemPromptSlots {
5701                    style: Some(AgentStyle::GeneralPurpose),
5702                    ..Default::default()
5703                },
5704                ..Default::default()
5705            },
5706        );
5707
5708        let result = agent
5709            .execute_loop(
5710                &[],
5711                "run both commands now",
5712                AgentStyle::GeneralPurpose,
5713                None,
5714                None,
5715                &tokio_util::sync::CancellationToken::new(),
5716                true,
5717            )
5718            .await
5719            .unwrap();
5720
5721        assert_eq!(result.text, "Both commands ran");
5722        assert_eq!(result.tool_calls_count, 2);
5723        assert!(
5724            mock_client.call_count.load(Ordering::SeqCst) >= 2,
5725            "expected at least the tool-call turn and final response turn"
5726        );
5727
5728        // Messages: user → assistant(2 tools) → user(tool_result) → user(tool_result) → assistant(text)
5729        assert_eq!(result.messages[0].role, "user");
5730        assert_eq!(result.messages[1].role, "assistant");
5731        assert_eq!(result.messages[2].role, "user"); // tool result 1
5732        assert_eq!(result.messages[3].role, "user"); // tool result 2
5733        assert_eq!(result.messages[4].role, "assistant");
5734    }
5735
5736    #[tokio::test]
5737    async fn test_agent_token_usage_accumulation() {
5738        // Verify usage sums across multiple turns
5739        let mock_client = Arc::new(MockLlmClient::new(vec![
5740            MockLlmClient::tool_call_response(
5741                "t1",
5742                "bash",
5743                serde_json::json!({"command": "echo x"}),
5744            ),
5745            MockLlmClient::text_response("Done"),
5746        ]));
5747
5748        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5749        let agent = AgentLoop::new(
5750            mock_client,
5751            tool_executor,
5752            test_tool_context(),
5753            AgentConfig::default(),
5754        );
5755
5756        let result = agent.execute(&[], "test", None).await.unwrap();
5757
5758        // Each mock response has prompt=10, completion=5, total=15
5759        // 2 LLM calls → 20 prompt, 10 completion, 30 total
5760        assert_eq!(result.usage.prompt_tokens, 20);
5761        assert_eq!(result.usage.completion_tokens, 10);
5762        assert_eq!(result.usage.total_tokens, 30);
5763    }
5764
5765    #[tokio::test]
5766    async fn test_agent_system_prompt_passed() {
5767        // Verify system prompt is used (MockLlmClient captures calls)
5768        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
5769            "I am a coding assistant.",
5770        )]));
5771
5772        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5773        let config = AgentConfig {
5774            prompt_slots: SystemPromptSlots {
5775                extra: Some("You are a coding assistant.".to_string()),
5776                ..Default::default()
5777            },
5778            ..Default::default()
5779        };
5780
5781        let agent = AgentLoop::new(
5782            mock_client.clone(),
5783            tool_executor,
5784            test_tool_context(),
5785            config,
5786        );
5787        let result = agent.execute(&[], "What are you?", None).await.unwrap();
5788
5789        assert_eq!(result.text, "I am a coding assistant.");
5790        assert_eq!(mock_client.call_count.load(Ordering::SeqCst), 1);
5791    }
5792
5793    #[tokio::test]
5794    async fn test_agent_max_rounds_with_persistent_tool_calls() {
5795        // LLM keeps calling tools forever — should hit max_tool_rounds
5796        let mut responses = Vec::new();
5797        for i in 0..15 {
5798            responses.push(MockLlmClient::tool_call_response(
5799                &format!("t{}", i),
5800                "bash",
5801                serde_json::json!({"command": format!("echo round{}", i)}),
5802            ));
5803        }
5804
5805        let mock_client = Arc::new(MockLlmClient::new(responses));
5806        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5807        let config = AgentConfig {
5808            max_tool_rounds: 5,
5809            ..Default::default()
5810        };
5811
5812        let agent = AgentLoop::new(
5813            mock_client.clone(),
5814            tool_executor,
5815            test_tool_context(),
5816            config,
5817        );
5818        let result = agent.execute(&[], "Loop forever", None).await;
5819
5820        assert!(result.is_err());
5821        let err = result.unwrap_err().to_string();
5822        assert!(err.contains("Max tool rounds (5) exceeded"));
5823    }
5824
5825    #[tokio::test]
5826    async fn test_agent_end_event_contains_final_text() {
5827        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
5828            "Final answer here",
5829        )]));
5830
5831        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5832        let agent = AgentLoop::new(
5833            mock_client,
5834            tool_executor,
5835            test_tool_context(),
5836            AgentConfig::default(),
5837        );
5838
5839        let (tx, rx) = mpsc::channel(100);
5840        agent.execute(&[], "test", Some(tx)).await.unwrap();
5841
5842        let events = collect_events(rx).await;
5843        let end_event = events.iter().find(|e| matches!(e, AgentEvent::End { .. }));
5844        assert!(end_event.is_some());
5845
5846        if let AgentEvent::End { text, usage, .. } = end_event.unwrap() {
5847            assert_eq!(text, "Final answer here");
5848            assert_eq!(usage.total_tokens, 15);
5849        }
5850    }
5851}
5852
5853#[cfg(test)]
5854mod extra_agent_tests {
5855    use super::*;
5856    use crate::agent::tests::MockLlmClient;
5857    use crate::queue::SessionQueueConfig;
5858    use crate::tools::ToolExecutor;
5859    use std::path::PathBuf;
5860    use std::sync::atomic::{AtomicUsize, Ordering};
5861
5862    fn test_tool_context() -> ToolContext {
5863        ToolContext::new(PathBuf::from("/tmp"))
5864    }
5865
5866    // ========================================================================
5867    // AgentConfig
5868    // ========================================================================
5869
5870    #[test]
5871    fn test_agent_config_debug() {
5872        let config = AgentConfig {
5873            prompt_slots: SystemPromptSlots {
5874                extra: Some("You are helpful".to_string()),
5875                ..Default::default()
5876            },
5877            tools: vec![],
5878            max_tool_rounds: 10,
5879            permission_checker: None,
5880            confirmation_manager: None,
5881            context_providers: vec![],
5882            planning_mode: PlanningMode::Enabled,
5883            goal_tracking: false,
5884            hook_engine: None,
5885            skill_registry: None,
5886            ..AgentConfig::default()
5887        };
5888        let debug = format!("{:?}", config);
5889        assert!(debug.contains("AgentConfig"));
5890        assert!(debug.contains("planning_mode"));
5891    }
5892
5893    #[test]
5894    fn test_agent_config_default_values() {
5895        let config = AgentConfig::default();
5896        assert_eq!(config.max_tool_rounds, MAX_TOOL_ROUNDS);
5897        assert_eq!(config.planning_mode, PlanningMode::Auto);
5898        assert!(!config.goal_tracking);
5899        assert!(config.context_providers.is_empty());
5900    }
5901
5902    // ========================================================================
5903    // AgentEvent serialization
5904    // ========================================================================
5905
5906    #[test]
5907    fn test_agent_event_serialize_start() {
5908        let event = AgentEvent::Start {
5909            prompt: "Hello".to_string(),
5910        };
5911        let json = serde_json::to_string(&event).unwrap();
5912        assert!(json.contains("agent_start"));
5913        assert!(json.contains("Hello"));
5914    }
5915
5916    #[test]
5917    fn test_agent_event_serialize_text_delta() {
5918        let event = AgentEvent::TextDelta {
5919            text: "chunk".to_string(),
5920        };
5921        let json = serde_json::to_string(&event).unwrap();
5922        assert!(json.contains("text_delta"));
5923    }
5924
5925    #[test]
5926    fn test_agent_event_serialize_tool_start() {
5927        let event = AgentEvent::ToolStart {
5928            id: "t1".to_string(),
5929            name: "bash".to_string(),
5930        };
5931        let json = serde_json::to_string(&event).unwrap();
5932        assert!(json.contains("tool_start"));
5933        assert!(json.contains("bash"));
5934    }
5935
5936    #[test]
5937    fn test_agent_event_serialize_tool_end() {
5938        let event = AgentEvent::ToolEnd {
5939            id: "t1".to_string(),
5940            name: "bash".to_string(),
5941            output: "hello".to_string(),
5942            exit_code: 0,
5943            metadata: None,
5944        };
5945        let json = serde_json::to_string(&event).unwrap();
5946        assert!(json.contains("tool_end"));
5947    }
5948
5949    #[test]
5950    fn test_agent_event_tool_end_has_metadata_field() {
5951        let event = AgentEvent::ToolEnd {
5952            id: "t1".to_string(),
5953            name: "write".to_string(),
5954            output: "Wrote 5 bytes".to_string(),
5955            exit_code: 0,
5956            metadata: Some(
5957                serde_json::json!({ "before": "old", "after": "new", "file_path": "f.txt" }),
5958            ),
5959        };
5960        let json = serde_json::to_string(&event).unwrap();
5961        assert!(json.contains("\"before\""));
5962    }
5963
5964    #[test]
5965    fn test_agent_event_serialize_error() {
5966        let event = AgentEvent::Error {
5967            message: "oops".to_string(),
5968        };
5969        let json = serde_json::to_string(&event).unwrap();
5970        assert!(json.contains("error"));
5971        assert!(json.contains("oops"));
5972    }
5973
5974    #[test]
5975    fn test_agent_event_serialize_confirmation_required() {
5976        let event = AgentEvent::ConfirmationRequired {
5977            tool_id: "t1".to_string(),
5978            tool_name: "bash".to_string(),
5979            args: serde_json::json!({"cmd": "rm"}),
5980            timeout_ms: 30000,
5981        };
5982        let json = serde_json::to_string(&event).unwrap();
5983        assert!(json.contains("confirmation_required"));
5984    }
5985
5986    #[test]
5987    fn test_agent_event_serialize_confirmation_received() {
5988        let event = AgentEvent::ConfirmationReceived {
5989            tool_id: "t1".to_string(),
5990            approved: true,
5991            reason: Some("safe".to_string()),
5992        };
5993        let json = serde_json::to_string(&event).unwrap();
5994        assert!(json.contains("confirmation_received"));
5995    }
5996
5997    #[test]
5998    fn test_agent_event_serialize_confirmation_timeout() {
5999        let event = AgentEvent::ConfirmationTimeout {
6000            tool_id: "t1".to_string(),
6001            action_taken: "rejected".to_string(),
6002        };
6003        let json = serde_json::to_string(&event).unwrap();
6004        assert!(json.contains("confirmation_timeout"));
6005    }
6006
6007    #[test]
6008    fn test_agent_event_serialize_external_task_pending() {
6009        let event = AgentEvent::ExternalTaskPending {
6010            task_id: "task-1".to_string(),
6011            session_id: "sess-1".to_string(),
6012            lane: crate::queue::SessionLane::Execute,
6013            command_type: "bash".to_string(),
6014            payload: serde_json::json!({}),
6015            timeout_ms: 60000,
6016        };
6017        let json = serde_json::to_string(&event).unwrap();
6018        assert!(json.contains("external_task_pending"));
6019    }
6020
6021    #[test]
6022    fn test_agent_event_serialize_external_task_completed() {
6023        let event = AgentEvent::ExternalTaskCompleted {
6024            task_id: "task-1".to_string(),
6025            session_id: "sess-1".to_string(),
6026            success: false,
6027        };
6028        let json = serde_json::to_string(&event).unwrap();
6029        assert!(json.contains("external_task_completed"));
6030    }
6031
6032    #[test]
6033    fn test_agent_event_serialize_permission_denied() {
6034        let event = AgentEvent::PermissionDenied {
6035            tool_id: "t1".to_string(),
6036            tool_name: "bash".to_string(),
6037            args: serde_json::json!({}),
6038            reason: "denied".to_string(),
6039        };
6040        let json = serde_json::to_string(&event).unwrap();
6041        assert!(json.contains("permission_denied"));
6042    }
6043
6044    #[test]
6045    fn test_agent_event_serialize_context_compacted() {
6046        let event = AgentEvent::ContextCompacted {
6047            session_id: "sess-1".to_string(),
6048            before_messages: 100,
6049            after_messages: 20,
6050            percent_before: 0.85,
6051        };
6052        let json = serde_json::to_string(&event).unwrap();
6053        assert!(json.contains("context_compacted"));
6054    }
6055
6056    #[test]
6057    fn test_agent_event_serialize_turn_start() {
6058        let event = AgentEvent::TurnStart { turn: 3 };
6059        let json = serde_json::to_string(&event).unwrap();
6060        assert!(json.contains("turn_start"));
6061    }
6062
6063    #[test]
6064    fn test_agent_event_serialize_turn_end() {
6065        let event = AgentEvent::TurnEnd {
6066            turn: 3,
6067            usage: TokenUsage::default(),
6068        };
6069        let json = serde_json::to_string(&event).unwrap();
6070        assert!(json.contains("turn_end"));
6071    }
6072
6073    #[test]
6074    fn test_agent_event_serialize_end() {
6075        let event = AgentEvent::End {
6076            text: "Done".to_string(),
6077            usage: TokenUsage {
6078                prompt_tokens: 100,
6079                completion_tokens: 50,
6080                total_tokens: 150,
6081                cache_read_tokens: None,
6082                cache_write_tokens: None,
6083            },
6084            verification_summary: Box::new(crate::verification::VerificationSummary::from_reports(
6085                &[],
6086            )),
6087            meta: None,
6088        };
6089        let json = serde_json::to_string(&event).unwrap();
6090        assert!(json.contains("agent_end"));
6091        assert!(json.contains("verification_summary"));
6092    }
6093
6094    // ========================================================================
6095    // AgentResult
6096    // ========================================================================
6097
6098    #[test]
6099    fn test_agent_result_fields() {
6100        let result = AgentResult {
6101            text: "output".to_string(),
6102            messages: vec![Message::user("hello")],
6103            usage: TokenUsage::default(),
6104            tool_calls_count: 3,
6105            verification_reports: Vec::new(),
6106        };
6107        assert_eq!(result.text, "output");
6108        assert_eq!(result.messages.len(), 1);
6109        assert_eq!(result.tool_calls_count, 3);
6110        assert!(result.verification_reports.is_empty());
6111        assert_eq!(
6112            result.verification_summary().status,
6113            crate::verification::VerificationStatus::Skipped
6114        );
6115        assert!(!result.has_pending_verification());
6116    }
6117
6118    #[test]
6119    fn test_collect_verification_report_from_tool_metadata() {
6120        let report = crate::verification::VerificationReport::new(
6121            "program:example",
6122            vec![crate::verification::VerificationCheck::required(
6123                "check:inspect",
6124                "inspect_artifacts",
6125                "Inspect artifacts",
6126            )],
6127        );
6128        let metadata = Some(serde_json::json!({
6129            "verification_report": report.to_value()
6130        }));
6131        let mut reports = Vec::new();
6132
6133        AgentLoop::collect_verification_report(&mut reports, &metadata);
6134
6135        assert_eq!(reports.len(), 1);
6136        assert_eq!(reports[0].subject, "program:example");
6137        assert_eq!(
6138            reports[0].status,
6139            crate::verification::VerificationStatus::NeedsReview
6140        );
6141    }
6142
6143    #[test]
6144    fn test_agent_result_verification_summary() {
6145        let report = crate::verification::VerificationReport::new(
6146            "program:example",
6147            vec![crate::verification::VerificationCheck::required(
6148                "check:inspect",
6149                "inspect_artifacts",
6150                "Inspect artifacts",
6151            )],
6152        );
6153        let result = AgentResult {
6154            text: "output".to_string(),
6155            messages: Vec::new(),
6156            usage: TokenUsage::default(),
6157            tool_calls_count: 1,
6158            verification_reports: vec![report],
6159        };
6160
6161        let summary = result.verification_summary();
6162
6163        assert_eq!(
6164            summary.status,
6165            crate::verification::VerificationStatus::NeedsReview
6166        );
6167        assert_eq!(summary.pending_required_check_count, 1);
6168        assert!(result
6169            .verification_summary_text()
6170            .contains("Verification needs review"));
6171        assert!(result.has_pending_verification());
6172    }
6173
6174    // ========================================================================
6175    // Missing AgentEvent serialization tests
6176    // ========================================================================
6177
6178    #[test]
6179    fn test_agent_event_serialize_context_resolving() {
6180        let event = AgentEvent::ContextResolving {
6181            providers: vec!["provider1".to_string(), "provider2".to_string()],
6182        };
6183        let json = serde_json::to_string(&event).unwrap();
6184        assert!(json.contains("context_resolving"));
6185        assert!(json.contains("provider1"));
6186    }
6187
6188    #[test]
6189    fn test_agent_event_serialize_context_resolved() {
6190        let event = AgentEvent::ContextResolved {
6191            total_items: 5,
6192            total_tokens: 1000,
6193        };
6194        let json = serde_json::to_string(&event).unwrap();
6195        assert!(json.contains("context_resolved"));
6196        assert!(json.contains("1000"));
6197    }
6198
6199    #[test]
6200    fn test_agent_event_serialize_command_dead_lettered() {
6201        let event = AgentEvent::CommandDeadLettered {
6202            command_id: "cmd-1".to_string(),
6203            command_type: "bash".to_string(),
6204            lane: "execute".to_string(),
6205            error: "timeout".to_string(),
6206            attempts: 3,
6207        };
6208        let json = serde_json::to_string(&event).unwrap();
6209        assert!(json.contains("command_dead_lettered"));
6210        assert!(json.contains("cmd-1"));
6211    }
6212
6213    #[test]
6214    fn test_agent_event_serialize_command_retry() {
6215        let event = AgentEvent::CommandRetry {
6216            command_id: "cmd-2".to_string(),
6217            command_type: "read".to_string(),
6218            lane: "query".to_string(),
6219            attempt: 2,
6220            delay_ms: 1000,
6221        };
6222        let json = serde_json::to_string(&event).unwrap();
6223        assert!(json.contains("command_retry"));
6224        assert!(json.contains("cmd-2"));
6225    }
6226
6227    #[test]
6228    fn test_agent_event_serialize_queue_alert() {
6229        let event = AgentEvent::QueueAlert {
6230            level: "warning".to_string(),
6231            alert_type: "depth".to_string(),
6232            message: "Queue depth exceeded".to_string(),
6233        };
6234        let json = serde_json::to_string(&event).unwrap();
6235        assert!(json.contains("queue_alert"));
6236        assert!(json.contains("warning"));
6237    }
6238
6239    #[test]
6240    fn test_agent_event_serialize_task_updated() {
6241        let event = AgentEvent::TaskUpdated {
6242            session_id: "sess-1".to_string(),
6243            tasks: vec![],
6244        };
6245        let json = serde_json::to_string(&event).unwrap();
6246        assert!(json.contains("task_updated"));
6247        assert!(json.contains("sess-1"));
6248    }
6249
6250    #[test]
6251    fn test_agent_event_serialize_memory_stored() {
6252        let event = AgentEvent::MemoryStored {
6253            memory_id: "mem-1".to_string(),
6254            memory_type: "conversation".to_string(),
6255            importance: 0.8,
6256            tags: vec!["important".to_string()],
6257        };
6258        let json = serde_json::to_string(&event).unwrap();
6259        assert!(json.contains("memory_stored"));
6260        assert!(json.contains("mem-1"));
6261    }
6262
6263    #[test]
6264    fn test_agent_event_serialize_memory_recalled() {
6265        let event = AgentEvent::MemoryRecalled {
6266            memory_id: "mem-2".to_string(),
6267            content: "Previous conversation".to_string(),
6268            relevance: 0.9,
6269        };
6270        let json = serde_json::to_string(&event).unwrap();
6271        assert!(json.contains("memory_recalled"));
6272        assert!(json.contains("mem-2"));
6273    }
6274
6275    #[test]
6276    fn test_agent_event_serialize_memories_searched() {
6277        let event = AgentEvent::MemoriesSearched {
6278            query: Some("search term".to_string()),
6279            tags: vec!["tag1".to_string()],
6280            result_count: 5,
6281        };
6282        let json = serde_json::to_string(&event).unwrap();
6283        assert!(json.contains("memories_searched"));
6284        assert!(json.contains("search term"));
6285    }
6286
6287    #[test]
6288    fn test_agent_event_serialize_memory_cleared() {
6289        let event = AgentEvent::MemoryCleared {
6290            tier: "short_term".to_string(),
6291            count: 10,
6292        };
6293        let json = serde_json::to_string(&event).unwrap();
6294        assert!(json.contains("memory_cleared"));
6295        assert!(json.contains("short_term"));
6296    }
6297
6298    #[test]
6299    fn test_agent_event_serialize_subagent_start() {
6300        let event = AgentEvent::SubagentStart {
6301            task_id: "task-1".to_string(),
6302            session_id: "child-sess".to_string(),
6303            parent_session_id: "parent-sess".to_string(),
6304            agent: "explore".to_string(),
6305            description: "Explore codebase".to_string(),
6306        };
6307        let json = serde_json::to_string(&event).unwrap();
6308        assert!(json.contains("subagent_start"));
6309        assert!(json.contains("explore"));
6310    }
6311
6312    #[test]
6313    fn test_agent_event_serialize_subagent_progress() {
6314        let event = AgentEvent::SubagentProgress {
6315            task_id: "task-1".to_string(),
6316            session_id: "child-sess".to_string(),
6317            status: "processing".to_string(),
6318            metadata: serde_json::json!({"progress": 50}),
6319        };
6320        let json = serde_json::to_string(&event).unwrap();
6321        assert!(json.contains("subagent_progress"));
6322        assert!(json.contains("processing"));
6323    }
6324
6325    #[test]
6326    fn test_agent_event_serialize_subagent_end() {
6327        let event = AgentEvent::SubagentEnd {
6328            task_id: "task-1".to_string(),
6329            session_id: "child-sess".to_string(),
6330            agent: "explore".to_string(),
6331            output: "Found 10 files".to_string(),
6332            success: true,
6333        };
6334        let json = serde_json::to_string(&event).unwrap();
6335        assert!(json.contains("subagent_end"));
6336        assert!(json.contains("Found 10 files"));
6337    }
6338
6339    #[test]
6340    fn test_agent_event_serialize_planning_start() {
6341        let event = AgentEvent::PlanningStart {
6342            prompt: "Build a web app".to_string(),
6343        };
6344        let json = serde_json::to_string(&event).unwrap();
6345        assert!(json.contains("planning_start"));
6346        assert!(json.contains("Build a web app"));
6347    }
6348
6349    #[test]
6350    fn test_agent_event_serialize_planning_end() {
6351        use crate::planning::{Complexity, ExecutionPlan};
6352        let plan = ExecutionPlan::new("Test goal".to_string(), Complexity::Simple);
6353        let event = AgentEvent::PlanningEnd {
6354            plan,
6355            estimated_steps: 3,
6356        };
6357        let json = serde_json::to_string(&event).unwrap();
6358        assert!(json.contains("planning_end"));
6359        assert!(json.contains("estimated_steps"));
6360    }
6361
6362    #[test]
6363    fn test_agent_event_serialize_step_start() {
6364        let event = AgentEvent::StepStart {
6365            step_id: "step-1".to_string(),
6366            description: "Initialize project".to_string(),
6367            step_number: 1,
6368            total_steps: 5,
6369        };
6370        let json = serde_json::to_string(&event).unwrap();
6371        assert!(json.contains("step_start"));
6372        assert!(json.contains("Initialize project"));
6373    }
6374
6375    #[test]
6376    fn test_agent_event_serialize_step_end() {
6377        let event = AgentEvent::StepEnd {
6378            step_id: "step-1".to_string(),
6379            status: TaskStatus::Completed,
6380            step_number: 1,
6381            total_steps: 5,
6382        };
6383        let json = serde_json::to_string(&event).unwrap();
6384        assert!(json.contains("step_end"));
6385        assert!(json.contains("step-1"));
6386    }
6387
6388    #[test]
6389    fn test_agent_event_serialize_goal_extracted() {
6390        use crate::planning::AgentGoal;
6391        let goal = AgentGoal::new("Complete the task".to_string());
6392        let event = AgentEvent::GoalExtracted { goal };
6393        let json = serde_json::to_string(&event).unwrap();
6394        assert!(json.contains("goal_extracted"));
6395    }
6396
6397    #[test]
6398    fn test_agent_event_serialize_goal_progress() {
6399        let event = AgentEvent::GoalProgress {
6400            goal: "Build app".to_string(),
6401            progress: 0.5,
6402            completed_steps: 2,
6403            total_steps: 4,
6404        };
6405        let json = serde_json::to_string(&event).unwrap();
6406        assert!(json.contains("goal_progress"));
6407        assert!(json.contains("0.5"));
6408    }
6409
6410    #[test]
6411    fn test_agent_event_serialize_goal_achieved() {
6412        let event = AgentEvent::GoalAchieved {
6413            goal: "Build app".to_string(),
6414            total_steps: 4,
6415            duration_ms: 5000,
6416        };
6417        let json = serde_json::to_string(&event).unwrap();
6418        assert!(json.contains("goal_achieved"));
6419        assert!(json.contains("5000"));
6420    }
6421
6422    #[tokio::test]
6423    async fn test_extract_goal_with_json_response() {
6424        // LlmPlanner expects JSON with "description" and "success_criteria" fields
6425        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
6426            r#"{"description": "Build web app", "success_criteria": ["App runs on port 3000", "Has login page"]}"#,
6427        )]));
6428        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
6429        let agent = AgentLoop::new(
6430            mock_client,
6431            tool_executor,
6432            test_tool_context(),
6433            AgentConfig::default(),
6434        );
6435
6436        let goal = agent.extract_goal("Build a web app").await.unwrap();
6437        assert_eq!(goal.description, "Build web app");
6438        assert_eq!(goal.success_criteria.len(), 2);
6439        assert_eq!(goal.success_criteria[0], "App runs on port 3000");
6440    }
6441
6442    #[tokio::test]
6443    async fn test_extract_goal_fallback_on_non_json() {
6444        // Non-JSON response triggers fallback: returns the original prompt as goal
6445        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
6446            "Some non-JSON response",
6447        )]));
6448        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
6449        let agent = AgentLoop::new(
6450            mock_client,
6451            tool_executor,
6452            test_tool_context(),
6453            AgentConfig::default(),
6454        );
6455
6456        let goal = agent.extract_goal("Do something").await.unwrap();
6457        // Fallback uses the original prompt as description
6458        assert_eq!(goal.description, "Do something");
6459        // Fallback adds 2 generic criteria
6460        assert_eq!(goal.success_criteria.len(), 2);
6461    }
6462
6463    #[tokio::test]
6464    async fn test_check_goal_achievement_json_yes() {
6465        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
6466            r#"{"achieved": true, "progress": 1.0, "remaining_criteria": []}"#,
6467        )]));
6468        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
6469        let agent = AgentLoop::new(
6470            mock_client,
6471            tool_executor,
6472            test_tool_context(),
6473            AgentConfig::default(),
6474        );
6475
6476        let goal = crate::planning::AgentGoal::new("Test goal".to_string());
6477        let achieved = agent
6478            .check_goal_achievement(&goal, "All done")
6479            .await
6480            .unwrap();
6481        assert!(achieved);
6482    }
6483
6484    #[tokio::test]
6485    async fn test_check_goal_achievement_fallback_not_done() {
6486        // Non-JSON response triggers heuristic fallback
6487        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
6488            "invalid json",
6489        )]));
6490        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
6491        let agent = AgentLoop::new(
6492            mock_client,
6493            tool_executor,
6494            test_tool_context(),
6495            AgentConfig::default(),
6496        );
6497
6498        let goal = crate::planning::AgentGoal::new("Test goal".to_string());
6499        // "still working" doesn't contain "complete"/"done"/"finished"
6500        let achieved = agent
6501            .check_goal_achievement(&goal, "still working")
6502            .await
6503            .unwrap();
6504        assert!(!achieved);
6505    }
6506
6507    // ========================================================================
6508    // build_augmented_system_prompt Tests
6509    // ========================================================================
6510
6511    #[test]
6512    fn test_build_augmented_system_prompt_empty_context() {
6513        let mock_client = Arc::new(MockLlmClient::new(vec![]));
6514        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
6515        let config = AgentConfig {
6516            prompt_slots: SystemPromptSlots {
6517                extra: Some("Base prompt".to_string()),
6518                ..Default::default()
6519            },
6520            ..Default::default()
6521        };
6522        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
6523
6524        let result = agent.build_augmented_system_prompt(&[]);
6525        assert!(result.unwrap().contains("Base prompt"));
6526    }
6527
6528    #[test]
6529    fn test_build_augmented_system_prompt_no_custom_slots() {
6530        let mock_client = Arc::new(MockLlmClient::new(vec![]));
6531        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
6532        let agent = AgentLoop::new(
6533            mock_client,
6534            tool_executor,
6535            test_tool_context(),
6536            AgentConfig::default(),
6537        );
6538
6539        let result = agent.build_augmented_system_prompt(&[]);
6540        // Default slots still produce the default agentic prompt
6541        assert!(result.is_some());
6542        assert!(result.unwrap().contains("Core Behaviour"));
6543    }
6544
6545    #[test]
6546    fn test_project_hint_is_assembled_as_context_item() {
6547        let temp_dir = tempfile::tempdir().unwrap();
6548        std::fs::write(
6549            temp_dir.path().join("Cargo.toml"),
6550            "[package]\nname = \"demo\"\n",
6551        )
6552        .unwrap();
6553
6554        let mock_client = Arc::new(MockLlmClient::new(vec![]));
6555        let tool_executor = Arc::new(ToolExecutor::new(temp_dir.path().display().to_string()));
6556        let agent = AgentLoop::new(
6557            mock_client,
6558            tool_executor,
6559            ToolContext::new(temp_dir.path().to_path_buf()),
6560            AgentConfig::default(),
6561        );
6562
6563        let assembly = agent.assemble_context_results(&[]);
6564        assert_eq!(assembly.items.len(), 1);
6565        assert_eq!(
6566            assembly.items[0].source.as_deref(),
6567            Some("a3s://project-hint")
6568        );
6569        assert!(assembly.items[0].content.contains("Rust"));
6570
6571        let text = agent.build_augmented_system_prompt(&[]).unwrap();
6572        assert!(text.contains("<context source=\"a3s://project-hint\" type=\"Resource\">"));
6573    }
6574
6575    #[test]
6576    fn test_build_augmented_system_prompt_with_context_no_base() {
6577        use crate::context::{ContextItem, ContextResult, ContextType};
6578
6579        let mock_client = Arc::new(MockLlmClient::new(vec![]));
6580        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
6581        let agent = AgentLoop::new(
6582            mock_client,
6583            tool_executor,
6584            test_tool_context(),
6585            AgentConfig::default(),
6586        );
6587
6588        let context = vec![ContextResult {
6589            provider: "test".to_string(),
6590            items: vec![ContextItem::new("id1", ContextType::Resource, "Content")],
6591            total_tokens: 10,
6592            truncated: false,
6593        }];
6594
6595        let result = agent.build_augmented_system_prompt(&context);
6596        assert!(result.is_some());
6597        let text = result.unwrap();
6598        assert!(text.contains("<context"));
6599        assert!(text.contains("Content"));
6600    }
6601
6602    // ========================================================================
6603    // AgentResult Clone and Debug
6604    // ========================================================================
6605
6606    #[test]
6607    fn test_agent_result_clone() {
6608        let result = AgentResult {
6609            text: "output".to_string(),
6610            messages: vec![Message::user("hello")],
6611            usage: TokenUsage::default(),
6612            tool_calls_count: 3,
6613            verification_reports: Vec::new(),
6614        };
6615        let cloned = result.clone();
6616        assert_eq!(cloned.text, result.text);
6617        assert_eq!(cloned.tool_calls_count, result.tool_calls_count);
6618    }
6619
6620    #[test]
6621    fn test_agent_result_debug() {
6622        let result = AgentResult {
6623            text: "output".to_string(),
6624            messages: vec![Message::user("hello")],
6625            usage: TokenUsage::default(),
6626            tool_calls_count: 3,
6627            verification_reports: Vec::new(),
6628        };
6629        let debug = format!("{:?}", result);
6630        assert!(debug.contains("AgentResult"));
6631        assert!(debug.contains("output"));
6632    }
6633
6634    // ========================================================================
6635    // handle_post_execution_metadata Tests
6636    // ========================================================================
6637
6638    // ========================================================================
6639    // ToolCommand adapter tests
6640    // ========================================================================
6641
6642    #[tokio::test]
6643    async fn test_tool_command_command_type() {
6644        let executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
6645        let cmd = ToolCommand {
6646            tool_executor: executor,
6647            tool_name: "read".to_string(),
6648            tool_args: serde_json::json!({"file": "test.rs"}),
6649            skill_registry: None,
6650            tool_context: test_tool_context(),
6651        };
6652        assert_eq!(cmd.command_type(), "read");
6653    }
6654
6655    #[tokio::test]
6656    async fn test_tool_command_payload() {
6657        let executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
6658        let args = serde_json::json!({"file": "test.rs", "offset": 10});
6659        let cmd = ToolCommand {
6660            tool_executor: executor,
6661            tool_name: "read".to_string(),
6662            tool_args: args.clone(),
6663            skill_registry: None,
6664            tool_context: test_tool_context(),
6665        };
6666        assert_eq!(cmd.payload(), args);
6667    }
6668
6669    // ========================================================================
6670    // AgentLoop with queue builder tests
6671    // ========================================================================
6672
6673    #[tokio::test(flavor = "multi_thread")]
6674    async fn test_agent_loop_with_queue() {
6675        use tokio::sync::broadcast;
6676
6677        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
6678            "Hello",
6679        )]));
6680        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
6681        let config = AgentConfig::default();
6682
6683        let (event_tx, _) = broadcast::channel(100);
6684        let queue = SessionLaneQueue::new("test-session", SessionQueueConfig::default(), event_tx)
6685            .await
6686            .unwrap();
6687
6688        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config)
6689            .with_queue(Arc::new(queue));
6690
6691        assert!(agent.command_queue.is_some());
6692    }
6693
6694    #[tokio::test]
6695    async fn test_agent_loop_without_queue() {
6696        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
6697            "Hello",
6698        )]));
6699        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
6700        let config = AgentConfig::default();
6701
6702        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
6703
6704        assert!(agent.command_queue.is_none());
6705    }
6706
6707    // ========================================================================
6708    // Parallel Plan Execution Tests
6709    // ========================================================================
6710
6711    #[tokio::test]
6712    async fn test_execute_plan_parallel_independent() {
6713        use crate::planning::{Complexity, ExecutionPlan, Task};
6714
6715        // 3 independent steps (no dependencies) — should all execute.
6716        // MockLlmClient needs one response per execute_loop call per step.
6717        let mock_client = Arc::new(MockLlmClient::new(vec![
6718            MockLlmClient::text_response("Step 1 done"),
6719            MockLlmClient::text_response("Step 2 done"),
6720            MockLlmClient::text_response("Step 3 done"),
6721        ]));
6722
6723        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
6724        let config = AgentConfig::default();
6725        let agent = AgentLoop::new(
6726            mock_client.clone(),
6727            tool_executor,
6728            test_tool_context(),
6729            config,
6730        );
6731
6732        let mut plan = ExecutionPlan::new("Test parallel", Complexity::Simple);
6733        plan.add_step(Task::new("s1", "First step"));
6734        plan.add_step(Task::new("s2", "Second step"));
6735        plan.add_step(Task::new("s3", "Third step"));
6736
6737        let (tx, mut rx) = mpsc::channel(100);
6738        let result = agent.execute_plan(&[], &plan, Some(tx)).await.unwrap();
6739
6740        // All 3 steps should have been executed (3 * 15 = 45 total tokens)
6741        assert_eq!(result.usage.total_tokens, 45);
6742
6743        // Verify we received StepStart and StepEnd events for all 3 steps
6744        let mut step_starts = Vec::new();
6745        let mut step_ends = Vec::new();
6746        rx.close();
6747        while let Some(event) = rx.recv().await {
6748            match event {
6749                AgentEvent::StepStart { step_id, .. } => step_starts.push(step_id),
6750                AgentEvent::StepEnd {
6751                    step_id, status, ..
6752                } => {
6753                    assert_eq!(status, TaskStatus::Completed);
6754                    step_ends.push(step_id);
6755                }
6756                _ => {}
6757            }
6758        }
6759        assert_eq!(step_starts.len(), 3);
6760        assert_eq!(step_ends.len(), 3);
6761    }
6762
6763    #[tokio::test]
6764    async fn test_execute_plan_respects_dependencies() {
6765        use crate::planning::{Complexity, ExecutionPlan, Task};
6766
6767        // s1 and s2 are independent (wave 1), s3 depends on both (wave 2).
6768        // This requires 3 responses total.
6769        let mock_client = Arc::new(MockLlmClient::new(vec![
6770            MockLlmClient::text_response("Step 1 done"),
6771            MockLlmClient::text_response("Step 2 done"),
6772            MockLlmClient::text_response("Step 3 done"),
6773        ]));
6774
6775        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
6776        let config = AgentConfig::default();
6777        let agent = AgentLoop::new(
6778            mock_client.clone(),
6779            tool_executor,
6780            test_tool_context(),
6781            config,
6782        );
6783
6784        let mut plan = ExecutionPlan::new("Test deps", Complexity::Medium);
6785        plan.add_step(Task::new("s1", "Independent A"));
6786        plan.add_step(Task::new("s2", "Independent B"));
6787        plan.add_step(
6788            Task::new("s3", "Depends on A+B")
6789                .with_dependencies(vec!["s1".to_string(), "s2".to_string()]),
6790        );
6791
6792        let (tx, mut rx) = mpsc::channel(100);
6793        let result = agent.execute_plan(&[], &plan, Some(tx)).await.unwrap();
6794
6795        // All 3 steps should have been executed (3 * 15 = 45 total tokens)
6796        assert_eq!(result.usage.total_tokens, 45);
6797
6798        // Verify ordering: s3's StepStart must come after s1 and s2's StepEnd
6799        let mut events = Vec::new();
6800        rx.close();
6801        while let Some(event) = rx.recv().await {
6802            match &event {
6803                AgentEvent::StepStart { step_id, .. } => {
6804                    events.push(format!("start:{}", step_id));
6805                }
6806                AgentEvent::StepEnd { step_id, .. } => {
6807                    events.push(format!("end:{}", step_id));
6808                }
6809                _ => {}
6810            }
6811        }
6812
6813        // s3 start must occur after both s1 end and s2 end
6814        let s1_end = events.iter().position(|e| e == "end:s1").unwrap();
6815        let s2_end = events.iter().position(|e| e == "end:s2").unwrap();
6816        let s3_start = events.iter().position(|e| e == "start:s3").unwrap();
6817        assert!(
6818            s3_start > s1_end,
6819            "s3 started before s1 ended: {:?}",
6820            events
6821        );
6822        assert!(
6823            s3_start > s2_end,
6824            "s3 started before s2 ended: {:?}",
6825            events
6826        );
6827
6828        // Final result should reflect step 3 (last sequential step)
6829        assert!(result.text.contains("Step 3 done") || !result.text.is_empty());
6830    }
6831
6832    #[tokio::test]
6833    async fn test_execute_plan_handles_step_failure() {
6834        use crate::planning::{Complexity, ExecutionPlan, Task};
6835
6836        // s1 succeeds, s2 depends on s1 (succeeds), s3 depends on nothing (succeeds),
6837        // s4 depends on a step that will fail (s_fail).
6838        // We simulate failure by providing no responses for s_fail's execute_loop.
6839        //
6840        // Simpler approach: s1 succeeds, s2 depends on s1 (will fail because no
6841        // mock response left), s3 is independent.
6842        // Layout: s1 (independent), s3 (independent) → wave 1 parallel
6843        //         s2 depends on s1 → wave 2
6844        //         s4 depends on s2 → wave 3 (should deadlock since s2 fails)
6845        let mock_client = Arc::new(MockLlmClient::new(vec![
6846            // Wave 1: s1 and s3 execute in parallel
6847            MockLlmClient::text_response("s1 done"),
6848            MockLlmClient::text_response("s3 done"),
6849            // Wave 2: s2 executes — but we give it no response, causing failure
6850            // Actually the MockLlmClient will fail with "No more mock responses"
6851        ]));
6852
6853        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
6854        let config = AgentConfig::default();
6855        let agent = AgentLoop::new(
6856            mock_client.clone(),
6857            tool_executor,
6858            test_tool_context(),
6859            config,
6860        );
6861
6862        let mut plan = ExecutionPlan::new("Test failure", Complexity::Medium);
6863        plan.add_step(Task::new("s1", "Independent step"));
6864        plan.add_step(Task::new("s2", "Depends on s1").with_dependencies(vec!["s1".to_string()]));
6865        plan.add_step(Task::new("s3", "Another independent"));
6866        plan.add_step(Task::new("s4", "Depends on s2").with_dependencies(vec!["s2".to_string()]));
6867
6868        let (tx, mut rx) = mpsc::channel(100);
6869        let _result = agent.execute_plan(&[], &plan, Some(tx)).await.unwrap();
6870
6871        // s1 and s3 should succeed (wave 1), s2 should fail (wave 2),
6872        // s4 should never execute (deadlock — dep s2 failed, not completed)
6873        let mut completed_steps = Vec::new();
6874        let mut failed_steps = Vec::new();
6875        rx.close();
6876        while let Some(event) = rx.recv().await {
6877            if let AgentEvent::StepEnd {
6878                step_id, status, ..
6879            } = event
6880            {
6881                match status {
6882                    TaskStatus::Completed => completed_steps.push(step_id),
6883                    TaskStatus::Failed => failed_steps.push(step_id),
6884                    _ => {}
6885                }
6886            }
6887        }
6888
6889        assert!(
6890            completed_steps.contains(&"s1".to_string()),
6891            "s1 should complete"
6892        );
6893        assert!(
6894            completed_steps.contains(&"s3".to_string()),
6895            "s3 should complete"
6896        );
6897        assert!(failed_steps.contains(&"s2".to_string()), "s2 should fail");
6898        // s4 should NOT appear in either list — it was never started
6899        assert!(
6900            !completed_steps.contains(&"s4".to_string()),
6901            "s4 should not complete"
6902        );
6903        assert!(
6904            !failed_steps.contains(&"s4".to_string()),
6905            "s4 should not fail (never started)"
6906        );
6907    }
6908
6909    // ========================================================================
6910    // Phase 4: Error Recovery & Resilience Tests
6911    // ========================================================================
6912
6913    #[test]
6914    fn test_agent_config_resilience_defaults() {
6915        let config = AgentConfig::default();
6916        assert_eq!(config.max_parse_retries, 2);
6917        assert_eq!(config.tool_timeout_ms, None);
6918        assert_eq!(config.circuit_breaker_threshold, 3);
6919    }
6920
6921    /// 4.1 — Parse error recovery: bails after max_parse_retries exceeded
6922    #[tokio::test]
6923    async fn test_parse_error_recovery_bails_after_threshold() {
6924        // 3 parse errors with max_parse_retries=2: count reaches 3 > 2 → bail
6925        let mock_client = Arc::new(MockLlmClient::new(vec![
6926            MockLlmClient::tool_call_response(
6927                "c1",
6928                "bash",
6929                serde_json::json!({"__parse_error": "unexpected token at position 5"}),
6930            ),
6931            MockLlmClient::tool_call_response(
6932                "c2",
6933                "bash",
6934                serde_json::json!({"__parse_error": "missing closing brace"}),
6935            ),
6936            MockLlmClient::tool_call_response(
6937                "c3",
6938                "bash",
6939                serde_json::json!({"__parse_error": "still broken"}),
6940            ),
6941            MockLlmClient::text_response("Done"), // never reached
6942        ]));
6943
6944        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
6945        let config = AgentConfig {
6946            max_parse_retries: 2,
6947            ..AgentConfig::default()
6948        };
6949        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
6950        let result = agent.execute(&[], "Do something", None).await;
6951        assert!(result.is_err(), "should bail after parse error threshold");
6952        let err = result.unwrap_err().to_string();
6953        assert!(
6954            err.contains("malformed tool arguments"),
6955            "error should mention malformed tool arguments, got: {}",
6956            err
6957        );
6958    }
6959
6960    /// 4.1 — Parse error recovery: counter resets after a valid tool execution
6961    #[tokio::test]
6962    async fn test_parse_error_counter_resets_on_success() {
6963        // 2 parse errors (= max_parse_retries, not yet exceeded)
6964        // Then a valid tool call (resets counter)
6965        // Then final text — should NOT bail
6966        let mock_client = Arc::new(MockLlmClient::new(vec![
6967            MockLlmClient::tool_call_response(
6968                "c1",
6969                "bash",
6970                serde_json::json!({"__parse_error": "bad args"}),
6971            ),
6972            MockLlmClient::tool_call_response(
6973                "c2",
6974                "bash",
6975                serde_json::json!({"__parse_error": "bad args again"}),
6976            ),
6977            // Valid call — resets parse_error_count to 0
6978            MockLlmClient::tool_call_response(
6979                "c3",
6980                "bash",
6981                serde_json::json!({"command": "echo ok"}),
6982            ),
6983            MockLlmClient::text_response("All done"),
6984        ]));
6985
6986        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
6987        let config = AgentConfig {
6988            max_parse_retries: 2,
6989            ..AgentConfig::default()
6990        };
6991        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
6992        let result = agent.execute(&[], "Do something", None).await;
6993        assert!(
6994            result.is_ok(),
6995            "should not bail — counter reset after successful tool, got: {:?}",
6996            result.err()
6997        );
6998        assert_eq!(result.unwrap().text, "All done");
6999    }
7000
7001    /// 4.2 — Tool timeout: slow tool produces a timeout error result; session continues
7002    #[tokio::test]
7003    async fn test_tool_timeout_produces_error_result() {
7004        let mock_client = Arc::new(MockLlmClient::new(vec![
7005            MockLlmClient::tool_call_response(
7006                "t1",
7007                "bash",
7008                serde_json::json!({"command": "sleep 10"}),
7009            ),
7010            MockLlmClient::text_response("The command timed out."),
7011        ]));
7012
7013        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
7014        let config = AgentConfig {
7015            // 50ms — sleep 10 will never finish
7016            tool_timeout_ms: Some(50),
7017            ..AgentConfig::default()
7018        };
7019        let agent = AgentLoop::new(
7020            mock_client.clone(),
7021            tool_executor,
7022            test_tool_context(),
7023            config,
7024        );
7025        let result = agent.execute(&[], "Run sleep", None).await;
7026        assert!(
7027            result.is_ok(),
7028            "session should continue after tool timeout: {:?}",
7029            result.err()
7030        );
7031        assert_eq!(result.unwrap().text, "The command timed out.");
7032        // LLM called twice: initial request + response after timeout error
7033        assert_eq!(mock_client.call_count.load(Ordering::SeqCst), 2);
7034    }
7035
7036    /// 4.2 — Tool timeout: tool that finishes before the deadline succeeds normally
7037    #[tokio::test]
7038    async fn test_tool_within_timeout_succeeds() {
7039        let mock_client = Arc::new(MockLlmClient::new(vec![
7040            MockLlmClient::tool_call_response(
7041                "t1",
7042                "bash",
7043                serde_json::json!({"command": "echo fast"}),
7044            ),
7045            MockLlmClient::text_response("Command succeeded."),
7046        ]));
7047
7048        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
7049        let config = AgentConfig {
7050            tool_timeout_ms: Some(5_000), // 5 s — echo completes in <100ms
7051            ..AgentConfig::default()
7052        };
7053        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
7054        let result = agent.execute(&[], "Run something fast", None).await;
7055        assert!(
7056            result.is_ok(),
7057            "fast tool should succeed: {:?}",
7058            result.err()
7059        );
7060        assert_eq!(result.unwrap().text, "Command succeeded.");
7061    }
7062
7063    /// 4.3 — Circuit breaker: retries non-streaming LLM failures up to threshold
7064    #[tokio::test]
7065    async fn test_circuit_breaker_retries_non_streaming() {
7066        // Empty response list → every call bails with "No more mock responses"
7067        // threshold=2 → tries twice, then bails with circuit-breaker message
7068        let mock_client = Arc::new(MockLlmClient::new(vec![]));
7069
7070        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
7071        let config = AgentConfig {
7072            circuit_breaker_threshold: 2,
7073            ..AgentConfig::default()
7074        };
7075        let agent = AgentLoop::new(
7076            mock_client.clone(),
7077            tool_executor,
7078            test_tool_context(),
7079            config,
7080        );
7081        let result = agent.execute(&[], "Hello", None).await;
7082        assert!(result.is_err(), "should fail when LLM always errors");
7083        let err = result.unwrap_err().to_string();
7084        assert!(
7085            err.contains("circuit breaker"),
7086            "error should mention circuit breaker, got: {}",
7087            err
7088        );
7089        assert_eq!(
7090            mock_client.call_count.load(Ordering::SeqCst),
7091            2,
7092            "should make exactly threshold=2 LLM calls"
7093        );
7094    }
7095
7096    /// 4.3 — Circuit breaker: threshold=1 bails on the very first failure
7097    #[tokio::test]
7098    async fn test_circuit_breaker_threshold_one_no_retry() {
7099        let mock_client = Arc::new(MockLlmClient::new(vec![]));
7100
7101        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
7102        let config = AgentConfig {
7103            circuit_breaker_threshold: 1,
7104            ..AgentConfig::default()
7105        };
7106        let agent = AgentLoop::new(
7107            mock_client.clone(),
7108            tool_executor,
7109            test_tool_context(),
7110            config,
7111        );
7112        let result = agent.execute(&[], "Hello", None).await;
7113        assert!(result.is_err());
7114        assert_eq!(
7115            mock_client.call_count.load(Ordering::SeqCst),
7116            1,
7117            "with threshold=1 exactly one attempt should be made"
7118        );
7119    }
7120
7121    /// 4.3 — Circuit breaker: succeeds when LLM recovers before hitting threshold
7122    #[tokio::test]
7123    async fn test_circuit_breaker_succeeds_if_llm_recovers() {
7124        // First call fails, second call succeeds; threshold=3 — recovery within threshold
7125        struct FailOnceThenSucceed {
7126            inner: MockLlmClient,
7127            failed_once: std::sync::atomic::AtomicBool,
7128            call_count: AtomicUsize,
7129        }
7130
7131        #[async_trait::async_trait]
7132        impl LlmClient for FailOnceThenSucceed {
7133            async fn complete(
7134                &self,
7135                messages: &[Message],
7136                system: Option<&str>,
7137                tools: &[ToolDefinition],
7138            ) -> Result<LlmResponse> {
7139                self.call_count.fetch_add(1, Ordering::SeqCst);
7140                let already_failed = self
7141                    .failed_once
7142                    .swap(true, std::sync::atomic::Ordering::SeqCst);
7143                if !already_failed {
7144                    anyhow::bail!("transient network error");
7145                }
7146                self.inner.complete(messages, system, tools).await
7147            }
7148
7149            async fn complete_streaming(
7150                &self,
7151                messages: &[Message],
7152                system: Option<&str>,
7153                tools: &[ToolDefinition],
7154                cancel_token: tokio_util::sync::CancellationToken,
7155            ) -> Result<tokio::sync::mpsc::Receiver<crate::llm::StreamEvent>> {
7156                self.inner
7157                    .complete_streaming(messages, system, tools, cancel_token)
7158                    .await
7159            }
7160        }
7161
7162        let mock = Arc::new(FailOnceThenSucceed {
7163            inner: MockLlmClient::new(vec![MockLlmClient::text_response("Recovered!")]),
7164            failed_once: std::sync::atomic::AtomicBool::new(false),
7165            call_count: AtomicUsize::new(0),
7166        });
7167
7168        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
7169        let config = AgentConfig {
7170            circuit_breaker_threshold: 3,
7171            ..AgentConfig::default()
7172        };
7173        let agent = AgentLoop::new(mock.clone(), tool_executor, test_tool_context(), config);
7174        let result = agent.execute(&[], "Hello", None).await;
7175        assert!(
7176            result.is_ok(),
7177            "should succeed when LLM recovers within threshold: {:?}",
7178            result.err()
7179        );
7180        assert_eq!(result.unwrap().text, "Recovered!");
7181        assert_eq!(
7182            mock.call_count.load(Ordering::SeqCst),
7183            2,
7184            "should have made exactly 2 calls (1 fail + 1 success)"
7185        );
7186    }
7187
7188    // ── Continuation detection tests ─────────────────────────────────────────
7189
7190    #[test]
7191    fn test_looks_incomplete_empty() {
7192        assert!(AgentLoop::looks_incomplete(""));
7193        assert!(AgentLoop::looks_incomplete("   "));
7194    }
7195
7196    #[test]
7197    fn test_looks_incomplete_trailing_colon() {
7198        assert!(AgentLoop::looks_incomplete("Let me check the file:"));
7199        assert!(AgentLoop::looks_incomplete("Next steps:"));
7200    }
7201
7202    #[test]
7203    fn test_looks_incomplete_ellipsis() {
7204        assert!(AgentLoop::looks_incomplete("Working on it..."));
7205        assert!(AgentLoop::looks_incomplete("Processing…"));
7206    }
7207
7208    #[test]
7209    fn test_looks_incomplete_intent_phrases() {
7210        assert!(AgentLoop::looks_incomplete(
7211            "I'll start by reading the file."
7212        ));
7213        assert!(AgentLoop::looks_incomplete(
7214            "Let me check the configuration."
7215        ));
7216        assert!(AgentLoop::looks_incomplete("I will now run the tests."));
7217        assert!(AgentLoop::looks_incomplete(
7218            "I need to update the Cargo.toml."
7219        ));
7220    }
7221
7222    #[test]
7223    fn test_looks_complete_final_answer() {
7224        // Clear final answers should NOT trigger continuation
7225        assert!(!AgentLoop::looks_incomplete(
7226            "The tests pass. All changes have been applied successfully."
7227        ));
7228        assert!(!AgentLoop::looks_incomplete(
7229            "Done. I've updated the three files and verified the build succeeds."
7230        ));
7231        assert!(!AgentLoop::looks_incomplete("42"));
7232        assert!(!AgentLoop::looks_incomplete("Yes."));
7233    }
7234
7235    #[test]
7236    fn test_looks_incomplete_multiline_complete() {
7237        let text = "Here is the summary:\n\n- Fixed the bug in agent.rs\n- All tests pass\n- Build succeeds";
7238        assert!(!AgentLoop::looks_incomplete(text));
7239    }
7240}