a3s_code_core/
agent.rs

1//! Agent Loop Implementation
2//!
3//! The agent loop handles the core conversation cycle:
4//! 1. User sends a prompt
5//! 2. LLM generates a response (possibly with tool calls)
6//! 3. If tool calls present, execute them and send results back
7//! 4. Repeat until LLM returns without tool calls
8//!
9//! This implements agentic behavior where the LLM can use tools
10//! to accomplish tasks agentically.
11
12#[cfg(feature = "ahp")]
13use crate::ahp::InjectedContext;
14use crate::context::{
15    ContextAssembler, ContextAssembly, ContextItem, ContextProvider, ContextQuery, ContextResult,
16    ContextType,
17};
18use crate::hitl::ConfirmationProvider;
19use crate::hooks::{
20    ErrorType, GenerateEndEvent, GenerateStartEvent, HookEvent, HookExecutor, HookResult,
21    IntentDetectionEvent, OnErrorEvent, PostResponseEvent, PostToolUseEvent,
22    PreContextPerceptionEvent, PrePromptEvent, PreToolUseEvent, TokenUsageInfo, ToolCallInfo,
23    ToolResultData,
24};
25use crate::llm::{LlmClient, LlmResponse, Message, TokenUsage, ToolDefinition};
26use crate::permissions::{PermissionChecker, PermissionDecision};
27use crate::planning::{AgentGoal, ExecutionPlan, LlmPlanner, PreAnalysis, Task, TaskStatus};
28use crate::prompts::{AgentStyle, PlanningMode, SystemPromptSlots};
29use crate::queue::SessionCommand;
30use crate::session_lane_queue::SessionLaneQueue;
31use crate::tools::{ToolContext, ToolExecutor, ToolStreamEvent};
32use anyhow::{Context, Result};
33use async_trait::async_trait;
34use futures::future::join_all;
35use serde::{Deserialize, Serialize};
36use serde_json::{json, Value};
37use std::sync::Arc;
38use std::time::Duration;
39use tokio::sync::mpsc;
40
41/// Maximum number of tool execution rounds before stopping
42const MAX_TOOL_ROUNDS: usize = 50;
43
44/// Result of a single parallel step execution, emitted as structured JSON
45/// so the frontend can render it dynamically in the user's language.
46#[derive(Debug, Clone, Serialize, Deserialize)]
47pub struct ParallelStepResult {
48    pub step_id: String,
49    pub step_number: u32,
50    pub status: String, // "completed" | "failed"
51    /// Brief summary of what this step did (in the LLM's language).
52    pub summary: String,
53    #[serde(skip_serializing_if = "Option::is_none")]
54    pub key_findings: Option<Vec<String>>,
55    #[serde(skip_serializing_if = "Option::is_none")]
56    pub error: Option<String>,
57    #[serde(skip_serializing_if = "Option::is_none")]
58    pub data: Option<Value>,
59}
60
61impl ParallelStepResult {
62    /// Build the full parallel-results JSON envelope injected into history.
63    pub fn build_envelope(results: Vec<ParallelStepResult>) -> Value {
64        json!({
65            "type": "parallel_results",
66            "steps": results
67        })
68    }
69}
70
71/// Internal agent loop configuration.
72#[derive(Clone)]
73pub(crate) struct AgentConfig {
74    /// Slot-based system prompt customization.
75    ///
76    /// Users can customize specific parts (role, guidelines, response style, extra)
77    /// without overriding the core agentic capabilities. The default agentic core
78    /// (tool usage, autonomous behavior, completion criteria) is always preserved.
79    pub prompt_slots: SystemPromptSlots,
80    pub tools: Vec<ToolDefinition>,
81    pub max_tool_rounds: usize,
82    /// Optional security provider for input taint tracking and output sanitization
83    pub security_provider: Option<Arc<dyn crate::security::SecurityProvider>>,
84    /// Optional permission checker for tool execution control
85    pub permission_checker: Option<Arc<dyn PermissionChecker>>,
86    /// Optional confirmation manager for HITL (Human-in-the-Loop)
87    pub confirmation_manager: Option<Arc<dyn ConfirmationProvider>>,
88    /// Context providers for augmenting prompts with external context
89    pub context_providers: Vec<Arc<dyn ContextProvider>>,
90    /// Planning mode — Auto (detect from message), Enabled, or Disabled.
91    pub planning_mode: PlanningMode,
92    /// Enable goal tracking
93    pub goal_tracking: bool,
94    /// Optional hook engine for firing lifecycle events (PreToolUse, PostToolUse, etc.)
95    pub hook_engine: Option<Arc<dyn HookExecutor>>,
96    /// Optional skill registry for tool permission enforcement
97    pub skill_registry: Option<Arc<crate::skills::SkillRegistry>>,
98    /// Max consecutive malformed-tool-args errors before aborting (default: 2).
99    ///
100    /// When the LLM returns tool arguments with `__parse_error`, the error is
101    /// fed back as a tool result. After this many consecutive parse errors the
102    /// loop bails instead of retrying indefinitely.
103    pub max_parse_retries: u32,
104    /// Per-tool execution timeout in milliseconds (`None` = no timeout).
105    ///
106    /// When set, each tool execution is wrapped in `tokio::time::timeout`.
107    /// A timeout produces an error result sent back to the LLM rather than
108    /// crashing the session.
109    pub tool_timeout_ms: Option<u64>,
110    /// Circuit-breaker threshold: max consecutive LLM API failures before
111    /// aborting (default: 3).
112    ///
113    /// In non-streaming mode, transient LLM failures are retried up to this
114    /// many times (with short exponential backoff) before the loop bails.
115    /// In streaming mode, any failure is fatal (events cannot be replayed).
116    pub circuit_breaker_threshold: u32,
117    /// Max consecutive identical tool signatures before aborting (default: 3).
118    ///
119    /// A tool signature is the exact combination of tool name + compact JSON
120    /// arguments. This prevents the agent from getting stuck repeating the same
121    /// tool call in a loop, for example repeatedly fetching the same URL.
122    pub duplicate_tool_call_threshold: u32,
123    /// Enable auto-compaction when context usage exceeds threshold.
124    pub auto_compact: bool,
125    /// Context usage percentage threshold to trigger auto-compaction (0.0 - 1.0).
126    /// Default: 0.80 (80%).
127    pub auto_compact_threshold: f32,
128    /// Maximum context window size in tokens (used for auto-compact calculation).
129    /// Default: 200_000.
130    pub max_context_tokens: usize,
131    /// Optional agent memory for auto-remember after tool execution and recall before prompts.
132    pub memory: Option<Arc<crate::memory::AgentMemory>>,
133    /// Inject a continuation message when the LLM stops calling tools before the
134    /// task is complete. Enabled by default. Set to `false` to disable.
135    ///
136    /// When enabled, if the LLM produces a response with no tool calls but the
137    /// response text looks like an intermediate step (not a final answer), the
138    /// loop injects [`crate::prompts::CONTINUATION`] as a user message and
139    /// continues for up to `max_continuation_turns` additional turns.
140    pub continuation_enabled: bool,
141    /// Maximum number of continuation injections per execution (default: 3).
142    ///
143    /// Prevents infinite loops when the LLM repeatedly stops without completing.
144    pub max_continuation_turns: u32,
145    /// Maximum execution time in milliseconds (`None` = no timeout).
146    ///
147    /// When set, the entire execution loop is wrapped in a timeout check.
148    /// If execution exceeds this duration, the loop bails with an error.
149    /// This prevents runaway executions that consume excessive API quota.
150    pub max_execution_time_ms: Option<u64>,
151}
152
153impl std::fmt::Debug for AgentConfig {
154    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
155        f.debug_struct("AgentConfig")
156            .field("prompt_slots", &self.prompt_slots)
157            .field("tools", &self.tools)
158            .field("max_tool_rounds", &self.max_tool_rounds)
159            .field("security_provider", &self.security_provider.is_some())
160            .field("permission_checker", &self.permission_checker.is_some())
161            .field("confirmation_manager", &self.confirmation_manager.is_some())
162            .field("context_providers", &self.context_providers.len())
163            .field("planning_mode", &self.planning_mode)
164            .field("goal_tracking", &self.goal_tracking)
165            .field("hook_engine", &self.hook_engine.is_some())
166            .field(
167                "skill_registry",
168                &self.skill_registry.as_ref().map(|r| r.len()),
169            )
170            .field("max_parse_retries", &self.max_parse_retries)
171            .field("tool_timeout_ms", &self.tool_timeout_ms)
172            .field("circuit_breaker_threshold", &self.circuit_breaker_threshold)
173            .field(
174                "duplicate_tool_call_threshold",
175                &self.duplicate_tool_call_threshold,
176            )
177            .field("auto_compact", &self.auto_compact)
178            .field("auto_compact_threshold", &self.auto_compact_threshold)
179            .field("max_context_tokens", &self.max_context_tokens)
180            .field("continuation_enabled", &self.continuation_enabled)
181            .field("max_continuation_turns", &self.max_continuation_turns)
182            .field("memory", &self.memory.is_some())
183            .finish()
184    }
185}
186
187impl Default for AgentConfig {
188    fn default() -> Self {
189        Self {
190            prompt_slots: SystemPromptSlots::default(),
191            tools: Vec::new(), // Tools are provided by ToolExecutor
192            max_tool_rounds: MAX_TOOL_ROUNDS,
193            security_provider: None,
194            permission_checker: None,
195            confirmation_manager: None,
196            context_providers: Vec::new(),
197            planning_mode: PlanningMode::default(),
198            goal_tracking: false,
199            hook_engine: None,
200            skill_registry: Some(Arc::new(crate::skills::SkillRegistry::with_builtins())),
201            max_parse_retries: 2,
202            tool_timeout_ms: None,
203            circuit_breaker_threshold: 3,
204            duplicate_tool_call_threshold: 3,
205            auto_compact: false,
206            auto_compact_threshold: 0.80,
207            max_context_tokens: 200_000,
208            memory: None,
209            continuation_enabled: true,
210            max_continuation_turns: 3,
211            max_execution_time_ms: None,
212        }
213    }
214}
215
216/// Events emitted during agent execution
217///
218/// Subscribe via [`crate::AgentSession::stream`].
219/// New variants may be added in minor releases — always include a wildcard arm
220/// (`_ => {}`) when matching.
221#[derive(Debug, Clone, Serialize, Deserialize)]
222#[serde(tag = "type")]
223#[non_exhaustive]
224pub enum AgentEvent {
225    /// Agent started processing
226    #[serde(rename = "agent_start")]
227    Start { prompt: String },
228
229    /// Runtime agent style/mode selected for the current execution.
230    #[serde(rename = "agent_mode_changed")]
231    AgentModeChanged {
232        /// Stable UI/runtime mode label, e.g. "general", "planning", "explore".
233        mode: String,
234        /// Canonical built-in agent name associated with this mode.
235        agent: String,
236        /// Human-readable explanation of the selected style.
237        description: String,
238    },
239
240    /// LLM turn started
241    #[serde(rename = "turn_start")]
242    TurnStart { turn: usize },
243
244    /// Text delta from streaming
245    #[serde(rename = "text_delta")]
246    TextDelta { text: String },
247
248    /// Reasoning/thinking delta from streaming (for models like kimi, deepseek)
249    #[serde(rename = "reasoning_delta")]
250    ReasoningDelta { text: String },
251
252    /// Tool execution started
253    #[serde(rename = "tool_start")]
254    ToolStart { id: String, name: String },
255
256    /// Tool input delta from streaming (partial JSON arguments)
257    #[serde(rename = "tool_input_delta")]
258    ToolInputDelta { delta: String },
259
260    /// Tool execution completed
261    #[serde(rename = "tool_end")]
262    ToolEnd {
263        id: String,
264        name: String,
265        output: String,
266        exit_code: i32,
267        #[serde(skip_serializing_if = "Option::is_none")]
268        metadata: Option<serde_json::Value>,
269    },
270
271    /// Intermediate tool output (streaming delta)
272    #[serde(rename = "tool_output_delta")]
273    ToolOutputDelta {
274        id: String,
275        name: String,
276        delta: String,
277    },
278
279    /// LLM turn completed
280    #[serde(rename = "turn_end")]
281    TurnEnd { turn: usize, usage: TokenUsage },
282
283    /// Agent completed
284    #[serde(rename = "agent_end")]
285    End {
286        text: String,
287        usage: TokenUsage,
288        verification_summary: Box<crate::verification::VerificationSummary>,
289        #[serde(skip_serializing_if = "Option::is_none")]
290        meta: Option<crate::llm::LlmResponseMeta>,
291    },
292
293    /// Error occurred
294    #[serde(rename = "error")]
295    Error { message: String },
296
297    /// Tool execution requires confirmation (HITL)
298    #[serde(rename = "confirmation_required")]
299    ConfirmationRequired {
300        tool_id: String,
301        tool_name: String,
302        args: serde_json::Value,
303        timeout_ms: u64,
304    },
305
306    /// Confirmation received from user (HITL)
307    #[serde(rename = "confirmation_received")]
308    ConfirmationReceived {
309        tool_id: String,
310        approved: bool,
311        reason: Option<String>,
312    },
313
314    /// Confirmation timed out (HITL)
315    #[serde(rename = "confirmation_timeout")]
316    ConfirmationTimeout {
317        tool_id: String,
318        action_taken: String, // "rejected" or "auto_approved"
319    },
320
321    /// External task pending (needs SDK processing)
322    #[serde(rename = "external_task_pending")]
323    ExternalTaskPending {
324        task_id: String,
325        session_id: String,
326        lane: crate::queue::SessionLane,
327        command_type: String,
328        payload: serde_json::Value,
329        timeout_ms: u64,
330    },
331
332    /// External task completed
333    #[serde(rename = "external_task_completed")]
334    ExternalTaskCompleted {
335        task_id: String,
336        session_id: String,
337        success: bool,
338    },
339
340    /// Tool execution denied by permission policy
341    #[serde(rename = "permission_denied")]
342    PermissionDenied {
343        tool_id: String,
344        tool_name: String,
345        args: serde_json::Value,
346        reason: String,
347    },
348
349    /// Context resolution started
350    #[serde(rename = "context_resolving")]
351    ContextResolving { providers: Vec<String> },
352
353    /// Context resolution completed
354    #[serde(rename = "context_resolved")]
355    ContextResolved {
356        total_items: usize,
357        total_tokens: usize,
358    },
359
360    // ========================================================================
361    // a3s-lane integration events
362    // ========================================================================
363    /// Command moved to dead letter queue after exhausting retries
364    #[serde(rename = "command_dead_lettered")]
365    CommandDeadLettered {
366        command_id: String,
367        command_type: String,
368        lane: String,
369        error: String,
370        attempts: u32,
371    },
372
373    /// Command retry attempt
374    #[serde(rename = "command_retry")]
375    CommandRetry {
376        command_id: String,
377        command_type: String,
378        lane: String,
379        attempt: u32,
380        delay_ms: u64,
381    },
382
383    /// Queue alert (depth warning, latency alert, etc.)
384    #[serde(rename = "queue_alert")]
385    QueueAlert {
386        level: String,
387        alert_type: String,
388        message: String,
389    },
390
391    // ========================================================================
392    // Task tracking events
393    // ========================================================================
394    /// Task list updated
395    #[serde(rename = "task_updated")]
396    TaskUpdated {
397        session_id: String,
398        tasks: Vec<crate::planning::Task>,
399    },
400
401    // ========================================================================
402    // Memory System events (Phase 3)
403    // ========================================================================
404    /// Memory stored
405    #[serde(rename = "memory_stored")]
406    MemoryStored {
407        memory_id: String,
408        memory_type: String,
409        importance: f32,
410        tags: Vec<String>,
411    },
412
413    /// Memory recalled
414    #[serde(rename = "memory_recalled")]
415    MemoryRecalled {
416        memory_id: String,
417        content: String,
418        relevance: f32,
419    },
420
421    /// Memories searched
422    #[serde(rename = "memories_searched")]
423    MemoriesSearched {
424        query: Option<String>,
425        tags: Vec<String>,
426        result_count: usize,
427    },
428
429    /// Memory cleared
430    #[serde(rename = "memory_cleared")]
431    MemoryCleared {
432        tier: String, // "long_term", "short_term", "working"
433        count: u64,
434    },
435
436    // ========================================================================
437    // Subagent events
438    // ========================================================================
439    /// Subagent task started
440    #[serde(rename = "subagent_start")]
441    SubagentStart {
442        /// Unique task identifier
443        task_id: String,
444        /// Child session ID
445        session_id: String,
446        /// Parent session ID
447        parent_session_id: String,
448        /// Agent type (e.g., "explore", "general")
449        agent: String,
450        /// Short description of the task
451        description: String,
452    },
453
454    /// Subagent task progress update
455    #[serde(rename = "subagent_progress")]
456    SubagentProgress {
457        /// Task identifier
458        task_id: String,
459        /// Child session ID
460        session_id: String,
461        /// Progress status message
462        status: String,
463        /// Additional metadata
464        metadata: serde_json::Value,
465    },
466
467    /// Subagent task completed
468    #[serde(rename = "subagent_end")]
469    SubagentEnd {
470        /// Task identifier
471        task_id: String,
472        /// Child session ID
473        session_id: String,
474        /// Agent type
475        agent: String,
476        /// Task output/result
477        output: String,
478        /// Whether the task succeeded
479        success: bool,
480    },
481
482    // ========================================================================
483    // Planning and Goal Tracking Events (Phase 1)
484    // ========================================================================
485    /// Planning phase started
486    #[serde(rename = "planning_start")]
487    PlanningStart { prompt: String },
488
489    /// Planning phase completed
490    #[serde(rename = "planning_end")]
491    PlanningEnd {
492        plan: ExecutionPlan,
493        estimated_steps: usize,
494    },
495
496    /// Step execution started
497    #[serde(rename = "step_start")]
498    StepStart {
499        step_id: String,
500        description: String,
501        step_number: usize,
502        total_steps: usize,
503    },
504
505    /// Step execution completed
506    #[serde(rename = "step_end")]
507    StepEnd {
508        step_id: String,
509        status: TaskStatus,
510        step_number: usize,
511        total_steps: usize,
512    },
513
514    /// Goal extracted from prompt
515    #[serde(rename = "goal_extracted")]
516    GoalExtracted { goal: AgentGoal },
517
518    /// Goal progress update
519    #[serde(rename = "goal_progress")]
520    GoalProgress {
521        goal: String,
522        progress: f32,
523        completed_steps: usize,
524        total_steps: usize,
525    },
526
527    /// Goal achieved
528    #[serde(rename = "goal_achieved")]
529    GoalAchieved {
530        goal: String,
531        total_steps: usize,
532        duration_ms: i64,
533    },
534
535    // ========================================================================
536    // Context Compaction events
537    // ========================================================================
538    /// Context automatically compacted due to high usage
539    #[serde(rename = "context_compacted")]
540    ContextCompacted {
541        session_id: String,
542        before_messages: usize,
543        after_messages: usize,
544        percent_before: f32,
545    },
546
547    // ========================================================================
548    // Persistence events
549    // ========================================================================
550    /// Session persistence failed — SDK clients should handle this
551    #[serde(rename = "persistence_failed")]
552    PersistenceFailed {
553        session_id: String,
554        operation: String,
555        error: String,
556    },
557
558    // ========================================================================
559    // Side question (btw)
560    // ========================================================================
561    /// Ephemeral side question answered.
562    ///
563    /// Emitted by [`crate::agent_api::AgentSession::btw()`] in streaming mode.
564    /// The answer is never added to conversation history.
565    #[serde(rename = "btw_answer")]
566    BtwAnswer {
567        question: String,
568        answer: String,
569        usage: TokenUsage,
570    },
571}
572
573/// Result of agent execution
574#[derive(Debug, Clone)]
575pub struct AgentResult {
576    pub text: String,
577    pub messages: Vec<Message>,
578    pub usage: TokenUsage,
579    pub tool_calls_count: usize,
580    pub verification_reports: Vec<crate::verification::VerificationReport>,
581}
582
583impl AgentResult {
584    pub fn verification_summary(&self) -> crate::verification::VerificationSummary {
585        crate::verification::VerificationSummary::from_reports(&self.verification_reports)
586    }
587
588    pub fn verification_summary_text(&self) -> String {
589        crate::verification::format_verification_summary(&self.verification_summary())
590    }
591
592    pub fn has_pending_verification(&self) -> bool {
593        matches!(
594            self.verification_summary().status,
595            crate::verification::VerificationStatus::NeedsReview
596        )
597    }
598}
599
600// ============================================================================
601// ToolCommand — bridges ToolExecutor to SessionCommand for queue submission
602// ============================================================================
603
604/// Adapter that implements `SessionCommand` for tool execution via the queue.
605///
606/// Wraps a `ToolExecutor` call so it can be submitted to `SessionLaneQueue`.
607pub struct ToolCommand {
608    tool_executor: Arc<ToolExecutor>,
609    tool_name: String,
610    tool_args: Value,
611    tool_context: ToolContext,
612    skill_registry: Option<Arc<crate::skills::SkillRegistry>>,
613}
614
615impl ToolCommand {
616    /// Create a new ToolCommand
617    pub fn new(
618        tool_executor: Arc<ToolExecutor>,
619        tool_name: String,
620        tool_args: Value,
621        tool_context: ToolContext,
622        skill_registry: Option<Arc<crate::skills::SkillRegistry>>,
623    ) -> Self {
624        Self {
625            tool_executor,
626            tool_name,
627            tool_args,
628            tool_context,
629            skill_registry,
630        }
631    }
632}
633
634#[async_trait]
635impl SessionCommand for ToolCommand {
636    async fn execute(&self) -> Result<Value> {
637        // Check skill-based tool permissions
638        if let Some(registry) = &self.skill_registry {
639            let instruction_skills = registry.by_kind(crate::skills::SkillKind::Instruction);
640
641            // If there are instruction skills with tool restrictions, check permissions
642            let has_restrictions = instruction_skills.iter().any(|s| s.allowed_tools.is_some());
643
644            if has_restrictions {
645                let mut allowed = false;
646
647                for skill in &instruction_skills {
648                    if skill.is_tool_allowed(&self.tool_name) {
649                        allowed = true;
650                        break;
651                    }
652                }
653
654                if !allowed {
655                    return Err(anyhow::anyhow!(
656                        "Tool '{}' is not allowed by any active skill. Active skills restrict tools to their allowed-tools lists.",
657                        self.tool_name
658                    ));
659                }
660            }
661        }
662
663        // Execute the tool
664        let result = self
665            .tool_executor
666            .execute_with_context(&self.tool_name, &self.tool_args, &self.tool_context)
667            .await?;
668        Ok(serde_json::json!({
669            "output": result.output,
670            "exit_code": result.exit_code,
671            "metadata": result.metadata,
672        }))
673    }
674
675    fn command_type(&self) -> &str {
676        &self.tool_name
677    }
678
679    fn payload(&self) -> Value {
680        self.tool_args.clone()
681    }
682}
683
684// ============================================================================
685// AgentLoop
686// ============================================================================
687
688/// Internal agent loop executor.
689#[derive(Clone)]
690pub(crate) struct AgentLoop {
691    llm_client: Arc<dyn LlmClient>,
692    tool_executor: Arc<ToolExecutor>,
693    tool_context: ToolContext,
694    config: AgentConfig,
695    /// Optional lane queue for priority-based tool execution
696    command_queue: Option<Arc<SessionLaneQueue>>,
697}
698
699// ============================================================================
700// Intent Detection Helpers (for AHP Context Perception)
701// ============================================================================
702
703/// Extract a target name from the prompt (e.g., function name, file path).
704#[allow(clippy::extra_unused_lifetimes)]
705fn extract_target_name_from_prompt<'a>(prompt: &str, _patterns: &[&str]) -> String {
706    // Try to extract quoted strings first
707    if let Some(start) = prompt.find('"') {
708        if let Some(end) = prompt[start + 1..].find('"') {
709            return prompt[start + 1..start + 1 + end].to_string();
710        }
711    }
712
713    // Try single quotes
714    if let Some(start) = prompt.find('\'') {
715        if let Some(end) = prompt[start + 1..].find('\'') {
716            return prompt[start + 1..start + 1 + end].to_string();
717        }
718    }
719
720    // Try backticks
721    if let Some(start) = prompt.find('`') {
722        if let Some(end) = prompt[start + 1..].find('`') {
723            return prompt[start + 1..start + 1 + end].to_string();
724        }
725    }
726
727    // Fall back to extracting a reasonable word boundary
728    let words: Vec<&str> = prompt.split_whitespace().collect();
729    if words.len() > 2 {
730        // Look for likely target words (after "the", "find", "where is", etc.)
731        for word in words.iter() {
732            if word.len() > 3
733                && !["where", "what", "find", "the", "how", "is", "are"].contains(word)
734            {
735                return word.to_string();
736            }
737        }
738    }
739
740    String::new()
741}
742
743/// Detect the domain from prompt keywords.
744fn detect_domain_from_prompt(prompt: &str) -> String {
745    let lower = prompt.to_lowercase();
746
747    if lower.contains("rust") || lower.contains("cargo") || lower.contains(".rs") {
748        "rust".to_string()
749    } else if lower.contains("javascript")
750        || lower.contains("typescript")
751        || lower.contains("node")
752        || lower.contains(".js")
753        || lower.contains(".ts")
754    {
755        "javascript".to_string()
756    } else if lower.contains("python") || lower.contains(".py") {
757        "python".to_string()
758    } else if lower.contains("go") || lower.contains(".go") {
759        "go".to_string()
760    } else if lower.contains("java") || lower.contains(".java") {
761        "java".to_string()
762    } else if lower.contains("docker") || lower.contains("container") {
763        "docker".to_string()
764    } else if lower.contains("kubernetes") || lower.contains("k8s") {
765        "kubernetes".to_string()
766    } else if lower.contains("sql")
767        || lower.contains("database")
768        || lower.contains("postgres")
769        || lower.contains("mysql")
770    {
771        "database".to_string()
772    } else if lower.contains("api") || lower.contains("rest") || lower.contains("grpc") {
773        "api".to_string()
774    } else if lower.contains("auth")
775        || lower.contains("login")
776        || lower.contains("password")
777        || lower.contains("token")
778    {
779        "security".to_string()
780    } else if lower.contains("test") || lower.contains("spec") || lower.contains("mock") {
781        "testing".to_string()
782    } else {
783        "general".to_string()
784    }
785}
786
787/// Result from IntentDetection harness
788#[derive(Debug, Clone, Serialize, Deserialize)]
789pub struct IntentDetectionResult {
790    /// Detected intent: "locate" | "understand" | "retrieve" | "explore" | "reason" | "validate" | "compare" | "track"
791    pub detected_intent: String,
792    /// Confidence score 0.0 - 1.0
793    pub confidence: f32,
794    /// Optional target hints from the harness
795    #[serde(skip_serializing_if = "Option::is_none")]
796    pub target_hints: Option<TargetHints>,
797}
798
799/// Target hints from IntentDetection harness
800#[derive(Debug, Clone, Serialize, Deserialize)]
801pub struct TargetHints {
802    #[serde(skip_serializing_if = "Option::is_none")]
803    pub target_type: Option<String>,
804    #[serde(skip_serializing_if = "Option::is_none")]
805    pub target_name: Option<String>,
806    #[serde(skip_serializing_if = "Option::is_none")]
807    pub domain: Option<String>,
808}
809
810/// Detect language hint from prompt characters.
811fn detect_language_hint(prompt: &str) -> Option<String> {
812    // Check for Chinese characters
813    if prompt
814        .chars()
815        .any(|c| ('\u{4e00}'..='\u{9fff}').contains(&c))
816    {
817        return Some("zh".to_string());
818    }
819    // Check for Japanese characters (Hiragana, Katakana, or CJK unified ideographs outside Chinese range)
820    if prompt
821        .chars()
822        .any(|c| ('\u{3040}'..='\u{309f}').contains(&c) || ('\u{30a0}'..='\u{30ff}').contains(&c))
823    {
824        return Some("ja".to_string());
825    }
826    // Check for Korean characters
827    if prompt
828        .chars()
829        .any(|c| ('\u{ac00}'..='\u{d7af}').contains(&c))
830    {
831        return Some("ko".to_string());
832    }
833    // Check for Arabic
834    if prompt
835        .chars()
836        .any(|c| ('\u{0600}'..='\u{06ff}').contains(&c))
837    {
838        return Some("ar".to_string());
839    }
840    // Check for Russian/Cyrillic
841    if prompt
842        .chars()
843        .any(|c| ('\u{0400}'..='\u{04ff}').contains(&c))
844    {
845        return Some("ru".to_string());
846    }
847    None
848}
849
850/// Build PreContextPerceptionEvent from IntentDetection result.
851fn build_pre_context_perception_from_intent(
852    result: IntentDetectionResult,
853    prompt: &str,
854    session_id: &str,
855    workspace: &str,
856) -> PreContextPerceptionEvent {
857    let target_hints = result.target_hints;
858    PreContextPerceptionEvent {
859        session_id: session_id.to_string(),
860        intent: result.detected_intent,
861        target_type: target_hints
862            .as_ref()
863            .and_then(|h| h.target_type.clone())
864            .unwrap_or_else(|| "unknown".to_string()),
865        target_name: target_hints
866            .as_ref()
867            .and_then(|h| h.target_name.clone())
868            .unwrap_or_else(|| extract_target_name_from_prompt(prompt, &[])),
869        domain: target_hints
870            .as_ref()
871            .and_then(|h| h.domain.clone())
872            .unwrap_or_else(|| detect_domain_from_prompt(prompt)),
873        query: Some(prompt.to_string()),
874        working_directory: workspace.to_string(),
875        urgency: "normal".to_string(),
876    }
877}
878
879/// Rough token estimation (~4 chars per token for English/code).
880#[cfg(feature = "ahp")]
881fn estimate_tokens(text: &str) -> usize {
882    (text.len() / 4).max(1)
883}
884
885#[cfg(feature = "ahp")]
886fn ahp_context_result(items: Vec<ContextItem>) -> Option<ContextResult> {
887    if items.is_empty() {
888        return None;
889    }
890
891    let total_tokens = items.iter().map(|item| item.token_count).sum();
892    Some(ContextResult {
893        items,
894        total_tokens,
895        provider: "ahp_harness".to_string(),
896        truncated: false,
897    })
898}
899
900#[cfg(feature = "ahp")]
901fn injected_context_to_results(injected: InjectedContext) -> Vec<ContextResult> {
902    let mut results = Vec::new();
903
904    let fact_items = injected
905        .facts
906        .into_iter()
907        .map(|fact| {
908            let token_count = estimate_tokens(&fact.content);
909            ContextItem::new(
910                uuid::Uuid::new_v4().to_string(),
911                ContextType::Resource,
912                fact.content,
913            )
914            .with_source(fact.source)
915            .with_provenance("ahp_fact")
916            .with_priority(0.75)
917            .with_trust(fact.confidence)
918            .with_freshness(0.85)
919            .with_relevance(fact.confidence)
920            .with_token_count(token_count)
921        })
922        .collect::<Vec<_>>();
923    if let Some(result) = ahp_context_result(fact_items) {
924        results.push(result);
925    }
926
927    if let Some(file_contents) = injected.file_contents {
928        let file_items = file_contents
929            .into_iter()
930            .map(|file| {
931                let token_count = estimate_tokens(&file.snippet);
932                ContextItem::new(
933                    uuid::Uuid::new_v4().to_string(),
934                    ContextType::Resource,
935                    file.snippet,
936                )
937                .with_source(file.path)
938                .with_provenance("ahp_file_snippet")
939                .with_priority(0.8)
940                .with_trust(0.8)
941                .with_freshness(0.8)
942                .with_relevance(file.relevance_score)
943                .with_token_count(token_count)
944            })
945            .collect::<Vec<_>>();
946        if let Some(result) = ahp_context_result(file_items) {
947            results.push(result);
948        }
949    }
950
951    if let Some(summary) = injected.project_summary {
952        let mut lines = vec![
953            format!("Project: {}", summary.project_name),
954            summary.structure_description,
955        ];
956        if let Some(language) = summary.language {
957            lines.push(format!("Language: {language}"));
958        }
959        if let Some(key_files) = summary.key_files.filter(|files| !files.is_empty()) {
960            lines.push(format!("Key files: {}", key_files.join(", ")));
961        }
962        let content = lines.join("\n");
963        let token_count = estimate_tokens(&content);
964        if let Some(result) = ahp_context_result(vec![ContextItem::new(
965            uuid::Uuid::new_v4().to_string(),
966            ContextType::Resource,
967            content,
968        )
969        .with_source("ahp://project-summary")
970        .with_provenance("ahp_project_summary")
971        .with_priority(0.7)
972        .with_trust(0.75)
973        .with_freshness(0.8)
974        .with_relevance(0.9)
975        .with_token_count(token_count)])
976        {
977            results.push(result);
978        }
979    }
980
981    if let Some(knowledge) = injected.knowledge {
982        let knowledge_items = knowledge
983            .into_iter()
984            .map(|content| {
985                let token_count = estimate_tokens(&content);
986                ContextItem::new(
987                    uuid::Uuid::new_v4().to_string(),
988                    ContextType::Resource,
989                    content,
990                )
991                .with_source("ahp://knowledge")
992                .with_provenance("ahp_knowledge")
993                .with_priority(0.55)
994                .with_trust(0.65)
995                .with_freshness(0.6)
996                .with_relevance(0.8)
997                .with_token_count(token_count)
998            })
999            .collect::<Vec<_>>();
1000        if let Some(result) = ahp_context_result(knowledge_items) {
1001            results.push(result);
1002        }
1003    }
1004
1005    if let Some(suggestions) = injected.suggestions.filter(|items| !items.is_empty()) {
1006        let content = format!("Harness suggestions:\n- {}", suggestions.join("\n- "));
1007        let token_count = estimate_tokens(&content);
1008        if let Some(result) = ahp_context_result(vec![ContextItem::new(
1009            uuid::Uuid::new_v4().to_string(),
1010            ContextType::Resource,
1011            content,
1012        )
1013        .with_source("ahp://suggestions")
1014        .with_provenance("ahp_suggestions")
1015        .with_priority(0.45)
1016        .with_trust(0.6)
1017        .with_freshness(0.8)
1018        .with_relevance(0.7)
1019        .with_token_count(token_count)])
1020        {
1021            results.push(result);
1022        }
1023    }
1024
1025    results
1026}
1027
1028impl AgentLoop {
1029    pub(crate) fn new(
1030        llm_client: Arc<dyn LlmClient>,
1031        tool_executor: Arc<ToolExecutor>,
1032        tool_context: ToolContext,
1033        config: AgentConfig,
1034    ) -> Self {
1035        Self {
1036            llm_client,
1037            tool_executor,
1038            tool_context,
1039            config,
1040            command_queue: None,
1041        }
1042    }
1043
1044    /// Set the lane queue for priority-based tool execution.
1045    ///
1046    /// When set, tools are routed through the lane queue which supports
1047    /// External task handling for multi-machine parallel processing.
1048    pub fn with_queue(mut self, queue: Arc<SessionLaneQueue>) -> Self {
1049        self.command_queue = Some(queue);
1050        self
1051    }
1052
1053    /// Track a tool call result in telemetry.
1054    fn track_tool_result(&self, tool_name: &str, args: &serde_json::Value, exit_code: i32) {
1055        let _ = (tool_name, args, exit_code);
1056    }
1057
1058    fn should_run_pre_analysis(&self) -> bool {
1059        match self.config.planning_mode {
1060            PlanningMode::Disabled => false,
1061            PlanningMode::Enabled => true,
1062            PlanningMode::Auto => true,
1063        }
1064    }
1065
1066    async fn emit_task_updated(
1067        &self,
1068        event_tx: &Option<mpsc::Sender<AgentEvent>>,
1069        session_id: &str,
1070        plan: &ExecutionPlan,
1071    ) {
1072        if let Some(tx) = event_tx {
1073            tx.send(AgentEvent::TaskUpdated {
1074                session_id: session_id.to_string(),
1075                tasks: plan.steps.clone(),
1076            })
1077            .await
1078            .ok();
1079        }
1080    }
1081
1082    fn normalized_plan_tool(step: &Task) -> Option<&str> {
1083        step.tool
1084            .as_deref()
1085            .map(str::trim)
1086            .filter(|tool| !tool.is_empty())
1087    }
1088
1089    fn should_delegate_plan_step(step: &Task) -> bool {
1090        matches!(
1091            Self::normalized_plan_tool(step),
1092            Some("task") | Some("parallel_task")
1093        )
1094    }
1095
1096    fn delegated_agent_for_step(step: &Task) -> &'static str {
1097        let text = format!(
1098            "{}\n{}",
1099            step.content,
1100            step.success_criteria.as_deref().unwrap_or_default()
1101        )
1102        .to_lowercase();
1103
1104        if text.contains("review")
1105            || text.contains("code review")
1106            || text.contains("regression")
1107            || text.contains("审查")
1108            || text.contains("评审")
1109            || text.contains("回归")
1110        {
1111            "review"
1112        } else if text.contains("verify")
1113            || text.contains("verification")
1114            || text.contains("validate")
1115            || text.contains("test")
1116            || text.contains("release")
1117            || text.contains("smoke")
1118            || text.contains("验证")
1119            || text.contains("测试")
1120            || text.contains("发布")
1121        {
1122            "verification"
1123        } else if text.contains("plan")
1124            || text.contains("design")
1125            || text.contains("architecture")
1126            || text.contains("规划")
1127            || text.contains("设计")
1128            || text.contains("架构")
1129        {
1130            "plan"
1131        } else if text.contains("explore")
1132            || text.contains("find")
1133            || text.contains("search")
1134            || text.contains("locate")
1135            || text.contains("inspect")
1136            || text.contains("查找")
1137            || text.contains("搜索")
1138            || text.contains("定位")
1139            || text.contains("探索")
1140            || text.contains("检查")
1141        {
1142            "explore"
1143        } else {
1144            "general"
1145        }
1146    }
1147
1148    fn delegated_prompt_for_step(step: &Task, step_number: usize, total_steps: usize) -> String {
1149        let mut prompt = format!(
1150            "Execute plan step {}/{}.\n\nTask:\n{}\n",
1151            step_number, total_steps, step.content
1152        );
1153        if let Some(criteria) = step
1154            .success_criteria
1155            .as_deref()
1156            .filter(|s| !s.trim().is_empty())
1157        {
1158            prompt.push_str("\nSuccess criteria:\n");
1159            prompt.push_str(criteria);
1160            prompt.push('\n');
1161        }
1162        prompt.push_str("\nReturn a compact result with summary, evidence, risks, and confidence.");
1163        prompt
1164    }
1165
1166    fn delegated_task_args(step: &Task, step_number: usize, total_steps: usize) -> Value {
1167        json!({
1168            "agent": Self::delegated_agent_for_step(step),
1169            "description": step.content,
1170            "prompt": Self::delegated_prompt_for_step(step, step_number, total_steps),
1171        })
1172    }
1173
1174    fn parallel_delegated_task_args(steps: &[(Task, usize)], total_steps: usize) -> Value {
1175        let tasks = steps
1176            .iter()
1177            .map(|(step, step_number)| Self::delegated_task_args(step, *step_number, total_steps))
1178            .collect::<Vec<_>>();
1179        json!({ "tasks": tasks })
1180    }
1181
1182    fn tool_context_for_plan(&self, session_id: Option<&str>) -> ToolContext {
1183        let mut ctx = self.tool_context.clone();
1184        if ctx.session_id.is_none() {
1185            if let Some(session_id) = session_id.filter(|id| !id.is_empty()) {
1186                ctx = ctx.with_session_id(session_id);
1187            }
1188        }
1189        ctx
1190    }
1191
1192    async fn execute_delegated_plan_tool(
1193        &self,
1194        tool_name: &str,
1195        args: &Value,
1196        session_id: Option<&str>,
1197        event_tx: &Option<mpsc::Sender<AgentEvent>>,
1198    ) -> (String, i32, bool, Option<Value>) {
1199        let call_id = format!("plan-{}-{}", tool_name, uuid::Uuid::new_v4());
1200        if let Some(tx) = event_tx {
1201            tx.send(AgentEvent::ToolStart {
1202                id: call_id.clone(),
1203                name: tool_name.to_string(),
1204            })
1205            .await
1206            .ok();
1207        }
1208
1209        let ctx = self.tool_context_for_plan(session_id);
1210        let (output, exit_code, is_error, metadata, _) =
1211            Self::tool_result_to_tuple(self.execute_tool_timed(tool_name, args, &ctx).await);
1212
1213        if let Some(tx) = event_tx {
1214            tx.send(AgentEvent::ToolEnd {
1215                id: call_id,
1216                name: tool_name.to_string(),
1217                output: output.clone(),
1218                exit_code,
1219                metadata: metadata.clone(),
1220            })
1221            .await
1222            .ok();
1223        }
1224
1225        (output, exit_code, is_error, metadata)
1226    }
1227
1228    /// Execute a tool, applying the configured timeout if set.
1229    ///
1230    /// On timeout, returns an error describing which tool timed out and after
1231    /// how many milliseconds. The caller converts this to a tool-result error
1232    /// message that is fed back to the LLM.
1233    async fn execute_tool_timed(
1234        &self,
1235        name: &str,
1236        args: &serde_json::Value,
1237        ctx: &ToolContext,
1238    ) -> anyhow::Result<crate::tools::ToolResult> {
1239        let fut = self.tool_executor.execute_with_context(name, args, ctx);
1240        if let Some(timeout_ms) = self.config.tool_timeout_ms {
1241            match tokio::time::timeout(Duration::from_millis(timeout_ms), fut).await {
1242                Ok(result) => result,
1243                Err(_) => Err(anyhow::anyhow!(
1244                    "Tool '{}' timed out after {}ms",
1245                    name,
1246                    timeout_ms
1247                )),
1248            }
1249        } else {
1250            fut.await
1251        }
1252    }
1253
1254    /// Convert a tool execution result into the (output, exit_code, is_error, metadata, images) tuple.
1255    fn tool_result_to_tuple(
1256        result: anyhow::Result<crate::tools::ToolResult>,
1257    ) -> (
1258        String,
1259        i32,
1260        bool,
1261        Option<serde_json::Value>,
1262        Vec<crate::llm::Attachment>,
1263    ) {
1264        match result {
1265            Ok(r) => (
1266                r.output,
1267                r.exit_code,
1268                r.exit_code != 0,
1269                r.metadata,
1270                r.images,
1271            ),
1272            Err(e) => {
1273                let msg = e.to_string();
1274                // Classify the error so the LLM knows whether retrying makes sense.
1275                let hint = if Self::is_transient_error(&msg) {
1276                    " [transient — you may retry this tool call]"
1277                } else {
1278                    " [permanent — do not retry without changing the arguments]"
1279                };
1280                (
1281                    format!("Tool execution error: {}{}", msg, hint),
1282                    1,
1283                    true,
1284                    None,
1285                    Vec::new(),
1286                )
1287            }
1288        }
1289    }
1290
1291    fn collect_verification_report(
1292        reports: &mut Vec<crate::verification::VerificationReport>,
1293        metadata: &Option<serde_json::Value>,
1294    ) {
1295        let Some(metadata) = metadata else {
1296            return;
1297        };
1298        let Some(report) = metadata.get("verification_report") else {
1299            return;
1300        };
1301
1302        match serde_json::from_value::<crate::verification::VerificationReport>(report.clone()) {
1303            Ok(report) => reports.push(report),
1304            Err(err) => tracing::warn!(
1305                error = %err,
1306                "Ignoring malformed verification_report tool metadata"
1307            ),
1308        }
1309    }
1310
1311    /// Inspect the workspace for well-known project marker files and return a short
1312    /// `## Project Context` section that the agent can use without any manual configuration.
1313    /// Returns an empty string when the workspace type cannot be determined.
1314    fn detect_project_hint(workspace: &std::path::Path) -> String {
1315        struct Marker {
1316            file: &'static str,
1317            lang: &'static str,
1318            tip: &'static str,
1319        }
1320
1321        let markers = [
1322            Marker {
1323                file: "Cargo.toml",
1324                lang: "Rust",
1325                tip: "Use `cargo build`, `cargo test`, `cargo clippy`, and `cargo fmt`. \
1326                  Prefer `anyhow` / `thiserror` for error handling. \
1327                  Follow the Microsoft Rust Guidelines (no panics in library code, \
1328                  async-first with Tokio).",
1329            },
1330            Marker {
1331                file: "package.json",
1332                lang: "Node.js / TypeScript",
1333                tip: "Check `package.json` for the package manager (npm/yarn/pnpm/bun) \
1334                  and available scripts. Prefer TypeScript with strict mode. \
1335                  Use ESM imports unless the project is CommonJS.",
1336            },
1337            Marker {
1338                file: "pyproject.toml",
1339                lang: "Python",
1340                tip: "Use the package manager declared in `pyproject.toml` \
1341                  (uv, poetry, hatch, etc.). Prefer type hints and async/await for I/O.",
1342            },
1343            Marker {
1344                file: "setup.py",
1345                lang: "Python",
1346                tip: "Legacy Python project. Prefer type hints and async/await for I/O.",
1347            },
1348            Marker {
1349                file: "requirements.txt",
1350                lang: "Python",
1351                tip: "Python project with pip-style dependencies. \
1352                  Prefer type hints and async/await for I/O.",
1353            },
1354            Marker {
1355                file: "go.mod",
1356                lang: "Go",
1357                tip: "Use `go build ./...` and `go test ./...`. \
1358                  Follow standard Go project layout. Use `gofmt` for formatting.",
1359            },
1360            Marker {
1361                file: "pom.xml",
1362                lang: "Java / Maven",
1363                tip: "Use `mvn compile`, `mvn test`, `mvn package`. \
1364                  Follow standard Maven project structure.",
1365            },
1366            Marker {
1367                file: "build.gradle",
1368                lang: "Java / Gradle",
1369                tip: "Use `./gradlew build` and `./gradlew test`. \
1370                  Follow standard Gradle project structure.",
1371            },
1372            Marker {
1373                file: "build.gradle.kts",
1374                lang: "Kotlin / Gradle",
1375                tip: "Use `./gradlew build` and `./gradlew test`. \
1376                  Prefer Kotlin coroutines for async work.",
1377            },
1378            Marker {
1379                file: "CMakeLists.txt",
1380                lang: "C / C++",
1381                tip: "Use `cmake -B build && cmake --build build`. \
1382                  Check for `compile_commands.json` for IDE tooling.",
1383            },
1384            Marker {
1385                file: "Makefile",
1386                lang: "C / C++ (or generic)",
1387                tip: "Use `make` or `make <target>`. \
1388                  Check available targets with `make help` or by reading the Makefile.",
1389            },
1390        ];
1391
1392        // Check for C# / .NET — no single fixed filename, so glob for *.csproj / *.sln
1393        let is_dotnet = workspace.join("*.csproj").exists() || {
1394            // Fast check: look for any .csproj or .sln in the workspace root
1395            std::fs::read_dir(workspace)
1396                .map(|entries| {
1397                    entries.flatten().any(|e| {
1398                        let name = e.file_name();
1399                        let s = name.to_string_lossy();
1400                        s.ends_with(".csproj") || s.ends_with(".sln")
1401                    })
1402                })
1403                .unwrap_or(false)
1404        };
1405
1406        if is_dotnet {
1407            return "## Project Context\n\nThis is a **C# / .NET** project. \
1408             Use `dotnet build`, `dotnet test`, and `dotnet run`. \
1409             Follow C# coding conventions and async/await patterns."
1410                .to_string();
1411        }
1412
1413        for marker in &markers {
1414            if workspace.join(marker.file).exists() {
1415                return format!(
1416                    "## Project Context\n\nThis is a **{}** project. {}",
1417                    marker.lang, marker.tip
1418                );
1419            }
1420        }
1421
1422        String::new()
1423    }
1424
1425    /// Returns `true` for errors that are likely transient (network, timeout, I/O contention).
1426    /// Used to annotate tool error messages so the LLM knows whether retrying is safe.
1427    fn is_transient_error(msg: &str) -> bool {
1428        let lower = msg.to_lowercase();
1429        lower.contains("timeout")
1430        || lower.contains("timed out")
1431        || lower.contains("connection refused")
1432        || lower.contains("connection reset")
1433        || lower.contains("broken pipe")
1434        || lower.contains("temporarily unavailable")
1435        || lower.contains("resource temporarily unavailable")
1436        || lower.contains("os error 11")  // EAGAIN
1437        || lower.contains("os error 35")  // EAGAIN on macOS
1438        || lower.contains("rate limit")
1439        || lower.contains("too many requests")
1440        || lower.contains("service unavailable")
1441        || lower.contains("network unreachable")
1442    }
1443
1444    /// Returns `true` when a tool writes a file and is safe to run concurrently with other
1445    /// independent writes (no ordering dependencies, no side-channel output).
1446    fn is_parallel_safe_write(name: &str, _args: &serde_json::Value) -> bool {
1447        matches!(
1448            name,
1449            "write_file" | "edit_file" | "create_file" | "append_to_file" | "replace_in_file"
1450        )
1451    }
1452
1453    /// Extract the target file path from write-tool arguments so we can check for conflicts.
1454    fn extract_write_path(args: &serde_json::Value) -> Option<String> {
1455        // write_file / create_file / append_to_file / replace_in_file use "path"
1456        // edit_file uses "path" as well
1457        args.get("path")
1458            .and_then(|v| v.as_str())
1459            .map(|s| s.to_string())
1460    }
1461
1462    /// Execute a tool through the lane queue (if configured) or directly.
1463    async fn execute_tool_queued_or_direct(
1464        &self,
1465        name: &str,
1466        args: &serde_json::Value,
1467        ctx: &ToolContext,
1468    ) -> anyhow::Result<crate::tools::ToolResult> {
1469        self.execute_tool_queued_or_direct_inner(name, args, ctx)
1470            .await
1471    }
1472
1473    /// Inner execution without task lifecycle wrapping.
1474    async fn execute_tool_queued_or_direct_inner(
1475        &self,
1476        name: &str,
1477        args: &serde_json::Value,
1478        ctx: &ToolContext,
1479    ) -> anyhow::Result<crate::tools::ToolResult> {
1480        if let Some(ref queue) = self.command_queue {
1481            let command = ToolCommand::new(
1482                Arc::clone(&self.tool_executor),
1483                name.to_string(),
1484                args.clone(),
1485                ctx.clone(),
1486                self.config.skill_registry.clone(),
1487            );
1488            let rx = queue.submit_by_tool(name, Box::new(command)).await;
1489            match rx.await {
1490                Ok(Ok(value)) => {
1491                    let output = value["output"]
1492                        .as_str()
1493                        .ok_or_else(|| {
1494                            anyhow::anyhow!(
1495                                "Queue result missing 'output' field for tool '{}'",
1496                                name
1497                            )
1498                        })?
1499                        .to_string();
1500                    let exit_code = value["exit_code"].as_i64().unwrap_or(0) as i32;
1501                    return Ok(crate::tools::ToolResult {
1502                        name: name.to_string(),
1503                        output,
1504                        exit_code,
1505                        metadata: None,
1506                        images: Vec::new(),
1507                    });
1508                }
1509                Ok(Err(e)) => {
1510                    tracing::warn!(
1511                        "Queue execution failed for tool '{}', falling back to direct: {}",
1512                        name,
1513                        e
1514                    );
1515                }
1516                Err(_) => {
1517                    tracing::warn!(
1518                        "Queue channel closed for tool '{}', falling back to direct",
1519                        name
1520                    );
1521                }
1522            }
1523        }
1524        self.execute_tool_timed(name, args, ctx).await
1525    }
1526
1527    /// Call the LLM, handling streaming vs non-streaming internally.
1528    ///
1529    /// Streaming events (`TextDelta`, `ToolStart`) are forwarded to `event_tx`
1530    /// as they arrive. Non-streaming mode simply awaits the complete response.
1531    ///
1532    /// Tool definitions are selected per turn by the centralized tool selector.
1533    ///
1534    /// Returns `Err` on any LLM API failure. The circuit breaker in
1535    /// `execute_loop` wraps this call with retry logic for non-streaming mode.
1536    async fn call_llm(
1537        &self,
1538        messages: &[Message],
1539        system: Option<&str>,
1540        event_tx: &Option<mpsc::Sender<AgentEvent>>,
1541        cancel_token: &tokio_util::sync::CancellationToken,
1542    ) -> anyhow::Result<LlmResponse> {
1543        let tools = crate::tools::select_tools_for_messages(&self.config.tools, messages);
1544
1545        if event_tx.is_some() {
1546            let mut stream_rx = match self
1547                .llm_client
1548                .complete_streaming(messages, system, &tools, cancel_token.clone())
1549                .await
1550            {
1551                Ok(rx) => rx,
1552                Err(stream_error) => {
1553                    // Do not fall back to non-streaming if cancelled — propagate cancellation
1554                    if cancel_token.is_cancelled() {
1555                        anyhow::bail!("Operation cancelled by user");
1556                    }
1557                    tracing::warn!(
1558                        error = %stream_error,
1559                        "LLM streaming setup failed; falling back to non-streaming completion"
1560                    );
1561                    return self
1562                        .llm_client
1563                        .complete(messages, system, &tools)
1564                        .await
1565                        .with_context(|| {
1566                            format!(
1567                                "LLM streaming call failed ({stream_error}); non-streaming fallback also failed"
1568                            )
1569                        });
1570                }
1571            };
1572
1573            let mut final_response: Option<LlmResponse> = None;
1574            loop {
1575                tokio::select! {
1576                    _ = cancel_token.cancelled() => {
1577                        tracing::info!("🛑 LLM streaming cancelled by CancellationToken");
1578                        anyhow::bail!("Operation cancelled by user");
1579                    }
1580                    event = stream_rx.recv() => {
1581                        match event {
1582                            Some(crate::llm::StreamEvent::TextDelta(text)) => {
1583                                if let Some(tx) = event_tx {
1584                                    tx.send(AgentEvent::TextDelta { text }).await.ok();
1585                                }
1586                            }
1587                            Some(crate::llm::StreamEvent::ReasoningDelta(text)) => {
1588                                if let Some(tx) = event_tx {
1589                                    tx.send(AgentEvent::ReasoningDelta { text }).await.ok();
1590                                }
1591                            }
1592                            Some(crate::llm::StreamEvent::ToolUseStart { id, name }) => {
1593                                if let Some(tx) = event_tx {
1594                                    tx.send(AgentEvent::ToolStart { id, name }).await.ok();
1595                                }
1596                            }
1597                            Some(crate::llm::StreamEvent::ToolUseInputDelta(delta)) => {
1598                                if let Some(tx) = event_tx {
1599                                    tx.send(AgentEvent::ToolInputDelta { delta }).await.ok();
1600                                }
1601                            }
1602                            Some(crate::llm::StreamEvent::Done(resp)) => {
1603                                final_response = Some(resp);
1604                                break;
1605                            }
1606                            None => break,
1607                        }
1608                    }
1609                }
1610            }
1611            final_response.context("Stream ended without final response")
1612        } else {
1613            self.llm_client
1614                .complete(messages, system, &tools)
1615                .await
1616                .context("LLM call failed")
1617        }
1618    }
1619
1620    /// Create a tool context with streaming support.
1621    ///
1622    /// When `event_tx` is Some, spawns a forwarder task that converts
1623    /// `ToolStreamEvent::OutputDelta` into `AgentEvent::ToolOutputDelta`
1624    /// and sends them to the agent event channel.
1625    ///
1626    /// Returns the augmented `ToolContext`. The forwarder task runs until
1627    /// the tool-side sender is dropped (i.e., tool execution finishes).
1628    fn streaming_tool_context(
1629        &self,
1630        event_tx: &Option<mpsc::Sender<AgentEvent>>,
1631        tool_id: &str,
1632        tool_name: &str,
1633    ) -> ToolContext {
1634        let mut ctx = self.tool_context.clone();
1635        if let Some(agent_tx) = event_tx {
1636            let (tool_tx, mut tool_rx) = mpsc::channel::<ToolStreamEvent>(64);
1637            ctx.event_tx = Some(tool_tx);
1638
1639            let agent_tx = agent_tx.clone();
1640            let tool_id = tool_id.to_string();
1641            let tool_name = tool_name.to_string();
1642            tokio::spawn(async move {
1643                while let Some(event) = tool_rx.recv().await {
1644                    match event {
1645                        ToolStreamEvent::OutputDelta(delta) => {
1646                            agent_tx
1647                                .send(AgentEvent::ToolOutputDelta {
1648                                    id: tool_id.clone(),
1649                                    name: tool_name.clone(),
1650                                    delta,
1651                                })
1652                                .await
1653                                .ok();
1654                        }
1655                    }
1656                }
1657            });
1658        }
1659        ctx
1660    }
1661
1662    /// Resolve context from all providers for a given prompt
1663    ///
1664    /// Returns aggregated context results from all configured providers.
1665    async fn resolve_context(&self, prompt: &str, session_id: Option<&str>) -> Vec<ContextResult> {
1666        if self.config.context_providers.is_empty() {
1667            return Vec::new();
1668        }
1669
1670        let query = ContextQuery::new(prompt).with_session_id(session_id.unwrap_or(""));
1671
1672        let futures = self
1673            .config
1674            .context_providers
1675            .iter()
1676            .map(|p| p.query(&query));
1677        let outcomes = join_all(futures).await;
1678
1679        outcomes
1680            .into_iter()
1681            .enumerate()
1682            .filter_map(|(i, r)| match r {
1683                Ok(result) if !result.is_empty() => Some(result),
1684                Ok(_) => None,
1685                Err(e) => {
1686                    tracing::warn!(
1687                        "Context provider '{}' failed: {}",
1688                        self.config.context_providers[i].name(),
1689                        e
1690                    );
1691                    None
1692                }
1693            })
1694            .collect()
1695    }
1696
1697    /// Detect whether the LLM's no-tool-call response looks like an intermediate
1698    /// step rather than a genuine final answer.
1699    ///
1700    /// Returns `true` when continuation should be injected. Heuristics:
1701    /// - Response ends with a colon or ellipsis (mid-thought)
1702    /// - Response contains phrases that signal incomplete work
1703    /// - Response is very short (< 80 chars) and doesn't look like a summary
1704    fn looks_incomplete(text: &str) -> bool {
1705        let t = text.trim();
1706        if t.is_empty() {
1707            return true;
1708        }
1709        // Very short responses that aren't clearly a final answer
1710        if t.len() < 80 && !t.contains('\n') {
1711            // Short single-line — could be a genuine one-liner answer, keep going
1712            // only if it ends with punctuation that signals continuation
1713            let ends_continuation =
1714                t.ends_with(':') || t.ends_with("...") || t.ends_with('…') || t.ends_with(',');
1715            if ends_continuation {
1716                return true;
1717            }
1718        }
1719        // Phrases that signal the LLM is describing what it will do rather than doing it
1720        let incomplete_phrases = [
1721            "i'll ",
1722            "i will ",
1723            "let me ",
1724            "i need to ",
1725            "i should ",
1726            "next, i",
1727            "first, i",
1728            "now i",
1729            "i'll start",
1730            "i'll begin",
1731            "i'll now",
1732            "let's start",
1733            "let's begin",
1734            "to do this",
1735            "i'm going to",
1736        ];
1737        let lower = t.to_lowercase();
1738        for phrase in &incomplete_phrases {
1739            if lower.contains(phrase) {
1740                return true;
1741            }
1742        }
1743        false
1744    }
1745
1746    /// Get the assembled system prompt from slots.
1747    #[allow(dead_code)]
1748    fn system_prompt(&self) -> String {
1749        self.config.prompt_slots.build()
1750    }
1751
1752    /// Get the assembled system prompt from slots with an explicit style.
1753    fn system_prompt_for_style(&self, style: AgentStyle) -> String {
1754        let mut slots = self.config.prompt_slots.clone();
1755        slots.style = Some(style);
1756        slots.build()
1757    }
1758
1759    async fn resolve_effective_style(&self, prompt: &str) -> AgentStyle {
1760        if let Some(style) = self.config.prompt_slots.style {
1761            return style;
1762        }
1763
1764        let (style, confidence) = AgentStyle::detect_with_confidence(prompt);
1765        tracing::debug!(
1766            intent.classification = ?style,
1767            intent.confidence = ?confidence,
1768            intent.source = "local",
1769            "Intent classified locally"
1770        );
1771        style
1772    }
1773
1774    /// Detect if context perception is needed based on user prompt.
1775    ///
1776    /// Returns `Some(PreContextPerceptionEvent)` if the prompt suggests the model
1777    /// needs workspace knowledge (finding files, understanding code, etc.).
1778    pub fn detect_context_perception_intent(
1779        &self,
1780        prompt: &str,
1781        session_id: &str,
1782        workspace: &str,
1783    ) -> Option<PreContextPerceptionEvent> {
1784        let lower = prompt.to_lowercase();
1785
1786        // Pattern matching for different intents that suggest context perception is needed
1787        let intents: &[(&[&str], &str)] = &[
1788            // Locate: finding files, functions, resources
1789            (
1790                &[
1791                    "where is",
1792                    "where are",
1793                    "find the file",
1794                    "find all",
1795                    "find files",
1796                    "who wrote",
1797                    "locate",
1798                    "search for",
1799                    "look for",
1800                    "search",
1801                ],
1802                "locate",
1803            ),
1804            // Understand: explaining how something works
1805            (
1806                &[
1807                    "how does",
1808                    "what does",
1809                    "explain",
1810                    "understand",
1811                    "what is this",
1812                    "how does this work",
1813                ],
1814                "understand",
1815            ),
1816            // Retrieve: recalling from memory/past
1817            (
1818                &[
1819                    "remember",
1820                    "earlier",
1821                    "before",
1822                    "previously",
1823                    "last time",
1824                    "past",
1825                    "previous",
1826                ],
1827                "retrieve",
1828            ),
1829            // Explore: understanding structure
1830            (
1831                &[
1832                    "how is organized",
1833                    "project structure",
1834                    "what files",
1835                    "show me the structure",
1836                    "explore",
1837                ],
1838                "explore",
1839            ),
1840            // Reason: asking why/causality
1841            (
1842                &[
1843                    "why did",
1844                    "why is",
1845                    "cause",
1846                    "reason",
1847                    "what happened",
1848                    "why does",
1849                ],
1850                "reason",
1851            ),
1852            // Validate: checking correctness
1853            (
1854                &["is this correct", "verify", "validate", "check if", "debug"],
1855                "validate",
1856            ),
1857            // Compare: comparing things
1858            (
1859                &[
1860                    "difference between",
1861                    "compare",
1862                    "versus",
1863                    " vs ",
1864                    "different from",
1865                ],
1866                "compare",
1867            ),
1868            // Track: status/history
1869            (
1870                &[
1871                    "status",
1872                    "progress",
1873                    "how far",
1874                    "history",
1875                    "what's the current",
1876                ],
1877                "track",
1878            ),
1879        ];
1880
1881        // Detect target type from keywords
1882        let target_type = if lower.contains("function") || lower.contains("method") {
1883            "function"
1884        } else if lower.contains("file") || lower.contains("config") {
1885            "file"
1886        } else if lower.contains("class") {
1887            "entity"
1888        } else if lower.contains("module") || lower.contains("package") {
1889            "module"
1890        } else if lower.contains("test") {
1891            "test"
1892        } else {
1893            "unknown"
1894        };
1895
1896        // Find matching intent
1897        let matched_intent = intents
1898            .iter()
1899            .find(|(patterns, _)| patterns.iter().any(|p| lower.contains(p)));
1900
1901        matched_intent.map(|(patterns, intent)| {
1902            // Extract target name if possible (simplified extraction)
1903            let target_name = extract_target_name_from_prompt(prompt, patterns);
1904
1905            PreContextPerceptionEvent {
1906                session_id: session_id.to_string(),
1907                intent: intent.to_string(),
1908                target_type: target_type.to_string(),
1909                target_name,
1910                domain: detect_domain_from_prompt(prompt),
1911                query: Some(prompt.to_string()),
1912                working_directory: workspace.to_string(),
1913                urgency: "normal".to_string(),
1914            }
1915        })
1916    }
1917
1918    /// Fire PreContextPerception hook and wait for harness decision.
1919    async fn fire_pre_context_perception(&self, event: &PreContextPerceptionEvent) -> HookResult {
1920        if let Some(he) = &self.config.hook_engine {
1921            let hook_event = HookEvent::PreContextPerception(event.clone());
1922            he.fire(&hook_event).await
1923        } else {
1924            HookResult::continue_()
1925        }
1926    }
1927
1928    /// Fire IntentDetection hook and wait for harness decision.
1929    ///
1930    /// This is called on every prompt to detect user intent via the AHP harness.
1931    /// Returns the detected intent if the harness provides one, or None if blocked/failed.
1932    async fn fire_intent_detection(
1933        &self,
1934        prompt: &str,
1935        session_id: &str,
1936        workspace: &str,
1937    ) -> Option<IntentDetectionResult> {
1938        let event = IntentDetectionEvent {
1939            session_id: session_id.to_string(),
1940            prompt: prompt.to_string(),
1941            workspace: workspace.to_string(),
1942            language_hint: detect_language_hint(prompt),
1943        };
1944
1945        let hook_result = if let Some(he) = &self.config.hook_engine {
1946            let hook_event = HookEvent::IntentDetection(event);
1947            he.fire(&hook_event).await
1948        } else {
1949            return None;
1950        };
1951
1952        match hook_result {
1953            HookResult::Continue(Some(modified)) => {
1954                // Parse the intent detection result
1955                serde_json::from_value::<IntentDetectionResult>(modified).ok()
1956            }
1957            HookResult::Block(_) => {
1958                // Harness blocked intent detection - use fallback
1959                tracing::info!("AHP harness blocked intent detection");
1960                None
1961            }
1962            _ => None,
1963        }
1964    }
1965
1966    /// Apply injected context from AHP harness decision.
1967    #[cfg(feature = "ahp")]
1968    fn apply_injected_context(&self, injected: InjectedContext) -> Vec<ContextResult> {
1969        injected_context_to_results(injected)
1970    }
1971
1972    /// Build augmented system prompt with context
1973    #[allow(dead_code)]
1974    fn build_augmented_system_prompt(&self, context_results: &[ContextResult]) -> Option<String> {
1975        let base = self.system_prompt();
1976        let context_assembly = self.assemble_context_results(context_results);
1977        self.build_augmented_system_prompt_with_base(&base, &context_assembly)
1978    }
1979
1980    fn assemble_context_results(&self, context_results: &[ContextResult]) -> ContextAssembly {
1981        let mut results = context_results.to_vec();
1982
1983        if self.config.prompt_slots.guidelines.is_none() {
1984            let project_hint = Self::detect_project_hint(&self.tool_context.workspace);
1985            if !project_hint.is_empty() {
1986                let token_count = project_hint.split_whitespace().count().max(1);
1987                let mut result = ContextResult::new("project_hint");
1988                result.add_item(
1989                    ContextItem::new("project_hint", ContextType::Resource, project_hint)
1990                        .with_source("a3s://project-hint")
1991                        .with_provenance("workspace_marker")
1992                        .with_priority(0.65)
1993                        .with_trust(0.8)
1994                        .with_freshness(1.0)
1995                        .with_relevance(0.9)
1996                        .with_token_count(token_count),
1997                );
1998                results.push(result);
1999            }
2000        }
2001
2002        ContextAssembler::with_default_budget().assemble(&results)
2003    }
2004
2005    fn build_augmented_system_prompt_with_base(
2006        &self,
2007        base: &str,
2008        context_assembly: &ContextAssembly,
2009    ) -> Option<String> {
2010        let base = base.to_string();
2011
2012        // MCP tool definitions are selected per turn by ToolSelector. Keep the
2013        // system prompt small instead of listing every external tool here.
2014        let has_mcp_tools = self
2015            .tool_executor
2016            .definitions()
2017            .iter()
2018            .any(|t| t.name.starts_with("mcp__"));
2019
2020        let mcp_section = if has_mcp_tools {
2021            "## MCP Tools\n\nExternal MCP tools are available on demand when relevant to the current request.".to_string()
2022        } else {
2023            String::new()
2024        };
2025
2026        let parts: Vec<&str> = [base.as_str(), mcp_section.as_str()]
2027            .iter()
2028            .filter(|s| !s.is_empty())
2029            .copied()
2030            .collect();
2031
2032        if context_assembly.is_empty() {
2033            return Some(parts.join("\n\n"));
2034        }
2035
2036        let context_xml = context_assembly.to_xml();
2037        Some(format!("{}\n\n{}", parts.join("\n\n"), context_xml))
2038    }
2039
2040    /// Notify providers of turn completion for memory extraction
2041    async fn notify_turn_complete(&self, session_id: &str, prompt: &str, response: &str) {
2042        let futures = self
2043            .config
2044            .context_providers
2045            .iter()
2046            .map(|p| p.on_turn_complete(session_id, prompt, response));
2047        let outcomes = join_all(futures).await;
2048
2049        for (i, result) in outcomes.into_iter().enumerate() {
2050            if let Err(e) = result {
2051                tracing::warn!(
2052                    "Context provider '{}' on_turn_complete failed: {}",
2053                    self.config.context_providers[i].name(),
2054                    e
2055                );
2056            }
2057        }
2058    }
2059
2060    /// Fire PreToolUse hook event before tool execution.
2061    /// Returns the HookResult which may block the tool call.
2062    async fn fire_pre_tool_use(
2063        &self,
2064        session_id: &str,
2065        tool_name: &str,
2066        args: &serde_json::Value,
2067        recent_tools: Vec<String>,
2068    ) -> Option<HookResult> {
2069        if let Some(he) = &self.config.hook_engine {
2070            // Convert null args to empty object so JS callbacks don't get null.input errors
2071            let safe_args = if args.is_null() {
2072                serde_json::Value::Object(Default::default())
2073            } else {
2074                args.clone()
2075            };
2076            let event = HookEvent::PreToolUse(PreToolUseEvent {
2077                session_id: session_id.to_string(),
2078                tool: tool_name.to_string(),
2079                args: safe_args,
2080                working_directory: self.tool_context.workspace.to_string_lossy().to_string(),
2081                recent_tools,
2082            });
2083            let result = he.fire(&event).await;
2084            if result.is_block() {
2085                return Some(result);
2086            }
2087        }
2088        None
2089    }
2090
2091    /// Fire PostToolUse hook event after tool execution (fire-and-forget).
2092    async fn fire_post_tool_use(
2093        &self,
2094        session_id: &str,
2095        tool_name: &str,
2096        args: &serde_json::Value,
2097        output: &str,
2098        success: bool,
2099        duration_ms: u64,
2100    ) {
2101        if let Some(he) = &self.config.hook_engine {
2102            // Convert null args to empty object so JS callbacks don't get null.input errors
2103            let safe_args = if args.is_null() {
2104                serde_json::Value::Object(Default::default())
2105            } else {
2106                args.clone()
2107            };
2108            let event = HookEvent::PostToolUse(PostToolUseEvent {
2109                session_id: session_id.to_string(),
2110                tool: tool_name.to_string(),
2111                args: safe_args,
2112                result: ToolResultData {
2113                    success,
2114                    output: output.to_string(),
2115                    exit_code: if success { Some(0) } else { Some(1) },
2116                    duration_ms,
2117                },
2118            });
2119            let he = Arc::clone(he);
2120            tokio::spawn(async move {
2121                let _ = he.fire(&event).await;
2122            });
2123        }
2124    }
2125
2126    /// Fire GenerateStart hook event before an LLM call.
2127    async fn fire_generate_start(
2128        &self,
2129        session_id: &str,
2130        prompt: &str,
2131        system_prompt: &Option<String>,
2132    ) {
2133        if let Some(he) = &self.config.hook_engine {
2134            let event = HookEvent::GenerateStart(GenerateStartEvent {
2135                session_id: session_id.to_string(),
2136                prompt: prompt.to_string(),
2137                system_prompt: system_prompt.clone(),
2138                model_provider: String::new(),
2139                model_name: String::new(),
2140                available_tools: self.config.tools.iter().map(|t| t.name.clone()).collect(),
2141            });
2142            let _ = he.fire(&event).await;
2143        }
2144    }
2145
2146    /// Fire GenerateEnd hook event after an LLM call.
2147    async fn fire_generate_end(
2148        &self,
2149        session_id: &str,
2150        prompt: &str,
2151        response: &LlmResponse,
2152        duration_ms: u64,
2153    ) {
2154        if let Some(he) = &self.config.hook_engine {
2155            let tool_calls: Vec<ToolCallInfo> = response
2156                .tool_calls()
2157                .iter()
2158                .map(|tc| {
2159                    let args = if tc.args.is_null() {
2160                        serde_json::Value::Object(Default::default())
2161                    } else {
2162                        tc.args.clone()
2163                    };
2164                    ToolCallInfo {
2165                        name: tc.name.clone(),
2166                        args,
2167                    }
2168                })
2169                .collect();
2170
2171            let event = HookEvent::GenerateEnd(GenerateEndEvent {
2172                session_id: session_id.to_string(),
2173                prompt: prompt.to_string(),
2174                response_text: response.text().to_string(),
2175                tool_calls,
2176                usage: TokenUsageInfo {
2177                    prompt_tokens: response.usage.prompt_tokens as i32,
2178                    completion_tokens: response.usage.completion_tokens as i32,
2179                    total_tokens: response.usage.total_tokens as i32,
2180                },
2181                duration_ms,
2182            });
2183            let _ = he.fire(&event).await;
2184        }
2185    }
2186
2187    /// Fire PrePrompt hook event before prompt augmentation.
2188    /// Returns optional modified prompt text from the hook.
2189    async fn fire_pre_prompt(
2190        &self,
2191        session_id: &str,
2192        prompt: &str,
2193        system_prompt: &Option<String>,
2194        message_count: usize,
2195    ) -> Option<String> {
2196        if let Some(he) = &self.config.hook_engine {
2197            let event = HookEvent::PrePrompt(PrePromptEvent {
2198                session_id: session_id.to_string(),
2199                prompt: prompt.to_string(),
2200                system_prompt: system_prompt.clone(),
2201                message_count,
2202            });
2203            let result = he.fire(&event).await;
2204            if let HookResult::Continue(Some(modified)) = result {
2205                // Extract modified prompt from hook response
2206                if let Some(new_prompt) = modified.get("prompt").and_then(|v| v.as_str()) {
2207                    return Some(new_prompt.to_string());
2208                }
2209            }
2210        }
2211        None
2212    }
2213
2214    /// Fire PostResponse hook event after the agent loop completes.
2215    async fn fire_post_response(
2216        &self,
2217        session_id: &str,
2218        response_text: &str,
2219        tool_calls_count: usize,
2220        usage: &TokenUsage,
2221        duration_ms: u64,
2222    ) {
2223        if let Some(he) = &self.config.hook_engine {
2224            let event = HookEvent::PostResponse(PostResponseEvent {
2225                session_id: session_id.to_string(),
2226                response_text: response_text.to_string(),
2227                tool_calls_count,
2228                usage: TokenUsageInfo {
2229                    prompt_tokens: usage.prompt_tokens as i32,
2230                    completion_tokens: usage.completion_tokens as i32,
2231                    total_tokens: usage.total_tokens as i32,
2232                },
2233                duration_ms,
2234            });
2235            let he = Arc::clone(he);
2236            tokio::spawn(async move {
2237                let _ = he.fire(&event).await;
2238            });
2239        }
2240    }
2241
2242    /// Fire OnError hook event when an error occurs.
2243    async fn fire_on_error(
2244        &self,
2245        session_id: &str,
2246        error_type: ErrorType,
2247        error_message: &str,
2248        context: serde_json::Value,
2249    ) {
2250        if let Some(he) = &self.config.hook_engine {
2251            let event = HookEvent::OnError(OnErrorEvent {
2252                session_id: session_id.to_string(),
2253                error_type,
2254                error_message: error_message.to_string(),
2255                context,
2256            });
2257            let he = Arc::clone(he);
2258            tokio::spawn(async move {
2259                let _ = he.fire(&event).await;
2260            });
2261        }
2262    }
2263
2264    /// Execute the agent loop for a prompt
2265    ///
2266    /// Takes the conversation history and a new user prompt.
2267    /// Returns the agent result and updated message history.
2268    /// When event_tx is provided, uses streaming LLM API for real-time text output.
2269    pub async fn execute(
2270        &self,
2271        history: &[Message],
2272        prompt: &str,
2273        event_tx: Option<mpsc::Sender<AgentEvent>>,
2274    ) -> Result<AgentResult> {
2275        self.execute_with_session(history, prompt, None, event_tx, None)
2276            .await
2277    }
2278
2279    /// Execute the agent loop with pre-built messages (user message already included).
2280    ///
2281    /// Used by `send_with_attachments` / `stream_with_attachments` where the
2282    /// user message contains multi-modal content and is already appended to
2283    /// the messages vec.
2284    pub async fn execute_from_messages(
2285        &self,
2286        messages: Vec<Message>,
2287        session_id: Option<&str>,
2288        event_tx: Option<mpsc::Sender<AgentEvent>>,
2289        cancel_token: Option<&tokio_util::sync::CancellationToken>,
2290    ) -> Result<AgentResult> {
2291        let default_token = tokio_util::sync::CancellationToken::new();
2292        let token = cancel_token.unwrap_or(&default_token);
2293        tracing::info!(
2294            a3s.session.id = session_id.unwrap_or("none"),
2295            a3s.agent.max_turns = self.config.max_tool_rounds,
2296            "a3s.agent.execute_from_messages started"
2297        );
2298
2299        // Extract the last user message text for hooks, memory recall, and events.
2300        // Pass empty prompt so execute_loop skips adding a duplicate user message,
2301        // but provide effective_prompt for hook/memory/event purposes.
2302        let effective_prompt = messages
2303            .iter()
2304            .rev()
2305            .find(|m| m.role == "user")
2306            .map(|m| m.text())
2307            .unwrap_or_default();
2308
2309        let result = self
2310            .execute_loop_inner(
2311                &messages,
2312                "",
2313                &effective_prompt,
2314                None, // no pre-computed style; resolve inside the loop
2315                session_id,
2316                event_tx,
2317                token,
2318                true, // emit_end: this is a standalone execution
2319            )
2320            .await;
2321
2322        match &result {
2323            Ok(r) => tracing::info!(
2324                a3s.agent.tool_calls_count = r.tool_calls_count,
2325                a3s.llm.total_tokens = r.usage.total_tokens,
2326                "a3s.agent.execute_from_messages completed"
2327            ),
2328            Err(e) => tracing::warn!(
2329                error = %e,
2330                "a3s.agent.execute_from_messages failed"
2331            ),
2332        }
2333
2334        result
2335    }
2336
2337    /// Execute the agent loop for a prompt with session context
2338    ///
2339    /// Takes the conversation history, user prompt, and optional session ID.
2340    /// When session_id is provided, context providers can use it for session-specific context.
2341    pub async fn execute_with_session(
2342        &self,
2343        history: &[Message],
2344        prompt: &str,
2345        session_id: Option<&str>,
2346        event_tx: Option<mpsc::Sender<AgentEvent>>,
2347        cancel_token: Option<&tokio_util::sync::CancellationToken>,
2348    ) -> Result<AgentResult> {
2349        let default_token = tokio_util::sync::CancellationToken::new();
2350        let token = cancel_token.unwrap_or(&default_token);
2351        tracing::info!(
2352            a3s.session.id = session_id.unwrap_or("none"),
2353            a3s.agent.max_turns = self.config.max_tool_rounds,
2354            "a3s.agent.execute started"
2355        );
2356
2357        // Step 1: pre-analysis — intent + goal + plan + input optimization in ONE LLM call.
2358        // Auto mode lets structured pre-analysis decide whether planning is needed.
2359        let pre_analysis: Option<PreAnalysis> = {
2360            let needs_llm_prep = self.should_run_pre_analysis();
2361
2362            if !needs_llm_prep {
2363                None
2364            } else {
2365                match LlmPlanner::pre_analyze(&self.llm_client.clone(), prompt).await {
2366                    Ok(analysis) => {
2367                        tracing::debug!(
2368                            intent = ?analysis.intent,
2369                            requires_planning = analysis.requires_planning,
2370                            plan_steps = analysis.execution_plan.steps.len(),
2371                            "Pre-analysis completed"
2372                        );
2373                        Some(analysis)
2374                    }
2375                    Err(e) => {
2376                        tracing::warn!(error = %e, "Pre-analysis failed; using local style fallback");
2377                        None
2378                    }
2379                }
2380            }
2381        };
2382
2383        // Step 2: choose the execution style. A successful pre-analysis is
2384        // authoritative; local deterministic detection is only a no-LLM fallback
2385        // for disabled planning or failed pre-analysis.
2386        let exec_style = if let Some(analysis) = pre_analysis.as_ref() {
2387            analysis.intent
2388        } else {
2389            let (style, confidence) = AgentStyle::detect_with_confidence(prompt);
2390            tracing::debug!(
2391                intent.classification = ?style,
2392                intent.confidence = ?confidence,
2393                intent.source = "local_fallback",
2394                "Intent classified locally"
2395            );
2396            style
2397        };
2398
2399        // Determine whether to use planning mode.
2400        let use_planning = match self.config.planning_mode {
2401            PlanningMode::Disabled => false,
2402            PlanningMode::Enabled => true,
2403            PlanningMode::Auto => pre_analysis
2404                .as_ref()
2405                .map(|analysis| analysis.requires_planning)
2406                .unwrap_or_else(|| exec_style.requires_planning()),
2407        };
2408
2409        // Determine the effective prompt: use optimized input from pre-analysis if available.
2410        // Clone to avoid borrow conflict when pre_analysis is moved.
2411        let effective_prompt: String = match pre_analysis.as_ref() {
2412            Some(a) => a.optimized_input.clone(),
2413            None => prompt.to_string(),
2414        };
2415
2416        let result = if use_planning {
2417            self.execute_with_planning(
2418                history,
2419                &effective_prompt,
2420                session_id,
2421                event_tx,
2422                pre_analysis,
2423            )
2424            .await
2425        } else {
2426            self.execute_loop(
2427                history,
2428                &effective_prompt,
2429                exec_style,
2430                session_id,
2431                event_tx,
2432                token,
2433                true,
2434            )
2435            .await
2436        };
2437
2438        match &result {
2439            Ok(r) => {
2440                tracing::info!(
2441                    a3s.agent.tool_calls_count = r.tool_calls_count,
2442                    a3s.llm.total_tokens = r.usage.total_tokens,
2443                    "a3s.agent.execute completed"
2444                );
2445                // Fire PostResponse hook
2446                self.fire_post_response(
2447                    session_id.unwrap_or(""),
2448                    &r.text,
2449                    r.tool_calls_count,
2450                    &r.usage,
2451                    0, // duration tracked externally
2452                )
2453                .await;
2454            }
2455            Err(e) => {
2456                tracing::warn!(
2457                    error = %e,
2458                    "a3s.agent.execute failed"
2459                );
2460                // Fire OnError hook
2461                self.fire_on_error(
2462                    session_id.unwrap_or(""),
2463                    ErrorType::Other,
2464                    &e.to_string(),
2465                    serde_json::json!({"phase": "execute"}),
2466                )
2467                .await;
2468            }
2469        }
2470
2471        result
2472    }
2473
2474    /// Core execution loop (without planning routing).
2475    ///
2476    /// This is the inner loop that runs LLM calls and tool executions.
2477    /// Called directly by `execute_with_session` (after planning check)
2478    /// and by `execute_plan` (for individual steps, bypassing planning).
2479    #[allow(clippy::too_many_arguments)]
2480    async fn execute_loop(
2481        &self,
2482        history: &[Message],
2483        prompt: &str,
2484        effective_style: AgentStyle,
2485        session_id: Option<&str>,
2486        event_tx: Option<mpsc::Sender<AgentEvent>>,
2487        cancel_token: &tokio_util::sync::CancellationToken,
2488        emit_end: bool,
2489    ) -> Result<AgentResult> {
2490        // When called via execute_loop, the prompt is used for both
2491        // message-adding and hook/memory/event purposes.
2492        self.execute_loop_inner(
2493            history,
2494            prompt,
2495            prompt,
2496            Some(effective_style),
2497            session_id,
2498            event_tx,
2499            cancel_token,
2500            emit_end,
2501        )
2502        .await
2503    }
2504
2505    /// Inner execution loop.
2506    ///
2507    /// `msg_prompt` controls whether a user message is appended (empty = skip).
2508    /// `effective_prompt` is used for hooks, memory recall, taint tracking, and events.
2509    /// `effective_style` pre-computed style to skip redundant LLM-based intent detection.
2510    /// `emit_end` controls whether to send `AgentEvent::End` when the loop completes
2511    /// (should be false when called from `execute_plan` to avoid duplicate End events).
2512    #[allow(clippy::too_many_arguments)]
2513    async fn execute_loop_inner(
2514        &self,
2515        history: &[Message],
2516        msg_prompt: &str,
2517        effective_prompt: &str,
2518        effective_style: Option<AgentStyle>,
2519        session_id: Option<&str>,
2520        event_tx: Option<mpsc::Sender<AgentEvent>>,
2521        cancel_token: &tokio_util::sync::CancellationToken,
2522        emit_end: bool,
2523    ) -> Result<AgentResult> {
2524        let mut messages = history.to_vec();
2525        let mut total_usage = TokenUsage::default();
2526        let mut tool_calls_count = 0;
2527        let mut verification_reports = Vec::new();
2528        let mut turn = 0;
2529        // Consecutive malformed-tool-args errors (4.1 parse error recovery)
2530        let mut parse_error_count: u32 = 0;
2531        // Continuation injection counter
2532        let mut continuation_count: u32 = 0;
2533        let mut recent_tool_signatures: Vec<String> = Vec::new();
2534
2535        // Start execution timer for timeout protection
2536        let execution_start = std::time::Instant::now();
2537
2538        let style_prompt = if effective_prompt.is_empty() {
2539            msg_prompt
2540        } else {
2541            effective_prompt
2542        };
2543        let effective_style = match effective_style {
2544            Some(s) => s,
2545            None => self.resolve_effective_style(style_prompt).await,
2546        };
2547        let effective_system_prompt = self.system_prompt_for_style(effective_style);
2548        if let Some(tx) = &event_tx {
2549            tx.send(AgentEvent::AgentModeChanged {
2550                mode: effective_style.runtime_mode().to_string(),
2551                agent: effective_style.builtin_agent_name().to_string(),
2552                description: effective_style.description().to_string(),
2553            })
2554            .await
2555            .ok();
2556        }
2557
2558        // Send start event
2559        if let Some(tx) = &event_tx {
2560            tx.send(AgentEvent::Start {
2561                prompt: effective_prompt.to_string(),
2562            })
2563            .await
2564            .ok();
2565        }
2566
2567        // Forward queue events (CommandDeadLettered, CommandRetry, QueueAlert) to event stream
2568        let queue_forward_handle =
2569            if let (Some(ref queue), Some(ref tx)) = (&self.command_queue, &event_tx) {
2570                let mut rx = queue.subscribe();
2571                let tx = tx.clone();
2572                let cancel = cancel_token.clone();
2573                Some(tokio::spawn(async move {
2574                    loop {
2575                        tokio::select! {
2576                            event = rx.recv() => {
2577                                match event {
2578                                    Ok(e) => {
2579                                        if tx.send(e).await.is_err() {
2580                                            break;
2581                                        }
2582                                    }
2583                                    Err(_) => break,
2584                                }
2585                            }
2586                            _ = cancel.cancelled() => {
2587                                break;
2588                            }
2589                        }
2590                    }
2591                }))
2592            } else {
2593                None
2594            };
2595
2596        // Fire PrePrompt hook (may modify the prompt)
2597        let built_system_prompt = Some(effective_system_prompt.clone());
2598        let hooked_prompt = if let Some(modified) = self
2599            .fire_pre_prompt(
2600                session_id.unwrap_or(""),
2601                effective_prompt,
2602                &built_system_prompt,
2603                messages.len(),
2604            )
2605            .await
2606        {
2607            modified
2608        } else {
2609            effective_prompt.to_string()
2610        };
2611        let effective_prompt = hooked_prompt.as_str();
2612
2613        // Taint-track the incoming prompt for sensitive data detection
2614        if let Some(ref sp) = self.config.security_provider {
2615            sp.taint_input(effective_prompt);
2616        }
2617
2618        // Resolve context from providers on first turn (before adding user message)
2619        // Intent-driven: detect context perception need first via AHP harness, then fire hook
2620        let workspace = self.tool_context.workspace.display().to_string();
2621        let session_id_str = session_id.unwrap_or("");
2622        let mut context_results = if !self.config.context_providers.is_empty() {
2623            if let Some(tx) = &event_tx {
2624                tx.send(AgentEvent::ContextResolving {
2625                    providers: self
2626                        .config
2627                        .context_providers
2628                        .iter()
2629                        .map(|p| p.name().to_string())
2630                        .collect(),
2631                })
2632                .await
2633                .ok();
2634            }
2635
2636            // Step 1: Fire IntentDetection harness point on EVERY prompt
2637            #[allow(clippy::needless_borrow)]
2638            let harness_intent = self
2639                .fire_intent_detection(effective_prompt, &session_id_str, &workspace)
2640                .await;
2641
2642            // Step 2: Build perception event from harness result, or fallback to local detection
2643            #[allow(clippy::needless_borrow)]
2644            let perception_event = if let Some(detected) = harness_intent {
2645                tracing::info!(
2646                    intent = %detected.detected_intent,
2647                    confidence = %detected.confidence,
2648                    "Intent detected from AHP harness"
2649                );
2650                Some(build_pre_context_perception_from_intent(
2651                    detected,
2652                    effective_prompt,
2653                    &session_id_str,
2654                    &workspace,
2655                ))
2656            } else {
2657                // Fallback to local keyword detection
2658                tracing::debug!("No intent from harness, using local keyword detection");
2659                self.detect_context_perception_intent(effective_prompt, &session_id_str, &workspace)
2660            };
2661
2662            if let Some(perception_event) = perception_event {
2663                // Step 3: Fire PreContextPerception hook to get harness decision
2664                tracing::info!(
2665                    intent = %perception_event.intent,
2666                    target_type = %perception_event.target_type,
2667                    "Context perception intent detected, firing AHP hook"
2668                );
2669
2670                let hook_result = self.fire_pre_context_perception(&perception_event).await;
2671
2672                match hook_result {
2673                    HookResult::Continue(Some(modified_context)) => {
2674                        // AHP harness returned injected context - parse and use it
2675                        #[cfg(feature = "ahp")]
2676                        {
2677                            if let Ok(injected) =
2678                                serde_json::from_value::<InjectedContext>(modified_context)
2679                            {
2680                                tracing::info!(
2681                                    facts = injected.facts.len(),
2682                                    "Using injected context from AHP harness"
2683                                );
2684                                self.apply_injected_context(injected)
2685                            } else {
2686                                // Fall back to normal providers if parsing fails
2687                                tracing::warn!(
2688                                    "Failed to parse injected context, falling back to providers"
2689                                );
2690                                self.resolve_context(effective_prompt, session_id).await
2691                            }
2692                        }
2693                        #[cfg(not(feature = "ahp"))]
2694                        {
2695                            // Without AHP, fall back to normal providers
2696                            let _ = modified_context; // suppress unused warning
2697                            self.resolve_context(effective_prompt, session_id).await
2698                        }
2699                    }
2700                    HookResult::Block(_) => {
2701                        // Harness blocked context injection - skip
2702                        tracing::info!("AHP harness blocked context injection");
2703                        Vec::new()
2704                    }
2705                    _ => {
2706                        // No modification or unknown result, proceed with normal providers
2707                        self.resolve_context(effective_prompt, session_id).await
2708                    }
2709                }
2710            } else {
2711                // No intent detected, proceed with normal providers
2712                self.resolve_context(effective_prompt, session_id).await
2713            }
2714        } else {
2715            Vec::new()
2716        };
2717
2718        // Recall relevant memories as structured context instead of directly
2719        // rewriting the system prompt.
2720        if let Some(ref memory) = self.config.memory {
2721            match memory.recall_similar(effective_prompt, 5).await {
2722                Ok(items) if !items.is_empty() => {
2723                    if let Some(tx) = &event_tx {
2724                        for item in &items {
2725                            tx.send(AgentEvent::MemoryRecalled {
2726                                memory_id: item.id.clone(),
2727                                content: item.content.clone(),
2728                                relevance: item.relevance_score(),
2729                            })
2730                            .await
2731                            .ok();
2732                        }
2733                        tx.send(AgentEvent::MemoriesSearched {
2734                            query: Some(effective_prompt.to_string()),
2735                            tags: Vec::new(),
2736                            result_count: items.len(),
2737                        })
2738                        .await
2739                        .ok();
2740                    }
2741                    context_results.push(crate::memory::memory_items_to_context_result(
2742                        "memory", items,
2743                    ));
2744                }
2745                Ok(_) => {}
2746                Err(e) => {
2747                    tracing::warn!(error = %e, "Failed to recall memory context");
2748                }
2749            }
2750        }
2751
2752        let context_assembly = self.assemble_context_results(&context_results);
2753
2754        // Send context resolved event
2755        if let Some(tx) = &event_tx {
2756            let total_items = context_assembly.items.len();
2757            let total_tokens = context_assembly.total_tokens;
2758
2759            tracing::info!(
2760                context_items = total_items,
2761                context_tokens = total_tokens,
2762                context_truncated = context_assembly.truncated,
2763                "Context resolution completed"
2764            );
2765
2766            tx.send(AgentEvent::ContextResolved {
2767                total_items,
2768                total_tokens,
2769            })
2770            .await
2771            .ok();
2772        }
2773
2774        let augmented_system = self
2775            .build_augmented_system_prompt_with_base(&effective_system_prompt, &context_assembly);
2776
2777        // Add user message
2778        if !msg_prompt.is_empty() {
2779            messages.push(Message::user(msg_prompt));
2780        }
2781
2782        loop {
2783            turn += 1;
2784
2785            // Check execution timeout
2786            if let Some(max_time_ms) = self.config.max_execution_time_ms {
2787                let elapsed_ms = execution_start.elapsed().as_millis() as u64;
2788                if elapsed_ms > max_time_ms {
2789                    let error = format!(
2790                        "Execution timeout after {} seconds (limit: {} seconds). Completed {} turns.",
2791                        elapsed_ms / 1000,
2792                        max_time_ms / 1000,
2793                        turn - 1
2794                    );
2795                    tracing::warn!(
2796                        elapsed_ms = elapsed_ms,
2797                        max_time_ms = max_time_ms,
2798                        turns = turn - 1,
2799                        "Execution timeout exceeded"
2800                    );
2801                    if let Some(tx) = &event_tx {
2802                        tx.send(AgentEvent::Error {
2803                            message: error.clone(),
2804                        })
2805                        .await
2806                        .ok();
2807                    }
2808                    anyhow::bail!(error);
2809                }
2810            }
2811
2812            if turn > self.config.max_tool_rounds {
2813                let error = format!("Max tool rounds ({}) exceeded", self.config.max_tool_rounds);
2814                if let Some(tx) = &event_tx {
2815                    tx.send(AgentEvent::Error {
2816                        message: error.clone(),
2817                    })
2818                    .await
2819                    .ok();
2820                }
2821                anyhow::bail!(error);
2822            }
2823
2824            // Send turn start event
2825            if let Some(tx) = &event_tx {
2826                tx.send(AgentEvent::TurnStart { turn }).await.ok();
2827            }
2828
2829            tracing::info!(
2830                turn = turn,
2831                max_turns = self.config.max_tool_rounds,
2832                "Agent turn started"
2833            );
2834
2835            // Call LLM - use streaming if we have an event channel
2836            tracing::info!(
2837                a3s.llm.streaming = event_tx.is_some(),
2838                "LLM completion started"
2839            );
2840
2841            // Fire GenerateStart hook
2842            self.fire_generate_start(
2843                session_id.unwrap_or(""),
2844                effective_prompt,
2845                &augmented_system,
2846            )
2847            .await;
2848
2849            let llm_start = std::time::Instant::now();
2850            // Circuit breaker (4.3): retry non-streaming LLM calls on transient failures.
2851            // Each failure increments `consecutive_llm_errors`; on success it resets to 0.
2852            // Streaming mode bails immediately on failure (events can't be replayed).
2853            let response = {
2854                let threshold = self.config.circuit_breaker_threshold.max(1);
2855                let mut attempt = 0u32;
2856                loop {
2857                    attempt += 1;
2858                    let result = self
2859                        .call_llm(
2860                            &messages,
2861                            augmented_system.as_deref(),
2862                            &event_tx,
2863                            cancel_token,
2864                        )
2865                        .await;
2866                    match result {
2867                        Ok(r) => {
2868                            break r;
2869                        }
2870                        // Never retry if cancelled
2871                        Err(e) if cancel_token.is_cancelled() => {
2872                            anyhow::bail!(e);
2873                        }
2874                        // Retry when: non-streaming under threshold, OR first streaming attempt
2875                        Err(e) if attempt < threshold && (event_tx.is_none() || attempt == 1) => {
2876                            tracing::warn!(
2877                                turn = turn,
2878                                attempt = attempt,
2879                                threshold = threshold,
2880                                error = %e,
2881                                "LLM call failed, will retry"
2882                            );
2883                            tokio::time::sleep(Duration::from_millis(100 * attempt as u64)).await;
2884                        }
2885                        // Threshold exceeded or streaming mid-stream: bail
2886                        Err(e) => {
2887                            let msg = if attempt > 1 {
2888                                format!(
2889                                    "LLM circuit breaker triggered: failed after {} attempt(s): {}",
2890                                    attempt, e
2891                                )
2892                            } else {
2893                                format!("LLM call failed: {}", e)
2894                            };
2895                            tracing::error!(turn = turn, attempt = attempt, "{}", msg);
2896                            // Fire OnError hook for LLM failure
2897                            self.fire_on_error(
2898                                session_id.unwrap_or(""),
2899                                ErrorType::LlmFailure,
2900                                &msg,
2901                                serde_json::json!({"turn": turn, "attempt": attempt}),
2902                            )
2903                            .await;
2904                            if let Some(tx) = &event_tx {
2905                                tx.send(AgentEvent::Error {
2906                                    message: msg.clone(),
2907                                })
2908                                .await
2909                                .ok();
2910                            }
2911                            anyhow::bail!(msg);
2912                        }
2913                    }
2914                }
2915            };
2916
2917            // Update usage
2918            total_usage.prompt_tokens += response.usage.prompt_tokens;
2919            total_usage.completion_tokens += response.usage.completion_tokens;
2920            total_usage.total_tokens += response.usage.total_tokens;
2921
2922            // Record LLM completion telemetry
2923            let llm_duration = llm_start.elapsed();
2924            tracing::info!(
2925                turn = turn,
2926                streaming = event_tx.is_some(),
2927                prompt_tokens = response.usage.prompt_tokens,
2928                completion_tokens = response.usage.completion_tokens,
2929                total_tokens = response.usage.total_tokens,
2930                stop_reason = response.stop_reason.as_deref().unwrap_or("unknown"),
2931                duration_ms = llm_duration.as_millis() as u64,
2932                "LLM completion finished"
2933            );
2934
2935            // Fire GenerateEnd hook
2936            self.fire_generate_end(
2937                session_id.unwrap_or(""),
2938                effective_prompt,
2939                &response,
2940                llm_duration.as_millis() as u64,
2941            )
2942            .await;
2943
2944            // Record LLM usage on the llm span
2945            crate::telemetry::record_llm_usage(
2946                response.usage.prompt_tokens,
2947                response.usage.completion_tokens,
2948                response.usage.total_tokens,
2949                response.stop_reason.as_deref(),
2950            );
2951            // Log turn token usage
2952            tracing::info!(
2953                turn = turn,
2954                a3s.llm.total_tokens = response.usage.total_tokens,
2955                "Turn token usage"
2956            );
2957
2958            // Add assistant message to history
2959            messages.push(response.message.clone());
2960
2961            // Check for tool calls
2962            let tool_calls = response.tool_calls();
2963
2964            // Send turn end event
2965            if let Some(tx) = &event_tx {
2966                tx.send(AgentEvent::TurnEnd {
2967                    turn,
2968                    usage: response.usage.clone(),
2969                })
2970                .await
2971                .ok();
2972            }
2973
2974            // Auto-compact: check if context usage exceeds threshold
2975            if self.config.auto_compact {
2976                let used = response.usage.prompt_tokens;
2977                let max = self.config.max_context_tokens;
2978                let threshold = self.config.auto_compact_threshold;
2979
2980                if crate::compaction::should_auto_compact(used, max, threshold) {
2981                    let before_len = messages.len();
2982                    let percent_before = used as f32 / max as f32;
2983
2984                    tracing::info!(
2985                        used_tokens = used,
2986                        max_tokens = max,
2987                        percent = percent_before,
2988                        threshold = threshold,
2989                        "Auto-compact triggered"
2990                    );
2991
2992                    // Step 1: Prune large tool outputs first (cheap, no LLM call)
2993                    if let Some(pruned) = crate::compaction::prune_tool_outputs(&messages) {
2994                        messages = pruned;
2995                        tracing::info!("Tool output pruning applied");
2996                    }
2997
2998                    // Step 2: Full summarization using the agent's LLM client
2999                    if let Ok(Some(compacted)) = crate::compaction::compact_messages(
3000                        session_id.unwrap_or(""),
3001                        &messages,
3002                        &self.llm_client,
3003                    )
3004                    .await
3005                    {
3006                        messages = compacted;
3007                    }
3008
3009                    // Emit compaction event
3010                    if let Some(tx) = &event_tx {
3011                        tx.send(AgentEvent::ContextCompacted {
3012                            session_id: session_id.unwrap_or("").to_string(),
3013                            before_messages: before_len,
3014                            after_messages: messages.len(),
3015                            percent_before,
3016                        })
3017                        .await
3018                        .ok();
3019                    }
3020                }
3021            }
3022
3023            if tool_calls.is_empty() {
3024                // No tool calls — check if we should inject a continuation message
3025                // before treating this as a final answer.
3026                let final_text = response.text();
3027
3028                if self.config.continuation_enabled
3029                    && continuation_count < self.config.max_continuation_turns
3030                    && turn < self.config.max_tool_rounds  // never inject past the turn limit
3031                    && Self::looks_incomplete(&final_text)
3032                {
3033                    continuation_count += 1;
3034                    tracing::info!(
3035                        turn = turn,
3036                        continuation = continuation_count,
3037                        max_continuation = self.config.max_continuation_turns,
3038                        "Injecting continuation message — response looks incomplete"
3039                    );
3040                    // Inject continuation as a user message and keep looping
3041                    messages.push(Message::user(crate::prompts::CONTINUATION));
3042                    continue;
3043                }
3044
3045                // Sanitize output to redact any sensitive data before returning
3046                let final_text = if let Some(ref sp) = self.config.security_provider {
3047                    sp.sanitize_output(&final_text)
3048                } else {
3049                    final_text
3050                };
3051
3052                // Record final totals
3053                tracing::info!(
3054                    tool_calls_count = tool_calls_count,
3055                    total_prompt_tokens = total_usage.prompt_tokens,
3056                    total_completion_tokens = total_usage.completion_tokens,
3057                    total_tokens = total_usage.total_tokens,
3058                    turns = turn,
3059                    "Agent execution completed"
3060                );
3061
3062                if emit_end {
3063                    if let Some(tx) = &event_tx {
3064                        let verification_summary =
3065                            crate::verification::VerificationSummary::from_reports(
3066                                &verification_reports,
3067                            );
3068                        tx.send(AgentEvent::End {
3069                            text: final_text.clone(),
3070                            usage: total_usage.clone(),
3071                            verification_summary: Box::new(verification_summary),
3072                            meta: response.meta.clone(),
3073                        })
3074                        .await
3075                        .ok();
3076                    }
3077                }
3078
3079                // Notify context providers of turn completion for memory extraction
3080                if let Some(sid) = session_id {
3081                    self.notify_turn_complete(sid, effective_prompt, &final_text)
3082                        .await;
3083                }
3084
3085                // Cleanup: abort queue forward handle if it exists
3086                if let Some(handle) = queue_forward_handle {
3087                    handle.abort();
3088                }
3089
3090                return Ok(AgentResult {
3091                    text: final_text,
3092                    messages,
3093                    usage: total_usage,
3094                    tool_calls_count,
3095                    verification_reports,
3096                });
3097            }
3098
3099            // Execute tools sequentially
3100            // Fast path: when all tool calls are independent file writes and no hooks/HITL
3101            // are configured, execute them concurrently to avoid serial I/O bottleneck.
3102            let tool_calls = if self.config.hook_engine.is_none()
3103                && self.config.confirmation_manager.is_none()
3104                && tool_calls.len() > 1
3105                && tool_calls
3106                    .iter()
3107                    .all(|tc| Self::is_parallel_safe_write(&tc.name, &tc.args))
3108                && {
3109                    // All target paths must be distinct (no write-write conflicts)
3110                    let paths: Vec<_> = tool_calls
3111                        .iter()
3112                        .filter_map(|tc| Self::extract_write_path(&tc.args))
3113                        .collect();
3114                    paths.len() == tool_calls.len()
3115                        && paths.iter().collect::<std::collections::HashSet<_>>().len()
3116                            == paths.len()
3117                } {
3118                tracing::info!(
3119                    count = tool_calls.len(),
3120                    "Parallel write batch: executing {} independent file writes concurrently",
3121                    tool_calls.len()
3122                );
3123
3124                let futures: Vec<_> = tool_calls
3125                    .iter()
3126                    .map(|tc| {
3127                        let ctx = self.tool_context.clone();
3128                        let executor = Arc::clone(&self.tool_executor);
3129                        let name = tc.name.clone();
3130                        let args = tc.args.clone();
3131                        async move { executor.execute_with_context(&name, &args, &ctx).await }
3132                    })
3133                    .collect();
3134
3135                let results = join_all(futures).await;
3136
3137                // Post-process results in original order (sequential, preserves message ordering)
3138                for (tc, result) in tool_calls.iter().zip(results) {
3139                    tool_calls_count += 1;
3140                    let (output, exit_code, is_error, metadata, images) =
3141                        Self::tool_result_to_tuple(result);
3142                    Self::collect_verification_report(&mut verification_reports, &metadata);
3143
3144                    // Track tool call in progress tracker
3145                    self.track_tool_result(&tc.name, &tc.args, exit_code);
3146
3147                    let output = if let Some(ref sp) = self.config.security_provider {
3148                        sp.sanitize_output(&output)
3149                    } else {
3150                        output
3151                    };
3152
3153                    if let Some(tx) = &event_tx {
3154                        tx.send(AgentEvent::ToolEnd {
3155                            id: tc.id.clone(),
3156                            name: tc.name.clone(),
3157                            output: output.clone(),
3158                            exit_code,
3159                            metadata,
3160                        })
3161                        .await
3162                        .ok();
3163                    }
3164
3165                    if images.is_empty() {
3166                        messages.push(Message::tool_result(&tc.id, &output, is_error));
3167                    } else {
3168                        messages.push(Message::tool_result_with_images(
3169                            &tc.id, &output, &images, is_error,
3170                        ));
3171                    }
3172                }
3173
3174                // Skip the sequential loop below
3175                continue;
3176            } else {
3177                tool_calls
3178            };
3179
3180            for tool_call in tool_calls {
3181                tool_calls_count += 1;
3182
3183                let tool_start = std::time::Instant::now();
3184
3185                tracing::info!(
3186                    tool_name = tool_call.name.as_str(),
3187                    tool_id = tool_call.id.as_str(),
3188                    "Tool execution started"
3189                );
3190
3191                // Send tool start event (only if not already sent during streaming)
3192                // In streaming mode, ToolStart is sent when we receive ToolUseStart from LLM
3193                // But we still need to send ToolEnd after execution
3194
3195                // Check for duplicate tool calls to prevent infinite loops
3196                let tool_signature = format!(
3197                    "{}:{}",
3198                    tool_call.name,
3199                    serde_json::to_string(&tool_call.args).unwrap_or_default()
3200                );
3201
3202                let duplicate_count = recent_tool_signatures
3203                    .iter()
3204                    .filter(|sig| sig.starts_with(&tool_signature))
3205                    .count();
3206
3207                if duplicate_count >= self.config.duplicate_tool_call_threshold as usize {
3208                    let error_msg = format!(
3209                        "Tool '{}' has been called {} times with identical arguments. \
3210                         Aborting to prevent infinite loop. Consider modifying your approach.",
3211                        tool_call.name, duplicate_count
3212                    );
3213
3214                    tracing::warn!(
3215                        tool_name = tool_call.name.as_str(),
3216                        duplicate_count = duplicate_count,
3217                        threshold = self.config.duplicate_tool_call_threshold,
3218                        "Duplicate tool call threshold exceeded"
3219                    );
3220
3221                    // Send error event
3222                    if let Some(tx) = &event_tx {
3223                        tx.send(AgentEvent::Error {
3224                            message: error_msg.clone(),
3225                        })
3226                        .await
3227                        .ok();
3228                    }
3229
3230                    // Return error result to LLM so it can adjust its approach
3231                    messages.push(Message::tool_result(&tool_call.id, &error_msg, true));
3232                    continue;
3233                }
3234
3235                // Check for malformed tool arguments from LLM (4.1 parse error recovery)
3236                if let Some(parse_error) =
3237                    tool_call.args.get("__parse_error").and_then(|v| v.as_str())
3238                {
3239                    parse_error_count += 1;
3240                    let error_msg = format!("Error: {}", parse_error);
3241                    tracing::warn!(
3242                        tool = tool_call.name.as_str(),
3243                        parse_error_count = parse_error_count,
3244                        max_parse_retries = self.config.max_parse_retries,
3245                        "Malformed tool arguments from LLM"
3246                    );
3247
3248                    if let Some(tx) = &event_tx {
3249                        tx.send(AgentEvent::ToolEnd {
3250                            id: tool_call.id.clone(),
3251                            name: tool_call.name.clone(),
3252                            output: error_msg.clone(),
3253                            exit_code: 1,
3254                            metadata: None,
3255                        })
3256                        .await
3257                        .ok();
3258                    }
3259
3260                    messages.push(Message::tool_result(&tool_call.id, &error_msg, true));
3261
3262                    if parse_error_count > self.config.max_parse_retries {
3263                        let msg = format!(
3264                            "LLM produced malformed tool arguments {} time(s) in a row \
3265                             (max_parse_retries={}); giving up",
3266                            parse_error_count, self.config.max_parse_retries
3267                        );
3268                        tracing::error!("{}", msg);
3269                        if let Some(tx) = &event_tx {
3270                            tx.send(AgentEvent::Error {
3271                                message: msg.clone(),
3272                            })
3273                            .await
3274                            .ok();
3275                        }
3276                        anyhow::bail!(msg);
3277                    }
3278                    continue;
3279                }
3280
3281                // Tool args are valid — reset parse error counter
3282                parse_error_count = 0;
3283
3284                // Check skill-based tool permissions
3285                if let Some(ref registry) = self.config.skill_registry {
3286                    let instruction_skills =
3287                        registry.by_kind(crate::skills::SkillKind::Instruction);
3288                    let has_restrictions =
3289                        instruction_skills.iter().any(|s| s.allowed_tools.is_some());
3290                    if has_restrictions {
3291                        let allowed = instruction_skills
3292                            .iter()
3293                            .any(|s| s.is_tool_allowed(&tool_call.name));
3294                        if !allowed {
3295                            let msg = format!(
3296                                "Tool '{}' is not allowed by any active skill.",
3297                                tool_call.name
3298                            );
3299                            tracing::info!(
3300                                tool_name = tool_call.name.as_str(),
3301                                "Tool blocked by skill registry"
3302                            );
3303                            if let Some(tx) = &event_tx {
3304                                tx.send(AgentEvent::PermissionDenied {
3305                                    tool_id: tool_call.id.clone(),
3306                                    tool_name: tool_call.name.clone(),
3307                                    args: tool_call.args.clone(),
3308                                    reason: msg.clone(),
3309                                })
3310                                .await
3311                                .ok();
3312                            }
3313                            messages.push(Message::tool_result(&tool_call.id, &msg, true));
3314                            continue;
3315                        }
3316                    }
3317                }
3318
3319                // Fire PreToolUse hook (may block the tool call)
3320                if let Some(HookResult::Block(reason)) = self
3321                    .fire_pre_tool_use(
3322                        session_id.unwrap_or(""),
3323                        &tool_call.name,
3324                        &tool_call.args,
3325                        recent_tool_signatures.clone(),
3326                    )
3327                    .await
3328                {
3329                    let msg = format!("Tool '{}' blocked by hook: {}", tool_call.name, reason);
3330                    tracing::info!(
3331                        tool_name = tool_call.name.as_str(),
3332                        "Tool blocked by PreToolUse hook"
3333                    );
3334
3335                    if let Some(tx) = &event_tx {
3336                        tx.send(AgentEvent::PermissionDenied {
3337                            tool_id: tool_call.id.clone(),
3338                            tool_name: tool_call.name.clone(),
3339                            args: tool_call.args.clone(),
3340                            reason: reason.clone(),
3341                        })
3342                        .await
3343                        .ok();
3344                    }
3345
3346                    messages.push(Message::tool_result(&tool_call.id, &msg, true));
3347                    continue;
3348                }
3349
3350                // Check permission before executing tool
3351                let permission_decision = if let Some(checker) = &self.config.permission_checker {
3352                    checker.check(&tool_call.name, &tool_call.args)
3353                } else {
3354                    // No policy configured — default to Ask so HITL can still intervene
3355                    PermissionDecision::Ask
3356                };
3357
3358                let (output, exit_code, is_error, metadata, images) = match permission_decision {
3359                    PermissionDecision::Deny => {
3360                        tracing::info!(
3361                            tool_name = tool_call.name.as_str(),
3362                            permission = "deny",
3363                            "Tool permission denied"
3364                        );
3365                        // Tool execution denied by permission policy
3366                        let denial_msg = format!(
3367                            "Permission denied: Tool '{}' is blocked by permission policy.",
3368                            tool_call.name
3369                        );
3370
3371                        // Send permission denied event
3372                        if let Some(tx) = &event_tx {
3373                            tx.send(AgentEvent::PermissionDenied {
3374                                tool_id: tool_call.id.clone(),
3375                                tool_name: tool_call.name.clone(),
3376                                args: tool_call.args.clone(),
3377                                reason: "Blocked by deny rule in permission policy".to_string(),
3378                            })
3379                            .await
3380                            .ok();
3381                        }
3382
3383                        (denial_msg, 1, true, None, Vec::new())
3384                    }
3385                    PermissionDecision::Allow => {
3386                        tracing::info!(
3387                            tool_name = tool_call.name.as_str(),
3388                            permission = "allow",
3389                            "Tool permission: allow"
3390                        );
3391                        // Permission explicitly allows — execute directly, no HITL
3392                        let stream_ctx =
3393                            self.streaming_tool_context(&event_tx, &tool_call.id, &tool_call.name);
3394                        let result = self
3395                            .execute_tool_queued_or_direct(
3396                                &tool_call.name,
3397                                &tool_call.args,
3398                                &stream_ctx,
3399                            )
3400                            .await;
3401
3402                        let tuple = Self::tool_result_to_tuple(result);
3403                        // Track tool call in progress tracker
3404                        let (_, exit_code, _, _, _) = tuple;
3405                        self.track_tool_result(&tool_call.name, &tool_call.args, exit_code);
3406                        tuple
3407                    }
3408                    PermissionDecision::Ask => {
3409                        tracing::info!(
3410                            tool_name = tool_call.name.as_str(),
3411                            permission = "ask",
3412                            "Tool permission: ask"
3413                        );
3414                        // Permission says Ask — delegate to HITL confirmation manager
3415                        if let Some(cm) = &self.config.confirmation_manager {
3416                            // Check YOLO lanes: if the tool's lane is in YOLO mode, skip confirmation
3417                            if !cm.requires_confirmation(&tool_call.name).await {
3418                                let stream_ctx = self.streaming_tool_context(
3419                                    &event_tx,
3420                                    &tool_call.id,
3421                                    &tool_call.name,
3422                                );
3423                                let result = self
3424                                    .execute_tool_queued_or_direct(
3425                                        &tool_call.name,
3426                                        &tool_call.args,
3427                                        &stream_ctx,
3428                                    )
3429                                    .await;
3430
3431                                let (output, exit_code, is_error, metadata, images) =
3432                                    Self::tool_result_to_tuple(result);
3433                                Self::collect_verification_report(
3434                                    &mut verification_reports,
3435                                    &metadata,
3436                                );
3437
3438                                // Track tool call in progress tracker
3439                                self.track_tool_result(&tool_call.name, &tool_call.args, exit_code);
3440
3441                                // Add tool result to messages
3442                                if images.is_empty() {
3443                                    messages.push(Message::tool_result(
3444                                        &tool_call.id,
3445                                        &output,
3446                                        is_error,
3447                                    ));
3448                                } else {
3449                                    messages.push(Message::tool_result_with_images(
3450                                        &tool_call.id,
3451                                        &output,
3452                                        &images,
3453                                        is_error,
3454                                    ));
3455                                }
3456
3457                                // Record tool result on the tool span for early exit
3458                                let tool_duration = tool_start.elapsed();
3459                                crate::telemetry::record_tool_result(exit_code, tool_duration);
3460
3461                                // Send ToolEnd event
3462                                if let Some(tx) = &event_tx {
3463                                    tx.send(AgentEvent::ToolEnd {
3464                                        id: tool_call.id.clone(),
3465                                        name: tool_call.name.clone(),
3466                                        output: output.clone(),
3467                                        exit_code,
3468                                        metadata,
3469                                    })
3470                                    .await
3471                                    .ok();
3472                                }
3473
3474                                // Fire PostToolUse hook (fire-and-forget)
3475                                self.fire_post_tool_use(
3476                                    session_id.unwrap_or(""),
3477                                    &tool_call.name,
3478                                    &tool_call.args,
3479                                    &output,
3480                                    exit_code == 0,
3481                                    tool_duration.as_millis() as u64,
3482                                )
3483                                .await;
3484
3485                                continue; // Skip the rest, move to next tool call
3486                            }
3487
3488                            // Get timeout from policy
3489                            let policy = cm.policy().await;
3490                            let timeout_ms = policy.default_timeout_ms;
3491                            let timeout_action = policy.timeout_action;
3492
3493                            // Request confirmation (this emits ConfirmationRequired event)
3494                            let rx = cm
3495                                .request_confirmation(
3496                                    &tool_call.id,
3497                                    &tool_call.name,
3498                                    &tool_call.args,
3499                                )
3500                                .await;
3501
3502                            // Forward ConfirmationRequired to the streaming event channel
3503                            // so external consumers (e.g. SafeClaw engine) can relay it
3504                            // to the browser UI.
3505                            if let Some(tx) = &event_tx {
3506                                tx.send(AgentEvent::ConfirmationRequired {
3507                                    tool_id: tool_call.id.clone(),
3508                                    tool_name: tool_call.name.clone(),
3509                                    args: tool_call.args.clone(),
3510                                    timeout_ms,
3511                                })
3512                                .await
3513                                .ok();
3514                            }
3515
3516                            // Wait for confirmation with timeout
3517                            let confirmation_result =
3518                                tokio::time::timeout(Duration::from_millis(timeout_ms), rx).await;
3519
3520                            match confirmation_result {
3521                                Ok(Ok(response)) => {
3522                                    // Forward ConfirmationReceived
3523                                    if let Some(tx) = &event_tx {
3524                                        tx.send(AgentEvent::ConfirmationReceived {
3525                                            tool_id: tool_call.id.clone(),
3526                                            approved: response.approved,
3527                                            reason: response.reason.clone(),
3528                                        })
3529                                        .await
3530                                        .ok();
3531                                    }
3532                                    if response.approved {
3533                                        let stream_ctx = self.streaming_tool_context(
3534                                            &event_tx,
3535                                            &tool_call.id,
3536                                            &tool_call.name,
3537                                        );
3538                                        let result = self
3539                                            .execute_tool_queued_or_direct(
3540                                                &tool_call.name,
3541                                                &tool_call.args,
3542                                                &stream_ctx,
3543                                            )
3544                                            .await;
3545
3546                                        let tuple = Self::tool_result_to_tuple(result);
3547                                        // Track tool call in progress tracker
3548                                        let (_, exit_code, _, _, _) = tuple;
3549                                        self.track_tool_result(
3550                                            &tool_call.name,
3551                                            &tool_call.args,
3552                                            exit_code,
3553                                        );
3554                                        tuple
3555                                    } else {
3556                                        let rejection_msg = format!(
3557                                            "Tool '{}' execution was REJECTED by the user. Reason: {}. \
3558                                             DO NOT retry this tool call unless the user explicitly asks you to.",
3559                                            tool_call.name,
3560                                            response.reason.unwrap_or_else(|| "No reason provided".to_string())
3561                                        );
3562                                        (rejection_msg, 1, true, None, Vec::new())
3563                                    }
3564                                }
3565                                Ok(Err(_)) => {
3566                                    // Forward ConfirmationTimeout (channel closed = effectively timed out)
3567                                    if let Some(tx) = &event_tx {
3568                                        tx.send(AgentEvent::ConfirmationTimeout {
3569                                            tool_id: tool_call.id.clone(),
3570                                            action_taken: "rejected".to_string(),
3571                                        })
3572                                        .await
3573                                        .ok();
3574                                    }
3575                                    let msg = format!(
3576                                        "Tool '{}' confirmation failed: confirmation channel closed",
3577                                        tool_call.name
3578                                    );
3579                                    (msg, 1, true, None, Vec::new())
3580                                }
3581                                Err(_) => {
3582                                    cm.check_timeouts().await;
3583
3584                                    // Forward ConfirmationTimeout
3585                                    if let Some(tx) = &event_tx {
3586                                        tx.send(AgentEvent::ConfirmationTimeout {
3587                                            tool_id: tool_call.id.clone(),
3588                                            action_taken: match timeout_action {
3589                                                crate::hitl::TimeoutAction::Reject => {
3590                                                    "rejected".to_string()
3591                                                }
3592                                                crate::hitl::TimeoutAction::AutoApprove => {
3593                                                    "auto_approved".to_string()
3594                                                }
3595                                            },
3596                                        })
3597                                        .await
3598                                        .ok();
3599                                    }
3600
3601                                    match timeout_action {
3602                                        crate::hitl::TimeoutAction::Reject => {
3603                                            let msg = format!(
3604                                                "Tool '{}' execution was REJECTED: user confirmation timed out after {}ms. \
3605                                                 DO NOT retry this tool call — the user did not approve it. \
3606                                                 Inform the user that the operation requires their approval and ask them to try again.",
3607                                                tool_call.name, timeout_ms
3608                                            );
3609                                            (msg, 1, true, None, Vec::new())
3610                                        }
3611                                        crate::hitl::TimeoutAction::AutoApprove => {
3612                                            let stream_ctx = self.streaming_tool_context(
3613                                                &event_tx,
3614                                                &tool_call.id,
3615                                                &tool_call.name,
3616                                            );
3617                                            let result = self
3618                                                .execute_tool_queued_or_direct(
3619                                                    &tool_call.name,
3620                                                    &tool_call.args,
3621                                                    &stream_ctx,
3622                                                )
3623                                                .await;
3624
3625                                            let tuple = Self::tool_result_to_tuple(result);
3626                                            // Track tool call in progress tracker
3627                                            let (_, exit_code, _, _, _) = tuple;
3628                                            self.track_tool_result(
3629                                                &tool_call.name,
3630                                                &tool_call.args,
3631                                                exit_code,
3632                                            );
3633                                            tuple
3634                                        }
3635                                    }
3636                                }
3637                            }
3638                        } else {
3639                            // Ask without confirmation manager — safe deny
3640                            let msg = format!(
3641                                "Tool '{}' requires confirmation but no HITL confirmation manager is configured. \
3642                                 Configure a confirmation policy to enable tool execution.",
3643                                tool_call.name
3644                            );
3645                            tracing::warn!(
3646                                tool_name = tool_call.name.as_str(),
3647                                "Tool requires confirmation but no HITL manager configured"
3648                            );
3649                            (msg, 1, true, None, Vec::new())
3650                        }
3651                    }
3652                };
3653
3654                let tool_duration = tool_start.elapsed();
3655                crate::telemetry::record_tool_result(exit_code, tool_duration);
3656                Self::collect_verification_report(&mut verification_reports, &metadata);
3657
3658                // Sanitize tool output for sensitive data before it enters the message history
3659                let output = if let Some(ref sp) = self.config.security_provider {
3660                    sp.sanitize_output(&output)
3661                } else {
3662                    output
3663                };
3664
3665                recent_tool_signatures.push(format!(
3666                    "{}:{} => {}",
3667                    tool_call.name,
3668                    serde_json::to_string(&tool_call.args).unwrap_or_default(),
3669                    if is_error { "error" } else { "ok" }
3670                ));
3671                if recent_tool_signatures.len() > 8 {
3672                    let overflow = recent_tool_signatures.len() - 8;
3673                    recent_tool_signatures.drain(0..overflow);
3674                }
3675
3676                // Fire PostToolUse hook (fire-and-forget)
3677                self.fire_post_tool_use(
3678                    session_id.unwrap_or(""),
3679                    &tool_call.name,
3680                    &tool_call.args,
3681                    &output,
3682                    exit_code == 0,
3683                    tool_duration.as_millis() as u64,
3684                )
3685                .await;
3686
3687                // Auto-remember tool result in long-term memory
3688                if let Some(ref memory) = self.config.memory {
3689                    let tools_used = [tool_call.name.clone()];
3690                    let remember_result = if exit_code == 0 {
3691                        memory
3692                            .remember_success(effective_prompt, &tools_used, &output)
3693                            .await
3694                    } else {
3695                        memory
3696                            .remember_failure(effective_prompt, &output, &tools_used)
3697                            .await
3698                    };
3699                    match remember_result {
3700                        Ok(()) => {
3701                            if let Some(tx) = &event_tx {
3702                                let item_type = if exit_code == 0 { "success" } else { "failure" };
3703                                tx.send(AgentEvent::MemoryStored {
3704                                    memory_id: uuid::Uuid::new_v4().to_string(),
3705                                    memory_type: item_type.to_string(),
3706                                    importance: if exit_code == 0 { 0.8 } else { 0.9 },
3707                                    tags: vec![item_type.to_string(), tool_call.name.clone()],
3708                                })
3709                                .await
3710                                .ok();
3711                            }
3712                        }
3713                        Err(e) => {
3714                            tracing::warn!("Failed to store memory after tool execution: {}", e);
3715                        }
3716                    }
3717                }
3718
3719                // Send tool end event
3720                if let Some(tx) = &event_tx {
3721                    tx.send(AgentEvent::ToolEnd {
3722                        id: tool_call.id.clone(),
3723                        name: tool_call.name.clone(),
3724                        output: output.clone(),
3725                        exit_code,
3726                        metadata,
3727                    })
3728                    .await
3729                    .ok();
3730                }
3731
3732                // Add tool result to messages
3733                if images.is_empty() {
3734                    messages.push(Message::tool_result(&tool_call.id, &output, is_error));
3735                } else {
3736                    messages.push(Message::tool_result_with_images(
3737                        &tool_call.id,
3738                        &output,
3739                        &images,
3740                        is_error,
3741                    ));
3742                }
3743            }
3744        }
3745    }
3746
3747    /// Create an execution plan for a prompt
3748    ///
3749    /// Delegates to [`LlmPlanner`] for structured JSON plan generation,
3750    /// falling back to heuristic planning if the LLM call fails.
3751    pub async fn plan(&self, prompt: &str, _context: Option<&str>) -> Result<ExecutionPlan> {
3752        use crate::planning::LlmPlanner;
3753
3754        match LlmPlanner::create_plan(&self.llm_client, prompt).await {
3755            Ok(plan) => Ok(plan),
3756            Err(e) => {
3757                tracing::warn!("LLM plan creation failed, using fallback: {}", e);
3758                Ok(LlmPlanner::fallback_plan(prompt))
3759            }
3760        }
3761    }
3762
3763    /// Execute with planning phase.
3764    ///
3765    /// If `pre_analysis` is provided (from a single pre-analysis LLM call in
3766    /// `execute_with_session`), the goal and plan are already available and no
3767    /// additional LLM calls are needed for planning. Otherwise, falls back to
3768    /// calling `extract_goal` and `plan` individually.
3769    pub async fn execute_with_planning(
3770        &self,
3771        history: &[Message],
3772        prompt: &str,
3773        session_id: Option<&str>,
3774        event_tx: Option<mpsc::Sender<AgentEvent>>,
3775        pre_analysis: Option<PreAnalysis>,
3776    ) -> Result<AgentResult> {
3777        // Send planning start event
3778        if let Some(tx) = &event_tx {
3779            tx.send(AgentEvent::PlanningStart {
3780                prompt: prompt.to_string(),
3781            })
3782            .await
3783            .ok();
3784        }
3785
3786        // Use pre-analysis result if available (goal + plan already computed in one LLM call).
3787        let (goal, plan) = if let Some(analysis) = pre_analysis {
3788            (Some(analysis.goal.clone()), analysis.execution_plan.clone())
3789        } else {
3790            // Fall back: extract goal and create plan via separate LLM calls.
3791            let g = if self.config.goal_tracking {
3792                Some(self.extract_goal(prompt).await?)
3793            } else {
3794                None
3795            };
3796            let p = self.plan(prompt, None).await?;
3797            (g, p)
3798        };
3799
3800        // Send GoalExtracted event if goal_tracking is enabled.
3801        if self.config.goal_tracking {
3802            if let Some(ref g) = goal {
3803                if let Some(tx) = &event_tx {
3804                    tx.send(AgentEvent::GoalExtracted { goal: g.clone() })
3805                        .await
3806                        .ok();
3807                }
3808            }
3809        }
3810
3811        // Send planning end event
3812        if let Some(tx) = &event_tx {
3813            tx.send(AgentEvent::PlanningEnd {
3814                estimated_steps: plan.steps.len(),
3815                plan: plan.clone(),
3816            })
3817            .await
3818            .ok();
3819        }
3820
3821        let plan_start = std::time::Instant::now();
3822
3823        // Execute the plan step by step
3824        let result = self
3825            .execute_plan(history, &plan, session_id, event_tx.clone())
3826            .await?;
3827
3828        // Emit the final End event (execute_loop_inner does not emit End in planning mode)
3829        if let Some(tx) = &event_tx {
3830            tx.send(AgentEvent::End {
3831                text: result.text.clone(),
3832                usage: result.usage.clone(),
3833                verification_summary: Box::new(result.verification_summary()),
3834                meta: None,
3835            })
3836            .await
3837            .ok();
3838        }
3839
3840        // Check goal achievement when goal_tracking is enabled
3841        if self.config.goal_tracking {
3842            if let Some(ref g) = goal {
3843                let achieved = self.check_goal_achievement(g, &result.text).await?;
3844                if achieved {
3845                    if let Some(tx) = &event_tx {
3846                        tx.send(AgentEvent::GoalAchieved {
3847                            goal: g.description.clone(),
3848                            total_steps: result.messages.len(),
3849                            duration_ms: plan_start.elapsed().as_millis() as i64,
3850                        })
3851                        .await
3852                        .ok();
3853                    }
3854                }
3855            }
3856        }
3857
3858        Ok(result)
3859    }
3860
3861    /// Execute an execution plan using wave-based dependency-aware scheduling.
3862    ///
3863    /// Steps with no unmet dependencies are grouped into "waves". A wave with
3864    /// a single step executes sequentially (preserving the history chain). A
3865    /// wave with multiple independent steps executes them in parallel via
3866    /// `JoinSet`, then merges their results back into the shared history.
3867    async fn execute_plan(
3868        &self,
3869        history: &[Message],
3870        plan: &ExecutionPlan,
3871        session_id: Option<&str>,
3872        event_tx: Option<mpsc::Sender<AgentEvent>>,
3873    ) -> Result<AgentResult> {
3874        let mut plan = plan.clone();
3875        let task_session_id = session_id.unwrap_or("").to_string();
3876        let mut current_history = history.to_vec();
3877        let mut total_usage = TokenUsage::default();
3878        let mut tool_calls_count = 0;
3879        let total_steps = plan.steps.len();
3880
3881        // Add initial user message with the goal
3882        let steps_text = plan
3883            .steps
3884            .iter()
3885            .enumerate()
3886            .map(|(i, step)| format!("{}. {}", i + 1, step.content))
3887            .collect::<Vec<_>>()
3888            .join("\n");
3889        current_history.push(Message::user(&crate::prompts::render(
3890            crate::prompts::PLAN_EXECUTE_GOAL,
3891            &[("goal", &plan.goal), ("steps", &steps_text)],
3892        )));
3893        self.emit_task_updated(&event_tx, &task_session_id, &plan)
3894            .await;
3895
3896        loop {
3897            let ready: Vec<String> = plan
3898                .get_ready_steps()
3899                .iter()
3900                .map(|s| s.id.clone())
3901                .collect();
3902
3903            if ready.is_empty() {
3904                // All done or deadlock
3905                if plan.has_deadlock() {
3906                    tracing::warn!(
3907                        "Plan deadlock detected: {} pending steps with unresolvable dependencies",
3908                        plan.pending_count()
3909                    );
3910                }
3911                break;
3912            }
3913
3914            if ready.len() == 1 {
3915                // === Single step: sequential execution (preserves history chain) ===
3916                let step_id = &ready[0];
3917                let step = plan
3918                    .steps
3919                    .iter()
3920                    .find(|s| s.id == *step_id)
3921                    .ok_or_else(|| anyhow::anyhow!("step '{}' not found in plan", step_id))?
3922                    .clone();
3923                let step_number = plan
3924                    .steps
3925                    .iter()
3926                    .position(|s| s.id == *step_id)
3927                    .unwrap_or(0)
3928                    + 1;
3929
3930                // Send step start event
3931                plan.mark_status(&step.id, TaskStatus::InProgress);
3932                self.emit_task_updated(&event_tx, &task_session_id, &plan)
3933                    .await;
3934
3935                if let Some(tx) = &event_tx {
3936                    tx.send(AgentEvent::StepStart {
3937                        step_id: step.id.clone(),
3938                        description: step.content.clone(),
3939                        step_number,
3940                        total_steps,
3941                    })
3942                    .await
3943                    .ok();
3944                }
3945
3946                if Self::should_delegate_plan_step(&step) {
3947                    let tool_name = match Self::normalized_plan_tool(&step) {
3948                        Some("parallel_task") => "parallel_task",
3949                        _ => "task",
3950                    };
3951                    let args = if tool_name == "parallel_task" {
3952                        json!({ "tasks": [Self::delegated_task_args(&step, step_number, total_steps)] })
3953                    } else {
3954                        Self::delegated_task_args(&step, step_number, total_steps)
3955                    };
3956                    let (output, _exit_code, is_error, _metadata) = self
3957                        .execute_delegated_plan_tool(tool_name, &args, session_id, &event_tx)
3958                        .await;
3959                    tool_calls_count += 1;
3960
3961                    if is_error {
3962                        tracing::error!("Delegated plan step '{}' failed: {}", step.id, output);
3963                        current_history.push(Message::user(&format!(
3964                            "Delegated plan step '{}' failed:\n{}",
3965                            step.content, output
3966                        )));
3967                        plan.mark_status(&step.id, TaskStatus::Failed);
3968                        self.emit_task_updated(&event_tx, &task_session_id, &plan)
3969                            .await;
3970
3971                        if let Some(tx) = &event_tx {
3972                            tx.send(AgentEvent::StepEnd {
3973                                step_id: step.id.clone(),
3974                                status: TaskStatus::Failed,
3975                                step_number,
3976                                total_steps,
3977                            })
3978                            .await
3979                            .ok();
3980                        }
3981                    } else {
3982                        current_history.push(Message {
3983                            role: "assistant".to_string(),
3984                            content: vec![crate::llm::ContentBlock::Text { text: output }],
3985                            reasoning_content: None,
3986                        });
3987                        plan.mark_status(&step.id, TaskStatus::Completed);
3988                        self.emit_task_updated(&event_tx, &task_session_id, &plan)
3989                            .await;
3990
3991                        if let Some(tx) = &event_tx {
3992                            tx.send(AgentEvent::StepEnd {
3993                                step_id: step.id.clone(),
3994                                status: TaskStatus::Completed,
3995                                step_number,
3996                                total_steps,
3997                            })
3998                            .await
3999                            .ok();
4000                        }
4001                    }
4002                } else {
4003                    let step_prompt = crate::prompts::render(
4004                        crate::prompts::PLAN_EXECUTE_STEP,
4005                        &[
4006                            ("step_num", &step_number.to_string()),
4007                            ("description", &step.content),
4008                        ],
4009                    );
4010
4011                    match self
4012                        .execute_loop(
4013                            &current_history,
4014                            &step_prompt,
4015                            AgentStyle::GeneralPurpose,
4016                            None,
4017                            event_tx.clone(),
4018                            &tokio_util::sync::CancellationToken::new(),
4019                            false, // emit_end: false — End is emitted by execute_with_planning after execute_plan
4020                        )
4021                        .await
4022                    {
4023                        Ok(result) => {
4024                            current_history = result.messages.clone();
4025                            total_usage.prompt_tokens += result.usage.prompt_tokens;
4026                            total_usage.completion_tokens += result.usage.completion_tokens;
4027                            total_usage.total_tokens += result.usage.total_tokens;
4028                            tool_calls_count += result.tool_calls_count;
4029                            plan.mark_status(&step.id, TaskStatus::Completed);
4030                            self.emit_task_updated(&event_tx, &task_session_id, &plan)
4031                                .await;
4032
4033                            if let Some(tx) = &event_tx {
4034                                tx.send(AgentEvent::StepEnd {
4035                                    step_id: step.id.clone(),
4036                                    status: TaskStatus::Completed,
4037                                    step_number,
4038                                    total_steps,
4039                                })
4040                                .await
4041                                .ok();
4042                            }
4043                        }
4044                        Err(e) => {
4045                            tracing::error!("Plan step '{}' failed: {}", step.id, e);
4046                            plan.mark_status(&step.id, TaskStatus::Failed);
4047                            self.emit_task_updated(&event_tx, &task_session_id, &plan)
4048                                .await;
4049
4050                            if let Some(tx) = &event_tx {
4051                                tx.send(AgentEvent::StepEnd {
4052                                    step_id: step.id.clone(),
4053                                    status: TaskStatus::Failed,
4054                                    step_number,
4055                                    total_steps,
4056                                })
4057                                .await
4058                                .ok();
4059                            }
4060                        }
4061                    }
4062                }
4063            } else {
4064                // === Multiple steps: parallel execution via JoinSet ===
4065                // NOTE: Each parallel branch gets a clone of the base history.
4066                // Individual branch histories (tool calls, LLM turns) are NOT merged
4067                // back — only a summary message is appended. This is a deliberate
4068                // trade-off: merging divergent histories in a deterministic order is
4069                // complex and the summary approach keeps the context window manageable.
4070                let ready_steps: Vec<_> = ready
4071                    .iter()
4072                    .filter_map(|id| {
4073                        let step = plan.steps.iter().find(|s| s.id == *id)?.clone();
4074                        let step_number =
4075                            plan.steps.iter().position(|s| s.id == *id).unwrap_or(0) + 1;
4076                        Some((step, step_number))
4077                    })
4078                    .collect();
4079
4080                // Mark all as InProgress and emit one task-list snapshot for the wave.
4081                for (step, _) in &ready_steps {
4082                    plan.mark_status(&step.id, TaskStatus::InProgress);
4083                }
4084                self.emit_task_updated(&event_tx, &task_session_id, &plan)
4085                    .await;
4086
4087                // Emit StepStart events after the authoritative task-list snapshot.
4088                for (step, step_number) in &ready_steps {
4089                    if let Some(tx) = &event_tx {
4090                        tx.send(AgentEvent::StepStart {
4091                            step_id: step.id.clone(),
4092                            description: step.content.clone(),
4093                            step_number: *step_number,
4094                            total_steps,
4095                        })
4096                        .await
4097                        .ok();
4098                    }
4099                }
4100
4101                let mut parallel_results: Vec<ParallelStepResult> = Vec::new();
4102                if ready_steps
4103                    .iter()
4104                    .all(|(step, _)| Self::should_delegate_plan_step(step))
4105                {
4106                    let args = Self::parallel_delegated_task_args(&ready_steps, total_steps);
4107                    let (output, _exit_code, is_error, metadata) = self
4108                        .execute_delegated_plan_tool("parallel_task", &args, session_id, &event_tx)
4109                        .await;
4110                    tool_calls_count += 1;
4111
4112                    let status = if is_error {
4113                        TaskStatus::Failed
4114                    } else {
4115                        TaskStatus::Completed
4116                    };
4117                    for (step, step_number) in &ready_steps {
4118                        plan.mark_status(&step.id, status);
4119                        self.emit_task_updated(&event_tx, &task_session_id, &plan)
4120                            .await;
4121
4122                        parallel_results.push(ParallelStepResult {
4123                            step_id: step.id.clone(),
4124                            step_number: *step_number as u32,
4125                            status: if is_error { "failed" } else { "completed" }.to_string(),
4126                            summary: if is_error {
4127                                String::new()
4128                            } else {
4129                                output.trim().to_string()
4130                            },
4131                            key_findings: None,
4132                            error: is_error.then(|| output.clone()),
4133                            data: metadata.clone(),
4134                        });
4135
4136                        if let Some(tx) = &event_tx {
4137                            tx.send(AgentEvent::StepEnd {
4138                                step_id: step.id.clone(),
4139                                status,
4140                                step_number: *step_number,
4141                                total_steps,
4142                            })
4143                            .await
4144                            .ok();
4145                        }
4146                    }
4147
4148                    if is_error {
4149                        current_history.push(Message::user(&format!(
4150                            "Delegated parallel plan wave failed:\n{}",
4151                            output
4152                        )));
4153                    } else {
4154                        current_history.push(Message {
4155                            role: "assistant".to_string(),
4156                            content: vec![crate::llm::ContentBlock::Text {
4157                                text: output.clone(),
4158                            }],
4159                            reasoning_content: None,
4160                        });
4161                    }
4162                } else {
4163                    // Spawn all into JoinSet, each with a clone of the base history
4164                    let mut join_set = tokio::task::JoinSet::new();
4165                    for (step, step_number) in &ready_steps {
4166                        let base_history = current_history.clone();
4167                        let agent_clone = self.clone();
4168                        let tx = event_tx.clone();
4169                        let step_clone = step.clone();
4170                        let sn = *step_number;
4171
4172                        join_set.spawn(async move {
4173                            let prompt = crate::prompts::render(
4174                                crate::prompts::PLAN_EXECUTE_STEP,
4175                                &[
4176                                    ("step_num", &sn.to_string()),
4177                                    ("description", &step_clone.content),
4178                                ],
4179                            );
4180                            let result = agent_clone
4181                                .execute_loop(
4182                                    &base_history,
4183                                    &prompt,
4184                                    AgentStyle::GeneralPurpose,
4185                                    None,
4186                                    tx,
4187                                    &tokio_util::sync::CancellationToken::new(),
4188                                    false, // emit_end: false — End is emitted by execute_with_planning after execute_plan
4189                                )
4190                                .await;
4191                            (step_clone.id, sn, result)
4192                        });
4193                    }
4194
4195                    // Collect results
4196                    while let Some(join_result) = join_set.join_next().await {
4197                        match join_result {
4198                            Ok((step_id, step_number, step_result)) => match step_result {
4199                                Ok(result) => {
4200                                    total_usage.prompt_tokens += result.usage.prompt_tokens;
4201                                    total_usage.completion_tokens += result.usage.completion_tokens;
4202                                    total_usage.total_tokens += result.usage.total_tokens;
4203                                    tool_calls_count += result.tool_calls_count;
4204                                    plan.mark_status(&step_id, TaskStatus::Completed);
4205                                    self.emit_task_updated(&event_tx, &task_session_id, &plan)
4206                                        .await;
4207
4208                                    parallel_results.push(ParallelStepResult {
4209                                        step_id: step_id.clone(),
4210                                        step_number: step_number as u32,
4211                                        status: "completed".to_string(),
4212                                        summary: result.text.trim().to_string(),
4213                                        key_findings: None,
4214                                        error: None,
4215                                        data: None,
4216                                    });
4217
4218                                    if let Some(tx) = &event_tx {
4219                                        tx.send(AgentEvent::StepEnd {
4220                                            step_id,
4221                                            status: TaskStatus::Completed,
4222                                            step_number,
4223                                            total_steps,
4224                                        })
4225                                        .await
4226                                        .ok();
4227                                    }
4228                                }
4229                                Err(e) => {
4230                                    tracing::error!("Plan step '{}' failed: {}", step_id, e);
4231                                    plan.mark_status(&step_id, TaskStatus::Failed);
4232                                    self.emit_task_updated(&event_tx, &task_session_id, &plan)
4233                                        .await;
4234
4235                                    parallel_results.push(ParallelStepResult {
4236                                        step_id: step_id.clone(),
4237                                        step_number: step_number as u32,
4238                                        status: "failed".to_string(),
4239                                        summary: String::new(),
4240                                        key_findings: None,
4241                                        error: Some(e.to_string()),
4242                                        data: None,
4243                                    });
4244
4245                                    if let Some(tx) = &event_tx {
4246                                        tx.send(AgentEvent::StepEnd {
4247                                            step_id,
4248                                            status: TaskStatus::Failed,
4249                                            step_number,
4250                                            total_steps,
4251                                        })
4252                                        .await
4253                                        .ok();
4254                                    }
4255                                }
4256                            },
4257                            Err(e) => {
4258                                tracing::error!("JoinSet task panicked: {}", e);
4259                            }
4260                        }
4261                    }
4262                }
4263
4264                // Merge parallel results into history for subsequent steps.
4265                // Emit as a structured JSON USER message so the frontend can
4266                // parse and render it in the user's language.
4267                if !parallel_results.is_empty() {
4268                    parallel_results.sort_by_key(|r| r.step_number);
4269                    let envelope = ParallelStepResult::build_envelope(parallel_results);
4270                    current_history.push(Message::user(
4271                        &serde_json::to_string(&envelope).unwrap_or_default(),
4272                    ));
4273                }
4274            }
4275
4276            // Emit GoalProgress after each wave
4277            if self.config.goal_tracking {
4278                let completed = plan
4279                    .steps
4280                    .iter()
4281                    .filter(|s| s.status == TaskStatus::Completed)
4282                    .count();
4283                if let Some(tx) = &event_tx {
4284                    tx.send(AgentEvent::GoalProgress {
4285                        goal: plan.goal.clone(),
4286                        progress: plan.progress(),
4287                        completed_steps: completed,
4288                        total_steps,
4289                    })
4290                    .await
4291                    .ok();
4292                }
4293            }
4294        }
4295
4296        // Get final response — find the last assistant message (not the last history
4297        // entry, which may be a user-summary injected after parallel execution)
4298        let final_text = current_history
4299            .iter()
4300            .rev()
4301            .find(|m| m.role == "assistant")
4302            .map(|m| {
4303                m.content
4304                    .iter()
4305                    .filter_map(|block| {
4306                        if let crate::llm::ContentBlock::Text { text } = block {
4307                            Some(text.as_str())
4308                        } else {
4309                            None
4310                        }
4311                    })
4312                    .collect::<Vec<_>>()
4313                    .join("\n")
4314            })
4315            .unwrap_or_default();
4316
4317        Ok(AgentResult {
4318            text: final_text,
4319            messages: current_history,
4320            usage: total_usage,
4321            tool_calls_count,
4322            verification_reports: Vec::new(),
4323        })
4324    }
4325
4326    /// Extract goal from prompt
4327    ///
4328    /// Delegates to [`LlmPlanner`] for structured JSON goal extraction,
4329    /// falling back to heuristic logic if the LLM call fails.
4330    pub async fn extract_goal(&self, prompt: &str) -> Result<AgentGoal> {
4331        use crate::planning::LlmPlanner;
4332
4333        match LlmPlanner::extract_goal(&self.llm_client, prompt).await {
4334            Ok(goal) => Ok(goal),
4335            Err(e) => {
4336                tracing::warn!("LLM goal extraction failed, using fallback: {}", e);
4337                Ok(LlmPlanner::fallback_goal(prompt))
4338            }
4339        }
4340    }
4341
4342    /// Check if goal is achieved
4343    ///
4344    /// Delegates to [`LlmPlanner`] for structured JSON achievement check,
4345    /// falling back to heuristic logic if the LLM call fails.
4346    pub async fn check_goal_achievement(
4347        &self,
4348        goal: &AgentGoal,
4349        current_state: &str,
4350    ) -> Result<bool> {
4351        use crate::planning::LlmPlanner;
4352
4353        match LlmPlanner::check_achievement(&self.llm_client, goal, current_state).await {
4354            Ok(result) => Ok(result.achieved),
4355            Err(e) => {
4356                tracing::warn!("LLM achievement check failed, using fallback: {}", e);
4357                let result = LlmPlanner::fallback_check_achievement(goal, current_state);
4358                Ok(result.achieved)
4359            }
4360        }
4361    }
4362}
4363
4364#[cfg(test)]
4365mod tests {
4366    use super::*;
4367    use crate::llm::{ContentBlock, StreamEvent};
4368    use crate::permissions::PermissionPolicy;
4369    use crate::tools::ToolExecutor;
4370    use std::path::PathBuf;
4371    use std::sync::atomic::{AtomicUsize, Ordering};
4372
4373    /// Create a default ToolContext for tests
4374    fn test_tool_context() -> ToolContext {
4375        ToolContext::new(PathBuf::from("/tmp"))
4376    }
4377
4378    #[test]
4379    fn test_plan_step_delegation_detection() {
4380        use crate::planning::Task;
4381
4382        assert!(AgentLoop::should_delegate_plan_step(
4383            &Task::new("s1", "Find relevant files").with_tool("task")
4384        ));
4385        assert!(AgentLoop::should_delegate_plan_step(
4386            &Task::new("s2", "Check independent areas").with_tool("parallel_task")
4387        ));
4388        assert!(!AgentLoop::should_delegate_plan_step(&Task::new(
4389            "s3",
4390            "Implement directly"
4391        )));
4392    }
4393
4394    #[test]
4395    fn test_delegated_agent_selection_from_step_text() {
4396        use crate::planning::Task;
4397
4398        assert_eq!(
4399            AgentLoop::delegated_agent_for_step(&Task::new("s1", "查找相关实现")),
4400            "explore"
4401        );
4402        assert_eq!(
4403            AgentLoop::delegated_agent_for_step(&Task::new("s2", "Run release verification tests")),
4404            "verification"
4405        );
4406        assert_eq!(
4407            AgentLoop::delegated_agent_for_step(&Task::new("s3", "Review risky code changes")),
4408            "review"
4409        );
4410        assert_eq!(
4411            AgentLoop::delegated_agent_for_step(&Task::new("s4", "Design the architecture")),
4412            "plan"
4413        );
4414        assert_eq!(
4415            AgentLoop::delegated_agent_for_step(&Task::new("s5", "Implement the change")),
4416            "general"
4417        );
4418    }
4419
4420    #[test]
4421    fn test_delegated_task_args_include_prompt_contract() {
4422        use crate::planning::Task;
4423
4424        let task = Task::new("s1", "验证 program 工具")
4425            .with_tool("task")
4426            .with_success_criteria("All integration checks pass.");
4427        let args = AgentLoop::delegated_task_args(&task, 2, 5);
4428
4429        assert_eq!(args["agent"], "verification");
4430        assert_eq!(args["description"], "验证 program 工具");
4431        assert!(args["prompt"].as_str().unwrap().contains("2/5"));
4432        assert!(args["prompt"]
4433            .as_str()
4434            .unwrap()
4435            .contains("All integration checks pass."));
4436    }
4437
4438    #[test]
4439    fn test_parallel_delegated_task_args_preserve_order() {
4440        use crate::planning::Task;
4441
4442        let steps = vec![
4443            (Task::new("s1", "Find docs").with_tool("task"), 1),
4444            (Task::new("s2", "Run tests").with_tool("task"), 2),
4445        ];
4446        let args = AgentLoop::parallel_delegated_task_args(&steps, 2);
4447        let tasks = args["tasks"].as_array().unwrap();
4448
4449        assert_eq!(tasks.len(), 2);
4450        assert_eq!(tasks[0]["agent"], "explore");
4451        assert_eq!(tasks[1]["agent"], "verification");
4452    }
4453
4454    #[test]
4455    fn test_memory_items_become_context_result() {
4456        let item = a3s_memory::MemoryItem::new("Use focused regression tests for context changes.")
4457            .with_importance(0.8);
4458
4459        let result = crate::memory::memory_items_to_context_result("memory", vec![item.clone()]);
4460
4461        assert_eq!(result.provider, "memory");
4462        assert_eq!(result.items.len(), 1);
4463        assert_eq!(result.items[0].id, item.id.as_str());
4464        assert_eq!(result.items[0].context_type, ContextType::Memory);
4465        let expected_source = format!("memory://{}", item.id);
4466        assert_eq!(
4467            result.items[0].source.as_deref(),
4468            Some(expected_source.as_str())
4469        );
4470        assert!(result.items[0].content.contains("focused regression tests"));
4471        assert!(result.items[0].token_count > 0);
4472    }
4473
4474    #[cfg(feature = "ahp")]
4475    #[test]
4476    fn test_injected_context_to_results_includes_all_context_shapes() {
4477        let injected = a3s_ahp::InjectedContext {
4478            facts: vec![a3s_ahp::Fact {
4479                content: "Fact from harness".to_string(),
4480                source: "ahp://fact/source".to_string(),
4481                confidence: 0.92,
4482            }],
4483            file_contents: Some(vec![a3s_ahp::FileContentSnippet {
4484                path: "src/lib.rs".to_string(),
4485                snippet: "pub fn important() {}".to_string(),
4486                relevance_score: 0.88,
4487            }]),
4488            project_summary: Some(a3s_ahp::ProjectSummary {
4489                project_name: "demo".to_string(),
4490                language: Some("Rust".to_string()),
4491                key_files: Some(vec!["Cargo.toml".to_string(), "src/lib.rs".to_string()]),
4492                structure_description: "Small Rust crate".to_string(),
4493            }),
4494            knowledge: Some(vec!["Use context budgets".to_string()]),
4495            suggestions: Some(vec!["Prefer focused verification".to_string()]),
4496        };
4497
4498        let results = injected_context_to_results(injected);
4499        let items = results
4500            .iter()
4501            .flat_map(|result| result.items.iter())
4502            .collect::<Vec<_>>();
4503
4504        assert_eq!(results.len(), 5);
4505        assert!(items.iter().any(|item| item.content == "Fact from harness"
4506            && item.source.as_deref() == Some("ahp://fact/source")));
4507        assert!(items
4508            .iter()
4509            .any(|item| item.content == "pub fn important() {}"
4510                && item.source.as_deref() == Some("src/lib.rs")));
4511        assert!(items
4512            .iter()
4513            .any(|item| item.content.contains("Key files: Cargo.toml, src/lib.rs")));
4514        assert!(items
4515            .iter()
4516            .any(|item| item.source.as_deref() == Some("ahp://suggestions")
4517                && item.content.contains("Prefer focused verification")));
4518        assert!(results
4519            .iter()
4520            .all(|result| result.provider == "ahp_harness"));
4521    }
4522
4523    #[test]
4524    fn test_agent_config_default() {
4525        let config = AgentConfig::default();
4526        assert!(config.prompt_slots.is_empty());
4527        assert!(config.tools.is_empty()); // Tools are provided externally
4528        assert_eq!(config.max_tool_rounds, MAX_TOOL_ROUNDS);
4529        assert!(config.permission_checker.is_none());
4530        assert!(config.context_providers.is_empty());
4531        // Built-in skills are always present by default
4532        let registry = config
4533            .skill_registry
4534            .expect("skill_registry must be Some by default");
4535        assert!(registry.len() >= 4, "expected at least 4 built-in skills");
4536        assert!(registry.get("code-search").is_some());
4537        assert!(registry.get("find-bugs").is_some());
4538    }
4539
4540    // ========================================================================
4541    // Mock LLM Client for Testing
4542    // ========================================================================
4543
4544    /// Mock LLM client that returns predefined responses
4545    pub(crate) struct MockLlmClient {
4546        /// Responses to return (consumed in order)
4547        responses: std::sync::Mutex<Vec<LlmResponse>>,
4548        /// Number of calls made
4549        pub(crate) call_count: AtomicUsize,
4550    }
4551
4552    impl MockLlmClient {
4553        pub(crate) fn new(responses: Vec<LlmResponse>) -> Self {
4554            Self {
4555                responses: std::sync::Mutex::new(responses),
4556                call_count: AtomicUsize::new(0),
4557            }
4558        }
4559
4560        /// Create a response with text only (no tool calls)
4561        pub(crate) fn text_response(text: &str) -> LlmResponse {
4562            LlmResponse {
4563                message: Message {
4564                    role: "assistant".to_string(),
4565                    content: vec![ContentBlock::Text {
4566                        text: text.to_string(),
4567                    }],
4568                    reasoning_content: None,
4569                },
4570                usage: TokenUsage {
4571                    prompt_tokens: 10,
4572                    completion_tokens: 5,
4573                    total_tokens: 15,
4574                    cache_read_tokens: None,
4575                    cache_write_tokens: None,
4576                },
4577                stop_reason: Some("end_turn".to_string()),
4578                meta: None,
4579            }
4580        }
4581
4582        /// Create a response with a tool call
4583        pub(crate) fn tool_call_response(
4584            tool_id: &str,
4585            tool_name: &str,
4586            args: serde_json::Value,
4587        ) -> LlmResponse {
4588            LlmResponse {
4589                message: Message {
4590                    role: "assistant".to_string(),
4591                    content: vec![ContentBlock::ToolUse {
4592                        id: tool_id.to_string(),
4593                        name: tool_name.to_string(),
4594                        input: args,
4595                    }],
4596                    reasoning_content: None,
4597                },
4598                usage: TokenUsage {
4599                    prompt_tokens: 10,
4600                    completion_tokens: 5,
4601                    total_tokens: 15,
4602                    cache_read_tokens: None,
4603                    cache_write_tokens: None,
4604                },
4605                stop_reason: Some("tool_use".to_string()),
4606                meta: None,
4607            }
4608        }
4609    }
4610
4611    #[async_trait::async_trait]
4612    impl LlmClient for MockLlmClient {
4613        async fn complete(
4614            &self,
4615            messages: &[Message],
4616            system: Option<&str>,
4617            _tools: &[ToolDefinition],
4618        ) -> Result<LlmResponse> {
4619            if system == Some(crate::prompts::PRE_ANALYSIS_SYSTEM) {
4620                let prompt = messages
4621                    .last()
4622                    .and_then(|m| {
4623                        m.content.iter().find_map(|block| {
4624                            if let ContentBlock::Text { text } = block {
4625                                Some(text.as_str())
4626                            } else {
4627                                None
4628                            }
4629                        })
4630                    })
4631                    .unwrap_or("");
4632                let response = serde_json::json!({
4633                    "intent": "GeneralPurpose",
4634                    "requires_planning": false,
4635                    "goal": {
4636                        "description": prompt,
4637                        "success_criteria": []
4638                    },
4639                    "execution_plan": {
4640                        "complexity": "Simple",
4641                        "steps": [
4642                            {
4643                                "id": "step-1",
4644                                "description": prompt,
4645                                "tool": null,
4646                                "dependencies": [],
4647                                "success_criteria": "Complete the request"
4648                            }
4649                        ],
4650                        "required_tools": []
4651                    },
4652                    "optimized_input": prompt
4653                });
4654                return Ok(MockLlmClient::text_response(&response.to_string()));
4655            }
4656            self.call_count.fetch_add(1, Ordering::SeqCst);
4657            let mut responses = self.responses.lock().unwrap();
4658            if responses.is_empty() {
4659                anyhow::bail!("No more mock responses available");
4660            }
4661            Ok(responses.remove(0))
4662        }
4663
4664        async fn complete_streaming(
4665            &self,
4666            _messages: &[Message],
4667            _system: Option<&str>,
4668            _tools: &[ToolDefinition],
4669            _cancel_token: tokio_util::sync::CancellationToken,
4670        ) -> Result<mpsc::Receiver<StreamEvent>> {
4671            self.call_count.fetch_add(1, Ordering::SeqCst);
4672            let mut responses = self.responses.lock().unwrap();
4673            if responses.is_empty() {
4674                anyhow::bail!("No more mock responses available");
4675            }
4676            let response = responses.remove(0);
4677
4678            let (tx, rx) = mpsc::channel(10);
4679            tokio::spawn(async move {
4680                // Send text deltas if any
4681                for block in &response.message.content {
4682                    if let ContentBlock::Text { text } = block {
4683                        tx.send(StreamEvent::TextDelta(text.clone())).await.ok();
4684                    }
4685                }
4686                tx.send(StreamEvent::Done(response)).await.ok();
4687            });
4688
4689            Ok(rx)
4690        }
4691    }
4692
4693    // ========================================================================
4694    // Agent Loop Tests
4695    // ========================================================================
4696
4697    #[tokio::test]
4698    async fn test_agent_simple_response() {
4699        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
4700            "Hello, I'm an AI assistant.",
4701        )]));
4702
4703        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4704        let config = AgentConfig::default();
4705
4706        let agent = AgentLoop::new(
4707            mock_client.clone(),
4708            tool_executor,
4709            test_tool_context(),
4710            config,
4711        );
4712        let result = agent.execute(&[], "Hello", None).await.unwrap();
4713
4714        assert_eq!(result.text, "Hello, I'm an AI assistant.");
4715        assert_eq!(result.tool_calls_count, 0);
4716        assert_eq!(mock_client.call_count.load(Ordering::SeqCst), 1);
4717    }
4718
4719    #[tokio::test]
4720    async fn test_agent_with_tool_call() {
4721        let mock_client = Arc::new(MockLlmClient::new(vec![
4722            // First response: tool call
4723            MockLlmClient::tool_call_response(
4724                "tool-1",
4725                "bash",
4726                serde_json::json!({"command": "echo hello"}),
4727            ),
4728            // Second response: final text
4729            MockLlmClient::text_response("The command output was: hello"),
4730        ]));
4731
4732        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4733        let config = AgentConfig::default();
4734
4735        let agent = AgentLoop::new(
4736            mock_client.clone(),
4737            tool_executor,
4738            test_tool_context(),
4739            config,
4740        );
4741        let result = agent.execute(&[], "Run echo hello", None).await.unwrap();
4742
4743        assert_eq!(result.text, "The command output was: hello");
4744        assert_eq!(result.tool_calls_count, 1);
4745        assert_eq!(mock_client.call_count.load(Ordering::SeqCst), 2);
4746    }
4747
4748    #[tokio::test]
4749    async fn test_agent_permission_deny() {
4750        let mock_client = Arc::new(MockLlmClient::new(vec![
4751            // First response: tool call that will be denied
4752            MockLlmClient::tool_call_response(
4753                "tool-1",
4754                "bash",
4755                serde_json::json!({"command": "rm -rf /tmp/test"}),
4756            ),
4757            // Second response: LLM responds to the denial
4758            MockLlmClient::text_response(
4759                "I cannot execute that command due to permission restrictions.",
4760            ),
4761        ]));
4762
4763        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4764
4765        // Create permission policy that denies rm commands
4766        let permission_policy = PermissionPolicy::new().deny("bash(rm:*)");
4767
4768        let config = AgentConfig {
4769            permission_checker: Some(Arc::new(permission_policy)),
4770            ..Default::default()
4771        };
4772
4773        let (tx, mut rx) = mpsc::channel(100);
4774        let agent = AgentLoop::new(
4775            mock_client.clone(),
4776            tool_executor,
4777            test_tool_context(),
4778            config,
4779        );
4780        let result = agent.execute(&[], "Delete files", Some(tx)).await.unwrap();
4781
4782        // Check that we received a PermissionDenied event
4783        let mut found_permission_denied = false;
4784        while let Ok(event) = rx.try_recv() {
4785            if let AgentEvent::PermissionDenied { tool_name, .. } = event {
4786                assert_eq!(tool_name, "bash");
4787                found_permission_denied = true;
4788            }
4789        }
4790        assert!(
4791            found_permission_denied,
4792            "Should have received PermissionDenied event"
4793        );
4794
4795        assert_eq!(result.tool_calls_count, 1);
4796    }
4797
4798    #[tokio::test]
4799    async fn test_agent_permission_allow() {
4800        let mock_client = Arc::new(MockLlmClient::new(vec![
4801            // First response: tool call that will be allowed
4802            MockLlmClient::tool_call_response(
4803                "tool-1",
4804                "bash",
4805                serde_json::json!({"command": "echo hello"}),
4806            ),
4807            // Second response: final text
4808            MockLlmClient::text_response("Done!"),
4809        ]));
4810
4811        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4812
4813        // Create permission policy that allows echo commands
4814        let permission_policy = PermissionPolicy::new()
4815            .allow("bash(echo:*)")
4816            .deny("bash(rm:*)");
4817
4818        let config = AgentConfig {
4819            permission_checker: Some(Arc::new(permission_policy)),
4820            ..Default::default()
4821        };
4822
4823        let agent = AgentLoop::new(
4824            mock_client.clone(),
4825            tool_executor,
4826            test_tool_context(),
4827            config,
4828        );
4829        let result = agent.execute(&[], "Echo hello", None).await.unwrap();
4830
4831        assert_eq!(result.text, "Done!");
4832        assert_eq!(result.tool_calls_count, 1);
4833    }
4834
4835    #[tokio::test]
4836    async fn test_agent_streaming_events() {
4837        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
4838            "Hello!",
4839        )]));
4840
4841        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4842        let config = AgentConfig::default();
4843
4844        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
4845        let (tx, mut rx) = mpsc::channel(100);
4846        let cancel_token = tokio_util::sync::CancellationToken::new();
4847
4848        let result = agent
4849            .execute_with_session(&[], "Hi", None, Some(tx), Some(&cancel_token))
4850            .await
4851            .unwrap();
4852        let mut events = Vec::new();
4853        while let Some(event) = rx.recv().await {
4854            events.push(event);
4855        }
4856
4857        assert_eq!(result.text, "Hello!");
4858
4859        // Check we received Start and End events
4860        assert!(events.iter().any(|e| matches!(e, AgentEvent::Start { .. })));
4861        assert!(events.iter().any(|e| matches!(e, AgentEvent::End { .. })));
4862    }
4863
4864    #[tokio::test]
4865    async fn test_agent_max_tool_rounds() {
4866        // Create a mock that always returns tool calls (infinite loop)
4867        let responses: Vec<LlmResponse> = (0..100)
4868            .map(|i| {
4869                MockLlmClient::tool_call_response(
4870                    &format!("tool-{}", i),
4871                    "bash",
4872                    serde_json::json!({"command": "echo loop"}),
4873                )
4874            })
4875            .collect();
4876
4877        let mock_client = Arc::new(MockLlmClient::new(responses));
4878        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4879
4880        let config = AgentConfig {
4881            max_tool_rounds: 3,
4882            ..Default::default()
4883        };
4884
4885        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
4886        let result = agent.execute(&[], "Loop forever", None).await;
4887
4888        // Should fail due to max tool rounds exceeded
4889        assert!(result.is_err());
4890        assert!(result.unwrap_err().to_string().contains("Max tool rounds"));
4891    }
4892
4893    #[tokio::test]
4894    async fn test_agent_no_permission_policy_defaults_to_ask() {
4895        // When no permission policy is set, tools default to Ask.
4896        // Without a confirmation manager, Ask = safe deny.
4897        let mock_client = Arc::new(MockLlmClient::new(vec![
4898            MockLlmClient::tool_call_response(
4899                "tool-1",
4900                "bash",
4901                serde_json::json!({"command": "rm -rf /tmp/test"}),
4902            ),
4903            MockLlmClient::text_response("Denied!"),
4904        ]));
4905
4906        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4907        let config = AgentConfig {
4908            permission_checker: None, // No policy → defaults to Ask
4909            // No confirmation_manager → safe deny
4910            ..Default::default()
4911        };
4912
4913        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
4914        let result = agent.execute(&[], "Delete", None).await.unwrap();
4915
4916        // Should be denied (no policy + no CM = safe deny)
4917        assert_eq!(result.text, "Denied!");
4918        assert_eq!(result.tool_calls_count, 1);
4919    }
4920
4921    #[tokio::test]
4922    async fn test_agent_permission_ask_without_cm_denies() {
4923        // When permission is Ask and no confirmation manager configured,
4924        // tool execution should be denied (safe default).
4925        let mock_client = Arc::new(MockLlmClient::new(vec![
4926            MockLlmClient::tool_call_response(
4927                "tool-1",
4928                "bash",
4929                serde_json::json!({"command": "echo test"}),
4930            ),
4931            MockLlmClient::text_response("Denied!"),
4932        ]));
4933
4934        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4935
4936        // Create policy where bash falls through to Ask (default)
4937        let permission_policy = PermissionPolicy::new(); // Default decision is Ask
4938
4939        let config = AgentConfig {
4940            permission_checker: Some(Arc::new(permission_policy)),
4941            // No confirmation_manager — safe deny
4942            ..Default::default()
4943        };
4944
4945        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
4946        let result = agent.execute(&[], "Echo", None).await.unwrap();
4947
4948        // Should deny (Ask without CM = safe deny)
4949        assert_eq!(result.text, "Denied!");
4950        // The tool result should contain the denial message
4951        assert!(result.tool_calls_count >= 1);
4952    }
4953
4954    // ========================================================================
4955    // HITL (Human-in-the-Loop) Tests
4956    // ========================================================================
4957
4958    #[tokio::test]
4959    async fn test_agent_hitl_approved() {
4960        use crate::hitl::{ConfirmationManager, ConfirmationPolicy};
4961        use tokio::sync::broadcast;
4962
4963        let mock_client = Arc::new(MockLlmClient::new(vec![
4964            MockLlmClient::tool_call_response(
4965                "tool-1",
4966                "bash",
4967                serde_json::json!({"command": "echo hello"}),
4968            ),
4969            MockLlmClient::text_response("Command executed!"),
4970        ]));
4971
4972        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4973
4974        // Create HITL confirmation manager with policy enabled
4975        let (event_tx, _event_rx) = broadcast::channel(100);
4976        let hitl_policy = ConfirmationPolicy {
4977            enabled: true,
4978            ..Default::default()
4979        };
4980        let confirmation_manager = Arc::new(ConfirmationManager::new(hitl_policy, event_tx));
4981
4982        // Create permission policy that returns Ask for bash
4983        let permission_policy = PermissionPolicy::new(); // Default is Ask
4984
4985        let config = AgentConfig {
4986            permission_checker: Some(Arc::new(permission_policy)),
4987            confirmation_manager: Some(confirmation_manager.clone()),
4988            ..Default::default()
4989        };
4990
4991        // Spawn a task to approve the confirmation
4992        let cm_clone = confirmation_manager.clone();
4993        tokio::spawn(async move {
4994            // Wait a bit for the confirmation request to be created
4995            tokio::time::sleep(std::time::Duration::from_millis(50)).await;
4996            // Approve it
4997            cm_clone.confirm("tool-1", true, None).await.ok();
4998        });
4999
5000        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
5001        let result = agent.execute(&[], "Run echo", None).await.unwrap();
5002
5003        assert_eq!(result.text, "Command executed!");
5004        assert_eq!(result.tool_calls_count, 1);
5005    }
5006
5007    #[tokio::test]
5008    async fn test_agent_hitl_rejected() {
5009        use crate::hitl::{ConfirmationManager, ConfirmationPolicy};
5010        use tokio::sync::broadcast;
5011
5012        let mock_client = Arc::new(MockLlmClient::new(vec![
5013            MockLlmClient::tool_call_response(
5014                "tool-1",
5015                "bash",
5016                serde_json::json!({"command": "rm -rf /"}),
5017            ),
5018            MockLlmClient::text_response("Understood, I won't do that."),
5019        ]));
5020
5021        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5022
5023        // Create HITL confirmation manager
5024        let (event_tx, _event_rx) = broadcast::channel(100);
5025        let hitl_policy = ConfirmationPolicy {
5026            enabled: true,
5027            ..Default::default()
5028        };
5029        let confirmation_manager = Arc::new(ConfirmationManager::new(hitl_policy, event_tx));
5030
5031        // Permission policy returns Ask
5032        let permission_policy = PermissionPolicy::new();
5033
5034        let config = AgentConfig {
5035            permission_checker: Some(Arc::new(permission_policy)),
5036            confirmation_manager: Some(confirmation_manager.clone()),
5037            ..Default::default()
5038        };
5039
5040        // Spawn a task to reject the confirmation
5041        let cm_clone = confirmation_manager.clone();
5042        tokio::spawn(async move {
5043            tokio::time::sleep(std::time::Duration::from_millis(50)).await;
5044            cm_clone
5045                .confirm("tool-1", false, Some("Too dangerous".to_string()))
5046                .await
5047                .ok();
5048        });
5049
5050        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
5051        let result = agent.execute(&[], "Delete everything", None).await.unwrap();
5052
5053        // LLM should respond to the rejection
5054        assert_eq!(result.text, "Understood, I won't do that.");
5055    }
5056
5057    #[tokio::test]
5058    async fn test_agent_hitl_timeout_reject() {
5059        use crate::hitl::{ConfirmationManager, ConfirmationPolicy, TimeoutAction};
5060        use tokio::sync::broadcast;
5061
5062        let mock_client = Arc::new(MockLlmClient::new(vec![
5063            MockLlmClient::tool_call_response(
5064                "tool-1",
5065                "bash",
5066                serde_json::json!({"command": "echo test"}),
5067            ),
5068            MockLlmClient::text_response("Timed out, I understand."),
5069        ]));
5070
5071        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5072
5073        // Create HITL with very short timeout and Reject action
5074        let (event_tx, _event_rx) = broadcast::channel(100);
5075        let hitl_policy = ConfirmationPolicy {
5076            enabled: true,
5077            default_timeout_ms: 50, // Very short timeout
5078            timeout_action: TimeoutAction::Reject,
5079            ..Default::default()
5080        };
5081        let confirmation_manager = Arc::new(ConfirmationManager::new(hitl_policy, event_tx));
5082
5083        let permission_policy = PermissionPolicy::new();
5084
5085        let config = AgentConfig {
5086            permission_checker: Some(Arc::new(permission_policy)),
5087            confirmation_manager: Some(confirmation_manager),
5088            ..Default::default()
5089        };
5090
5091        // Don't approve - let it timeout
5092        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
5093        let result = agent.execute(&[], "Echo", None).await.unwrap();
5094
5095        // Should get timeout rejection response from LLM
5096        assert_eq!(result.text, "Timed out, I understand.");
5097    }
5098
5099    #[tokio::test]
5100    async fn test_agent_hitl_timeout_auto_approve() {
5101        use crate::hitl::{ConfirmationManager, ConfirmationPolicy, TimeoutAction};
5102        use tokio::sync::broadcast;
5103
5104        let mock_client = Arc::new(MockLlmClient::new(vec![
5105            MockLlmClient::tool_call_response(
5106                "tool-1",
5107                "bash",
5108                serde_json::json!({"command": "echo hello"}),
5109            ),
5110            MockLlmClient::text_response("Auto-approved and executed!"),
5111        ]));
5112
5113        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5114
5115        // Create HITL with very short timeout and AutoApprove action
5116        let (event_tx, _event_rx) = broadcast::channel(100);
5117        let hitl_policy = ConfirmationPolicy {
5118            enabled: true,
5119            default_timeout_ms: 50, // Very short timeout
5120            timeout_action: TimeoutAction::AutoApprove,
5121            ..Default::default()
5122        };
5123        let confirmation_manager = Arc::new(ConfirmationManager::new(hitl_policy, event_tx));
5124
5125        let permission_policy = PermissionPolicy::new();
5126
5127        let config = AgentConfig {
5128            permission_checker: Some(Arc::new(permission_policy)),
5129            confirmation_manager: Some(confirmation_manager),
5130            ..Default::default()
5131        };
5132
5133        // Don't approve - let it timeout and auto-approve
5134        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
5135        let result = agent.execute(&[], "Echo", None).await.unwrap();
5136
5137        // Should auto-approve on timeout and execute
5138        assert_eq!(result.text, "Auto-approved and executed!");
5139        assert_eq!(result.tool_calls_count, 1);
5140    }
5141
5142    #[tokio::test]
5143    async fn test_agent_hitl_confirmation_events() {
5144        use crate::hitl::{ConfirmationManager, ConfirmationPolicy};
5145        use tokio::sync::broadcast;
5146
5147        let mock_client = Arc::new(MockLlmClient::new(vec![
5148            MockLlmClient::tool_call_response(
5149                "tool-1",
5150                "bash",
5151                serde_json::json!({"command": "echo test"}),
5152            ),
5153            MockLlmClient::text_response("Done!"),
5154        ]));
5155
5156        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5157
5158        // Create HITL confirmation manager
5159        let (event_tx, mut event_rx) = broadcast::channel(100);
5160        let hitl_policy = ConfirmationPolicy {
5161            enabled: true,
5162            default_timeout_ms: 5000, // Long enough timeout
5163            ..Default::default()
5164        };
5165        let confirmation_manager = Arc::new(ConfirmationManager::new(hitl_policy, event_tx));
5166
5167        let permission_policy = PermissionPolicy::new();
5168
5169        let config = AgentConfig {
5170            permission_checker: Some(Arc::new(permission_policy)),
5171            confirmation_manager: Some(confirmation_manager.clone()),
5172            ..Default::default()
5173        };
5174
5175        // Spawn task to approve and collect events
5176        let cm_clone = confirmation_manager.clone();
5177        let event_handle = tokio::spawn(async move {
5178            let mut events = Vec::new();
5179            // Wait for ConfirmationRequired event
5180            while let Ok(event) = event_rx.recv().await {
5181                events.push(event.clone());
5182                if let AgentEvent::ConfirmationRequired { tool_id, .. } = event {
5183                    // Approve it
5184                    cm_clone.confirm(&tool_id, true, None).await.ok();
5185                    // Wait for ConfirmationReceived
5186                    if let Ok(recv_event) = event_rx.recv().await {
5187                        events.push(recv_event);
5188                    }
5189                    break;
5190                }
5191            }
5192            events
5193        });
5194
5195        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
5196        let _result = agent.execute(&[], "Echo", None).await.unwrap();
5197
5198        // Check events
5199        let events = event_handle.await.unwrap();
5200        assert!(
5201            events
5202                .iter()
5203                .any(|e| matches!(e, AgentEvent::ConfirmationRequired { .. })),
5204            "Should have ConfirmationRequired event"
5205        );
5206        assert!(
5207            events
5208                .iter()
5209                .any(|e| matches!(e, AgentEvent::ConfirmationReceived { approved: true, .. })),
5210            "Should have ConfirmationReceived event with approved=true"
5211        );
5212    }
5213
5214    #[tokio::test]
5215    async fn test_agent_hitl_disabled_auto_executes() {
5216        // When HITL is disabled, tools should execute automatically even with Ask permission
5217        use crate::hitl::{ConfirmationManager, ConfirmationPolicy};
5218        use tokio::sync::broadcast;
5219
5220        let mock_client = Arc::new(MockLlmClient::new(vec![
5221            MockLlmClient::tool_call_response(
5222                "tool-1",
5223                "bash",
5224                serde_json::json!({"command": "echo auto"}),
5225            ),
5226            MockLlmClient::text_response("Auto executed!"),
5227        ]));
5228
5229        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5230
5231        // Create HITL with enabled=false
5232        let (event_tx, _event_rx) = broadcast::channel(100);
5233        let hitl_policy = ConfirmationPolicy {
5234            enabled: false, // HITL disabled
5235            ..Default::default()
5236        };
5237        let confirmation_manager = Arc::new(ConfirmationManager::new(hitl_policy, event_tx));
5238
5239        let permission_policy = PermissionPolicy::new(); // Default is Ask
5240
5241        let config = AgentConfig {
5242            permission_checker: Some(Arc::new(permission_policy)),
5243            confirmation_manager: Some(confirmation_manager),
5244            ..Default::default()
5245        };
5246
5247        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
5248        let result = agent.execute(&[], "Echo", None).await.unwrap();
5249
5250        // Should execute without waiting for confirmation
5251        assert_eq!(result.text, "Auto executed!");
5252        assert_eq!(result.tool_calls_count, 1);
5253    }
5254
5255    #[tokio::test]
5256    async fn test_agent_hitl_with_permission_deny_skips_hitl() {
5257        // When permission is Deny, HITL should not be triggered
5258        use crate::hitl::{ConfirmationManager, ConfirmationPolicy};
5259        use tokio::sync::broadcast;
5260
5261        let mock_client = Arc::new(MockLlmClient::new(vec![
5262            MockLlmClient::tool_call_response(
5263                "tool-1",
5264                "bash",
5265                serde_json::json!({"command": "rm -rf /"}),
5266            ),
5267            MockLlmClient::text_response("Blocked by permission."),
5268        ]));
5269
5270        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5271
5272        // Create HITL enabled
5273        let (event_tx, mut event_rx) = broadcast::channel(100);
5274        let hitl_policy = ConfirmationPolicy {
5275            enabled: true,
5276            ..Default::default()
5277        };
5278        let confirmation_manager = Arc::new(ConfirmationManager::new(hitl_policy, event_tx));
5279
5280        // Permission policy denies rm commands
5281        let permission_policy = PermissionPolicy::new().deny("bash(rm:*)");
5282
5283        let config = AgentConfig {
5284            permission_checker: Some(Arc::new(permission_policy)),
5285            confirmation_manager: Some(confirmation_manager),
5286            ..Default::default()
5287        };
5288
5289        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
5290        let result = agent.execute(&[], "Delete", None).await.unwrap();
5291
5292        // Should be denied without HITL
5293        assert_eq!(result.text, "Blocked by permission.");
5294
5295        // Should NOT have any ConfirmationRequired events
5296        let mut found_confirmation = false;
5297        while let Ok(event) = event_rx.try_recv() {
5298            if matches!(event, AgentEvent::ConfirmationRequired { .. }) {
5299                found_confirmation = true;
5300            }
5301        }
5302        assert!(
5303            !found_confirmation,
5304            "HITL should not be triggered when permission is Deny"
5305        );
5306    }
5307
5308    #[tokio::test]
5309    async fn test_agent_hitl_with_permission_allow_skips_hitl() {
5310        // When permission is Allow, HITL confirmation is skipped entirely.
5311        // PermissionPolicy is the declarative rule engine; Allow = execute directly.
5312        use crate::hitl::{ConfirmationManager, ConfirmationPolicy};
5313        use tokio::sync::broadcast;
5314
5315        let mock_client = Arc::new(MockLlmClient::new(vec![
5316            MockLlmClient::tool_call_response(
5317                "tool-1",
5318                "bash",
5319                serde_json::json!({"command": "echo hello"}),
5320            ),
5321            MockLlmClient::text_response("Allowed!"),
5322        ]));
5323
5324        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5325
5326        // Create HITL enabled
5327        let (event_tx, mut event_rx) = broadcast::channel(100);
5328        let hitl_policy = ConfirmationPolicy {
5329            enabled: true,
5330            ..Default::default()
5331        };
5332        let confirmation_manager = Arc::new(ConfirmationManager::new(hitl_policy, event_tx));
5333
5334        // Permission policy allows echo commands
5335        let permission_policy = PermissionPolicy::new().allow("bash(echo:*)");
5336
5337        let config = AgentConfig {
5338            permission_checker: Some(Arc::new(permission_policy)),
5339            confirmation_manager: Some(confirmation_manager.clone()),
5340            ..Default::default()
5341        };
5342
5343        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
5344        let result = agent.execute(&[], "Echo", None).await.unwrap();
5345
5346        // Should execute directly without HITL (permission Allow skips confirmation)
5347        assert_eq!(result.text, "Allowed!");
5348
5349        // Should NOT have ConfirmationRequired event (Allow bypasses HITL)
5350        let mut found_confirmation = false;
5351        while let Ok(event) = event_rx.try_recv() {
5352            if matches!(event, AgentEvent::ConfirmationRequired { .. }) {
5353                found_confirmation = true;
5354            }
5355        }
5356        assert!(
5357            !found_confirmation,
5358            "Permission Allow should skip HITL confirmation"
5359        );
5360    }
5361
5362    #[tokio::test]
5363    async fn test_agent_hitl_multiple_tool_calls() {
5364        // Test multiple tool calls in sequence with HITL
5365        use crate::hitl::{ConfirmationManager, ConfirmationPolicy};
5366        use tokio::sync::broadcast;
5367
5368        let mock_client = Arc::new(MockLlmClient::new(vec![
5369            // First response: two tool calls
5370            LlmResponse {
5371                message: Message {
5372                    role: "assistant".to_string(),
5373                    content: vec![
5374                        ContentBlock::ToolUse {
5375                            id: "tool-1".to_string(),
5376                            name: "bash".to_string(),
5377                            input: serde_json::json!({"command": "echo first"}),
5378                        },
5379                        ContentBlock::ToolUse {
5380                            id: "tool-2".to_string(),
5381                            name: "bash".to_string(),
5382                            input: serde_json::json!({"command": "echo second"}),
5383                        },
5384                    ],
5385                    reasoning_content: None,
5386                },
5387                usage: TokenUsage {
5388                    prompt_tokens: 10,
5389                    completion_tokens: 5,
5390                    total_tokens: 15,
5391                    cache_read_tokens: None,
5392                    cache_write_tokens: None,
5393                },
5394                stop_reason: Some("tool_use".to_string()),
5395                meta: None,
5396            },
5397            MockLlmClient::text_response("Both executed!"),
5398        ]));
5399
5400        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5401
5402        // Create HITL
5403        let (event_tx, _event_rx) = broadcast::channel(100);
5404        let hitl_policy = ConfirmationPolicy {
5405            enabled: true,
5406            default_timeout_ms: 5000,
5407            ..Default::default()
5408        };
5409        let confirmation_manager = Arc::new(ConfirmationManager::new(hitl_policy, event_tx));
5410
5411        let permission_policy = PermissionPolicy::new(); // Default Ask
5412
5413        let config = AgentConfig {
5414            permission_checker: Some(Arc::new(permission_policy)),
5415            confirmation_manager: Some(confirmation_manager.clone()),
5416            ..Default::default()
5417        };
5418
5419        // Spawn task to approve both tools
5420        let cm_clone = confirmation_manager.clone();
5421        tokio::spawn(async move {
5422            tokio::time::sleep(std::time::Duration::from_millis(30)).await;
5423            cm_clone.confirm("tool-1", true, None).await.ok();
5424            tokio::time::sleep(std::time::Duration::from_millis(30)).await;
5425            cm_clone.confirm("tool-2", true, None).await.ok();
5426        });
5427
5428        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
5429        let result = agent
5430            .execute_loop(
5431                &[],
5432                "run both commands now",
5433                AgentStyle::GeneralPurpose,
5434                None,
5435                None,
5436                &tokio_util::sync::CancellationToken::new(),
5437                true,
5438            )
5439            .await
5440            .unwrap();
5441
5442        assert_eq!(result.text, "Both executed!");
5443        assert_eq!(result.tool_calls_count, 2);
5444    }
5445
5446    #[tokio::test]
5447    async fn test_agent_hitl_partial_approval() {
5448        // Test: first tool approved, second rejected
5449        use crate::hitl::{ConfirmationManager, ConfirmationPolicy};
5450        use tokio::sync::broadcast;
5451
5452        let mock_client = Arc::new(MockLlmClient::new(vec![
5453            // First response: two tool calls
5454            LlmResponse {
5455                message: Message {
5456                    role: "assistant".to_string(),
5457                    content: vec![
5458                        ContentBlock::ToolUse {
5459                            id: "tool-1".to_string(),
5460                            name: "bash".to_string(),
5461                            input: serde_json::json!({"command": "echo safe"}),
5462                        },
5463                        ContentBlock::ToolUse {
5464                            id: "tool-2".to_string(),
5465                            name: "bash".to_string(),
5466                            input: serde_json::json!({"command": "rm -rf /"}),
5467                        },
5468                    ],
5469                    reasoning_content: None,
5470                },
5471                usage: TokenUsage {
5472                    prompt_tokens: 10,
5473                    completion_tokens: 5,
5474                    total_tokens: 15,
5475                    cache_read_tokens: None,
5476                    cache_write_tokens: None,
5477                },
5478                stop_reason: Some("tool_use".to_string()),
5479                meta: None,
5480            },
5481            MockLlmClient::text_response("First worked, second rejected."),
5482        ]));
5483
5484        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5485
5486        let (event_tx, _event_rx) = broadcast::channel(100);
5487        let hitl_policy = ConfirmationPolicy {
5488            enabled: true,
5489            default_timeout_ms: 5000,
5490            ..Default::default()
5491        };
5492        let confirmation_manager = Arc::new(ConfirmationManager::new(hitl_policy, event_tx));
5493
5494        let permission_policy = PermissionPolicy::new();
5495
5496        let config = AgentConfig {
5497            permission_checker: Some(Arc::new(permission_policy)),
5498            confirmation_manager: Some(confirmation_manager.clone()),
5499            ..Default::default()
5500        };
5501
5502        // Approve first, reject second
5503        let cm_clone = confirmation_manager.clone();
5504        tokio::spawn(async move {
5505            tokio::time::sleep(std::time::Duration::from_millis(30)).await;
5506            cm_clone.confirm("tool-1", true, None).await.ok();
5507            tokio::time::sleep(std::time::Duration::from_millis(30)).await;
5508            cm_clone
5509                .confirm("tool-2", false, Some("Dangerous".to_string()))
5510                .await
5511                .ok();
5512        });
5513
5514        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
5515        let result = agent.execute(&[], "Run both", None).await.unwrap();
5516
5517        assert_eq!(result.text, "First worked, second rejected.");
5518        assert_eq!(result.tool_calls_count, 2);
5519    }
5520
5521    #[tokio::test]
5522    async fn test_agent_hitl_yolo_mode_auto_approves() {
5523        // YOLO mode: specific lanes auto-approve without confirmation
5524        use crate::hitl::{ConfirmationManager, ConfirmationPolicy};
5525        use crate::queue::SessionLane;
5526        use tokio::sync::broadcast;
5527
5528        let mock_client = Arc::new(MockLlmClient::new(vec![
5529            MockLlmClient::tool_call_response(
5530                "tool-1",
5531                "read", // Query lane tool
5532                serde_json::json!({"path": "/tmp/test.txt"}),
5533            ),
5534            MockLlmClient::text_response("File read!"),
5535        ]));
5536
5537        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5538
5539        // YOLO mode for Query lane (read, glob, ls, grep)
5540        let (event_tx, mut event_rx) = broadcast::channel(100);
5541        let mut yolo_lanes = std::collections::HashSet::new();
5542        yolo_lanes.insert(SessionLane::Query);
5543        let hitl_policy = ConfirmationPolicy {
5544            enabled: true,
5545            yolo_lanes, // Auto-approve query operations
5546            ..Default::default()
5547        };
5548        let confirmation_manager = Arc::new(ConfirmationManager::new(hitl_policy, event_tx));
5549
5550        let permission_policy = PermissionPolicy::new();
5551
5552        let config = AgentConfig {
5553            permission_checker: Some(Arc::new(permission_policy)),
5554            confirmation_manager: Some(confirmation_manager),
5555            ..Default::default()
5556        };
5557
5558        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
5559        let result = agent.execute(&[], "Read file", None).await.unwrap();
5560
5561        // Should auto-execute without confirmation (YOLO mode)
5562        assert_eq!(result.text, "File read!");
5563
5564        // Should NOT have ConfirmationRequired for yolo lane
5565        let mut found_confirmation = false;
5566        while let Ok(event) = event_rx.try_recv() {
5567            if matches!(event, AgentEvent::ConfirmationRequired { .. }) {
5568                found_confirmation = true;
5569            }
5570        }
5571        assert!(
5572            !found_confirmation,
5573            "YOLO mode should not trigger confirmation"
5574        );
5575    }
5576
5577    #[tokio::test]
5578    async fn test_agent_config_with_all_options() {
5579        use crate::hitl::{ConfirmationManager, ConfirmationPolicy};
5580        use tokio::sync::broadcast;
5581
5582        let (event_tx, _) = broadcast::channel(100);
5583        let hitl_policy = ConfirmationPolicy::default();
5584        let confirmation_manager = Arc::new(ConfirmationManager::new(hitl_policy, event_tx));
5585
5586        let permission_policy = PermissionPolicy::new().allow("bash(*)");
5587
5588        let config = AgentConfig {
5589            prompt_slots: SystemPromptSlots {
5590                extra: Some("Test system prompt".to_string()),
5591                ..Default::default()
5592            },
5593            tools: vec![],
5594            max_tool_rounds: 10,
5595            permission_checker: Some(Arc::new(permission_policy)),
5596            confirmation_manager: Some(confirmation_manager),
5597            context_providers: vec![],
5598            planning_mode: PlanningMode::default(),
5599            goal_tracking: false,
5600            hook_engine: None,
5601            skill_registry: None,
5602            ..AgentConfig::default()
5603        };
5604
5605        assert!(config.prompt_slots.build().contains("Test system prompt"));
5606        assert_eq!(config.max_tool_rounds, 10);
5607        assert!(config.permission_checker.is_some());
5608        assert!(config.confirmation_manager.is_some());
5609        assert!(config.context_providers.is_empty());
5610
5611        // Test Debug trait
5612        let debug_str = format!("{:?}", config);
5613        assert!(debug_str.contains("AgentConfig"));
5614        assert!(debug_str.contains("permission_checker: true"));
5615        assert!(debug_str.contains("confirmation_manager: true"));
5616        assert!(debug_str.contains("context_providers: 0"));
5617    }
5618
5619    // ========================================================================
5620    // Context Provider Tests
5621    // ========================================================================
5622
5623    use crate::context::{ContextItem, ContextType};
5624
5625    /// Mock context provider for testing
5626    struct MockContextProvider {
5627        name: String,
5628        items: Vec<ContextItem>,
5629        on_turn_calls: std::sync::Arc<tokio::sync::RwLock<Vec<(String, String, String)>>>,
5630    }
5631
5632    impl MockContextProvider {
5633        fn new(name: &str) -> Self {
5634            Self {
5635                name: name.to_string(),
5636                items: Vec::new(),
5637                on_turn_calls: std::sync::Arc::new(tokio::sync::RwLock::new(Vec::new())),
5638            }
5639        }
5640
5641        fn with_items(mut self, items: Vec<ContextItem>) -> Self {
5642            self.items = items;
5643            self
5644        }
5645    }
5646
5647    #[async_trait::async_trait]
5648    impl ContextProvider for MockContextProvider {
5649        fn name(&self) -> &str {
5650            &self.name
5651        }
5652
5653        async fn query(&self, _query: &ContextQuery) -> anyhow::Result<ContextResult> {
5654            let mut result = ContextResult::new(&self.name);
5655            for item in &self.items {
5656                result.add_item(item.clone());
5657            }
5658            Ok(result)
5659        }
5660
5661        async fn on_turn_complete(
5662            &self,
5663            session_id: &str,
5664            prompt: &str,
5665            response: &str,
5666        ) -> anyhow::Result<()> {
5667            let mut calls = self.on_turn_calls.write().await;
5668            calls.push((
5669                session_id.to_string(),
5670                prompt.to_string(),
5671                response.to_string(),
5672            ));
5673            Ok(())
5674        }
5675    }
5676
5677    #[tokio::test]
5678    async fn test_agent_with_context_provider() {
5679        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
5680            "Response using context",
5681        )]));
5682
5683        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5684
5685        let provider =
5686            MockContextProvider::new("test-provider").with_items(vec![ContextItem::new(
5687                "ctx-1",
5688                ContextType::Resource,
5689                "Relevant context here",
5690            )
5691            .with_source("test://docs/example")]);
5692
5693        let config = AgentConfig {
5694            prompt_slots: SystemPromptSlots {
5695                extra: Some("You are helpful.".to_string()),
5696                ..Default::default()
5697            },
5698            context_providers: vec![Arc::new(provider)],
5699            ..Default::default()
5700        };
5701
5702        let agent = AgentLoop::new(
5703            mock_client.clone(),
5704            tool_executor,
5705            test_tool_context(),
5706            config,
5707        );
5708        let result = agent
5709            .execute(&[], "verify context provider output", None)
5710            .await
5711            .unwrap();
5712
5713        assert_eq!(result.text, "Response using context");
5714        assert_eq!(mock_client.call_count.load(Ordering::SeqCst), 1);
5715    }
5716
5717    #[tokio::test]
5718    async fn test_agent_context_provider_events() {
5719        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
5720            "Answer",
5721        )]));
5722
5723        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5724
5725        let provider =
5726            MockContextProvider::new("event-provider").with_items(vec![ContextItem::new(
5727                "item-1",
5728                ContextType::Memory,
5729                "Memory content",
5730            )
5731            .with_token_count(50)]);
5732
5733        let config = AgentConfig {
5734            context_providers: vec![Arc::new(provider)],
5735            ..Default::default()
5736        };
5737
5738        let (tx, mut rx) = mpsc::channel(100);
5739        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
5740        let _result = agent.execute(&[], "Test prompt", Some(tx)).await.unwrap();
5741
5742        // Collect events
5743        let mut events = Vec::new();
5744        while let Ok(event) = rx.try_recv() {
5745            events.push(event);
5746        }
5747
5748        // Should have ContextResolving and ContextResolved events
5749        assert!(
5750            events
5751                .iter()
5752                .any(|e| matches!(e, AgentEvent::ContextResolving { .. })),
5753            "Should have ContextResolving event"
5754        );
5755        assert!(
5756            events
5757                .iter()
5758                .any(|e| matches!(e, AgentEvent::ContextResolved { .. })),
5759            "Should have ContextResolved event"
5760        );
5761
5762        // Check context resolved values
5763        for event in &events {
5764            if let AgentEvent::ContextResolved {
5765                total_items,
5766                total_tokens,
5767            } = event
5768            {
5769                assert_eq!(*total_items, 1);
5770                assert_eq!(*total_tokens, 50);
5771            }
5772        }
5773    }
5774
5775    #[tokio::test]
5776    async fn test_agent_multiple_context_providers() {
5777        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
5778            "Combined response",
5779        )]));
5780
5781        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5782
5783        let provider1 = MockContextProvider::new("provider-1").with_items(vec![ContextItem::new(
5784            "p1-1",
5785            ContextType::Resource,
5786            "Resource from P1",
5787        )
5788        .with_token_count(100)]);
5789
5790        let provider2 = MockContextProvider::new("provider-2").with_items(vec![
5791            ContextItem::new("p2-1", ContextType::Memory, "Memory from P2").with_token_count(50),
5792            ContextItem::new("p2-2", ContextType::Skill, "Skill from P2").with_token_count(75),
5793        ]);
5794
5795        let config = AgentConfig {
5796            prompt_slots: SystemPromptSlots {
5797                extra: Some("Base system prompt.".to_string()),
5798                ..Default::default()
5799            },
5800            context_providers: vec![Arc::new(provider1), Arc::new(provider2)],
5801            ..Default::default()
5802        };
5803
5804        let (tx, mut rx) = mpsc::channel(100);
5805        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
5806        let result = agent
5807            .execute(&[], "verify combined context", Some(tx))
5808            .await
5809            .unwrap();
5810
5811        assert_eq!(result.text, "Combined response");
5812
5813        // Check context resolved event has combined totals
5814        while let Ok(event) = rx.try_recv() {
5815            if let AgentEvent::ContextResolved {
5816                total_items,
5817                total_tokens,
5818            } = event
5819            {
5820                assert_eq!(total_items, 3); // 1 + 2
5821                assert_eq!(total_tokens, 225); // 100 + 50 + 75
5822            }
5823        }
5824    }
5825
5826    #[tokio::test]
5827    async fn test_agent_no_context_providers() {
5828        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
5829            "No context",
5830        )]));
5831
5832        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5833
5834        // No context providers
5835        let config = AgentConfig::default();
5836
5837        let (tx, mut rx) = mpsc::channel(100);
5838        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
5839        let result = agent
5840            .execute(&[], "verify simple prompt", Some(tx))
5841            .await
5842            .unwrap();
5843
5844        assert_eq!(result.text, "No context");
5845
5846        // Should NOT have context events when no providers
5847        let mut events = Vec::new();
5848        while let Ok(event) = rx.try_recv() {
5849            events.push(event);
5850        }
5851
5852        assert!(
5853            !events
5854                .iter()
5855                .any(|e| matches!(e, AgentEvent::ContextResolving { .. })),
5856            "Should NOT have ContextResolving event"
5857        );
5858    }
5859
5860    #[tokio::test]
5861    async fn test_agent_memory_recall_routes_through_context_assembly() {
5862        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
5863            "Memory-aware response",
5864        )]));
5865
5866        let memory = crate::memory::AgentMemory::new(Arc::new(a3s_memory::InMemoryStore::new()));
5867        memory
5868            .remember(
5869                a3s_memory::MemoryItem::new(
5870                    "verify focused regression tests caught context regressions.",
5871                )
5872                .with_importance(0.9),
5873            )
5874            .await
5875            .unwrap();
5876
5877        let temp_dir = tempfile::tempdir().unwrap();
5878        let tool_executor = Arc::new(ToolExecutor::new(temp_dir.path().display().to_string()));
5879        let config = AgentConfig {
5880            memory: Some(Arc::new(memory)),
5881            ..Default::default()
5882        };
5883
5884        let (tx, mut rx) = mpsc::channel(100);
5885        let agent = AgentLoop::new(
5886            mock_client,
5887            tool_executor,
5888            ToolContext::new(temp_dir.path().to_path_buf()),
5889            config,
5890        );
5891        let result = agent
5892            .execute(&[], "verify focused regression tests", Some(tx))
5893            .await
5894            .unwrap();
5895
5896        assert_eq!(result.text, "Memory-aware response");
5897
5898        let mut recalled = false;
5899        let mut resolved_items = None;
5900        while let Ok(event) = rx.try_recv() {
5901            match event {
5902                AgentEvent::MemoryRecalled { content, .. } => {
5903                    recalled = content.contains("focused regression tests");
5904                }
5905                AgentEvent::ContextResolved { total_items, .. } => {
5906                    resolved_items = Some(total_items);
5907                }
5908                _ => {}
5909            }
5910        }
5911
5912        assert!(recalled);
5913        assert_eq!(resolved_items, Some(1));
5914    }
5915
5916    #[tokio::test]
5917    async fn test_agent_context_on_turn_complete() {
5918        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
5919            "Final response",
5920        )]));
5921
5922        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5923
5924        let provider = Arc::new(MockContextProvider::new("memory-provider"));
5925        let on_turn_calls = provider.on_turn_calls.clone();
5926
5927        let config = AgentConfig {
5928            context_providers: vec![provider],
5929            ..Default::default()
5930        };
5931
5932        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
5933
5934        // Execute with session ID
5935        let result = agent
5936            .execute_with_session(&[], "verify user prompt", Some("sess-123"), None, None)
5937            .await
5938            .unwrap();
5939
5940        assert_eq!(result.text, "Final response");
5941
5942        // Check on_turn_complete was called
5943        let calls = on_turn_calls.read().await;
5944        assert_eq!(calls.len(), 1);
5945        assert_eq!(calls[0].0, "sess-123");
5946        assert_eq!(calls[0].1, "verify user prompt");
5947        assert_eq!(calls[0].2, "Final response");
5948    }
5949
5950    #[tokio::test]
5951    async fn test_agent_context_on_turn_complete_no_session() {
5952        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
5953            "Response",
5954        )]));
5955
5956        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5957
5958        let provider = Arc::new(MockContextProvider::new("memory-provider"));
5959        let on_turn_calls = provider.on_turn_calls.clone();
5960
5961        let config = AgentConfig {
5962            context_providers: vec![provider],
5963            ..Default::default()
5964        };
5965
5966        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
5967
5968        // Execute without session ID (uses execute() which passes None)
5969        let _result = agent.execute(&[], "Prompt", None).await.unwrap();
5970
5971        // on_turn_complete should NOT be called when session_id is None
5972        let calls = on_turn_calls.read().await;
5973        assert!(calls.is_empty());
5974    }
5975
5976    #[tokio::test]
5977    async fn test_agent_build_augmented_system_prompt() {
5978        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response("OK")]));
5979
5980        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5981
5982        let provider = MockContextProvider::new("test").with_items(vec![ContextItem::new(
5983            "doc-1",
5984            ContextType::Resource,
5985            "Auth uses JWT tokens.",
5986        )
5987        .with_source("viking://docs/auth")]);
5988
5989        let config = AgentConfig {
5990            prompt_slots: SystemPromptSlots {
5991                extra: Some("You are helpful.".to_string()),
5992                ..Default::default()
5993            },
5994            context_providers: vec![Arc::new(provider)],
5995            ..Default::default()
5996        };
5997
5998        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
5999
6000        // Test building augmented prompt
6001        let context_results = agent.resolve_context("test", None).await;
6002        let augmented = agent.build_augmented_system_prompt(&context_results);
6003
6004        let augmented_str = augmented.unwrap();
6005        assert!(augmented_str.contains("You are helpful."));
6006        assert!(augmented_str.contains("<context source=\"viking://docs/auth\" type=\"Resource\">"));
6007        assert!(augmented_str.contains("Auth uses JWT tokens."));
6008    }
6009
6010    // ========================================================================
6011    // Agentic Loop Integration Tests
6012    // ========================================================================
6013
6014    /// Helper: collect all events from a channel
6015    async fn collect_events(mut rx: mpsc::Receiver<AgentEvent>) -> Vec<AgentEvent> {
6016        let mut events = Vec::new();
6017        while let Ok(event) = rx.try_recv() {
6018            events.push(event);
6019        }
6020        // Drain remaining
6021        while let Some(event) = rx.recv().await {
6022            events.push(event);
6023        }
6024        events
6025    }
6026
6027    #[tokio::test]
6028    async fn test_agent_multi_turn_tool_chain() {
6029        // LLM calls tool A → sees result → calls tool B → sees result → final answer
6030        let mock_client = Arc::new(MockLlmClient::new(vec![
6031            // Turn 1: call ls
6032            MockLlmClient::tool_call_response(
6033                "t1",
6034                "bash",
6035                serde_json::json!({"command": "echo step1"}),
6036            ),
6037            // Turn 2: call another tool based on first result
6038            MockLlmClient::tool_call_response(
6039                "t2",
6040                "bash",
6041                serde_json::json!({"command": "echo step2"}),
6042            ),
6043            // Turn 3: final answer
6044            MockLlmClient::text_response("Completed both steps: step1 then step2"),
6045        ]));
6046
6047        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
6048        let config = AgentConfig::default();
6049
6050        let agent = AgentLoop::new(
6051            mock_client.clone(),
6052            tool_executor,
6053            test_tool_context(),
6054            config,
6055        );
6056        let result = agent.execute(&[], "Run two steps", None).await.unwrap();
6057
6058        assert_eq!(result.text, "Completed both steps: step1 then step2");
6059        assert_eq!(result.tool_calls_count, 2);
6060        assert_eq!(mock_client.call_count.load(Ordering::SeqCst), 3);
6061
6062        // Verify message history: user → assistant(tool_use) → user(tool_result) → assistant(tool_use) → user(tool_result) → assistant(text)
6063        assert_eq!(result.messages[0].role, "user");
6064        assert_eq!(result.messages[1].role, "assistant"); // tool call 1
6065        assert_eq!(result.messages[2].role, "user"); // tool result 1 (Anthropic convention)
6066        assert_eq!(result.messages[3].role, "assistant"); // tool call 2
6067        assert_eq!(result.messages[4].role, "user"); // tool result 2
6068        assert_eq!(result.messages[5].role, "assistant"); // final text
6069        assert_eq!(result.messages.len(), 6);
6070    }
6071
6072    #[tokio::test]
6073    async fn test_agent_conversation_history_preserved() {
6074        // Pass existing history, verify it's preserved in output
6075        let existing_history = vec![
6076            Message::user("What is Rust?"),
6077            Message {
6078                role: "assistant".to_string(),
6079                content: vec![ContentBlock::Text {
6080                    text: "Rust is a systems programming language.".to_string(),
6081                }],
6082                reasoning_content: None,
6083            },
6084        ];
6085
6086        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
6087            "Rust was created by Graydon Hoare at Mozilla.",
6088        )]));
6089
6090        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
6091        let agent = AgentLoop::new(
6092            mock_client.clone(),
6093            tool_executor,
6094            test_tool_context(),
6095            AgentConfig {
6096                prompt_slots: SystemPromptSlots {
6097                    style: Some(AgentStyle::GeneralPurpose),
6098                    ..Default::default()
6099                },
6100                ..Default::default()
6101            },
6102        );
6103
6104        let result = agent
6105            .execute(&existing_history, "Who created it?", None)
6106            .await
6107            .unwrap();
6108
6109        // History should contain: old user + old assistant + new user + new assistant
6110        assert_eq!(result.messages.len(), 4);
6111        assert_eq!(result.messages[0].text(), "What is Rust?");
6112        assert_eq!(
6113            result.messages[1].text(),
6114            "Rust is a systems programming language."
6115        );
6116        assert_eq!(result.messages[2].text(), "Who created it?");
6117        assert_eq!(
6118            result.messages[3].text(),
6119            "Rust was created by Graydon Hoare at Mozilla."
6120        );
6121    }
6122
6123    #[tokio::test]
6124    async fn test_agent_event_stream_completeness() {
6125        // Verify full event sequence for a single tool call loop
6126        let mock_client = Arc::new(MockLlmClient::new(vec![
6127            MockLlmClient::tool_call_response(
6128                "t1",
6129                "bash",
6130                serde_json::json!({"command": "echo hi"}),
6131            ),
6132            MockLlmClient::text_response("Done"),
6133        ]));
6134
6135        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
6136        let agent = AgentLoop::new(
6137            mock_client,
6138            tool_executor,
6139            test_tool_context(),
6140            AgentConfig {
6141                permission_checker: Some(Arc::new(PermissionPolicy::new().allow("bash(echo:*)"))),
6142                ..Default::default()
6143            },
6144        );
6145
6146        let (tx, rx) = mpsc::channel(100);
6147        let result = agent.execute(&[], "Say hi", Some(tx)).await.unwrap();
6148        assert_eq!(result.text, "Done");
6149
6150        let events = collect_events(rx).await;
6151
6152        // Verify event sequence
6153        let event_types: Vec<&str> = events
6154            .iter()
6155            .map(|e| match e {
6156                AgentEvent::Start { .. } => "Start",
6157                AgentEvent::TurnStart { .. } => "TurnStart",
6158                AgentEvent::TurnEnd { .. } => "TurnEnd",
6159                AgentEvent::ToolEnd { .. } => "ToolEnd",
6160                AgentEvent::End { .. } => "End",
6161                _ => "Other",
6162            })
6163            .collect();
6164
6165        // Mode/context events may precede Start; the execution lifecycle still
6166        // needs a Start before turns and an End as the final event.
6167        let start_index = event_types
6168            .iter()
6169            .position(|t| *t == "Start")
6170            .expect("Start event should be present");
6171        let first_turn_index = event_types
6172            .iter()
6173            .position(|t| *t == "TurnStart")
6174            .expect("TurnStart event should be present");
6175        assert!(start_index < first_turn_index);
6176        assert_eq!(event_types.last(), Some(&"End"));
6177
6178        // Must have 2 TurnStarts (tool call turn + final answer turn)
6179        let turn_starts = event_types.iter().filter(|&&t| t == "TurnStart").count();
6180        assert_eq!(turn_starts, 2);
6181
6182        // Must have 1 ToolEnd
6183        let tool_ends = event_types.iter().filter(|&&t| t == "ToolEnd").count();
6184        assert_eq!(tool_ends, 1);
6185    }
6186
6187    #[tokio::test]
6188    async fn test_agent_multiple_tools_single_turn() {
6189        // LLM returns 2 tool calls in one response
6190        let mock_client = Arc::new(MockLlmClient::new(vec![
6191            LlmResponse {
6192                message: Message {
6193                    role: "assistant".to_string(),
6194                    content: vec![
6195                        ContentBlock::ToolUse {
6196                            id: "t1".to_string(),
6197                            name: "bash".to_string(),
6198                            input: serde_json::json!({"command": "echo first"}),
6199                        },
6200                        ContentBlock::ToolUse {
6201                            id: "t2".to_string(),
6202                            name: "bash".to_string(),
6203                            input: serde_json::json!({"command": "echo second"}),
6204                        },
6205                    ],
6206                    reasoning_content: None,
6207                },
6208                usage: TokenUsage {
6209                    prompt_tokens: 10,
6210                    completion_tokens: 5,
6211                    total_tokens: 15,
6212                    cache_read_tokens: None,
6213                    cache_write_tokens: None,
6214                },
6215                stop_reason: Some("tool_use".to_string()),
6216                meta: None,
6217            },
6218            MockLlmClient::text_response("Both commands ran"),
6219            MockLlmClient::text_response("Both commands ran"),
6220        ]));
6221
6222        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
6223        let agent = AgentLoop::new(
6224            mock_client.clone(),
6225            tool_executor,
6226            test_tool_context(),
6227            AgentConfig {
6228                prompt_slots: SystemPromptSlots {
6229                    style: Some(AgentStyle::GeneralPurpose),
6230                    ..Default::default()
6231                },
6232                ..Default::default()
6233            },
6234        );
6235
6236        let result = agent
6237            .execute_loop(
6238                &[],
6239                "run both commands now",
6240                AgentStyle::GeneralPurpose,
6241                None,
6242                None,
6243                &tokio_util::sync::CancellationToken::new(),
6244                true,
6245            )
6246            .await
6247            .unwrap();
6248
6249        assert_eq!(result.text, "Both commands ran");
6250        assert_eq!(result.tool_calls_count, 2);
6251        assert!(
6252            mock_client.call_count.load(Ordering::SeqCst) >= 2,
6253            "expected at least the tool-call turn and final response turn"
6254        );
6255
6256        // Messages: user → assistant(2 tools) → user(tool_result) → user(tool_result) → assistant(text)
6257        assert_eq!(result.messages[0].role, "user");
6258        assert_eq!(result.messages[1].role, "assistant");
6259        assert_eq!(result.messages[2].role, "user"); // tool result 1
6260        assert_eq!(result.messages[3].role, "user"); // tool result 2
6261        assert_eq!(result.messages[4].role, "assistant");
6262    }
6263
6264    #[tokio::test]
6265    async fn test_agent_token_usage_accumulation() {
6266        // Verify usage sums across multiple turns
6267        let mock_client = Arc::new(MockLlmClient::new(vec![
6268            MockLlmClient::tool_call_response(
6269                "t1",
6270                "bash",
6271                serde_json::json!({"command": "echo x"}),
6272            ),
6273            MockLlmClient::text_response("Done"),
6274        ]));
6275
6276        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
6277        let agent = AgentLoop::new(
6278            mock_client,
6279            tool_executor,
6280            test_tool_context(),
6281            AgentConfig::default(),
6282        );
6283
6284        let result = agent.execute(&[], "test", None).await.unwrap();
6285
6286        // Each mock response has prompt=10, completion=5, total=15
6287        // 2 LLM calls → 20 prompt, 10 completion, 30 total
6288        assert_eq!(result.usage.prompt_tokens, 20);
6289        assert_eq!(result.usage.completion_tokens, 10);
6290        assert_eq!(result.usage.total_tokens, 30);
6291    }
6292
6293    #[tokio::test]
6294    async fn test_agent_system_prompt_passed() {
6295        // Verify system prompt is used (MockLlmClient captures calls)
6296        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
6297            "I am a coding assistant.",
6298        )]));
6299
6300        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
6301        let config = AgentConfig {
6302            prompt_slots: SystemPromptSlots {
6303                extra: Some("You are a coding assistant.".to_string()),
6304                ..Default::default()
6305            },
6306            ..Default::default()
6307        };
6308
6309        let agent = AgentLoop::new(
6310            mock_client.clone(),
6311            tool_executor,
6312            test_tool_context(),
6313            config,
6314        );
6315        let result = agent.execute(&[], "What are you?", None).await.unwrap();
6316
6317        assert_eq!(result.text, "I am a coding assistant.");
6318        assert_eq!(mock_client.call_count.load(Ordering::SeqCst), 1);
6319    }
6320
6321    #[tokio::test]
6322    async fn test_agent_max_rounds_with_persistent_tool_calls() {
6323        // LLM keeps calling tools forever — should hit max_tool_rounds
6324        let mut responses = Vec::new();
6325        for i in 0..15 {
6326            responses.push(MockLlmClient::tool_call_response(
6327                &format!("t{}", i),
6328                "bash",
6329                serde_json::json!({"command": format!("echo round{}", i)}),
6330            ));
6331        }
6332
6333        let mock_client = Arc::new(MockLlmClient::new(responses));
6334        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
6335        let config = AgentConfig {
6336            max_tool_rounds: 5,
6337            ..Default::default()
6338        };
6339
6340        let agent = AgentLoop::new(
6341            mock_client.clone(),
6342            tool_executor,
6343            test_tool_context(),
6344            config,
6345        );
6346        let result = agent.execute(&[], "Loop forever", None).await;
6347
6348        assert!(result.is_err());
6349        let err = result.unwrap_err().to_string();
6350        assert!(err.contains("Max tool rounds (5) exceeded"));
6351    }
6352
6353    #[tokio::test]
6354    async fn test_agent_end_event_contains_final_text() {
6355        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
6356            "Final answer here",
6357        )]));
6358
6359        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
6360        let agent = AgentLoop::new(
6361            mock_client,
6362            tool_executor,
6363            test_tool_context(),
6364            AgentConfig::default(),
6365        );
6366
6367        let (tx, rx) = mpsc::channel(100);
6368        agent.execute(&[], "test", Some(tx)).await.unwrap();
6369
6370        let events = collect_events(rx).await;
6371        let end_event = events.iter().find(|e| matches!(e, AgentEvent::End { .. }));
6372        assert!(end_event.is_some());
6373
6374        if let AgentEvent::End { text, usage, .. } = end_event.unwrap() {
6375            assert_eq!(text, "Final answer here");
6376            assert_eq!(usage.total_tokens, 15);
6377        }
6378    }
6379}
6380
6381#[cfg(test)]
6382mod extra_agent_tests {
6383    use super::*;
6384    use crate::agent::tests::MockLlmClient;
6385    use crate::queue::SessionQueueConfig;
6386    use crate::tools::ToolExecutor;
6387    use std::path::PathBuf;
6388    use std::sync::atomic::{AtomicUsize, Ordering};
6389
6390    fn test_tool_context() -> ToolContext {
6391        ToolContext::new(PathBuf::from("/tmp"))
6392    }
6393
6394    // ========================================================================
6395    // AgentConfig
6396    // ========================================================================
6397
6398    #[test]
6399    fn test_agent_config_debug() {
6400        let config = AgentConfig {
6401            prompt_slots: SystemPromptSlots {
6402                extra: Some("You are helpful".to_string()),
6403                ..Default::default()
6404            },
6405            tools: vec![],
6406            max_tool_rounds: 10,
6407            permission_checker: None,
6408            confirmation_manager: None,
6409            context_providers: vec![],
6410            planning_mode: PlanningMode::Enabled,
6411            goal_tracking: false,
6412            hook_engine: None,
6413            skill_registry: None,
6414            ..AgentConfig::default()
6415        };
6416        let debug = format!("{:?}", config);
6417        assert!(debug.contains("AgentConfig"));
6418        assert!(debug.contains("planning_mode"));
6419    }
6420
6421    #[test]
6422    fn test_agent_config_default_values() {
6423        let config = AgentConfig::default();
6424        assert_eq!(config.max_tool_rounds, MAX_TOOL_ROUNDS);
6425        assert_eq!(config.planning_mode, PlanningMode::Auto);
6426        assert!(!config.goal_tracking);
6427        assert!(config.context_providers.is_empty());
6428    }
6429
6430    #[test]
6431    fn test_auto_pre_analysis_runs_without_keyword_gate() {
6432        let mock_client = Arc::new(MockLlmClient::new(vec![]));
6433        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
6434        let agent = AgentLoop::new(
6435            mock_client,
6436            tool_executor,
6437            test_tool_context(),
6438            AgentConfig::default(),
6439        );
6440
6441        assert!(agent.should_run_pre_analysis());
6442    }
6443
6444    #[test]
6445    fn test_disabled_planning_never_runs_pre_analysis() {
6446        let mock_client = Arc::new(MockLlmClient::new(vec![]));
6447        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
6448        let config = AgentConfig {
6449            planning_mode: PlanningMode::Disabled,
6450            ..AgentConfig::default()
6451        };
6452        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
6453
6454        assert!(!agent.should_run_pre_analysis());
6455    }
6456
6457    // ========================================================================
6458    // AgentEvent serialization
6459    // ========================================================================
6460
6461    #[test]
6462    fn test_agent_event_serialize_start() {
6463        let event = AgentEvent::Start {
6464            prompt: "Hello".to_string(),
6465        };
6466        let json = serde_json::to_string(&event).unwrap();
6467        assert!(json.contains("agent_start"));
6468        assert!(json.contains("Hello"));
6469    }
6470
6471    #[test]
6472    fn test_agent_event_serialize_text_delta() {
6473        let event = AgentEvent::TextDelta {
6474            text: "chunk".to_string(),
6475        };
6476        let json = serde_json::to_string(&event).unwrap();
6477        assert!(json.contains("text_delta"));
6478    }
6479
6480    #[test]
6481    fn test_agent_event_serialize_tool_start() {
6482        let event = AgentEvent::ToolStart {
6483            id: "t1".to_string(),
6484            name: "bash".to_string(),
6485        };
6486        let json = serde_json::to_string(&event).unwrap();
6487        assert!(json.contains("tool_start"));
6488        assert!(json.contains("bash"));
6489    }
6490
6491    #[test]
6492    fn test_agent_event_serialize_tool_end() {
6493        let event = AgentEvent::ToolEnd {
6494            id: "t1".to_string(),
6495            name: "bash".to_string(),
6496            output: "hello".to_string(),
6497            exit_code: 0,
6498            metadata: None,
6499        };
6500        let json = serde_json::to_string(&event).unwrap();
6501        assert!(json.contains("tool_end"));
6502    }
6503
6504    #[test]
6505    fn test_agent_event_tool_end_has_metadata_field() {
6506        let event = AgentEvent::ToolEnd {
6507            id: "t1".to_string(),
6508            name: "write".to_string(),
6509            output: "Wrote 5 bytes".to_string(),
6510            exit_code: 0,
6511            metadata: Some(
6512                serde_json::json!({ "before": "old", "after": "new", "file_path": "f.txt" }),
6513            ),
6514        };
6515        let json = serde_json::to_string(&event).unwrap();
6516        assert!(json.contains("\"before\""));
6517    }
6518
6519    #[test]
6520    fn test_agent_event_serialize_error() {
6521        let event = AgentEvent::Error {
6522            message: "oops".to_string(),
6523        };
6524        let json = serde_json::to_string(&event).unwrap();
6525        assert!(json.contains("error"));
6526        assert!(json.contains("oops"));
6527    }
6528
6529    #[test]
6530    fn test_agent_event_serialize_confirmation_required() {
6531        let event = AgentEvent::ConfirmationRequired {
6532            tool_id: "t1".to_string(),
6533            tool_name: "bash".to_string(),
6534            args: serde_json::json!({"cmd": "rm"}),
6535            timeout_ms: 30000,
6536        };
6537        let json = serde_json::to_string(&event).unwrap();
6538        assert!(json.contains("confirmation_required"));
6539    }
6540
6541    #[test]
6542    fn test_agent_event_serialize_confirmation_received() {
6543        let event = AgentEvent::ConfirmationReceived {
6544            tool_id: "t1".to_string(),
6545            approved: true,
6546            reason: Some("safe".to_string()),
6547        };
6548        let json = serde_json::to_string(&event).unwrap();
6549        assert!(json.contains("confirmation_received"));
6550    }
6551
6552    #[test]
6553    fn test_agent_event_serialize_confirmation_timeout() {
6554        let event = AgentEvent::ConfirmationTimeout {
6555            tool_id: "t1".to_string(),
6556            action_taken: "rejected".to_string(),
6557        };
6558        let json = serde_json::to_string(&event).unwrap();
6559        assert!(json.contains("confirmation_timeout"));
6560    }
6561
6562    #[test]
6563    fn test_agent_event_serialize_external_task_pending() {
6564        let event = AgentEvent::ExternalTaskPending {
6565            task_id: "task-1".to_string(),
6566            session_id: "sess-1".to_string(),
6567            lane: crate::queue::SessionLane::Execute,
6568            command_type: "bash".to_string(),
6569            payload: serde_json::json!({}),
6570            timeout_ms: 60000,
6571        };
6572        let json = serde_json::to_string(&event).unwrap();
6573        assert!(json.contains("external_task_pending"));
6574    }
6575
6576    #[test]
6577    fn test_agent_event_serialize_external_task_completed() {
6578        let event = AgentEvent::ExternalTaskCompleted {
6579            task_id: "task-1".to_string(),
6580            session_id: "sess-1".to_string(),
6581            success: false,
6582        };
6583        let json = serde_json::to_string(&event).unwrap();
6584        assert!(json.contains("external_task_completed"));
6585    }
6586
6587    #[test]
6588    fn test_agent_event_serialize_permission_denied() {
6589        let event = AgentEvent::PermissionDenied {
6590            tool_id: "t1".to_string(),
6591            tool_name: "bash".to_string(),
6592            args: serde_json::json!({}),
6593            reason: "denied".to_string(),
6594        };
6595        let json = serde_json::to_string(&event).unwrap();
6596        assert!(json.contains("permission_denied"));
6597    }
6598
6599    #[test]
6600    fn test_agent_event_serialize_context_compacted() {
6601        let event = AgentEvent::ContextCompacted {
6602            session_id: "sess-1".to_string(),
6603            before_messages: 100,
6604            after_messages: 20,
6605            percent_before: 0.85,
6606        };
6607        let json = serde_json::to_string(&event).unwrap();
6608        assert!(json.contains("context_compacted"));
6609    }
6610
6611    #[test]
6612    fn test_agent_event_serialize_turn_start() {
6613        let event = AgentEvent::TurnStart { turn: 3 };
6614        let json = serde_json::to_string(&event).unwrap();
6615        assert!(json.contains("turn_start"));
6616    }
6617
6618    #[test]
6619    fn test_agent_event_serialize_turn_end() {
6620        let event = AgentEvent::TurnEnd {
6621            turn: 3,
6622            usage: TokenUsage::default(),
6623        };
6624        let json = serde_json::to_string(&event).unwrap();
6625        assert!(json.contains("turn_end"));
6626    }
6627
6628    #[test]
6629    fn test_agent_event_serialize_end() {
6630        let event = AgentEvent::End {
6631            text: "Done".to_string(),
6632            usage: TokenUsage {
6633                prompt_tokens: 100,
6634                completion_tokens: 50,
6635                total_tokens: 150,
6636                cache_read_tokens: None,
6637                cache_write_tokens: None,
6638            },
6639            verification_summary: Box::new(crate::verification::VerificationSummary::from_reports(
6640                &[],
6641            )),
6642            meta: None,
6643        };
6644        let json = serde_json::to_string(&event).unwrap();
6645        assert!(json.contains("agent_end"));
6646        assert!(json.contains("verification_summary"));
6647    }
6648
6649    // ========================================================================
6650    // AgentResult
6651    // ========================================================================
6652
6653    #[test]
6654    fn test_agent_result_fields() {
6655        let result = AgentResult {
6656            text: "output".to_string(),
6657            messages: vec![Message::user("hello")],
6658            usage: TokenUsage::default(),
6659            tool_calls_count: 3,
6660            verification_reports: Vec::new(),
6661        };
6662        assert_eq!(result.text, "output");
6663        assert_eq!(result.messages.len(), 1);
6664        assert_eq!(result.tool_calls_count, 3);
6665        assert!(result.verification_reports.is_empty());
6666        assert_eq!(
6667            result.verification_summary().status,
6668            crate::verification::VerificationStatus::Skipped
6669        );
6670        assert!(!result.has_pending_verification());
6671    }
6672
6673    #[test]
6674    fn test_collect_verification_report_from_tool_metadata() {
6675        let report = crate::verification::VerificationReport::new(
6676            "program:example",
6677            vec![crate::verification::VerificationCheck::required(
6678                "check:inspect",
6679                "inspect_artifacts",
6680                "Inspect artifacts",
6681            )],
6682        );
6683        let metadata = Some(serde_json::json!({
6684            "verification_report": report.to_value()
6685        }));
6686        let mut reports = Vec::new();
6687
6688        AgentLoop::collect_verification_report(&mut reports, &metadata);
6689
6690        assert_eq!(reports.len(), 1);
6691        assert_eq!(reports[0].subject, "program:example");
6692        assert_eq!(
6693            reports[0].status,
6694            crate::verification::VerificationStatus::NeedsReview
6695        );
6696    }
6697
6698    #[test]
6699    fn test_agent_result_verification_summary() {
6700        let report = crate::verification::VerificationReport::new(
6701            "program:example",
6702            vec![crate::verification::VerificationCheck::required(
6703                "check:inspect",
6704                "inspect_artifacts",
6705                "Inspect artifacts",
6706            )],
6707        );
6708        let result = AgentResult {
6709            text: "output".to_string(),
6710            messages: Vec::new(),
6711            usage: TokenUsage::default(),
6712            tool_calls_count: 1,
6713            verification_reports: vec![report],
6714        };
6715
6716        let summary = result.verification_summary();
6717
6718        assert_eq!(
6719            summary.status,
6720            crate::verification::VerificationStatus::NeedsReview
6721        );
6722        assert_eq!(summary.pending_required_check_count, 1);
6723        assert!(result
6724            .verification_summary_text()
6725            .contains("Verification needs review"));
6726        assert!(result.has_pending_verification());
6727    }
6728
6729    // ========================================================================
6730    // Missing AgentEvent serialization tests
6731    // ========================================================================
6732
6733    #[test]
6734    fn test_agent_event_serialize_context_resolving() {
6735        let event = AgentEvent::ContextResolving {
6736            providers: vec!["provider1".to_string(), "provider2".to_string()],
6737        };
6738        let json = serde_json::to_string(&event).unwrap();
6739        assert!(json.contains("context_resolving"));
6740        assert!(json.contains("provider1"));
6741    }
6742
6743    #[test]
6744    fn test_agent_event_serialize_context_resolved() {
6745        let event = AgentEvent::ContextResolved {
6746            total_items: 5,
6747            total_tokens: 1000,
6748        };
6749        let json = serde_json::to_string(&event).unwrap();
6750        assert!(json.contains("context_resolved"));
6751        assert!(json.contains("1000"));
6752    }
6753
6754    #[test]
6755    fn test_agent_event_serialize_command_dead_lettered() {
6756        let event = AgentEvent::CommandDeadLettered {
6757            command_id: "cmd-1".to_string(),
6758            command_type: "bash".to_string(),
6759            lane: "execute".to_string(),
6760            error: "timeout".to_string(),
6761            attempts: 3,
6762        };
6763        let json = serde_json::to_string(&event).unwrap();
6764        assert!(json.contains("command_dead_lettered"));
6765        assert!(json.contains("cmd-1"));
6766    }
6767
6768    #[test]
6769    fn test_agent_event_serialize_command_retry() {
6770        let event = AgentEvent::CommandRetry {
6771            command_id: "cmd-2".to_string(),
6772            command_type: "read".to_string(),
6773            lane: "query".to_string(),
6774            attempt: 2,
6775            delay_ms: 1000,
6776        };
6777        let json = serde_json::to_string(&event).unwrap();
6778        assert!(json.contains("command_retry"));
6779        assert!(json.contains("cmd-2"));
6780    }
6781
6782    #[test]
6783    fn test_agent_event_serialize_queue_alert() {
6784        let event = AgentEvent::QueueAlert {
6785            level: "warning".to_string(),
6786            alert_type: "depth".to_string(),
6787            message: "Queue depth exceeded".to_string(),
6788        };
6789        let json = serde_json::to_string(&event).unwrap();
6790        assert!(json.contains("queue_alert"));
6791        assert!(json.contains("warning"));
6792    }
6793
6794    #[test]
6795    fn test_agent_event_serialize_task_updated() {
6796        let event = AgentEvent::TaskUpdated {
6797            session_id: "sess-1".to_string(),
6798            tasks: vec![],
6799        };
6800        let json = serde_json::to_string(&event).unwrap();
6801        assert!(json.contains("task_updated"));
6802        assert!(json.contains("sess-1"));
6803    }
6804
6805    #[test]
6806    fn test_agent_event_serialize_memory_stored() {
6807        let event = AgentEvent::MemoryStored {
6808            memory_id: "mem-1".to_string(),
6809            memory_type: "conversation".to_string(),
6810            importance: 0.8,
6811            tags: vec!["important".to_string()],
6812        };
6813        let json = serde_json::to_string(&event).unwrap();
6814        assert!(json.contains("memory_stored"));
6815        assert!(json.contains("mem-1"));
6816    }
6817
6818    #[test]
6819    fn test_agent_event_serialize_memory_recalled() {
6820        let event = AgentEvent::MemoryRecalled {
6821            memory_id: "mem-2".to_string(),
6822            content: "Previous conversation".to_string(),
6823            relevance: 0.9,
6824        };
6825        let json = serde_json::to_string(&event).unwrap();
6826        assert!(json.contains("memory_recalled"));
6827        assert!(json.contains("mem-2"));
6828    }
6829
6830    #[test]
6831    fn test_agent_event_serialize_memories_searched() {
6832        let event = AgentEvent::MemoriesSearched {
6833            query: Some("search term".to_string()),
6834            tags: vec!["tag1".to_string()],
6835            result_count: 5,
6836        };
6837        let json = serde_json::to_string(&event).unwrap();
6838        assert!(json.contains("memories_searched"));
6839        assert!(json.contains("search term"));
6840    }
6841
6842    #[test]
6843    fn test_agent_event_serialize_memory_cleared() {
6844        let event = AgentEvent::MemoryCleared {
6845            tier: "short_term".to_string(),
6846            count: 10,
6847        };
6848        let json = serde_json::to_string(&event).unwrap();
6849        assert!(json.contains("memory_cleared"));
6850        assert!(json.contains("short_term"));
6851    }
6852
6853    #[test]
6854    fn test_agent_event_serialize_subagent_start() {
6855        let event = AgentEvent::SubagentStart {
6856            task_id: "task-1".to_string(),
6857            session_id: "child-sess".to_string(),
6858            parent_session_id: "parent-sess".to_string(),
6859            agent: "explore".to_string(),
6860            description: "Explore codebase".to_string(),
6861        };
6862        let json = serde_json::to_string(&event).unwrap();
6863        assert!(json.contains("subagent_start"));
6864        assert!(json.contains("explore"));
6865    }
6866
6867    #[test]
6868    fn test_agent_event_serialize_subagent_progress() {
6869        let event = AgentEvent::SubagentProgress {
6870            task_id: "task-1".to_string(),
6871            session_id: "child-sess".to_string(),
6872            status: "processing".to_string(),
6873            metadata: serde_json::json!({"progress": 50}),
6874        };
6875        let json = serde_json::to_string(&event).unwrap();
6876        assert!(json.contains("subagent_progress"));
6877        assert!(json.contains("processing"));
6878    }
6879
6880    #[test]
6881    fn test_agent_event_serialize_subagent_end() {
6882        let event = AgentEvent::SubagentEnd {
6883            task_id: "task-1".to_string(),
6884            session_id: "child-sess".to_string(),
6885            agent: "explore".to_string(),
6886            output: "Found 10 files".to_string(),
6887            success: true,
6888        };
6889        let json = serde_json::to_string(&event).unwrap();
6890        assert!(json.contains("subagent_end"));
6891        assert!(json.contains("Found 10 files"));
6892    }
6893
6894    #[test]
6895    fn test_agent_event_serialize_planning_start() {
6896        let event = AgentEvent::PlanningStart {
6897            prompt: "Build a web app".to_string(),
6898        };
6899        let json = serde_json::to_string(&event).unwrap();
6900        assert!(json.contains("planning_start"));
6901        assert!(json.contains("Build a web app"));
6902    }
6903
6904    #[test]
6905    fn test_agent_event_serialize_planning_end() {
6906        use crate::planning::{Complexity, ExecutionPlan};
6907        let plan = ExecutionPlan::new("Test goal".to_string(), Complexity::Simple);
6908        let event = AgentEvent::PlanningEnd {
6909            plan,
6910            estimated_steps: 3,
6911        };
6912        let json = serde_json::to_string(&event).unwrap();
6913        assert!(json.contains("planning_end"));
6914        assert!(json.contains("estimated_steps"));
6915    }
6916
6917    #[test]
6918    fn test_agent_event_serialize_step_start() {
6919        let event = AgentEvent::StepStart {
6920            step_id: "step-1".to_string(),
6921            description: "Initialize project".to_string(),
6922            step_number: 1,
6923            total_steps: 5,
6924        };
6925        let json = serde_json::to_string(&event).unwrap();
6926        assert!(json.contains("step_start"));
6927        assert!(json.contains("Initialize project"));
6928    }
6929
6930    #[test]
6931    fn test_agent_event_serialize_step_end() {
6932        let event = AgentEvent::StepEnd {
6933            step_id: "step-1".to_string(),
6934            status: TaskStatus::Completed,
6935            step_number: 1,
6936            total_steps: 5,
6937        };
6938        let json = serde_json::to_string(&event).unwrap();
6939        assert!(json.contains("step_end"));
6940        assert!(json.contains("step-1"));
6941    }
6942
6943    #[test]
6944    fn test_agent_event_serialize_goal_extracted() {
6945        use crate::planning::AgentGoal;
6946        let goal = AgentGoal::new("Complete the task".to_string());
6947        let event = AgentEvent::GoalExtracted { goal };
6948        let json = serde_json::to_string(&event).unwrap();
6949        assert!(json.contains("goal_extracted"));
6950    }
6951
6952    #[test]
6953    fn test_agent_event_serialize_goal_progress() {
6954        let event = AgentEvent::GoalProgress {
6955            goal: "Build app".to_string(),
6956            progress: 0.5,
6957            completed_steps: 2,
6958            total_steps: 4,
6959        };
6960        let json = serde_json::to_string(&event).unwrap();
6961        assert!(json.contains("goal_progress"));
6962        assert!(json.contains("0.5"));
6963    }
6964
6965    #[test]
6966    fn test_agent_event_serialize_goal_achieved() {
6967        let event = AgentEvent::GoalAchieved {
6968            goal: "Build app".to_string(),
6969            total_steps: 4,
6970            duration_ms: 5000,
6971        };
6972        let json = serde_json::to_string(&event).unwrap();
6973        assert!(json.contains("goal_achieved"));
6974        assert!(json.contains("5000"));
6975    }
6976
6977    #[tokio::test]
6978    async fn test_extract_goal_with_json_response() {
6979        // LlmPlanner expects JSON with "description" and "success_criteria" fields
6980        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
6981            r#"{"description": "Build web app", "success_criteria": ["App runs on port 3000", "Has login page"]}"#,
6982        )]));
6983        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
6984        let agent = AgentLoop::new(
6985            mock_client,
6986            tool_executor,
6987            test_tool_context(),
6988            AgentConfig::default(),
6989        );
6990
6991        let goal = agent.extract_goal("Build a web app").await.unwrap();
6992        assert_eq!(goal.description, "Build web app");
6993        assert_eq!(goal.success_criteria.len(), 2);
6994        assert_eq!(goal.success_criteria[0], "App runs on port 3000");
6995    }
6996
6997    #[tokio::test]
6998    async fn test_extract_goal_fallback_on_non_json() {
6999        // Non-JSON response triggers fallback: returns the original prompt as goal
7000        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
7001            "Some non-JSON response",
7002        )]));
7003        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
7004        let agent = AgentLoop::new(
7005            mock_client,
7006            tool_executor,
7007            test_tool_context(),
7008            AgentConfig::default(),
7009        );
7010
7011        let goal = agent.extract_goal("Do something").await.unwrap();
7012        // Fallback uses the original prompt as description
7013        assert_eq!(goal.description, "Do something");
7014        // Fallback adds 2 generic criteria
7015        assert_eq!(goal.success_criteria.len(), 2);
7016    }
7017
7018    #[tokio::test]
7019    async fn test_check_goal_achievement_json_yes() {
7020        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
7021            r#"{"achieved": true, "progress": 1.0, "remaining_criteria": []}"#,
7022        )]));
7023        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
7024        let agent = AgentLoop::new(
7025            mock_client,
7026            tool_executor,
7027            test_tool_context(),
7028            AgentConfig::default(),
7029        );
7030
7031        let goal = crate::planning::AgentGoal::new("Test goal".to_string());
7032        let achieved = agent
7033            .check_goal_achievement(&goal, "All done")
7034            .await
7035            .unwrap();
7036        assert!(achieved);
7037    }
7038
7039    #[tokio::test]
7040    async fn test_check_goal_achievement_fallback_not_done() {
7041        // Non-JSON response triggers heuristic fallback
7042        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
7043            "invalid json",
7044        )]));
7045        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
7046        let agent = AgentLoop::new(
7047            mock_client,
7048            tool_executor,
7049            test_tool_context(),
7050            AgentConfig::default(),
7051        );
7052
7053        let goal = crate::planning::AgentGoal::new("Test goal".to_string());
7054        // "still working" doesn't contain "complete"/"done"/"finished"
7055        let achieved = agent
7056            .check_goal_achievement(&goal, "still working")
7057            .await
7058            .unwrap();
7059        assert!(!achieved);
7060    }
7061
7062    // ========================================================================
7063    // build_augmented_system_prompt Tests
7064    // ========================================================================
7065
7066    #[test]
7067    fn test_build_augmented_system_prompt_empty_context() {
7068        let mock_client = Arc::new(MockLlmClient::new(vec![]));
7069        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
7070        let config = AgentConfig {
7071            prompt_slots: SystemPromptSlots {
7072                extra: Some("Base prompt".to_string()),
7073                ..Default::default()
7074            },
7075            ..Default::default()
7076        };
7077        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
7078
7079        let result = agent.build_augmented_system_prompt(&[]);
7080        assert!(result.unwrap().contains("Base prompt"));
7081    }
7082
7083    #[test]
7084    fn test_build_augmented_system_prompt_no_custom_slots() {
7085        let mock_client = Arc::new(MockLlmClient::new(vec![]));
7086        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
7087        let agent = AgentLoop::new(
7088            mock_client,
7089            tool_executor,
7090            test_tool_context(),
7091            AgentConfig::default(),
7092        );
7093
7094        let result = agent.build_augmented_system_prompt(&[]);
7095        // Default slots still produce the default agentic prompt
7096        assert!(result.is_some());
7097        assert!(result.unwrap().contains("Core Behaviour"));
7098    }
7099
7100    #[test]
7101    fn test_project_hint_is_assembled_as_context_item() {
7102        let temp_dir = tempfile::tempdir().unwrap();
7103        std::fs::write(
7104            temp_dir.path().join("Cargo.toml"),
7105            "[package]\nname = \"demo\"\n",
7106        )
7107        .unwrap();
7108
7109        let mock_client = Arc::new(MockLlmClient::new(vec![]));
7110        let tool_executor = Arc::new(ToolExecutor::new(temp_dir.path().display().to_string()));
7111        let agent = AgentLoop::new(
7112            mock_client,
7113            tool_executor,
7114            ToolContext::new(temp_dir.path().to_path_buf()),
7115            AgentConfig::default(),
7116        );
7117
7118        let assembly = agent.assemble_context_results(&[]);
7119        assert_eq!(assembly.items.len(), 1);
7120        assert_eq!(
7121            assembly.items[0].source.as_deref(),
7122            Some("a3s://project-hint")
7123        );
7124        assert!(assembly.items[0].content.contains("Rust"));
7125
7126        let text = agent.build_augmented_system_prompt(&[]).unwrap();
7127        assert!(text.contains("<context source=\"a3s://project-hint\" type=\"Resource\">"));
7128    }
7129
7130    #[test]
7131    fn test_build_augmented_system_prompt_with_context_no_base() {
7132        use crate::context::{ContextItem, ContextResult, ContextType};
7133
7134        let mock_client = Arc::new(MockLlmClient::new(vec![]));
7135        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
7136        let agent = AgentLoop::new(
7137            mock_client,
7138            tool_executor,
7139            test_tool_context(),
7140            AgentConfig::default(),
7141        );
7142
7143        let context = vec![ContextResult {
7144            provider: "test".to_string(),
7145            items: vec![ContextItem::new("id1", ContextType::Resource, "Content")],
7146            total_tokens: 10,
7147            truncated: false,
7148        }];
7149
7150        let result = agent.build_augmented_system_prompt(&context);
7151        assert!(result.is_some());
7152        let text = result.unwrap();
7153        assert!(text.contains("<context"));
7154        assert!(text.contains("Content"));
7155    }
7156
7157    // ========================================================================
7158    // AgentResult Clone and Debug
7159    // ========================================================================
7160
7161    #[test]
7162    fn test_agent_result_clone() {
7163        let result = AgentResult {
7164            text: "output".to_string(),
7165            messages: vec![Message::user("hello")],
7166            usage: TokenUsage::default(),
7167            tool_calls_count: 3,
7168            verification_reports: Vec::new(),
7169        };
7170        let cloned = result.clone();
7171        assert_eq!(cloned.text, result.text);
7172        assert_eq!(cloned.tool_calls_count, result.tool_calls_count);
7173    }
7174
7175    #[test]
7176    fn test_agent_result_debug() {
7177        let result = AgentResult {
7178            text: "output".to_string(),
7179            messages: vec![Message::user("hello")],
7180            usage: TokenUsage::default(),
7181            tool_calls_count: 3,
7182            verification_reports: Vec::new(),
7183        };
7184        let debug = format!("{:?}", result);
7185        assert!(debug.contains("AgentResult"));
7186        assert!(debug.contains("output"));
7187    }
7188
7189    // ========================================================================
7190    // handle_post_execution_metadata Tests
7191    // ========================================================================
7192
7193    // ========================================================================
7194    // ToolCommand adapter tests
7195    // ========================================================================
7196
7197    #[tokio::test]
7198    async fn test_tool_command_command_type() {
7199        let executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
7200        let cmd = ToolCommand {
7201            tool_executor: executor,
7202            tool_name: "read".to_string(),
7203            tool_args: serde_json::json!({"file": "test.rs"}),
7204            skill_registry: None,
7205            tool_context: test_tool_context(),
7206        };
7207        assert_eq!(cmd.command_type(), "read");
7208    }
7209
7210    #[tokio::test]
7211    async fn test_tool_command_payload() {
7212        let executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
7213        let args = serde_json::json!({"file": "test.rs", "offset": 10});
7214        let cmd = ToolCommand {
7215            tool_executor: executor,
7216            tool_name: "read".to_string(),
7217            tool_args: args.clone(),
7218            skill_registry: None,
7219            tool_context: test_tool_context(),
7220        };
7221        assert_eq!(cmd.payload(), args);
7222    }
7223
7224    // ========================================================================
7225    // AgentLoop with queue builder tests
7226    // ========================================================================
7227
7228    #[tokio::test(flavor = "multi_thread")]
7229    async fn test_agent_loop_with_queue() {
7230        use tokio::sync::broadcast;
7231
7232        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
7233            "Hello",
7234        )]));
7235        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
7236        let config = AgentConfig::default();
7237
7238        let (event_tx, _) = broadcast::channel(100);
7239        let queue = SessionLaneQueue::new("test-session", SessionQueueConfig::default(), event_tx)
7240            .await
7241            .unwrap();
7242
7243        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config)
7244            .with_queue(Arc::new(queue));
7245
7246        assert!(agent.command_queue.is_some());
7247    }
7248
7249    #[tokio::test]
7250    async fn test_agent_loop_without_queue() {
7251        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
7252            "Hello",
7253        )]));
7254        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
7255        let config = AgentConfig::default();
7256
7257        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
7258
7259        assert!(agent.command_queue.is_none());
7260    }
7261
7262    // ========================================================================
7263    // Parallel Plan Execution Tests
7264    // ========================================================================
7265
7266    #[tokio::test]
7267    async fn test_execute_plan_parallel_independent() {
7268        use crate::planning::{Complexity, ExecutionPlan, Task};
7269
7270        // 3 independent steps (no dependencies) — should all execute.
7271        // MockLlmClient needs one response per execute_loop call per step.
7272        let mock_client = Arc::new(MockLlmClient::new(vec![
7273            MockLlmClient::text_response("Step 1 done"),
7274            MockLlmClient::text_response("Step 2 done"),
7275            MockLlmClient::text_response("Step 3 done"),
7276        ]));
7277
7278        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
7279        let config = AgentConfig::default();
7280        let agent = AgentLoop::new(
7281            mock_client.clone(),
7282            tool_executor,
7283            test_tool_context(),
7284            config,
7285        );
7286
7287        let mut plan = ExecutionPlan::new("Test parallel", Complexity::Simple);
7288        plan.add_step(Task::new("s1", "First step"));
7289        plan.add_step(Task::new("s2", "Second step"));
7290        plan.add_step(Task::new("s3", "Third step"));
7291
7292        let (tx, mut rx) = mpsc::channel(100);
7293        let result = agent
7294            .execute_plan(&[], &plan, Some("test-session"), Some(tx))
7295            .await
7296            .unwrap();
7297
7298        // All 3 steps should have been executed (3 * 15 = 45 total tokens)
7299        assert_eq!(result.usage.total_tokens, 45);
7300
7301        // Verify we received StepStart and StepEnd events for all 3 steps
7302        let mut step_starts = Vec::new();
7303        let mut step_ends = Vec::new();
7304        rx.close();
7305        while let Some(event) = rx.recv().await {
7306            match event {
7307                AgentEvent::StepStart { step_id, .. } => step_starts.push(step_id),
7308                AgentEvent::StepEnd {
7309                    step_id, status, ..
7310                } => {
7311                    assert_eq!(status, TaskStatus::Completed);
7312                    step_ends.push(step_id);
7313                }
7314                _ => {}
7315            }
7316        }
7317        assert_eq!(step_starts.len(), 3);
7318        assert_eq!(step_ends.len(), 3);
7319    }
7320
7321    #[tokio::test]
7322    async fn test_execute_plan_emits_task_list_snapshots() {
7323        use crate::planning::{Complexity, ExecutionPlan, Task};
7324
7325        let mock_client = Arc::new(MockLlmClient::new(vec![
7326            MockLlmClient::text_response("Step 1 done"),
7327            MockLlmClient::text_response("Step 2 done"),
7328        ]));
7329
7330        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
7331        let agent = AgentLoop::new(
7332            mock_client,
7333            tool_executor,
7334            test_tool_context(),
7335            AgentConfig::default(),
7336        );
7337
7338        let mut plan = ExecutionPlan::new("Track task list", Complexity::Simple);
7339        plan.add_step(Task::new("s1", "First step"));
7340        plan.add_step(Task::new("s2", "Second step").with_dependencies(vec!["s1".to_string()]));
7341
7342        let (tx, mut rx) = mpsc::channel(100);
7343        let _ = agent
7344            .execute_plan(&[], &plan, Some("task-session"), Some(tx))
7345            .await
7346            .unwrap();
7347
7348        let mut snapshots = Vec::new();
7349        rx.close();
7350        while let Some(event) = rx.recv().await {
7351            if let AgentEvent::TaskUpdated { session_id, tasks } = event {
7352                assert_eq!(session_id, "task-session");
7353                snapshots.push(tasks);
7354            }
7355        }
7356
7357        assert!(
7358            snapshots
7359                .first()
7360                .unwrap()
7361                .iter()
7362                .all(|task| task.status == TaskStatus::Pending),
7363            "initial snapshot should expose the pending task list"
7364        );
7365        assert!(snapshots.iter().any(|tasks| tasks
7366            .iter()
7367            .any(|task| task.id == "s1" && task.status == TaskStatus::InProgress)));
7368        assert!(snapshots.iter().any(|tasks| tasks
7369            .iter()
7370            .any(|task| task.id == "s1" && task.status == TaskStatus::Completed)));
7371        assert!(snapshots
7372            .last()
7373            .unwrap()
7374            .iter()
7375            .all(|task| task.status == TaskStatus::Completed));
7376    }
7377
7378    #[tokio::test]
7379    async fn test_execute_plan_delegates_task_tool_steps() {
7380        use crate::planning::{Complexity, ExecutionPlan, Task};
7381        use crate::subagent::AgentRegistry;
7382        use crate::tools::register_task;
7383
7384        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
7385            "delegated search complete",
7386        )]));
7387        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
7388        register_task(
7389            tool_executor.registry(),
7390            mock_client,
7391            Arc::new(AgentRegistry::new()),
7392            "/tmp".to_string(),
7393        );
7394        let agent = AgentLoop::new(
7395            Arc::new(MockLlmClient::new(vec![])),
7396            tool_executor,
7397            test_tool_context(),
7398            AgentConfig::default(),
7399        );
7400
7401        let mut plan = ExecutionPlan::new("Delegate a step", Complexity::Simple);
7402        plan.add_step(Task::new("s1", "Find the relevant docs").with_tool("task"));
7403
7404        let (tx, mut rx) = mpsc::channel(100);
7405        let result = agent
7406            .execute_plan(&[], &plan, Some("task-session"), Some(tx))
7407            .await
7408            .unwrap();
7409
7410        assert_eq!(result.tool_calls_count, 1);
7411        assert!(result.text.contains("delegated search complete"));
7412
7413        let mut saw_task_tool_start = false;
7414        let mut saw_completed_step = false;
7415        rx.close();
7416        while let Some(event) = rx.recv().await {
7417            match event {
7418                AgentEvent::ToolStart { name, .. } if name == "task" => {
7419                    saw_task_tool_start = true;
7420                }
7421                AgentEvent::StepEnd { status, .. } if status == TaskStatus::Completed => {
7422                    saw_completed_step = true;
7423                }
7424                _ => {}
7425            }
7426        }
7427
7428        assert!(saw_task_tool_start);
7429        assert!(saw_completed_step);
7430    }
7431
7432    #[tokio::test]
7433    async fn test_execute_plan_delegates_parallel_task_wave_once() {
7434        use crate::planning::{Complexity, ExecutionPlan, Task};
7435        use crate::subagent::AgentRegistry;
7436        use crate::tools::register_task;
7437
7438        let child_client = Arc::new(MockLlmClient::new(vec![
7439            MockLlmClient::text_response("delegated docs complete"),
7440            MockLlmClient::text_response("delegated tests complete"),
7441        ]));
7442        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
7443        register_task(
7444            tool_executor.registry(),
7445            child_client,
7446            Arc::new(AgentRegistry::new()),
7447            "/tmp".to_string(),
7448        );
7449        let agent = AgentLoop::new(
7450            Arc::new(MockLlmClient::new(vec![])),
7451            tool_executor,
7452            test_tool_context(),
7453            AgentConfig::default(),
7454        );
7455
7456        let mut plan = ExecutionPlan::new("Delegate independent wave", Complexity::Medium);
7457        plan.add_step(Task::new("s1", "Find relevant docs").with_tool("task"));
7458        plan.add_step(Task::new("s2", "Run verification tests").with_tool("task"));
7459
7460        let (tx, mut rx) = mpsc::channel(100);
7461        let result = agent
7462            .execute_plan(&[], &plan, Some("parallel-task-session"), Some(tx))
7463            .await
7464            .unwrap();
7465
7466        assert_eq!(
7467            result.tool_calls_count, 1,
7468            "independent delegated wave should be collapsed into one parallel_task call"
7469        );
7470        assert!(result.text.contains("delegated docs complete"));
7471        assert!(result.text.contains("delegated tests complete"));
7472
7473        let mut parallel_task_starts = 0;
7474        let mut completed_steps = Vec::new();
7475        let mut task_snapshots = Vec::new();
7476        rx.close();
7477        while let Some(event) = rx.recv().await {
7478            match event {
7479                AgentEvent::ToolStart { name, .. } if name == "parallel_task" => {
7480                    parallel_task_starts += 1;
7481                }
7482                AgentEvent::StepEnd {
7483                    step_id,
7484                    status: TaskStatus::Completed,
7485                    ..
7486                } => completed_steps.push(step_id),
7487                AgentEvent::TaskUpdated { tasks, .. } => task_snapshots.push(tasks),
7488                _ => {}
7489            }
7490        }
7491
7492        completed_steps.sort();
7493        assert_eq!(parallel_task_starts, 1);
7494        assert_eq!(completed_steps, vec!["s1".to_string(), "s2".to_string()]);
7495        assert!(task_snapshots.iter().any(|tasks| tasks
7496            .iter()
7497            .all(|task| task.status == TaskStatus::InProgress)));
7498        assert!(task_snapshots
7499            .last()
7500            .unwrap()
7501            .iter()
7502            .all(|task| task.status == TaskStatus::Completed));
7503    }
7504
7505    #[tokio::test]
7506    async fn test_execute_plan_respects_dependencies() {
7507        use crate::planning::{Complexity, ExecutionPlan, Task};
7508
7509        // s1 and s2 are independent (wave 1), s3 depends on both (wave 2).
7510        // This requires 3 responses total.
7511        let mock_client = Arc::new(MockLlmClient::new(vec![
7512            MockLlmClient::text_response("Step 1 done"),
7513            MockLlmClient::text_response("Step 2 done"),
7514            MockLlmClient::text_response("Step 3 done"),
7515        ]));
7516
7517        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
7518        let config = AgentConfig::default();
7519        let agent = AgentLoop::new(
7520            mock_client.clone(),
7521            tool_executor,
7522            test_tool_context(),
7523            config,
7524        );
7525
7526        let mut plan = ExecutionPlan::new("Test deps", Complexity::Medium);
7527        plan.add_step(Task::new("s1", "Independent A"));
7528        plan.add_step(Task::new("s2", "Independent B"));
7529        plan.add_step(
7530            Task::new("s3", "Depends on A+B")
7531                .with_dependencies(vec!["s1".to_string(), "s2".to_string()]),
7532        );
7533
7534        let (tx, mut rx) = mpsc::channel(100);
7535        let result = agent
7536            .execute_plan(&[], &plan, Some("test-session"), Some(tx))
7537            .await
7538            .unwrap();
7539
7540        // All 3 steps should have been executed (3 * 15 = 45 total tokens)
7541        assert_eq!(result.usage.total_tokens, 45);
7542
7543        // Verify ordering: s3's StepStart must come after s1 and s2's StepEnd
7544        let mut events = Vec::new();
7545        rx.close();
7546        while let Some(event) = rx.recv().await {
7547            match &event {
7548                AgentEvent::StepStart { step_id, .. } => {
7549                    events.push(format!("start:{}", step_id));
7550                }
7551                AgentEvent::StepEnd { step_id, .. } => {
7552                    events.push(format!("end:{}", step_id));
7553                }
7554                _ => {}
7555            }
7556        }
7557
7558        // s3 start must occur after both s1 end and s2 end
7559        let s1_end = events.iter().position(|e| e == "end:s1").unwrap();
7560        let s2_end = events.iter().position(|e| e == "end:s2").unwrap();
7561        let s3_start = events.iter().position(|e| e == "start:s3").unwrap();
7562        assert!(
7563            s3_start > s1_end,
7564            "s3 started before s1 ended: {:?}",
7565            events
7566        );
7567        assert!(
7568            s3_start > s2_end,
7569            "s3 started before s2 ended: {:?}",
7570            events
7571        );
7572
7573        // Final result should reflect step 3 (last sequential step)
7574        assert!(result.text.contains("Step 3 done") || !result.text.is_empty());
7575    }
7576
7577    #[tokio::test]
7578    async fn test_execute_plan_handles_step_failure() {
7579        use crate::planning::{Complexity, ExecutionPlan, Task};
7580
7581        // s1 succeeds, s2 depends on s1 (succeeds), s3 depends on nothing (succeeds),
7582        // s4 depends on a step that will fail (s_fail).
7583        // We simulate failure by providing no responses for s_fail's execute_loop.
7584        //
7585        // Simpler approach: s1 succeeds, s2 depends on s1 (will fail because no
7586        // mock response left), s3 is independent.
7587        // Layout: s1 (independent), s3 (independent) → wave 1 parallel
7588        //         s2 depends on s1 → wave 2
7589        //         s4 depends on s2 → wave 3 (should deadlock since s2 fails)
7590        let mock_client = Arc::new(MockLlmClient::new(vec![
7591            // Wave 1: s1 and s3 execute in parallel
7592            MockLlmClient::text_response("s1 done"),
7593            MockLlmClient::text_response("s3 done"),
7594            // Wave 2: s2 executes — but we give it no response, causing failure
7595            // Actually the MockLlmClient will fail with "No more mock responses"
7596        ]));
7597
7598        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
7599        let config = AgentConfig::default();
7600        let agent = AgentLoop::new(
7601            mock_client.clone(),
7602            tool_executor,
7603            test_tool_context(),
7604            config,
7605        );
7606
7607        let mut plan = ExecutionPlan::new("Test failure", Complexity::Medium);
7608        plan.add_step(Task::new("s1", "Independent step"));
7609        plan.add_step(Task::new("s2", "Depends on s1").with_dependencies(vec!["s1".to_string()]));
7610        plan.add_step(Task::new("s3", "Another independent"));
7611        plan.add_step(Task::new("s4", "Depends on s2").with_dependencies(vec!["s2".to_string()]));
7612
7613        let (tx, mut rx) = mpsc::channel(100);
7614        let _result = agent
7615            .execute_plan(&[], &plan, Some("test-session"), Some(tx))
7616            .await
7617            .unwrap();
7618
7619        // s1 and s3 should succeed (wave 1), s2 should fail (wave 2),
7620        // s4 should never execute (deadlock — dep s2 failed, not completed)
7621        let mut completed_steps = Vec::new();
7622        let mut failed_steps = Vec::new();
7623        rx.close();
7624        while let Some(event) = rx.recv().await {
7625            if let AgentEvent::StepEnd {
7626                step_id, status, ..
7627            } = event
7628            {
7629                match status {
7630                    TaskStatus::Completed => completed_steps.push(step_id),
7631                    TaskStatus::Failed => failed_steps.push(step_id),
7632                    _ => {}
7633                }
7634            }
7635        }
7636
7637        assert!(
7638            completed_steps.contains(&"s1".to_string()),
7639            "s1 should complete"
7640        );
7641        assert!(
7642            completed_steps.contains(&"s3".to_string()),
7643            "s3 should complete"
7644        );
7645        assert!(failed_steps.contains(&"s2".to_string()), "s2 should fail");
7646        // s4 should NOT appear in either list — it was never started
7647        assert!(
7648            !completed_steps.contains(&"s4".to_string()),
7649            "s4 should not complete"
7650        );
7651        assert!(
7652            !failed_steps.contains(&"s4".to_string()),
7653            "s4 should not fail (never started)"
7654        );
7655    }
7656
7657    // ========================================================================
7658    // Phase 4: Error Recovery & Resilience Tests
7659    // ========================================================================
7660
7661    #[test]
7662    fn test_agent_config_resilience_defaults() {
7663        let config = AgentConfig::default();
7664        assert_eq!(config.max_parse_retries, 2);
7665        assert_eq!(config.tool_timeout_ms, None);
7666        assert_eq!(config.circuit_breaker_threshold, 3);
7667    }
7668
7669    /// 4.1 — Parse error recovery: bails after max_parse_retries exceeded
7670    #[tokio::test]
7671    async fn test_parse_error_recovery_bails_after_threshold() {
7672        // 3 parse errors with max_parse_retries=2: count reaches 3 > 2 → bail
7673        let mock_client = Arc::new(MockLlmClient::new(vec![
7674            MockLlmClient::tool_call_response(
7675                "c1",
7676                "bash",
7677                serde_json::json!({"__parse_error": "unexpected token at position 5"}),
7678            ),
7679            MockLlmClient::tool_call_response(
7680                "c2",
7681                "bash",
7682                serde_json::json!({"__parse_error": "missing closing brace"}),
7683            ),
7684            MockLlmClient::tool_call_response(
7685                "c3",
7686                "bash",
7687                serde_json::json!({"__parse_error": "still broken"}),
7688            ),
7689            MockLlmClient::text_response("Done"), // never reached
7690        ]));
7691
7692        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
7693        let config = AgentConfig {
7694            max_parse_retries: 2,
7695            ..AgentConfig::default()
7696        };
7697        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
7698        let result = agent.execute(&[], "Do something", None).await;
7699        assert!(result.is_err(), "should bail after parse error threshold");
7700        let err = result.unwrap_err().to_string();
7701        assert!(
7702            err.contains("malformed tool arguments"),
7703            "error should mention malformed tool arguments, got: {}",
7704            err
7705        );
7706    }
7707
7708    /// 4.1 — Parse error recovery: counter resets after a valid tool execution
7709    #[tokio::test]
7710    async fn test_parse_error_counter_resets_on_success() {
7711        // 2 parse errors (= max_parse_retries, not yet exceeded)
7712        // Then a valid tool call (resets counter)
7713        // Then final text — should NOT bail
7714        let mock_client = Arc::new(MockLlmClient::new(vec![
7715            MockLlmClient::tool_call_response(
7716                "c1",
7717                "bash",
7718                serde_json::json!({"__parse_error": "bad args"}),
7719            ),
7720            MockLlmClient::tool_call_response(
7721                "c2",
7722                "bash",
7723                serde_json::json!({"__parse_error": "bad args again"}),
7724            ),
7725            // Valid call — resets parse_error_count to 0
7726            MockLlmClient::tool_call_response(
7727                "c3",
7728                "bash",
7729                serde_json::json!({"command": "echo ok"}),
7730            ),
7731            MockLlmClient::text_response("All done"),
7732        ]));
7733
7734        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
7735        let config = AgentConfig {
7736            max_parse_retries: 2,
7737            ..AgentConfig::default()
7738        };
7739        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
7740        let result = agent.execute(&[], "Do something", None).await;
7741        assert!(
7742            result.is_ok(),
7743            "should not bail — counter reset after successful tool, got: {:?}",
7744            result.err()
7745        );
7746        assert_eq!(result.unwrap().text, "All done");
7747    }
7748
7749    /// 4.2 — Tool timeout: slow tool produces a timeout error result; session continues
7750    #[tokio::test]
7751    async fn test_tool_timeout_produces_error_result() {
7752        let mock_client = Arc::new(MockLlmClient::new(vec![
7753            MockLlmClient::tool_call_response(
7754                "t1",
7755                "bash",
7756                serde_json::json!({"command": "sleep 10"}),
7757            ),
7758            MockLlmClient::text_response("The command timed out."),
7759        ]));
7760
7761        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
7762        let config = AgentConfig {
7763            // 50ms — sleep 10 will never finish
7764            tool_timeout_ms: Some(50),
7765            ..AgentConfig::default()
7766        };
7767        let agent = AgentLoop::new(
7768            mock_client.clone(),
7769            tool_executor,
7770            test_tool_context(),
7771            config,
7772        );
7773        let result = agent.execute(&[], "Run sleep", None).await;
7774        assert!(
7775            result.is_ok(),
7776            "session should continue after tool timeout: {:?}",
7777            result.err()
7778        );
7779        assert_eq!(result.unwrap().text, "The command timed out.");
7780        // LLM called twice: initial request + response after timeout error
7781        assert_eq!(mock_client.call_count.load(Ordering::SeqCst), 2);
7782    }
7783
7784    /// 4.2 — Tool timeout: tool that finishes before the deadline succeeds normally
7785    #[tokio::test]
7786    async fn test_tool_within_timeout_succeeds() {
7787        let mock_client = Arc::new(MockLlmClient::new(vec![
7788            MockLlmClient::tool_call_response(
7789                "t1",
7790                "bash",
7791                serde_json::json!({"command": "echo fast"}),
7792            ),
7793            MockLlmClient::text_response("Command succeeded."),
7794        ]));
7795
7796        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
7797        let config = AgentConfig {
7798            tool_timeout_ms: Some(5_000), // 5 s — echo completes in <100ms
7799            ..AgentConfig::default()
7800        };
7801        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
7802        let result = agent.execute(&[], "Run something fast", None).await;
7803        assert!(
7804            result.is_ok(),
7805            "fast tool should succeed: {:?}",
7806            result.err()
7807        );
7808        assert_eq!(result.unwrap().text, "Command succeeded.");
7809    }
7810
7811    /// 4.3 — Circuit breaker: retries non-streaming LLM failures up to threshold
7812    #[tokio::test]
7813    async fn test_circuit_breaker_retries_non_streaming() {
7814        // Empty response list → every call bails with "No more mock responses"
7815        // threshold=2 → tries twice, then bails with circuit-breaker message
7816        let mock_client = Arc::new(MockLlmClient::new(vec![]));
7817
7818        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
7819        let config = AgentConfig {
7820            circuit_breaker_threshold: 2,
7821            ..AgentConfig::default()
7822        };
7823        let agent = AgentLoop::new(
7824            mock_client.clone(),
7825            tool_executor,
7826            test_tool_context(),
7827            config,
7828        );
7829        let result = agent.execute(&[], "Hello", None).await;
7830        assert!(result.is_err(), "should fail when LLM always errors");
7831        let err = result.unwrap_err().to_string();
7832        assert!(
7833            err.contains("circuit breaker"),
7834            "error should mention circuit breaker, got: {}",
7835            err
7836        );
7837        assert_eq!(
7838            mock_client.call_count.load(Ordering::SeqCst),
7839            2,
7840            "should make exactly threshold=2 LLM calls"
7841        );
7842    }
7843
7844    /// 4.3 — Circuit breaker: threshold=1 bails on the very first failure
7845    #[tokio::test]
7846    async fn test_circuit_breaker_threshold_one_no_retry() {
7847        let mock_client = Arc::new(MockLlmClient::new(vec![]));
7848
7849        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
7850        let config = AgentConfig {
7851            circuit_breaker_threshold: 1,
7852            ..AgentConfig::default()
7853        };
7854        let agent = AgentLoop::new(
7855            mock_client.clone(),
7856            tool_executor,
7857            test_tool_context(),
7858            config,
7859        );
7860        let result = agent.execute(&[], "Hello", None).await;
7861        assert!(result.is_err());
7862        assert_eq!(
7863            mock_client.call_count.load(Ordering::SeqCst),
7864            1,
7865            "with threshold=1 exactly one attempt should be made"
7866        );
7867    }
7868
7869    /// 4.3 — Circuit breaker: succeeds when LLM recovers before hitting threshold
7870    #[tokio::test]
7871    async fn test_circuit_breaker_succeeds_if_llm_recovers() {
7872        // First call fails, second call succeeds; threshold=3 — recovery within threshold
7873        struct FailOnceThenSucceed {
7874            inner: MockLlmClient,
7875            failed_once: std::sync::atomic::AtomicBool,
7876            call_count: AtomicUsize,
7877        }
7878
7879        #[async_trait::async_trait]
7880        impl LlmClient for FailOnceThenSucceed {
7881            async fn complete(
7882                &self,
7883                messages: &[Message],
7884                system: Option<&str>,
7885                tools: &[ToolDefinition],
7886            ) -> Result<LlmResponse> {
7887                self.call_count.fetch_add(1, Ordering::SeqCst);
7888                let already_failed = self
7889                    .failed_once
7890                    .swap(true, std::sync::atomic::Ordering::SeqCst);
7891                if !already_failed {
7892                    anyhow::bail!("transient network error");
7893                }
7894                self.inner.complete(messages, system, tools).await
7895            }
7896
7897            async fn complete_streaming(
7898                &self,
7899                messages: &[Message],
7900                system: Option<&str>,
7901                tools: &[ToolDefinition],
7902                cancel_token: tokio_util::sync::CancellationToken,
7903            ) -> Result<tokio::sync::mpsc::Receiver<crate::llm::StreamEvent>> {
7904                self.inner
7905                    .complete_streaming(messages, system, tools, cancel_token)
7906                    .await
7907            }
7908        }
7909
7910        let mock = Arc::new(FailOnceThenSucceed {
7911            inner: MockLlmClient::new(vec![MockLlmClient::text_response("Recovered!")]),
7912            failed_once: std::sync::atomic::AtomicBool::new(false),
7913            call_count: AtomicUsize::new(0),
7914        });
7915
7916        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
7917        let config = AgentConfig {
7918            circuit_breaker_threshold: 3,
7919            ..AgentConfig::default()
7920        };
7921        let agent = AgentLoop::new(mock.clone(), tool_executor, test_tool_context(), config);
7922        let result = agent.execute(&[], "Hello", None).await;
7923        assert!(
7924            result.is_ok(),
7925            "should succeed when LLM recovers within threshold: {:?}",
7926            result.err()
7927        );
7928        assert_eq!(result.unwrap().text, "Recovered!");
7929        assert_eq!(
7930            mock.call_count.load(Ordering::SeqCst),
7931            2,
7932            "should have made exactly 2 calls (1 fail + 1 success)"
7933        );
7934    }
7935
7936    // ── Continuation detection tests ─────────────────────────────────────────
7937
7938    #[test]
7939    fn test_looks_incomplete_empty() {
7940        assert!(AgentLoop::looks_incomplete(""));
7941        assert!(AgentLoop::looks_incomplete("   "));
7942    }
7943
7944    #[test]
7945    fn test_looks_incomplete_trailing_colon() {
7946        assert!(AgentLoop::looks_incomplete("Let me check the file:"));
7947        assert!(AgentLoop::looks_incomplete("Next steps:"));
7948    }
7949
7950    #[test]
7951    fn test_looks_incomplete_ellipsis() {
7952        assert!(AgentLoop::looks_incomplete("Working on it..."));
7953        assert!(AgentLoop::looks_incomplete("Processing…"));
7954    }
7955
7956    #[test]
7957    fn test_looks_incomplete_intent_phrases() {
7958        assert!(AgentLoop::looks_incomplete(
7959            "I'll start by reading the file."
7960        ));
7961        assert!(AgentLoop::looks_incomplete(
7962            "Let me check the configuration."
7963        ));
7964        assert!(AgentLoop::looks_incomplete("I will now run the tests."));
7965        assert!(AgentLoop::looks_incomplete(
7966            "I need to update the Cargo.toml."
7967        ));
7968    }
7969
7970    #[test]
7971    fn test_looks_complete_final_answer() {
7972        // Clear final answers should NOT trigger continuation
7973        assert!(!AgentLoop::looks_incomplete(
7974            "The tests pass. All changes have been applied successfully."
7975        ));
7976        assert!(!AgentLoop::looks_incomplete(
7977            "Done. I've updated the three files and verified the build succeeds."
7978        ));
7979        assert!(!AgentLoop::looks_incomplete("42"));
7980        assert!(!AgentLoop::looks_incomplete("Yes."));
7981    }
7982
7983    #[test]
7984    fn test_looks_incomplete_multiline_complete() {
7985        let text = "Here is the summary:\n\n- Fixed the bug in agent.rs\n- All tests pass\n- Build succeeds";
7986        assert!(!AgentLoop::looks_incomplete(text));
7987    }
7988}
a3s_code_core/agent.rs

a3s_code_core/
agent.rs