a3s_code_core/
agent.rs

1//! Agent Loop Implementation
2//!
3//! The agent loop handles the core conversation cycle:
4//! 1. User sends a prompt
5//! 2. LLM generates a response (possibly with tool calls)
6//! 3. If tool calls present, execute them and send results back
7//! 4. Repeat until LLM returns without tool calls
8//!
9//! This implements agentic behavior where the LLM can use tools
10//! to accomplish tasks agentically.
11
12#[cfg(feature = "ahp")]
13use crate::ahp::InjectedContext;
14use crate::context::{
15    ContextAssembler, ContextAssembly, ContextItem, ContextProvider, ContextQuery, ContextResult,
16    ContextType,
17};
18use crate::hitl::ConfirmationProvider;
19use crate::hooks::{
20    ErrorType, GenerateEndEvent, GenerateStartEvent, HookEvent, HookExecutor, HookResult,
21    IntentDetectionEvent, OnErrorEvent, PostResponseEvent, PostToolUseEvent,
22    PreContextPerceptionEvent, PrePromptEvent, PreToolUseEvent, TokenUsageInfo, ToolCallInfo,
23    ToolResultData,
24};
25use crate::llm::{LlmClient, LlmResponse, Message, TokenUsage, ToolDefinition};
26use crate::permissions::{PermissionChecker, PermissionDecision};
27use crate::planning::{AgentGoal, ExecutionPlan, LlmPlanner, PreAnalysis, Task, TaskStatus};
28use crate::prompts::{AgentStyle, PlanningMode, SystemPromptSlots};
29use crate::queue::SessionCommand;
30use crate::session_lane_queue::SessionLaneQueue;
31use crate::tools::{ToolContext, ToolExecutor, ToolStreamEvent};
32use anyhow::{Context, Result};
33use async_trait::async_trait;
34use futures::future::join_all;
35use serde::{Deserialize, Serialize};
36use serde_json::{json, Value};
37use std::sync::Arc;
38use std::time::Duration;
39use tokio::sync::mpsc;
40
41/// Maximum number of tool execution rounds before stopping
42const MAX_TOOL_ROUNDS: usize = 50;
43
44/// Result of a single parallel step execution, emitted as structured JSON
45/// so the frontend can render it dynamically in the user's language.
46#[derive(Debug, Clone, Serialize, Deserialize)]
47pub struct ParallelStepResult {
48    pub step_id: String,
49    pub step_number: u32,
50    pub status: String, // "completed" | "failed"
51    /// Brief summary of what this step did (in the LLM's language).
52    pub summary: String,
53    #[serde(skip_serializing_if = "Option::is_none")]
54    pub key_findings: Option<Vec<String>>,
55    #[serde(skip_serializing_if = "Option::is_none")]
56    pub error: Option<String>,
57    #[serde(skip_serializing_if = "Option::is_none")]
58    pub data: Option<Value>,
59}
60
61impl ParallelStepResult {
62    /// Build the full parallel-results JSON envelope injected into history.
63    pub fn build_envelope(results: Vec<ParallelStepResult>) -> Value {
64        json!({
65            "type": "parallel_results",
66            "steps": results
67        })
68    }
69}
70
71/// Internal agent loop configuration.
72#[derive(Clone)]
73pub(crate) struct AgentConfig {
74    /// Slot-based system prompt customization.
75    ///
76    /// Users can customize specific parts (role, guidelines, response style, extra)
77    /// without overriding the core agentic capabilities. The default agentic core
78    /// (tool usage, autonomous behavior, completion criteria) is always preserved.
79    pub prompt_slots: SystemPromptSlots,
80    pub tools: Vec<ToolDefinition>,
81    pub max_tool_rounds: usize,
82    /// Optional security provider for input taint tracking and output sanitization
83    pub security_provider: Option<Arc<dyn crate::security::SecurityProvider>>,
84    /// Optional permission checker for tool execution control
85    pub permission_checker: Option<Arc<dyn PermissionChecker>>,
86    /// Optional confirmation manager for HITL (Human-in-the-Loop)
87    pub confirmation_manager: Option<Arc<dyn ConfirmationProvider>>,
88    /// Context providers for augmenting prompts with external context
89    pub context_providers: Vec<Arc<dyn ContextProvider>>,
90    /// Planning mode — Auto (detect from message), Enabled, or Disabled.
91    pub planning_mode: PlanningMode,
92    /// Enable goal tracking
93    pub goal_tracking: bool,
94    /// Optional hook engine for firing lifecycle events (PreToolUse, PostToolUse, etc.)
95    pub hook_engine: Option<Arc<dyn HookExecutor>>,
96    /// Optional skill registry for tool permission enforcement
97    pub skill_registry: Option<Arc<crate::skills::SkillRegistry>>,
98    /// Max consecutive malformed-tool-args errors before aborting (default: 2).
99    ///
100    /// When the LLM returns tool arguments with `__parse_error`, the error is
101    /// fed back as a tool result. After this many consecutive parse errors the
102    /// loop bails instead of retrying indefinitely.
103    pub max_parse_retries: u32,
104    /// Per-tool execution timeout in milliseconds (`None` = no timeout).
105    ///
106    /// When set, each tool execution is wrapped in `tokio::time::timeout`.
107    /// A timeout produces an error result sent back to the LLM rather than
108    /// crashing the session.
109    pub tool_timeout_ms: Option<u64>,
110    /// Circuit-breaker threshold: max consecutive LLM API failures before
111    /// aborting (default: 3).
112    ///
113    /// In non-streaming mode, transient LLM failures are retried up to this
114    /// many times (with short exponential backoff) before the loop bails.
115    /// In streaming mode, any failure is fatal (events cannot be replayed).
116    pub circuit_breaker_threshold: u32,
117    /// Max consecutive identical tool signatures before aborting (default: 3).
118    ///
119    /// A tool signature is the exact combination of tool name + compact JSON
120    /// arguments. This prevents the agent from getting stuck repeating the same
121    /// tool call in a loop, for example repeatedly fetching the same URL.
122    pub duplicate_tool_call_threshold: u32,
123    /// Enable auto-compaction when context usage exceeds threshold.
124    pub auto_compact: bool,
125    /// Context usage percentage threshold to trigger auto-compaction (0.0 - 1.0).
126    /// Default: 0.80 (80%).
127    pub auto_compact_threshold: f32,
128    /// Maximum context window size in tokens (used for auto-compact calculation).
129    /// Default: 200_000.
130    pub max_context_tokens: usize,
131    /// Optional agent memory for auto-remember after tool execution and recall before prompts.
132    pub memory: Option<Arc<crate::memory::AgentMemory>>,
133    /// Inject a continuation message when the LLM stops calling tools before the
134    /// task is complete. Enabled by default. Set to `false` to disable.
135    ///
136    /// When enabled, if the LLM produces a response with no tool calls but the
137    /// response text looks like an intermediate step (not a final answer), the
138    /// loop injects [`crate::prompts::CONTINUATION`] as a user message and
139    /// continues for up to `max_continuation_turns` additional turns.
140    pub continuation_enabled: bool,
141    /// Maximum number of continuation injections per execution (default: 3).
142    ///
143    /// Prevents infinite loops when the LLM repeatedly stops without completing.
144    pub max_continuation_turns: u32,
145}
146
147impl std::fmt::Debug for AgentConfig {
148    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
149        f.debug_struct("AgentConfig")
150            .field("prompt_slots", &self.prompt_slots)
151            .field("tools", &self.tools)
152            .field("max_tool_rounds", &self.max_tool_rounds)
153            .field("security_provider", &self.security_provider.is_some())
154            .field("permission_checker", &self.permission_checker.is_some())
155            .field("confirmation_manager", &self.confirmation_manager.is_some())
156            .field("context_providers", &self.context_providers.len())
157            .field("planning_mode", &self.planning_mode)
158            .field("goal_tracking", &self.goal_tracking)
159            .field("hook_engine", &self.hook_engine.is_some())
160            .field(
161                "skill_registry",
162                &self.skill_registry.as_ref().map(|r| r.len()),
163            )
164            .field("max_parse_retries", &self.max_parse_retries)
165            .field("tool_timeout_ms", &self.tool_timeout_ms)
166            .field("circuit_breaker_threshold", &self.circuit_breaker_threshold)
167            .field(
168                "duplicate_tool_call_threshold",
169                &self.duplicate_tool_call_threshold,
170            )
171            .field("auto_compact", &self.auto_compact)
172            .field("auto_compact_threshold", &self.auto_compact_threshold)
173            .field("max_context_tokens", &self.max_context_tokens)
174            .field("continuation_enabled", &self.continuation_enabled)
175            .field("max_continuation_turns", &self.max_continuation_turns)
176            .field("memory", &self.memory.is_some())
177            .finish()
178    }
179}
180
181impl Default for AgentConfig {
182    fn default() -> Self {
183        Self {
184            prompt_slots: SystemPromptSlots::default(),
185            tools: Vec::new(), // Tools are provided by ToolExecutor
186            max_tool_rounds: MAX_TOOL_ROUNDS,
187            security_provider: None,
188            permission_checker: None,
189            confirmation_manager: None,
190            context_providers: Vec::new(),
191            planning_mode: PlanningMode::default(),
192            goal_tracking: false,
193            hook_engine: None,
194            skill_registry: Some(Arc::new(crate::skills::SkillRegistry::with_builtins())),
195            max_parse_retries: 2,
196            tool_timeout_ms: None,
197            circuit_breaker_threshold: 3,
198            duplicate_tool_call_threshold: 3,
199            auto_compact: false,
200            auto_compact_threshold: 0.80,
201            max_context_tokens: 200_000,
202            memory: None,
203            continuation_enabled: true,
204            max_continuation_turns: 3,
205        }
206    }
207}
208
209/// Events emitted during agent execution
210///
211/// Subscribe via [`crate::AgentSession::stream`].
212/// New variants may be added in minor releases — always include a wildcard arm
213/// (`_ => {}`) when matching.
214#[derive(Debug, Clone, Serialize, Deserialize)]
215#[serde(tag = "type")]
216#[non_exhaustive]
217pub enum AgentEvent {
218    /// Agent started processing
219    #[serde(rename = "agent_start")]
220    Start { prompt: String },
221
222    /// Runtime agent style/mode selected for the current execution.
223    #[serde(rename = "agent_mode_changed")]
224    AgentModeChanged {
225        /// Stable UI/runtime mode label, e.g. "general", "planning", "explore".
226        mode: String,
227        /// Canonical built-in agent name associated with this mode.
228        agent: String,
229        /// Human-readable explanation of the selected style.
230        description: String,
231    },
232
233    /// LLM turn started
234    #[serde(rename = "turn_start")]
235    TurnStart { turn: usize },
236
237    /// Text delta from streaming
238    #[serde(rename = "text_delta")]
239    TextDelta { text: String },
240
241    /// Reasoning/thinking delta from streaming (for models like kimi, deepseek)
242    #[serde(rename = "reasoning_delta")]
243    ReasoningDelta { text: String },
244
245    /// Tool execution started
246    #[serde(rename = "tool_start")]
247    ToolStart { id: String, name: String },
248
249    /// Tool input delta from streaming (partial JSON arguments)
250    #[serde(rename = "tool_input_delta")]
251    ToolInputDelta { delta: String },
252
253    /// Tool execution completed
254    #[serde(rename = "tool_end")]
255    ToolEnd {
256        id: String,
257        name: String,
258        output: String,
259        exit_code: i32,
260        #[serde(skip_serializing_if = "Option::is_none")]
261        metadata: Option<serde_json::Value>,
262    },
263
264    /// Intermediate tool output (streaming delta)
265    #[serde(rename = "tool_output_delta")]
266    ToolOutputDelta {
267        id: String,
268        name: String,
269        delta: String,
270    },
271
272    /// LLM turn completed
273    #[serde(rename = "turn_end")]
274    TurnEnd { turn: usize, usage: TokenUsage },
275
276    /// Agent completed
277    #[serde(rename = "agent_end")]
278    End {
279        text: String,
280        usage: TokenUsage,
281        verification_summary: Box<crate::verification::VerificationSummary>,
282        #[serde(skip_serializing_if = "Option::is_none")]
283        meta: Option<crate::llm::LlmResponseMeta>,
284    },
285
286    /// Error occurred
287    #[serde(rename = "error")]
288    Error { message: String },
289
290    /// Tool execution requires confirmation (HITL)
291    #[serde(rename = "confirmation_required")]
292    ConfirmationRequired {
293        tool_id: String,
294        tool_name: String,
295        args: serde_json::Value,
296        timeout_ms: u64,
297    },
298
299    /// Confirmation received from user (HITL)
300    #[serde(rename = "confirmation_received")]
301    ConfirmationReceived {
302        tool_id: String,
303        approved: bool,
304        reason: Option<String>,
305    },
306
307    /// Confirmation timed out (HITL)
308    #[serde(rename = "confirmation_timeout")]
309    ConfirmationTimeout {
310        tool_id: String,
311        action_taken: String, // "rejected" or "auto_approved"
312    },
313
314    /// External task pending (needs SDK processing)
315    #[serde(rename = "external_task_pending")]
316    ExternalTaskPending {
317        task_id: String,
318        session_id: String,
319        lane: crate::queue::SessionLane,
320        command_type: String,
321        payload: serde_json::Value,
322        timeout_ms: u64,
323    },
324
325    /// External task completed
326    #[serde(rename = "external_task_completed")]
327    ExternalTaskCompleted {
328        task_id: String,
329        session_id: String,
330        success: bool,
331    },
332
333    /// Tool execution denied by permission policy
334    #[serde(rename = "permission_denied")]
335    PermissionDenied {
336        tool_id: String,
337        tool_name: String,
338        args: serde_json::Value,
339        reason: String,
340    },
341
342    /// Context resolution started
343    #[serde(rename = "context_resolving")]
344    ContextResolving { providers: Vec<String> },
345
346    /// Context resolution completed
347    #[serde(rename = "context_resolved")]
348    ContextResolved {
349        total_items: usize,
350        total_tokens: usize,
351    },
352
353    // ========================================================================
354    // a3s-lane integration events
355    // ========================================================================
356    /// Command moved to dead letter queue after exhausting retries
357    #[serde(rename = "command_dead_lettered")]
358    CommandDeadLettered {
359        command_id: String,
360        command_type: String,
361        lane: String,
362        error: String,
363        attempts: u32,
364    },
365
366    /// Command retry attempt
367    #[serde(rename = "command_retry")]
368    CommandRetry {
369        command_id: String,
370        command_type: String,
371        lane: String,
372        attempt: u32,
373        delay_ms: u64,
374    },
375
376    /// Queue alert (depth warning, latency alert, etc.)
377    #[serde(rename = "queue_alert")]
378    QueueAlert {
379        level: String,
380        alert_type: String,
381        message: String,
382    },
383
384    // ========================================================================
385    // Task tracking events
386    // ========================================================================
387    /// Task list updated
388    #[serde(rename = "task_updated")]
389    TaskUpdated {
390        session_id: String,
391        tasks: Vec<crate::planning::Task>,
392    },
393
394    // ========================================================================
395    // Memory System events (Phase 3)
396    // ========================================================================
397    /// Memory stored
398    #[serde(rename = "memory_stored")]
399    MemoryStored {
400        memory_id: String,
401        memory_type: String,
402        importance: f32,
403        tags: Vec<String>,
404    },
405
406    /// Memory recalled
407    #[serde(rename = "memory_recalled")]
408    MemoryRecalled {
409        memory_id: String,
410        content: String,
411        relevance: f32,
412    },
413
414    /// Memories searched
415    #[serde(rename = "memories_searched")]
416    MemoriesSearched {
417        query: Option<String>,
418        tags: Vec<String>,
419        result_count: usize,
420    },
421
422    /// Memory cleared
423    #[serde(rename = "memory_cleared")]
424    MemoryCleared {
425        tier: String, // "long_term", "short_term", "working"
426        count: u64,
427    },
428
429    // ========================================================================
430    // Subagent events
431    // ========================================================================
432    /// Subagent task started
433    #[serde(rename = "subagent_start")]
434    SubagentStart {
435        /// Unique task identifier
436        task_id: String,
437        /// Child session ID
438        session_id: String,
439        /// Parent session ID
440        parent_session_id: String,
441        /// Agent type (e.g., "explore", "general")
442        agent: String,
443        /// Short description of the task
444        description: String,
445    },
446
447    /// Subagent task progress update
448    #[serde(rename = "subagent_progress")]
449    SubagentProgress {
450        /// Task identifier
451        task_id: String,
452        /// Child session ID
453        session_id: String,
454        /// Progress status message
455        status: String,
456        /// Additional metadata
457        metadata: serde_json::Value,
458    },
459
460    /// Subagent task completed
461    #[serde(rename = "subagent_end")]
462    SubagentEnd {
463        /// Task identifier
464        task_id: String,
465        /// Child session ID
466        session_id: String,
467        /// Agent type
468        agent: String,
469        /// Task output/result
470        output: String,
471        /// Whether the task succeeded
472        success: bool,
473    },
474
475    // ========================================================================
476    // Planning and Goal Tracking Events (Phase 1)
477    // ========================================================================
478    /// Planning phase started
479    #[serde(rename = "planning_start")]
480    PlanningStart { prompt: String },
481
482    /// Planning phase completed
483    #[serde(rename = "planning_end")]
484    PlanningEnd {
485        plan: ExecutionPlan,
486        estimated_steps: usize,
487    },
488
489    /// Step execution started
490    #[serde(rename = "step_start")]
491    StepStart {
492        step_id: String,
493        description: String,
494        step_number: usize,
495        total_steps: usize,
496    },
497
498    /// Step execution completed
499    #[serde(rename = "step_end")]
500    StepEnd {
501        step_id: String,
502        status: TaskStatus,
503        step_number: usize,
504        total_steps: usize,
505    },
506
507    /// Goal extracted from prompt
508    #[serde(rename = "goal_extracted")]
509    GoalExtracted { goal: AgentGoal },
510
511    /// Goal progress update
512    #[serde(rename = "goal_progress")]
513    GoalProgress {
514        goal: String,
515        progress: f32,
516        completed_steps: usize,
517        total_steps: usize,
518    },
519
520    /// Goal achieved
521    #[serde(rename = "goal_achieved")]
522    GoalAchieved {
523        goal: String,
524        total_steps: usize,
525        duration_ms: i64,
526    },
527
528    // ========================================================================
529    // Context Compaction events
530    // ========================================================================
531    /// Context automatically compacted due to high usage
532    #[serde(rename = "context_compacted")]
533    ContextCompacted {
534        session_id: String,
535        before_messages: usize,
536        after_messages: usize,
537        percent_before: f32,
538    },
539
540    // ========================================================================
541    // Persistence events
542    // ========================================================================
543    /// Session persistence failed — SDK clients should handle this
544    #[serde(rename = "persistence_failed")]
545    PersistenceFailed {
546        session_id: String,
547        operation: String,
548        error: String,
549    },
550
551    // ========================================================================
552    // Side question (btw)
553    // ========================================================================
554    /// Ephemeral side question answered.
555    ///
556    /// Emitted by [`crate::agent_api::AgentSession::btw()`] in streaming mode.
557    /// The answer is never added to conversation history.
558    #[serde(rename = "btw_answer")]
559    BtwAnswer {
560        question: String,
561        answer: String,
562        usage: TokenUsage,
563    },
564}
565
566/// Result of agent execution
567#[derive(Debug, Clone)]
568pub struct AgentResult {
569    pub text: String,
570    pub messages: Vec<Message>,
571    pub usage: TokenUsage,
572    pub tool_calls_count: usize,
573    pub verification_reports: Vec<crate::verification::VerificationReport>,
574}
575
576impl AgentResult {
577    pub fn verification_summary(&self) -> crate::verification::VerificationSummary {
578        crate::verification::VerificationSummary::from_reports(&self.verification_reports)
579    }
580
581    pub fn verification_summary_text(&self) -> String {
582        crate::verification::format_verification_summary(&self.verification_summary())
583    }
584
585    pub fn has_pending_verification(&self) -> bool {
586        matches!(
587            self.verification_summary().status,
588            crate::verification::VerificationStatus::NeedsReview
589        )
590    }
591}
592
593// ============================================================================
594// ToolCommand — bridges ToolExecutor to SessionCommand for queue submission
595// ============================================================================
596
597/// Adapter that implements `SessionCommand` for tool execution via the queue.
598///
599/// Wraps a `ToolExecutor` call so it can be submitted to `SessionLaneQueue`.
600pub struct ToolCommand {
601    tool_executor: Arc<ToolExecutor>,
602    tool_name: String,
603    tool_args: Value,
604    tool_context: ToolContext,
605    skill_registry: Option<Arc<crate::skills::SkillRegistry>>,
606}
607
608impl ToolCommand {
609    /// Create a new ToolCommand
610    pub fn new(
611        tool_executor: Arc<ToolExecutor>,
612        tool_name: String,
613        tool_args: Value,
614        tool_context: ToolContext,
615        skill_registry: Option<Arc<crate::skills::SkillRegistry>>,
616    ) -> Self {
617        Self {
618            tool_executor,
619            tool_name,
620            tool_args,
621            tool_context,
622            skill_registry,
623        }
624    }
625}
626
627#[async_trait]
628impl SessionCommand for ToolCommand {
629    async fn execute(&self) -> Result<Value> {
630        // Check skill-based tool permissions
631        if let Some(registry) = &self.skill_registry {
632            let instruction_skills = registry.by_kind(crate::skills::SkillKind::Instruction);
633
634            // If there are instruction skills with tool restrictions, check permissions
635            let has_restrictions = instruction_skills.iter().any(|s| s.allowed_tools.is_some());
636
637            if has_restrictions {
638                let mut allowed = false;
639
640                for skill in &instruction_skills {
641                    if skill.is_tool_allowed(&self.tool_name) {
642                        allowed = true;
643                        break;
644                    }
645                }
646
647                if !allowed {
648                    return Err(anyhow::anyhow!(
649                        "Tool '{}' is not allowed by any active skill. Active skills restrict tools to their allowed-tools lists.",
650                        self.tool_name
651                    ));
652                }
653            }
654        }
655
656        // Execute the tool
657        let result = self
658            .tool_executor
659            .execute_with_context(&self.tool_name, &self.tool_args, &self.tool_context)
660            .await?;
661        Ok(serde_json::json!({
662            "output": result.output,
663            "exit_code": result.exit_code,
664            "metadata": result.metadata,
665        }))
666    }
667
668    fn command_type(&self) -> &str {
669        &self.tool_name
670    }
671
672    fn payload(&self) -> Value {
673        self.tool_args.clone()
674    }
675}
676
677// ============================================================================
678// AgentLoop
679// ============================================================================
680
681/// Internal agent loop executor.
682#[derive(Clone)]
683pub(crate) struct AgentLoop {
684    llm_client: Arc<dyn LlmClient>,
685    tool_executor: Arc<ToolExecutor>,
686    tool_context: ToolContext,
687    config: AgentConfig,
688    /// Optional lane queue for priority-based tool execution
689    command_queue: Option<Arc<SessionLaneQueue>>,
690}
691
692// ============================================================================
693// Intent Detection Helpers (for AHP Context Perception)
694// ============================================================================
695
696/// Extract a target name from the prompt (e.g., function name, file path).
697#[allow(clippy::extra_unused_lifetimes)]
698fn extract_target_name_from_prompt<'a>(prompt: &str, _patterns: &[&str]) -> String {
699    // Try to extract quoted strings first
700    if let Some(start) = prompt.find('"') {
701        if let Some(end) = prompt[start + 1..].find('"') {
702            return prompt[start + 1..start + 1 + end].to_string();
703        }
704    }
705
706    // Try single quotes
707    if let Some(start) = prompt.find('\'') {
708        if let Some(end) = prompt[start + 1..].find('\'') {
709            return prompt[start + 1..start + 1 + end].to_string();
710        }
711    }
712
713    // Try backticks
714    if let Some(start) = prompt.find('`') {
715        if let Some(end) = prompt[start + 1..].find('`') {
716            return prompt[start + 1..start + 1 + end].to_string();
717        }
718    }
719
720    // Fall back to extracting a reasonable word boundary
721    let words: Vec<&str> = prompt.split_whitespace().collect();
722    if words.len() > 2 {
723        // Look for likely target words (after "the", "find", "where is", etc.)
724        for word in words.iter() {
725            if word.len() > 3
726                && !["where", "what", "find", "the", "how", "is", "are"].contains(word)
727            {
728                return word.to_string();
729            }
730        }
731    }
732
733    String::new()
734}
735
736/// Detect the domain from prompt keywords.
737fn detect_domain_from_prompt(prompt: &str) -> String {
738    let lower = prompt.to_lowercase();
739
740    if lower.contains("rust") || lower.contains("cargo") || lower.contains(".rs") {
741        "rust".to_string()
742    } else if lower.contains("javascript")
743        || lower.contains("typescript")
744        || lower.contains("node")
745        || lower.contains(".js")
746        || lower.contains(".ts")
747    {
748        "javascript".to_string()
749    } else if lower.contains("python") || lower.contains(".py") {
750        "python".to_string()
751    } else if lower.contains("go") || lower.contains(".go") {
752        "go".to_string()
753    } else if lower.contains("java") || lower.contains(".java") {
754        "java".to_string()
755    } else if lower.contains("docker") || lower.contains("container") {
756        "docker".to_string()
757    } else if lower.contains("kubernetes") || lower.contains("k8s") {
758        "kubernetes".to_string()
759    } else if lower.contains("sql")
760        || lower.contains("database")
761        || lower.contains("postgres")
762        || lower.contains("mysql")
763    {
764        "database".to_string()
765    } else if lower.contains("api") || lower.contains("rest") || lower.contains("grpc") {
766        "api".to_string()
767    } else if lower.contains("auth")
768        || lower.contains("login")
769        || lower.contains("password")
770        || lower.contains("token")
771    {
772        "security".to_string()
773    } else if lower.contains("test") || lower.contains("spec") || lower.contains("mock") {
774        "testing".to_string()
775    } else {
776        "general".to_string()
777    }
778}
779
780/// Result from IntentDetection harness
781#[derive(Debug, Clone, Serialize, Deserialize)]
782pub struct IntentDetectionResult {
783    /// Detected intent: "locate" | "understand" | "retrieve" | "explore" | "reason" | "validate" | "compare" | "track"
784    pub detected_intent: String,
785    /// Confidence score 0.0 - 1.0
786    pub confidence: f32,
787    /// Optional target hints from the harness
788    #[serde(skip_serializing_if = "Option::is_none")]
789    pub target_hints: Option<TargetHints>,
790}
791
792/// Target hints from IntentDetection harness
793#[derive(Debug, Clone, Serialize, Deserialize)]
794pub struct TargetHints {
795    #[serde(skip_serializing_if = "Option::is_none")]
796    pub target_type: Option<String>,
797    #[serde(skip_serializing_if = "Option::is_none")]
798    pub target_name: Option<String>,
799    #[serde(skip_serializing_if = "Option::is_none")]
800    pub domain: Option<String>,
801}
802
803/// Detect language hint from prompt characters.
804fn detect_language_hint(prompt: &str) -> Option<String> {
805    // Check for Chinese characters
806    if prompt
807        .chars()
808        .any(|c| ('\u{4e00}'..='\u{9fff}').contains(&c))
809    {
810        return Some("zh".to_string());
811    }
812    // Check for Japanese characters (Hiragana, Katakana, or CJK unified ideographs outside Chinese range)
813    if prompt
814        .chars()
815        .any(|c| ('\u{3040}'..='\u{309f}').contains(&c) || ('\u{30a0}'..='\u{30ff}').contains(&c))
816    {
817        return Some("ja".to_string());
818    }
819    // Check for Korean characters
820    if prompt
821        .chars()
822        .any(|c| ('\u{ac00}'..='\u{d7af}').contains(&c))
823    {
824        return Some("ko".to_string());
825    }
826    // Check for Arabic
827    if prompt
828        .chars()
829        .any(|c| ('\u{0600}'..='\u{06ff}').contains(&c))
830    {
831        return Some("ar".to_string());
832    }
833    // Check for Russian/Cyrillic
834    if prompt
835        .chars()
836        .any(|c| ('\u{0400}'..='\u{04ff}').contains(&c))
837    {
838        return Some("ru".to_string());
839    }
840    None
841}
842
843/// Build PreContextPerceptionEvent from IntentDetection result.
844fn build_pre_context_perception_from_intent(
845    result: IntentDetectionResult,
846    prompt: &str,
847    session_id: &str,
848    workspace: &str,
849) -> PreContextPerceptionEvent {
850    let target_hints = result.target_hints;
851    PreContextPerceptionEvent {
852        session_id: session_id.to_string(),
853        intent: result.detected_intent,
854        target_type: target_hints
855            .as_ref()
856            .and_then(|h| h.target_type.clone())
857            .unwrap_or_else(|| "unknown".to_string()),
858        target_name: target_hints
859            .as_ref()
860            .and_then(|h| h.target_name.clone())
861            .unwrap_or_else(|| extract_target_name_from_prompt(prompt, &[])),
862        domain: target_hints
863            .as_ref()
864            .and_then(|h| h.domain.clone())
865            .unwrap_or_else(|| detect_domain_from_prompt(prompt)),
866        query: Some(prompt.to_string()),
867        working_directory: workspace.to_string(),
868        urgency: "normal".to_string(),
869    }
870}
871
872/// Rough token estimation (~4 chars per token for English/code).
873#[cfg(feature = "ahp")]
874fn estimate_tokens(text: &str) -> usize {
875    (text.len() / 4).max(1)
876}
877
878#[cfg(feature = "ahp")]
879fn ahp_context_result(items: Vec<ContextItem>) -> Option<ContextResult> {
880    if items.is_empty() {
881        return None;
882    }
883
884    let total_tokens = items.iter().map(|item| item.token_count).sum();
885    Some(ContextResult {
886        items,
887        total_tokens,
888        provider: "ahp_harness".to_string(),
889        truncated: false,
890    })
891}
892
893#[cfg(feature = "ahp")]
894fn injected_context_to_results(injected: InjectedContext) -> Vec<ContextResult> {
895    let mut results = Vec::new();
896
897    let fact_items = injected
898        .facts
899        .into_iter()
900        .map(|fact| {
901            let token_count = estimate_tokens(&fact.content);
902            ContextItem::new(
903                uuid::Uuid::new_v4().to_string(),
904                ContextType::Resource,
905                fact.content,
906            )
907            .with_source(fact.source)
908            .with_provenance("ahp_fact")
909            .with_priority(0.75)
910            .with_trust(fact.confidence)
911            .with_freshness(0.85)
912            .with_relevance(fact.confidence)
913            .with_token_count(token_count)
914        })
915        .collect::<Vec<_>>();
916    if let Some(result) = ahp_context_result(fact_items) {
917        results.push(result);
918    }
919
920    if let Some(file_contents) = injected.file_contents {
921        let file_items = file_contents
922            .into_iter()
923            .map(|file| {
924                let token_count = estimate_tokens(&file.snippet);
925                ContextItem::new(
926                    uuid::Uuid::new_v4().to_string(),
927                    ContextType::Resource,
928                    file.snippet,
929                )
930                .with_source(file.path)
931                .with_provenance("ahp_file_snippet")
932                .with_priority(0.8)
933                .with_trust(0.8)
934                .with_freshness(0.8)
935                .with_relevance(file.relevance_score)
936                .with_token_count(token_count)
937            })
938            .collect::<Vec<_>>();
939        if let Some(result) = ahp_context_result(file_items) {
940            results.push(result);
941        }
942    }
943
944    if let Some(summary) = injected.project_summary {
945        let mut lines = vec![
946            format!("Project: {}", summary.project_name),
947            summary.structure_description,
948        ];
949        if let Some(language) = summary.language {
950            lines.push(format!("Language: {language}"));
951        }
952        if let Some(key_files) = summary.key_files.filter(|files| !files.is_empty()) {
953            lines.push(format!("Key files: {}", key_files.join(", ")));
954        }
955        let content = lines.join("\n");
956        let token_count = estimate_tokens(&content);
957        if let Some(result) = ahp_context_result(vec![ContextItem::new(
958            uuid::Uuid::new_v4().to_string(),
959            ContextType::Resource,
960            content,
961        )
962        .with_source("ahp://project-summary")
963        .with_provenance("ahp_project_summary")
964        .with_priority(0.7)
965        .with_trust(0.75)
966        .with_freshness(0.8)
967        .with_relevance(0.9)
968        .with_token_count(token_count)])
969        {
970            results.push(result);
971        }
972    }
973
974    if let Some(knowledge) = injected.knowledge {
975        let knowledge_items = knowledge
976            .into_iter()
977            .map(|content| {
978                let token_count = estimate_tokens(&content);
979                ContextItem::new(
980                    uuid::Uuid::new_v4().to_string(),
981                    ContextType::Resource,
982                    content,
983                )
984                .with_source("ahp://knowledge")
985                .with_provenance("ahp_knowledge")
986                .with_priority(0.55)
987                .with_trust(0.65)
988                .with_freshness(0.6)
989                .with_relevance(0.8)
990                .with_token_count(token_count)
991            })
992            .collect::<Vec<_>>();
993        if let Some(result) = ahp_context_result(knowledge_items) {
994            results.push(result);
995        }
996    }
997
998    if let Some(suggestions) = injected.suggestions.filter(|items| !items.is_empty()) {
999        let content = format!("Harness suggestions:\n- {}", suggestions.join("\n- "));
1000        let token_count = estimate_tokens(&content);
1001        if let Some(result) = ahp_context_result(vec![ContextItem::new(
1002            uuid::Uuid::new_v4().to_string(),
1003            ContextType::Resource,
1004            content,
1005        )
1006        .with_source("ahp://suggestions")
1007        .with_provenance("ahp_suggestions")
1008        .with_priority(0.45)
1009        .with_trust(0.6)
1010        .with_freshness(0.8)
1011        .with_relevance(0.7)
1012        .with_token_count(token_count)])
1013        {
1014            results.push(result);
1015        }
1016    }
1017
1018    results
1019}
1020
1021impl AgentLoop {
1022    pub(crate) fn new(
1023        llm_client: Arc<dyn LlmClient>,
1024        tool_executor: Arc<ToolExecutor>,
1025        tool_context: ToolContext,
1026        config: AgentConfig,
1027    ) -> Self {
1028        Self {
1029            llm_client,
1030            tool_executor,
1031            tool_context,
1032            config,
1033            command_queue: None,
1034        }
1035    }
1036
1037    /// Set the lane queue for priority-based tool execution.
1038    ///
1039    /// When set, tools are routed through the lane queue which supports
1040    /// External task handling for multi-machine parallel processing.
1041    pub fn with_queue(mut self, queue: Arc<SessionLaneQueue>) -> Self {
1042        self.command_queue = Some(queue);
1043        self
1044    }
1045
1046    /// Track a tool call result in telemetry.
1047    fn track_tool_result(&self, tool_name: &str, args: &serde_json::Value, exit_code: i32) {
1048        let _ = (tool_name, args, exit_code);
1049    }
1050
1051    fn should_run_pre_analysis(&self) -> bool {
1052        match self.config.planning_mode {
1053            PlanningMode::Disabled => false,
1054            PlanningMode::Enabled => true,
1055            PlanningMode::Auto => true,
1056        }
1057    }
1058
1059    async fn emit_task_updated(
1060        &self,
1061        event_tx: &Option<mpsc::Sender<AgentEvent>>,
1062        session_id: &str,
1063        plan: &ExecutionPlan,
1064    ) {
1065        if let Some(tx) = event_tx {
1066            tx.send(AgentEvent::TaskUpdated {
1067                session_id: session_id.to_string(),
1068                tasks: plan.steps.clone(),
1069            })
1070            .await
1071            .ok();
1072        }
1073    }
1074
1075    fn normalized_plan_tool(step: &Task) -> Option<&str> {
1076        step.tool
1077            .as_deref()
1078            .map(str::trim)
1079            .filter(|tool| !tool.is_empty())
1080    }
1081
1082    fn should_delegate_plan_step(step: &Task) -> bool {
1083        matches!(
1084            Self::normalized_plan_tool(step),
1085            Some("task") | Some("parallel_task")
1086        )
1087    }
1088
1089    fn delegated_agent_for_step(step: &Task) -> &'static str {
1090        let text = format!(
1091            "{}\n{}",
1092            step.content,
1093            step.success_criteria.as_deref().unwrap_or_default()
1094        )
1095        .to_lowercase();
1096
1097        if text.contains("review")
1098            || text.contains("code review")
1099            || text.contains("regression")
1100            || text.contains("审查")
1101            || text.contains("评审")
1102            || text.contains("回归")
1103        {
1104            "review"
1105        } else if text.contains("verify")
1106            || text.contains("verification")
1107            || text.contains("validate")
1108            || text.contains("test")
1109            || text.contains("release")
1110            || text.contains("smoke")
1111            || text.contains("验证")
1112            || text.contains("测试")
1113            || text.contains("发布")
1114        {
1115            "verification"
1116        } else if text.contains("plan")
1117            || text.contains("design")
1118            || text.contains("architecture")
1119            || text.contains("规划")
1120            || text.contains("设计")
1121            || text.contains("架构")
1122        {
1123            "plan"
1124        } else if text.contains("explore")
1125            || text.contains("find")
1126            || text.contains("search")
1127            || text.contains("locate")
1128            || text.contains("inspect")
1129            || text.contains("查找")
1130            || text.contains("搜索")
1131            || text.contains("定位")
1132            || text.contains("探索")
1133            || text.contains("检查")
1134        {
1135            "explore"
1136        } else {
1137            "general"
1138        }
1139    }
1140
1141    fn delegated_prompt_for_step(step: &Task, step_number: usize, total_steps: usize) -> String {
1142        let mut prompt = format!(
1143            "Execute plan step {}/{}.\n\nTask:\n{}\n",
1144            step_number, total_steps, step.content
1145        );
1146        if let Some(criteria) = step
1147            .success_criteria
1148            .as_deref()
1149            .filter(|s| !s.trim().is_empty())
1150        {
1151            prompt.push_str("\nSuccess criteria:\n");
1152            prompt.push_str(criteria);
1153            prompt.push('\n');
1154        }
1155        prompt.push_str("\nReturn a compact result with summary, evidence, risks, and confidence.");
1156        prompt
1157    }
1158
1159    fn delegated_task_args(step: &Task, step_number: usize, total_steps: usize) -> Value {
1160        json!({
1161            "agent": Self::delegated_agent_for_step(step),
1162            "description": step.content,
1163            "prompt": Self::delegated_prompt_for_step(step, step_number, total_steps),
1164        })
1165    }
1166
1167    fn parallel_delegated_task_args(steps: &[(Task, usize)], total_steps: usize) -> Value {
1168        let tasks = steps
1169            .iter()
1170            .map(|(step, step_number)| Self::delegated_task_args(step, *step_number, total_steps))
1171            .collect::<Vec<_>>();
1172        json!({ "tasks": tasks })
1173    }
1174
1175    fn tool_context_for_plan(&self, session_id: Option<&str>) -> ToolContext {
1176        let mut ctx = self.tool_context.clone();
1177        if ctx.session_id.is_none() {
1178            if let Some(session_id) = session_id.filter(|id| !id.is_empty()) {
1179                ctx = ctx.with_session_id(session_id);
1180            }
1181        }
1182        ctx
1183    }
1184
1185    async fn execute_delegated_plan_tool(
1186        &self,
1187        tool_name: &str,
1188        args: &Value,
1189        session_id: Option<&str>,
1190        event_tx: &Option<mpsc::Sender<AgentEvent>>,
1191    ) -> (String, i32, bool, Option<Value>) {
1192        let call_id = format!("plan-{}-{}", tool_name, uuid::Uuid::new_v4());
1193        if let Some(tx) = event_tx {
1194            tx.send(AgentEvent::ToolStart {
1195                id: call_id.clone(),
1196                name: tool_name.to_string(),
1197            })
1198            .await
1199            .ok();
1200        }
1201
1202        let ctx = self.tool_context_for_plan(session_id);
1203        let (output, exit_code, is_error, metadata, _) =
1204            Self::tool_result_to_tuple(self.execute_tool_timed(tool_name, args, &ctx).await);
1205
1206        if let Some(tx) = event_tx {
1207            tx.send(AgentEvent::ToolEnd {
1208                id: call_id,
1209                name: tool_name.to_string(),
1210                output: output.clone(),
1211                exit_code,
1212                metadata: metadata.clone(),
1213            })
1214            .await
1215            .ok();
1216        }
1217
1218        (output, exit_code, is_error, metadata)
1219    }
1220
1221    /// Execute a tool, applying the configured timeout if set.
1222    ///
1223    /// On timeout, returns an error describing which tool timed out and after
1224    /// how many milliseconds. The caller converts this to a tool-result error
1225    /// message that is fed back to the LLM.
1226    async fn execute_tool_timed(
1227        &self,
1228        name: &str,
1229        args: &serde_json::Value,
1230        ctx: &ToolContext,
1231    ) -> anyhow::Result<crate::tools::ToolResult> {
1232        let fut = self.tool_executor.execute_with_context(name, args, ctx);
1233        if let Some(timeout_ms) = self.config.tool_timeout_ms {
1234            match tokio::time::timeout(Duration::from_millis(timeout_ms), fut).await {
1235                Ok(result) => result,
1236                Err(_) => Err(anyhow::anyhow!(
1237                    "Tool '{}' timed out after {}ms",
1238                    name,
1239                    timeout_ms
1240                )),
1241            }
1242        } else {
1243            fut.await
1244        }
1245    }
1246
1247    /// Convert a tool execution result into the (output, exit_code, is_error, metadata, images) tuple.
1248    fn tool_result_to_tuple(
1249        result: anyhow::Result<crate::tools::ToolResult>,
1250    ) -> (
1251        String,
1252        i32,
1253        bool,
1254        Option<serde_json::Value>,
1255        Vec<crate::llm::Attachment>,
1256    ) {
1257        match result {
1258            Ok(r) => (
1259                r.output,
1260                r.exit_code,
1261                r.exit_code != 0,
1262                r.metadata,
1263                r.images,
1264            ),
1265            Err(e) => {
1266                let msg = e.to_string();
1267                // Classify the error so the LLM knows whether retrying makes sense.
1268                let hint = if Self::is_transient_error(&msg) {
1269                    " [transient — you may retry this tool call]"
1270                } else {
1271                    " [permanent — do not retry without changing the arguments]"
1272                };
1273                (
1274                    format!("Tool execution error: {}{}", msg, hint),
1275                    1,
1276                    true,
1277                    None,
1278                    Vec::new(),
1279                )
1280            }
1281        }
1282    }
1283
1284    fn collect_verification_report(
1285        reports: &mut Vec<crate::verification::VerificationReport>,
1286        metadata: &Option<serde_json::Value>,
1287    ) {
1288        let Some(metadata) = metadata else {
1289            return;
1290        };
1291        let Some(report) = metadata.get("verification_report") else {
1292            return;
1293        };
1294
1295        match serde_json::from_value::<crate::verification::VerificationReport>(report.clone()) {
1296            Ok(report) => reports.push(report),
1297            Err(err) => tracing::warn!(
1298                error = %err,
1299                "Ignoring malformed verification_report tool metadata"
1300            ),
1301        }
1302    }
1303
1304    /// Inspect the workspace for well-known project marker files and return a short
1305    /// `## Project Context` section that the agent can use without any manual configuration.
1306    /// Returns an empty string when the workspace type cannot be determined.
1307    fn detect_project_hint(workspace: &std::path::Path) -> String {
1308        struct Marker {
1309            file: &'static str,
1310            lang: &'static str,
1311            tip: &'static str,
1312        }
1313
1314        let markers = [
1315            Marker {
1316                file: "Cargo.toml",
1317                lang: "Rust",
1318                tip: "Use `cargo build`, `cargo test`, `cargo clippy`, and `cargo fmt`. \
1319                  Prefer `anyhow` / `thiserror` for error handling. \
1320                  Follow the Microsoft Rust Guidelines (no panics in library code, \
1321                  async-first with Tokio).",
1322            },
1323            Marker {
1324                file: "package.json",
1325                lang: "Node.js / TypeScript",
1326                tip: "Check `package.json` for the package manager (npm/yarn/pnpm/bun) \
1327                  and available scripts. Prefer TypeScript with strict mode. \
1328                  Use ESM imports unless the project is CommonJS.",
1329            },
1330            Marker {
1331                file: "pyproject.toml",
1332                lang: "Python",
1333                tip: "Use the package manager declared in `pyproject.toml` \
1334                  (uv, poetry, hatch, etc.). Prefer type hints and async/await for I/O.",
1335            },
1336            Marker {
1337                file: "setup.py",
1338                lang: "Python",
1339                tip: "Legacy Python project. Prefer type hints and async/await for I/O.",
1340            },
1341            Marker {
1342                file: "requirements.txt",
1343                lang: "Python",
1344                tip: "Python project with pip-style dependencies. \
1345                  Prefer type hints and async/await for I/O.",
1346            },
1347            Marker {
1348                file: "go.mod",
1349                lang: "Go",
1350                tip: "Use `go build ./...` and `go test ./...`. \
1351                  Follow standard Go project layout. Use `gofmt` for formatting.",
1352            },
1353            Marker {
1354                file: "pom.xml",
1355                lang: "Java / Maven",
1356                tip: "Use `mvn compile`, `mvn test`, `mvn package`. \
1357                  Follow standard Maven project structure.",
1358            },
1359            Marker {
1360                file: "build.gradle",
1361                lang: "Java / Gradle",
1362                tip: "Use `./gradlew build` and `./gradlew test`. \
1363                  Follow standard Gradle project structure.",
1364            },
1365            Marker {
1366                file: "build.gradle.kts",
1367                lang: "Kotlin / Gradle",
1368                tip: "Use `./gradlew build` and `./gradlew test`. \
1369                  Prefer Kotlin coroutines for async work.",
1370            },
1371            Marker {
1372                file: "CMakeLists.txt",
1373                lang: "C / C++",
1374                tip: "Use `cmake -B build && cmake --build build`. \
1375                  Check for `compile_commands.json` for IDE tooling.",
1376            },
1377            Marker {
1378                file: "Makefile",
1379                lang: "C / C++ (or generic)",
1380                tip: "Use `make` or `make <target>`. \
1381                  Check available targets with `make help` or by reading the Makefile.",
1382            },
1383        ];
1384
1385        // Check for C# / .NET — no single fixed filename, so glob for *.csproj / *.sln
1386        let is_dotnet = workspace.join("*.csproj").exists() || {
1387            // Fast check: look for any .csproj or .sln in the workspace root
1388            std::fs::read_dir(workspace)
1389                .map(|entries| {
1390                    entries.flatten().any(|e| {
1391                        let name = e.file_name();
1392                        let s = name.to_string_lossy();
1393                        s.ends_with(".csproj") || s.ends_with(".sln")
1394                    })
1395                })
1396                .unwrap_or(false)
1397        };
1398
1399        if is_dotnet {
1400            return "## Project Context\n\nThis is a **C# / .NET** project. \
1401             Use `dotnet build`, `dotnet test`, and `dotnet run`. \
1402             Follow C# coding conventions and async/await patterns."
1403                .to_string();
1404        }
1405
1406        for marker in &markers {
1407            if workspace.join(marker.file).exists() {
1408                return format!(
1409                    "## Project Context\n\nThis is a **{}** project. {}",
1410                    marker.lang, marker.tip
1411                );
1412            }
1413        }
1414
1415        String::new()
1416    }
1417
1418    /// Returns `true` for errors that are likely transient (network, timeout, I/O contention).
1419    /// Used to annotate tool error messages so the LLM knows whether retrying is safe.
1420    fn is_transient_error(msg: &str) -> bool {
1421        let lower = msg.to_lowercase();
1422        lower.contains("timeout")
1423        || lower.contains("timed out")
1424        || lower.contains("connection refused")
1425        || lower.contains("connection reset")
1426        || lower.contains("broken pipe")
1427        || lower.contains("temporarily unavailable")
1428        || lower.contains("resource temporarily unavailable")
1429        || lower.contains("os error 11")  // EAGAIN
1430        || lower.contains("os error 35")  // EAGAIN on macOS
1431        || lower.contains("rate limit")
1432        || lower.contains("too many requests")
1433        || lower.contains("service unavailable")
1434        || lower.contains("network unreachable")
1435    }
1436
1437    /// Returns `true` when a tool writes a file and is safe to run concurrently with other
1438    /// independent writes (no ordering dependencies, no side-channel output).
1439    fn is_parallel_safe_write(name: &str, _args: &serde_json::Value) -> bool {
1440        matches!(
1441            name,
1442            "write_file" | "edit_file" | "create_file" | "append_to_file" | "replace_in_file"
1443        )
1444    }
1445
1446    /// Extract the target file path from write-tool arguments so we can check for conflicts.
1447    fn extract_write_path(args: &serde_json::Value) -> Option<String> {
1448        // write_file / create_file / append_to_file / replace_in_file use "path"
1449        // edit_file uses "path" as well
1450        args.get("path")
1451            .and_then(|v| v.as_str())
1452            .map(|s| s.to_string())
1453    }
1454
1455    /// Execute a tool through the lane queue (if configured) or directly.
1456    async fn execute_tool_queued_or_direct(
1457        &self,
1458        name: &str,
1459        args: &serde_json::Value,
1460        ctx: &ToolContext,
1461    ) -> anyhow::Result<crate::tools::ToolResult> {
1462        self.execute_tool_queued_or_direct_inner(name, args, ctx)
1463            .await
1464    }
1465
1466    /// Inner execution without task lifecycle wrapping.
1467    async fn execute_tool_queued_or_direct_inner(
1468        &self,
1469        name: &str,
1470        args: &serde_json::Value,
1471        ctx: &ToolContext,
1472    ) -> anyhow::Result<crate::tools::ToolResult> {
1473        if let Some(ref queue) = self.command_queue {
1474            let command = ToolCommand::new(
1475                Arc::clone(&self.tool_executor),
1476                name.to_string(),
1477                args.clone(),
1478                ctx.clone(),
1479                self.config.skill_registry.clone(),
1480            );
1481            let rx = queue.submit_by_tool(name, Box::new(command)).await;
1482            match rx.await {
1483                Ok(Ok(value)) => {
1484                    let output = value["output"]
1485                        .as_str()
1486                        .ok_or_else(|| {
1487                            anyhow::anyhow!(
1488                                "Queue result missing 'output' field for tool '{}'",
1489                                name
1490                            )
1491                        })?
1492                        .to_string();
1493                    let exit_code = value["exit_code"].as_i64().unwrap_or(0) as i32;
1494                    return Ok(crate::tools::ToolResult {
1495                        name: name.to_string(),
1496                        output,
1497                        exit_code,
1498                        metadata: None,
1499                        images: Vec::new(),
1500                    });
1501                }
1502                Ok(Err(e)) => {
1503                    tracing::warn!(
1504                        "Queue execution failed for tool '{}', falling back to direct: {}",
1505                        name,
1506                        e
1507                    );
1508                }
1509                Err(_) => {
1510                    tracing::warn!(
1511                        "Queue channel closed for tool '{}', falling back to direct",
1512                        name
1513                    );
1514                }
1515            }
1516        }
1517        self.execute_tool_timed(name, args, ctx).await
1518    }
1519
1520    /// Call the LLM, handling streaming vs non-streaming internally.
1521    ///
1522    /// Streaming events (`TextDelta`, `ToolStart`) are forwarded to `event_tx`
1523    /// as they arrive. Non-streaming mode simply awaits the complete response.
1524    ///
1525    /// Tool definitions are selected per turn by the centralized tool selector.
1526    ///
1527    /// Returns `Err` on any LLM API failure. The circuit breaker in
1528    /// `execute_loop` wraps this call with retry logic for non-streaming mode.
1529    async fn call_llm(
1530        &self,
1531        messages: &[Message],
1532        system: Option<&str>,
1533        event_tx: &Option<mpsc::Sender<AgentEvent>>,
1534        cancel_token: &tokio_util::sync::CancellationToken,
1535    ) -> anyhow::Result<LlmResponse> {
1536        let tools = crate::tools::select_tools_for_messages(&self.config.tools, messages);
1537
1538        if event_tx.is_some() {
1539            let mut stream_rx = match self
1540                .llm_client
1541                .complete_streaming(messages, system, &tools, cancel_token.clone())
1542                .await
1543            {
1544                Ok(rx) => rx,
1545                Err(stream_error) => {
1546                    // Do not fall back to non-streaming if cancelled — propagate cancellation
1547                    if cancel_token.is_cancelled() {
1548                        anyhow::bail!("Operation cancelled by user");
1549                    }
1550                    tracing::warn!(
1551                        error = %stream_error,
1552                        "LLM streaming setup failed; falling back to non-streaming completion"
1553                    );
1554                    return self
1555                        .llm_client
1556                        .complete(messages, system, &tools)
1557                        .await
1558                        .with_context(|| {
1559                            format!(
1560                                "LLM streaming call failed ({stream_error}); non-streaming fallback also failed"
1561                            )
1562                        });
1563                }
1564            };
1565
1566            let mut final_response: Option<LlmResponse> = None;
1567            loop {
1568                tokio::select! {
1569                    _ = cancel_token.cancelled() => {
1570                        tracing::info!("🛑 LLM streaming cancelled by CancellationToken");
1571                        anyhow::bail!("Operation cancelled by user");
1572                    }
1573                    event = stream_rx.recv() => {
1574                        match event {
1575                            Some(crate::llm::StreamEvent::TextDelta(text)) => {
1576                                if let Some(tx) = event_tx {
1577                                    tx.send(AgentEvent::TextDelta { text }).await.ok();
1578                                }
1579                            }
1580                            Some(crate::llm::StreamEvent::ReasoningDelta(text)) => {
1581                                if let Some(tx) = event_tx {
1582                                    tx.send(AgentEvent::ReasoningDelta { text }).await.ok();
1583                                }
1584                            }
1585                            Some(crate::llm::StreamEvent::ToolUseStart { id, name }) => {
1586                                if let Some(tx) = event_tx {
1587                                    tx.send(AgentEvent::ToolStart { id, name }).await.ok();
1588                                }
1589                            }
1590                            Some(crate::llm::StreamEvent::ToolUseInputDelta(delta)) => {
1591                                if let Some(tx) = event_tx {
1592                                    tx.send(AgentEvent::ToolInputDelta { delta }).await.ok();
1593                                }
1594                            }
1595                            Some(crate::llm::StreamEvent::Done(resp)) => {
1596                                final_response = Some(resp);
1597                                break;
1598                            }
1599                            None => break,
1600                        }
1601                    }
1602                }
1603            }
1604            final_response.context("Stream ended without final response")
1605        } else {
1606            self.llm_client
1607                .complete(messages, system, &tools)
1608                .await
1609                .context("LLM call failed")
1610        }
1611    }
1612
1613    /// Create a tool context with streaming support.
1614    ///
1615    /// When `event_tx` is Some, spawns a forwarder task that converts
1616    /// `ToolStreamEvent::OutputDelta` into `AgentEvent::ToolOutputDelta`
1617    /// and sends them to the agent event channel.
1618    ///
1619    /// Returns the augmented `ToolContext`. The forwarder task runs until
1620    /// the tool-side sender is dropped (i.e., tool execution finishes).
1621    fn streaming_tool_context(
1622        &self,
1623        event_tx: &Option<mpsc::Sender<AgentEvent>>,
1624        tool_id: &str,
1625        tool_name: &str,
1626    ) -> ToolContext {
1627        let mut ctx = self.tool_context.clone();
1628        if let Some(agent_tx) = event_tx {
1629            let (tool_tx, mut tool_rx) = mpsc::channel::<ToolStreamEvent>(64);
1630            ctx.event_tx = Some(tool_tx);
1631
1632            let agent_tx = agent_tx.clone();
1633            let tool_id = tool_id.to_string();
1634            let tool_name = tool_name.to_string();
1635            tokio::spawn(async move {
1636                while let Some(event) = tool_rx.recv().await {
1637                    match event {
1638                        ToolStreamEvent::OutputDelta(delta) => {
1639                            agent_tx
1640                                .send(AgentEvent::ToolOutputDelta {
1641                                    id: tool_id.clone(),
1642                                    name: tool_name.clone(),
1643                                    delta,
1644                                })
1645                                .await
1646                                .ok();
1647                        }
1648                    }
1649                }
1650            });
1651        }
1652        ctx
1653    }
1654
1655    /// Resolve context from all providers for a given prompt
1656    ///
1657    /// Returns aggregated context results from all configured providers.
1658    async fn resolve_context(&self, prompt: &str, session_id: Option<&str>) -> Vec<ContextResult> {
1659        if self.config.context_providers.is_empty() {
1660            return Vec::new();
1661        }
1662
1663        let query = ContextQuery::new(prompt).with_session_id(session_id.unwrap_or(""));
1664
1665        let futures = self
1666            .config
1667            .context_providers
1668            .iter()
1669            .map(|p| p.query(&query));
1670        let outcomes = join_all(futures).await;
1671
1672        outcomes
1673            .into_iter()
1674            .enumerate()
1675            .filter_map(|(i, r)| match r {
1676                Ok(result) if !result.is_empty() => Some(result),
1677                Ok(_) => None,
1678                Err(e) => {
1679                    tracing::warn!(
1680                        "Context provider '{}' failed: {}",
1681                        self.config.context_providers[i].name(),
1682                        e
1683                    );
1684                    None
1685                }
1686            })
1687            .collect()
1688    }
1689
1690    /// Detect whether the LLM's no-tool-call response looks like an intermediate
1691    /// step rather than a genuine final answer.
1692    ///
1693    /// Returns `true` when continuation should be injected. Heuristics:
1694    /// - Response ends with a colon or ellipsis (mid-thought)
1695    /// - Response contains phrases that signal incomplete work
1696    /// - Response is very short (< 80 chars) and doesn't look like a summary
1697    fn looks_incomplete(text: &str) -> bool {
1698        let t = text.trim();
1699        if t.is_empty() {
1700            return true;
1701        }
1702        // Very short responses that aren't clearly a final answer
1703        if t.len() < 80 && !t.contains('\n') {
1704            // Short single-line — could be a genuine one-liner answer, keep going
1705            // only if it ends with punctuation that signals continuation
1706            let ends_continuation =
1707                t.ends_with(':') || t.ends_with("...") || t.ends_with('…') || t.ends_with(',');
1708            if ends_continuation {
1709                return true;
1710            }
1711        }
1712        // Phrases that signal the LLM is describing what it will do rather than doing it
1713        let incomplete_phrases = [
1714            "i'll ",
1715            "i will ",
1716            "let me ",
1717            "i need to ",
1718            "i should ",
1719            "next, i",
1720            "first, i",
1721            "now i",
1722            "i'll start",
1723            "i'll begin",
1724            "i'll now",
1725            "let's start",
1726            "let's begin",
1727            "to do this",
1728            "i'm going to",
1729        ];
1730        let lower = t.to_lowercase();
1731        for phrase in &incomplete_phrases {
1732            if lower.contains(phrase) {
1733                return true;
1734            }
1735        }
1736        false
1737    }
1738
1739    /// Get the assembled system prompt from slots.
1740    #[allow(dead_code)]
1741    fn system_prompt(&self) -> String {
1742        self.config.prompt_slots.build()
1743    }
1744
1745    /// Get the assembled system prompt from slots with an explicit style.
1746    fn system_prompt_for_style(&self, style: AgentStyle) -> String {
1747        let mut slots = self.config.prompt_slots.clone();
1748        slots.style = Some(style);
1749        slots.build()
1750    }
1751
1752    async fn resolve_effective_style(&self, prompt: &str) -> AgentStyle {
1753        if let Some(style) = self.config.prompt_slots.style {
1754            return style;
1755        }
1756
1757        let (style, confidence) = AgentStyle::detect_with_confidence(prompt);
1758        tracing::debug!(
1759            intent.classification = ?style,
1760            intent.confidence = ?confidence,
1761            intent.source = "local",
1762            "Intent classified locally"
1763        );
1764        style
1765    }
1766
1767    /// Detect if context perception is needed based on user prompt.
1768    ///
1769    /// Returns `Some(PreContextPerceptionEvent)` if the prompt suggests the model
1770    /// needs workspace knowledge (finding files, understanding code, etc.).
1771    pub fn detect_context_perception_intent(
1772        &self,
1773        prompt: &str,
1774        session_id: &str,
1775        workspace: &str,
1776    ) -> Option<PreContextPerceptionEvent> {
1777        let lower = prompt.to_lowercase();
1778
1779        // Pattern matching for different intents that suggest context perception is needed
1780        let intents: &[(&[&str], &str)] = &[
1781            // Locate: finding files, functions, resources
1782            (
1783                &[
1784                    "where is",
1785                    "where are",
1786                    "find the file",
1787                    "find all",
1788                    "find files",
1789                    "who wrote",
1790                    "locate",
1791                    "search for",
1792                    "look for",
1793                    "search",
1794                ],
1795                "locate",
1796            ),
1797            // Understand: explaining how something works
1798            (
1799                &[
1800                    "how does",
1801                    "what does",
1802                    "explain",
1803                    "understand",
1804                    "what is this",
1805                    "how does this work",
1806                ],
1807                "understand",
1808            ),
1809            // Retrieve: recalling from memory/past
1810            (
1811                &[
1812                    "remember",
1813                    "earlier",
1814                    "before",
1815                    "previously",
1816                    "last time",
1817                    "past",
1818                    "previous",
1819                ],
1820                "retrieve",
1821            ),
1822            // Explore: understanding structure
1823            (
1824                &[
1825                    "how is organized",
1826                    "project structure",
1827                    "what files",
1828                    "show me the structure",
1829                    "explore",
1830                ],
1831                "explore",
1832            ),
1833            // Reason: asking why/causality
1834            (
1835                &[
1836                    "why did",
1837                    "why is",
1838                    "cause",
1839                    "reason",
1840                    "what happened",
1841                    "why does",
1842                ],
1843                "reason",
1844            ),
1845            // Validate: checking correctness
1846            (
1847                &["is this correct", "verify", "validate", "check if", "debug"],
1848                "validate",
1849            ),
1850            // Compare: comparing things
1851            (
1852                &[
1853                    "difference between",
1854                    "compare",
1855                    "versus",
1856                    " vs ",
1857                    "different from",
1858                ],
1859                "compare",
1860            ),
1861            // Track: status/history
1862            (
1863                &[
1864                    "status",
1865                    "progress",
1866                    "how far",
1867                    "history",
1868                    "what's the current",
1869                ],
1870                "track",
1871            ),
1872        ];
1873
1874        // Detect target type from keywords
1875        let target_type = if lower.contains("function") || lower.contains("method") {
1876            "function"
1877        } else if lower.contains("file") || lower.contains("config") {
1878            "file"
1879        } else if lower.contains("class") {
1880            "entity"
1881        } else if lower.contains("module") || lower.contains("package") {
1882            "module"
1883        } else if lower.contains("test") {
1884            "test"
1885        } else {
1886            "unknown"
1887        };
1888
1889        // Find matching intent
1890        let matched_intent = intents
1891            .iter()
1892            .find(|(patterns, _)| patterns.iter().any(|p| lower.contains(p)));
1893
1894        matched_intent.map(|(patterns, intent)| {
1895            // Extract target name if possible (simplified extraction)
1896            let target_name = extract_target_name_from_prompt(prompt, patterns);
1897
1898            PreContextPerceptionEvent {
1899                session_id: session_id.to_string(),
1900                intent: intent.to_string(),
1901                target_type: target_type.to_string(),
1902                target_name,
1903                domain: detect_domain_from_prompt(prompt),
1904                query: Some(prompt.to_string()),
1905                working_directory: workspace.to_string(),
1906                urgency: "normal".to_string(),
1907            }
1908        })
1909    }
1910
1911    /// Fire PreContextPerception hook and wait for harness decision.
1912    async fn fire_pre_context_perception(&self, event: &PreContextPerceptionEvent) -> HookResult {
1913        if let Some(he) = &self.config.hook_engine {
1914            let hook_event = HookEvent::PreContextPerception(event.clone());
1915            he.fire(&hook_event).await
1916        } else {
1917            HookResult::continue_()
1918        }
1919    }
1920
1921    /// Fire IntentDetection hook and wait for harness decision.
1922    ///
1923    /// This is called on every prompt to detect user intent via the AHP harness.
1924    /// Returns the detected intent if the harness provides one, or None if blocked/failed.
1925    async fn fire_intent_detection(
1926        &self,
1927        prompt: &str,
1928        session_id: &str,
1929        workspace: &str,
1930    ) -> Option<IntentDetectionResult> {
1931        let event = IntentDetectionEvent {
1932            session_id: session_id.to_string(),
1933            prompt: prompt.to_string(),
1934            workspace: workspace.to_string(),
1935            language_hint: detect_language_hint(prompt),
1936        };
1937
1938        let hook_result = if let Some(he) = &self.config.hook_engine {
1939            let hook_event = HookEvent::IntentDetection(event);
1940            he.fire(&hook_event).await
1941        } else {
1942            return None;
1943        };
1944
1945        match hook_result {
1946            HookResult::Continue(Some(modified)) => {
1947                // Parse the intent detection result
1948                serde_json::from_value::<IntentDetectionResult>(modified).ok()
1949            }
1950            HookResult::Block(_) => {
1951                // Harness blocked intent detection - use fallback
1952                tracing::info!("AHP harness blocked intent detection");
1953                None
1954            }
1955            _ => None,
1956        }
1957    }
1958
1959    /// Apply injected context from AHP harness decision.
1960    #[cfg(feature = "ahp")]
1961    fn apply_injected_context(&self, injected: InjectedContext) -> Vec<ContextResult> {
1962        injected_context_to_results(injected)
1963    }
1964
1965    /// Build augmented system prompt with context
1966    #[allow(dead_code)]
1967    fn build_augmented_system_prompt(&self, context_results: &[ContextResult]) -> Option<String> {
1968        let base = self.system_prompt();
1969        let context_assembly = self.assemble_context_results(context_results);
1970        self.build_augmented_system_prompt_with_base(&base, &context_assembly)
1971    }
1972
1973    fn assemble_context_results(&self, context_results: &[ContextResult]) -> ContextAssembly {
1974        let mut results = context_results.to_vec();
1975
1976        if self.config.prompt_slots.guidelines.is_none() {
1977            let project_hint = Self::detect_project_hint(&self.tool_context.workspace);
1978            if !project_hint.is_empty() {
1979                let token_count = project_hint.split_whitespace().count().max(1);
1980                let mut result = ContextResult::new("project_hint");
1981                result.add_item(
1982                    ContextItem::new("project_hint", ContextType::Resource, project_hint)
1983                        .with_source("a3s://project-hint")
1984                        .with_provenance("workspace_marker")
1985                        .with_priority(0.65)
1986                        .with_trust(0.8)
1987                        .with_freshness(1.0)
1988                        .with_relevance(0.9)
1989                        .with_token_count(token_count),
1990                );
1991                results.push(result);
1992            }
1993        }
1994
1995        ContextAssembler::with_default_budget().assemble(&results)
1996    }
1997
1998    fn build_augmented_system_prompt_with_base(
1999        &self,
2000        base: &str,
2001        context_assembly: &ContextAssembly,
2002    ) -> Option<String> {
2003        let base = base.to_string();
2004
2005        // MCP tool definitions are selected per turn by ToolSelector. Keep the
2006        // system prompt small instead of listing every external tool here.
2007        let has_mcp_tools = self
2008            .tool_executor
2009            .definitions()
2010            .iter()
2011            .any(|t| t.name.starts_with("mcp__"));
2012
2013        let mcp_section = if has_mcp_tools {
2014            "## MCP Tools\n\nExternal MCP tools are available on demand when relevant to the current request.".to_string()
2015        } else {
2016            String::new()
2017        };
2018
2019        let parts: Vec<&str> = [base.as_str(), mcp_section.as_str()]
2020            .iter()
2021            .filter(|s| !s.is_empty())
2022            .copied()
2023            .collect();
2024
2025        if context_assembly.is_empty() {
2026            return Some(parts.join("\n\n"));
2027        }
2028
2029        let context_xml = context_assembly.to_xml();
2030        Some(format!("{}\n\n{}", parts.join("\n\n"), context_xml))
2031    }
2032
2033    /// Notify providers of turn completion for memory extraction
2034    async fn notify_turn_complete(&self, session_id: &str, prompt: &str, response: &str) {
2035        let futures = self
2036            .config
2037            .context_providers
2038            .iter()
2039            .map(|p| p.on_turn_complete(session_id, prompt, response));
2040        let outcomes = join_all(futures).await;
2041
2042        for (i, result) in outcomes.into_iter().enumerate() {
2043            if let Err(e) = result {
2044                tracing::warn!(
2045                    "Context provider '{}' on_turn_complete failed: {}",
2046                    self.config.context_providers[i].name(),
2047                    e
2048                );
2049            }
2050        }
2051    }
2052
2053    /// Fire PreToolUse hook event before tool execution.
2054    /// Returns the HookResult which may block the tool call.
2055    async fn fire_pre_tool_use(
2056        &self,
2057        session_id: &str,
2058        tool_name: &str,
2059        args: &serde_json::Value,
2060        recent_tools: Vec<String>,
2061    ) -> Option<HookResult> {
2062        if let Some(he) = &self.config.hook_engine {
2063            // Convert null args to empty object so JS callbacks don't get null.input errors
2064            let safe_args = if args.is_null() {
2065                serde_json::Value::Object(Default::default())
2066            } else {
2067                args.clone()
2068            };
2069            let event = HookEvent::PreToolUse(PreToolUseEvent {
2070                session_id: session_id.to_string(),
2071                tool: tool_name.to_string(),
2072                args: safe_args,
2073                working_directory: self.tool_context.workspace.to_string_lossy().to_string(),
2074                recent_tools,
2075            });
2076            let result = he.fire(&event).await;
2077            if result.is_block() {
2078                return Some(result);
2079            }
2080        }
2081        None
2082    }
2083
2084    /// Fire PostToolUse hook event after tool execution (fire-and-forget).
2085    async fn fire_post_tool_use(
2086        &self,
2087        session_id: &str,
2088        tool_name: &str,
2089        args: &serde_json::Value,
2090        output: &str,
2091        success: bool,
2092        duration_ms: u64,
2093    ) {
2094        if let Some(he) = &self.config.hook_engine {
2095            // Convert null args to empty object so JS callbacks don't get null.input errors
2096            let safe_args = if args.is_null() {
2097                serde_json::Value::Object(Default::default())
2098            } else {
2099                args.clone()
2100            };
2101            let event = HookEvent::PostToolUse(PostToolUseEvent {
2102                session_id: session_id.to_string(),
2103                tool: tool_name.to_string(),
2104                args: safe_args,
2105                result: ToolResultData {
2106                    success,
2107                    output: output.to_string(),
2108                    exit_code: if success { Some(0) } else { Some(1) },
2109                    duration_ms,
2110                },
2111            });
2112            let he = Arc::clone(he);
2113            tokio::spawn(async move {
2114                let _ = he.fire(&event).await;
2115            });
2116        }
2117    }
2118
2119    /// Fire GenerateStart hook event before an LLM call.
2120    async fn fire_generate_start(
2121        &self,
2122        session_id: &str,
2123        prompt: &str,
2124        system_prompt: &Option<String>,
2125    ) {
2126        if let Some(he) = &self.config.hook_engine {
2127            let event = HookEvent::GenerateStart(GenerateStartEvent {
2128                session_id: session_id.to_string(),
2129                prompt: prompt.to_string(),
2130                system_prompt: system_prompt.clone(),
2131                model_provider: String::new(),
2132                model_name: String::new(),
2133                available_tools: self.config.tools.iter().map(|t| t.name.clone()).collect(),
2134            });
2135            let _ = he.fire(&event).await;
2136        }
2137    }
2138
2139    /// Fire GenerateEnd hook event after an LLM call.
2140    async fn fire_generate_end(
2141        &self,
2142        session_id: &str,
2143        prompt: &str,
2144        response: &LlmResponse,
2145        duration_ms: u64,
2146    ) {
2147        if let Some(he) = &self.config.hook_engine {
2148            let tool_calls: Vec<ToolCallInfo> = response
2149                .tool_calls()
2150                .iter()
2151                .map(|tc| {
2152                    let args = if tc.args.is_null() {
2153                        serde_json::Value::Object(Default::default())
2154                    } else {
2155                        tc.args.clone()
2156                    };
2157                    ToolCallInfo {
2158                        name: tc.name.clone(),
2159                        args,
2160                    }
2161                })
2162                .collect();
2163
2164            let event = HookEvent::GenerateEnd(GenerateEndEvent {
2165                session_id: session_id.to_string(),
2166                prompt: prompt.to_string(),
2167                response_text: response.text().to_string(),
2168                tool_calls,
2169                usage: TokenUsageInfo {
2170                    prompt_tokens: response.usage.prompt_tokens as i32,
2171                    completion_tokens: response.usage.completion_tokens as i32,
2172                    total_tokens: response.usage.total_tokens as i32,
2173                },
2174                duration_ms,
2175            });
2176            let _ = he.fire(&event).await;
2177        }
2178    }
2179
2180    /// Fire PrePrompt hook event before prompt augmentation.
2181    /// Returns optional modified prompt text from the hook.
2182    async fn fire_pre_prompt(
2183        &self,
2184        session_id: &str,
2185        prompt: &str,
2186        system_prompt: &Option<String>,
2187        message_count: usize,
2188    ) -> Option<String> {
2189        if let Some(he) = &self.config.hook_engine {
2190            let event = HookEvent::PrePrompt(PrePromptEvent {
2191                session_id: session_id.to_string(),
2192                prompt: prompt.to_string(),
2193                system_prompt: system_prompt.clone(),
2194                message_count,
2195            });
2196            let result = he.fire(&event).await;
2197            if let HookResult::Continue(Some(modified)) = result {
2198                // Extract modified prompt from hook response
2199                if let Some(new_prompt) = modified.get("prompt").and_then(|v| v.as_str()) {
2200                    return Some(new_prompt.to_string());
2201                }
2202            }
2203        }
2204        None
2205    }
2206
2207    /// Fire PostResponse hook event after the agent loop completes.
2208    async fn fire_post_response(
2209        &self,
2210        session_id: &str,
2211        response_text: &str,
2212        tool_calls_count: usize,
2213        usage: &TokenUsage,
2214        duration_ms: u64,
2215    ) {
2216        if let Some(he) = &self.config.hook_engine {
2217            let event = HookEvent::PostResponse(PostResponseEvent {
2218                session_id: session_id.to_string(),
2219                response_text: response_text.to_string(),
2220                tool_calls_count,
2221                usage: TokenUsageInfo {
2222                    prompt_tokens: usage.prompt_tokens as i32,
2223                    completion_tokens: usage.completion_tokens as i32,
2224                    total_tokens: usage.total_tokens as i32,
2225                },
2226                duration_ms,
2227            });
2228            let he = Arc::clone(he);
2229            tokio::spawn(async move {
2230                let _ = he.fire(&event).await;
2231            });
2232        }
2233    }
2234
2235    /// Fire OnError hook event when an error occurs.
2236    async fn fire_on_error(
2237        &self,
2238        session_id: &str,
2239        error_type: ErrorType,
2240        error_message: &str,
2241        context: serde_json::Value,
2242    ) {
2243        if let Some(he) = &self.config.hook_engine {
2244            let event = HookEvent::OnError(OnErrorEvent {
2245                session_id: session_id.to_string(),
2246                error_type,
2247                error_message: error_message.to_string(),
2248                context,
2249            });
2250            let he = Arc::clone(he);
2251            tokio::spawn(async move {
2252                let _ = he.fire(&event).await;
2253            });
2254        }
2255    }
2256
2257    /// Execute the agent loop for a prompt
2258    ///
2259    /// Takes the conversation history and a new user prompt.
2260    /// Returns the agent result and updated message history.
2261    /// When event_tx is provided, uses streaming LLM API for real-time text output.
2262    pub async fn execute(
2263        &self,
2264        history: &[Message],
2265        prompt: &str,
2266        event_tx: Option<mpsc::Sender<AgentEvent>>,
2267    ) -> Result<AgentResult> {
2268        self.execute_with_session(history, prompt, None, event_tx, None)
2269            .await
2270    }
2271
2272    /// Execute the agent loop with pre-built messages (user message already included).
2273    ///
2274    /// Used by `send_with_attachments` / `stream_with_attachments` where the
2275    /// user message contains multi-modal content and is already appended to
2276    /// the messages vec.
2277    pub async fn execute_from_messages(
2278        &self,
2279        messages: Vec<Message>,
2280        session_id: Option<&str>,
2281        event_tx: Option<mpsc::Sender<AgentEvent>>,
2282        cancel_token: Option<&tokio_util::sync::CancellationToken>,
2283    ) -> Result<AgentResult> {
2284        let default_token = tokio_util::sync::CancellationToken::new();
2285        let token = cancel_token.unwrap_or(&default_token);
2286        tracing::info!(
2287            a3s.session.id = session_id.unwrap_or("none"),
2288            a3s.agent.max_turns = self.config.max_tool_rounds,
2289            "a3s.agent.execute_from_messages started"
2290        );
2291
2292        // Extract the last user message text for hooks, memory recall, and events.
2293        // Pass empty prompt so execute_loop skips adding a duplicate user message,
2294        // but provide effective_prompt for hook/memory/event purposes.
2295        let effective_prompt = messages
2296            .iter()
2297            .rev()
2298            .find(|m| m.role == "user")
2299            .map(|m| m.text())
2300            .unwrap_or_default();
2301
2302        let result = self
2303            .execute_loop_inner(
2304                &messages,
2305                "",
2306                &effective_prompt,
2307                None, // no pre-computed style; resolve inside the loop
2308                session_id,
2309                event_tx,
2310                token,
2311                true, // emit_end: this is a standalone execution
2312            )
2313            .await;
2314
2315        match &result {
2316            Ok(r) => tracing::info!(
2317                a3s.agent.tool_calls_count = r.tool_calls_count,
2318                a3s.llm.total_tokens = r.usage.total_tokens,
2319                "a3s.agent.execute_from_messages completed"
2320            ),
2321            Err(e) => tracing::warn!(
2322                error = %e,
2323                "a3s.agent.execute_from_messages failed"
2324            ),
2325        }
2326
2327        result
2328    }
2329
2330    /// Execute the agent loop for a prompt with session context
2331    ///
2332    /// Takes the conversation history, user prompt, and optional session ID.
2333    /// When session_id is provided, context providers can use it for session-specific context.
2334    pub async fn execute_with_session(
2335        &self,
2336        history: &[Message],
2337        prompt: &str,
2338        session_id: Option<&str>,
2339        event_tx: Option<mpsc::Sender<AgentEvent>>,
2340        cancel_token: Option<&tokio_util::sync::CancellationToken>,
2341    ) -> Result<AgentResult> {
2342        let default_token = tokio_util::sync::CancellationToken::new();
2343        let token = cancel_token.unwrap_or(&default_token);
2344        tracing::info!(
2345            a3s.session.id = session_id.unwrap_or("none"),
2346            a3s.agent.max_turns = self.config.max_tool_rounds,
2347            "a3s.agent.execute started"
2348        );
2349
2350        // Step 1: pre-analysis — intent + goal + plan + input optimization in ONE LLM call.
2351        // Auto mode lets structured pre-analysis decide whether planning is needed.
2352        let pre_analysis: Option<PreAnalysis> = {
2353            let needs_llm_prep = self.should_run_pre_analysis();
2354
2355            if !needs_llm_prep {
2356                None
2357            } else {
2358                match LlmPlanner::pre_analyze(&self.llm_client.clone(), prompt).await {
2359                    Ok(analysis) => {
2360                        tracing::debug!(
2361                            intent = ?analysis.intent,
2362                            requires_planning = analysis.requires_planning,
2363                            plan_steps = analysis.execution_plan.steps.len(),
2364                            "Pre-analysis completed"
2365                        );
2366                        Some(analysis)
2367                    }
2368                    Err(e) => {
2369                        tracing::warn!(error = %e, "Pre-analysis failed; using local style fallback");
2370                        None
2371                    }
2372                }
2373            }
2374        };
2375
2376        // Step 2: choose the execution style. A successful pre-analysis is
2377        // authoritative; local deterministic detection is only a no-LLM fallback
2378        // for disabled planning or failed pre-analysis.
2379        let exec_style = if let Some(analysis) = pre_analysis.as_ref() {
2380            analysis.intent
2381        } else {
2382            let (style, confidence) = AgentStyle::detect_with_confidence(prompt);
2383            tracing::debug!(
2384                intent.classification = ?style,
2385                intent.confidence = ?confidence,
2386                intent.source = "local_fallback",
2387                "Intent classified locally"
2388            );
2389            style
2390        };
2391
2392        // Determine whether to use planning mode.
2393        let use_planning = match self.config.planning_mode {
2394            PlanningMode::Disabled => false,
2395            PlanningMode::Enabled => true,
2396            PlanningMode::Auto => pre_analysis
2397                .as_ref()
2398                .map(|analysis| analysis.requires_planning)
2399                .unwrap_or_else(|| exec_style.requires_planning()),
2400        };
2401
2402        // Determine the effective prompt: use optimized input from pre-analysis if available.
2403        // Clone to avoid borrow conflict when pre_analysis is moved.
2404        let effective_prompt: String = match pre_analysis.as_ref() {
2405            Some(a) => a.optimized_input.clone(),
2406            None => prompt.to_string(),
2407        };
2408
2409        let result = if use_planning {
2410            self.execute_with_planning(
2411                history,
2412                &effective_prompt,
2413                session_id,
2414                event_tx,
2415                pre_analysis,
2416            )
2417            .await
2418        } else {
2419            self.execute_loop(
2420                history,
2421                &effective_prompt,
2422                exec_style,
2423                session_id,
2424                event_tx,
2425                token,
2426                true,
2427            )
2428            .await
2429        };
2430
2431        match &result {
2432            Ok(r) => {
2433                tracing::info!(
2434                    a3s.agent.tool_calls_count = r.tool_calls_count,
2435                    a3s.llm.total_tokens = r.usage.total_tokens,
2436                    "a3s.agent.execute completed"
2437                );
2438                // Fire PostResponse hook
2439                self.fire_post_response(
2440                    session_id.unwrap_or(""),
2441                    &r.text,
2442                    r.tool_calls_count,
2443                    &r.usage,
2444                    0, // duration tracked externally
2445                )
2446                .await;
2447            }
2448            Err(e) => {
2449                tracing::warn!(
2450                    error = %e,
2451                    "a3s.agent.execute failed"
2452                );
2453                // Fire OnError hook
2454                self.fire_on_error(
2455                    session_id.unwrap_or(""),
2456                    ErrorType::Other,
2457                    &e.to_string(),
2458                    serde_json::json!({"phase": "execute"}),
2459                )
2460                .await;
2461            }
2462        }
2463
2464        result
2465    }
2466
2467    /// Core execution loop (without planning routing).
2468    ///
2469    /// This is the inner loop that runs LLM calls and tool executions.
2470    /// Called directly by `execute_with_session` (after planning check)
2471    /// and by `execute_plan` (for individual steps, bypassing planning).
2472    #[allow(clippy::too_many_arguments)]
2473    async fn execute_loop(
2474        &self,
2475        history: &[Message],
2476        prompt: &str,
2477        effective_style: AgentStyle,
2478        session_id: Option<&str>,
2479        event_tx: Option<mpsc::Sender<AgentEvent>>,
2480        cancel_token: &tokio_util::sync::CancellationToken,
2481        emit_end: bool,
2482    ) -> Result<AgentResult> {
2483        // When called via execute_loop, the prompt is used for both
2484        // message-adding and hook/memory/event purposes.
2485        self.execute_loop_inner(
2486            history,
2487            prompt,
2488            prompt,
2489            Some(effective_style),
2490            session_id,
2491            event_tx,
2492            cancel_token,
2493            emit_end,
2494        )
2495        .await
2496    }
2497
2498    /// Inner execution loop.
2499    ///
2500    /// `msg_prompt` controls whether a user message is appended (empty = skip).
2501    /// `effective_prompt` is used for hooks, memory recall, taint tracking, and events.
2502    /// `effective_style` pre-computed style to skip redundant LLM-based intent detection.
2503    /// `emit_end` controls whether to send `AgentEvent::End` when the loop completes
2504    /// (should be false when called from `execute_plan` to avoid duplicate End events).
2505    #[allow(clippy::too_many_arguments)]
2506    async fn execute_loop_inner(
2507        &self,
2508        history: &[Message],
2509        msg_prompt: &str,
2510        effective_prompt: &str,
2511        effective_style: Option<AgentStyle>,
2512        session_id: Option<&str>,
2513        event_tx: Option<mpsc::Sender<AgentEvent>>,
2514        cancel_token: &tokio_util::sync::CancellationToken,
2515        emit_end: bool,
2516    ) -> Result<AgentResult> {
2517        let mut messages = history.to_vec();
2518        let mut total_usage = TokenUsage::default();
2519        let mut tool_calls_count = 0;
2520        let mut verification_reports = Vec::new();
2521        let mut turn = 0;
2522        // Consecutive malformed-tool-args errors (4.1 parse error recovery)
2523        let mut parse_error_count: u32 = 0;
2524        // Continuation injection counter
2525        let mut continuation_count: u32 = 0;
2526        let mut recent_tool_signatures: Vec<String> = Vec::new();
2527        let style_prompt = if effective_prompt.is_empty() {
2528            msg_prompt
2529        } else {
2530            effective_prompt
2531        };
2532        let effective_style = match effective_style {
2533            Some(s) => s,
2534            None => self.resolve_effective_style(style_prompt).await,
2535        };
2536        let effective_system_prompt = self.system_prompt_for_style(effective_style);
2537        if let Some(tx) = &event_tx {
2538            tx.send(AgentEvent::AgentModeChanged {
2539                mode: effective_style.runtime_mode().to_string(),
2540                agent: effective_style.builtin_agent_name().to_string(),
2541                description: effective_style.description().to_string(),
2542            })
2543            .await
2544            .ok();
2545        }
2546
2547        // Send start event
2548        if let Some(tx) = &event_tx {
2549            tx.send(AgentEvent::Start {
2550                prompt: effective_prompt.to_string(),
2551            })
2552            .await
2553            .ok();
2554        }
2555
2556        // Forward queue events (CommandDeadLettered, CommandRetry, QueueAlert) to event stream
2557        let _queue_forward_handle =
2558            if let (Some(ref queue), Some(ref tx)) = (&self.command_queue, &event_tx) {
2559                let mut rx = queue.subscribe();
2560                let tx = tx.clone();
2561                Some(tokio::spawn(async move {
2562                    while let Ok(event) = rx.recv().await {
2563                        if tx.send(event).await.is_err() {
2564                            break;
2565                        }
2566                    }
2567                }))
2568            } else {
2569                None
2570            };
2571
2572        // Fire PrePrompt hook (may modify the prompt)
2573        let built_system_prompt = Some(effective_system_prompt.clone());
2574        let hooked_prompt = if let Some(modified) = self
2575            .fire_pre_prompt(
2576                session_id.unwrap_or(""),
2577                effective_prompt,
2578                &built_system_prompt,
2579                messages.len(),
2580            )
2581            .await
2582        {
2583            modified
2584        } else {
2585            effective_prompt.to_string()
2586        };
2587        let effective_prompt = hooked_prompt.as_str();
2588
2589        // Taint-track the incoming prompt for sensitive data detection
2590        if let Some(ref sp) = self.config.security_provider {
2591            sp.taint_input(effective_prompt);
2592        }
2593
2594        // Resolve context from providers on first turn (before adding user message)
2595        // Intent-driven: detect context perception need first via AHP harness, then fire hook
2596        let workspace = self.tool_context.workspace.display().to_string();
2597        let session_id_str = session_id.unwrap_or("");
2598        let mut context_results = if !self.config.context_providers.is_empty() {
2599            if let Some(tx) = &event_tx {
2600                tx.send(AgentEvent::ContextResolving {
2601                    providers: self
2602                        .config
2603                        .context_providers
2604                        .iter()
2605                        .map(|p| p.name().to_string())
2606                        .collect(),
2607                })
2608                .await
2609                .ok();
2610            }
2611
2612            // Step 1: Fire IntentDetection harness point on EVERY prompt
2613            #[allow(clippy::needless_borrow)]
2614            let harness_intent = self
2615                .fire_intent_detection(effective_prompt, &session_id_str, &workspace)
2616                .await;
2617
2618            // Step 2: Build perception event from harness result, or fallback to local detection
2619            #[allow(clippy::needless_borrow)]
2620            let perception_event = if let Some(detected) = harness_intent {
2621                tracing::info!(
2622                    intent = %detected.detected_intent,
2623                    confidence = %detected.confidence,
2624                    "Intent detected from AHP harness"
2625                );
2626                Some(build_pre_context_perception_from_intent(
2627                    detected,
2628                    effective_prompt,
2629                    &session_id_str,
2630                    &workspace,
2631                ))
2632            } else {
2633                // Fallback to local keyword detection
2634                tracing::debug!("No intent from harness, using local keyword detection");
2635                self.detect_context_perception_intent(effective_prompt, &session_id_str, &workspace)
2636            };
2637
2638            if let Some(perception_event) = perception_event {
2639                // Step 3: Fire PreContextPerception hook to get harness decision
2640                tracing::info!(
2641                    intent = %perception_event.intent,
2642                    target_type = %perception_event.target_type,
2643                    "Context perception intent detected, firing AHP hook"
2644                );
2645
2646                let hook_result = self.fire_pre_context_perception(&perception_event).await;
2647
2648                match hook_result {
2649                    HookResult::Continue(Some(modified_context)) => {
2650                        // AHP harness returned injected context - parse and use it
2651                        #[cfg(feature = "ahp")]
2652                        {
2653                            if let Ok(injected) =
2654                                serde_json::from_value::<InjectedContext>(modified_context)
2655                            {
2656                                tracing::info!(
2657                                    facts = injected.facts.len(),
2658                                    "Using injected context from AHP harness"
2659                                );
2660                                self.apply_injected_context(injected)
2661                            } else {
2662                                // Fall back to normal providers if parsing fails
2663                                tracing::warn!(
2664                                    "Failed to parse injected context, falling back to providers"
2665                                );
2666                                self.resolve_context(effective_prompt, session_id).await
2667                            }
2668                        }
2669                        #[cfg(not(feature = "ahp"))]
2670                        {
2671                            // Without AHP, fall back to normal providers
2672                            let _ = modified_context; // suppress unused warning
2673                            self.resolve_context(effective_prompt, session_id).await
2674                        }
2675                    }
2676                    HookResult::Block(_) => {
2677                        // Harness blocked context injection - skip
2678                        tracing::info!("AHP harness blocked context injection");
2679                        Vec::new()
2680                    }
2681                    _ => {
2682                        // No modification or unknown result, proceed with normal providers
2683                        self.resolve_context(effective_prompt, session_id).await
2684                    }
2685                }
2686            } else {
2687                // No intent detected, proceed with normal providers
2688                self.resolve_context(effective_prompt, session_id).await
2689            }
2690        } else {
2691            Vec::new()
2692        };
2693
2694        // Recall relevant memories as structured context instead of directly
2695        // rewriting the system prompt.
2696        if let Some(ref memory) = self.config.memory {
2697            match memory.recall_similar(effective_prompt, 5).await {
2698                Ok(items) if !items.is_empty() => {
2699                    if let Some(tx) = &event_tx {
2700                        for item in &items {
2701                            tx.send(AgentEvent::MemoryRecalled {
2702                                memory_id: item.id.clone(),
2703                                content: item.content.clone(),
2704                                relevance: item.relevance_score(),
2705                            })
2706                            .await
2707                            .ok();
2708                        }
2709                        tx.send(AgentEvent::MemoriesSearched {
2710                            query: Some(effective_prompt.to_string()),
2711                            tags: Vec::new(),
2712                            result_count: items.len(),
2713                        })
2714                        .await
2715                        .ok();
2716                    }
2717                    context_results.push(crate::memory::memory_items_to_context_result(
2718                        "memory", items,
2719                    ));
2720                }
2721                Ok(_) => {}
2722                Err(e) => {
2723                    tracing::warn!(error = %e, "Failed to recall memory context");
2724                }
2725            }
2726        }
2727
2728        let context_assembly = self.assemble_context_results(&context_results);
2729
2730        // Send context resolved event
2731        if let Some(tx) = &event_tx {
2732            let total_items = context_assembly.items.len();
2733            let total_tokens = context_assembly.total_tokens;
2734
2735            tracing::info!(
2736                context_items = total_items,
2737                context_tokens = total_tokens,
2738                context_truncated = context_assembly.truncated,
2739                "Context resolution completed"
2740            );
2741
2742            tx.send(AgentEvent::ContextResolved {
2743                total_items,
2744                total_tokens,
2745            })
2746            .await
2747            .ok();
2748        }
2749
2750        let augmented_system = self
2751            .build_augmented_system_prompt_with_base(&effective_system_prompt, &context_assembly);
2752
2753        // Add user message
2754        if !msg_prompt.is_empty() {
2755            messages.push(Message::user(msg_prompt));
2756        }
2757
2758        loop {
2759            turn += 1;
2760
2761            if turn > self.config.max_tool_rounds {
2762                let error = format!("Max tool rounds ({}) exceeded", self.config.max_tool_rounds);
2763                if let Some(tx) = &event_tx {
2764                    tx.send(AgentEvent::Error {
2765                        message: error.clone(),
2766                    })
2767                    .await
2768                    .ok();
2769                }
2770                anyhow::bail!(error);
2771            }
2772
2773            // Send turn start event
2774            if let Some(tx) = &event_tx {
2775                tx.send(AgentEvent::TurnStart { turn }).await.ok();
2776            }
2777
2778            tracing::info!(
2779                turn = turn,
2780                max_turns = self.config.max_tool_rounds,
2781                "Agent turn started"
2782            );
2783
2784            // Call LLM - use streaming if we have an event channel
2785            tracing::info!(
2786                a3s.llm.streaming = event_tx.is_some(),
2787                "LLM completion started"
2788            );
2789
2790            // Fire GenerateStart hook
2791            self.fire_generate_start(
2792                session_id.unwrap_or(""),
2793                effective_prompt,
2794                &augmented_system,
2795            )
2796            .await;
2797
2798            let llm_start = std::time::Instant::now();
2799            // Circuit breaker (4.3): retry non-streaming LLM calls on transient failures.
2800            // Each failure increments `consecutive_llm_errors`; on success it resets to 0.
2801            // Streaming mode bails immediately on failure (events can't be replayed).
2802            let response = {
2803                let threshold = self.config.circuit_breaker_threshold.max(1);
2804                let mut attempt = 0u32;
2805                loop {
2806                    attempt += 1;
2807                    let result = self
2808                        .call_llm(
2809                            &messages,
2810                            augmented_system.as_deref(),
2811                            &event_tx,
2812                            cancel_token,
2813                        )
2814                        .await;
2815                    match result {
2816                        Ok(r) => {
2817                            break r;
2818                        }
2819                        // Never retry if cancelled
2820                        Err(e) if cancel_token.is_cancelled() => {
2821                            anyhow::bail!(e);
2822                        }
2823                        // Retry when: non-streaming under threshold, OR first streaming attempt
2824                        Err(e) if attempt < threshold && (event_tx.is_none() || attempt == 1) => {
2825                            tracing::warn!(
2826                                turn = turn,
2827                                attempt = attempt,
2828                                threshold = threshold,
2829                                error = %e,
2830                                "LLM call failed, will retry"
2831                            );
2832                            tokio::time::sleep(Duration::from_millis(100 * attempt as u64)).await;
2833                        }
2834                        // Threshold exceeded or streaming mid-stream: bail
2835                        Err(e) => {
2836                            let msg = if attempt > 1 {
2837                                format!(
2838                                    "LLM circuit breaker triggered: failed after {} attempt(s): {}",
2839                                    attempt, e
2840                                )
2841                            } else {
2842                                format!("LLM call failed: {}", e)
2843                            };
2844                            tracing::error!(turn = turn, attempt = attempt, "{}", msg);
2845                            // Fire OnError hook for LLM failure
2846                            self.fire_on_error(
2847                                session_id.unwrap_or(""),
2848                                ErrorType::LlmFailure,
2849                                &msg,
2850                                serde_json::json!({"turn": turn, "attempt": attempt}),
2851                            )
2852                            .await;
2853                            if let Some(tx) = &event_tx {
2854                                tx.send(AgentEvent::Error {
2855                                    message: msg.clone(),
2856                                })
2857                                .await
2858                                .ok();
2859                            }
2860                            anyhow::bail!(msg);
2861                        }
2862                    }
2863                }
2864            };
2865
2866            // Update usage
2867            total_usage.prompt_tokens += response.usage.prompt_tokens;
2868            total_usage.completion_tokens += response.usage.completion_tokens;
2869            total_usage.total_tokens += response.usage.total_tokens;
2870
2871            // Record LLM completion telemetry
2872            let llm_duration = llm_start.elapsed();
2873            tracing::info!(
2874                turn = turn,
2875                streaming = event_tx.is_some(),
2876                prompt_tokens = response.usage.prompt_tokens,
2877                completion_tokens = response.usage.completion_tokens,
2878                total_tokens = response.usage.total_tokens,
2879                stop_reason = response.stop_reason.as_deref().unwrap_or("unknown"),
2880                duration_ms = llm_duration.as_millis() as u64,
2881                "LLM completion finished"
2882            );
2883
2884            // Fire GenerateEnd hook
2885            self.fire_generate_end(
2886                session_id.unwrap_or(""),
2887                effective_prompt,
2888                &response,
2889                llm_duration.as_millis() as u64,
2890            )
2891            .await;
2892
2893            // Record LLM usage on the llm span
2894            crate::telemetry::record_llm_usage(
2895                response.usage.prompt_tokens,
2896                response.usage.completion_tokens,
2897                response.usage.total_tokens,
2898                response.stop_reason.as_deref(),
2899            );
2900            // Log turn token usage
2901            tracing::info!(
2902                turn = turn,
2903                a3s.llm.total_tokens = response.usage.total_tokens,
2904                "Turn token usage"
2905            );
2906
2907            // Add assistant message to history
2908            messages.push(response.message.clone());
2909
2910            // Check for tool calls
2911            let tool_calls = response.tool_calls();
2912
2913            // Send turn end event
2914            if let Some(tx) = &event_tx {
2915                tx.send(AgentEvent::TurnEnd {
2916                    turn,
2917                    usage: response.usage.clone(),
2918                })
2919                .await
2920                .ok();
2921            }
2922
2923            // Auto-compact: check if context usage exceeds threshold
2924            if self.config.auto_compact {
2925                let used = response.usage.prompt_tokens;
2926                let max = self.config.max_context_tokens;
2927                let threshold = self.config.auto_compact_threshold;
2928
2929                if crate::compaction::should_auto_compact(used, max, threshold) {
2930                    let before_len = messages.len();
2931                    let percent_before = used as f32 / max as f32;
2932
2933                    tracing::info!(
2934                        used_tokens = used,
2935                        max_tokens = max,
2936                        percent = percent_before,
2937                        threshold = threshold,
2938                        "Auto-compact triggered"
2939                    );
2940
2941                    // Step 1: Prune large tool outputs first (cheap, no LLM call)
2942                    if let Some(pruned) = crate::compaction::prune_tool_outputs(&messages) {
2943                        messages = pruned;
2944                        tracing::info!("Tool output pruning applied");
2945                    }
2946
2947                    // Step 2: Full summarization using the agent's LLM client
2948                    if let Ok(Some(compacted)) = crate::compaction::compact_messages(
2949                        session_id.unwrap_or(""),
2950                        &messages,
2951                        &self.llm_client,
2952                    )
2953                    .await
2954                    {
2955                        messages = compacted;
2956                    }
2957
2958                    // Emit compaction event
2959                    if let Some(tx) = &event_tx {
2960                        tx.send(AgentEvent::ContextCompacted {
2961                            session_id: session_id.unwrap_or("").to_string(),
2962                            before_messages: before_len,
2963                            after_messages: messages.len(),
2964                            percent_before,
2965                        })
2966                        .await
2967                        .ok();
2968                    }
2969                }
2970            }
2971
2972            if tool_calls.is_empty() {
2973                // No tool calls — check if we should inject a continuation message
2974                // before treating this as a final answer.
2975                let final_text = response.text();
2976
2977                if self.config.continuation_enabled
2978                    && continuation_count < self.config.max_continuation_turns
2979                    && turn < self.config.max_tool_rounds  // never inject past the turn limit
2980                    && Self::looks_incomplete(&final_text)
2981                {
2982                    continuation_count += 1;
2983                    tracing::info!(
2984                        turn = turn,
2985                        continuation = continuation_count,
2986                        max_continuation = self.config.max_continuation_turns,
2987                        "Injecting continuation message — response looks incomplete"
2988                    );
2989                    // Inject continuation as a user message and keep looping
2990                    messages.push(Message::user(crate::prompts::CONTINUATION));
2991                    continue;
2992                }
2993
2994                // Sanitize output to redact any sensitive data before returning
2995                let final_text = if let Some(ref sp) = self.config.security_provider {
2996                    sp.sanitize_output(&final_text)
2997                } else {
2998                    final_text
2999                };
3000
3001                // Record final totals
3002                tracing::info!(
3003                    tool_calls_count = tool_calls_count,
3004                    total_prompt_tokens = total_usage.prompt_tokens,
3005                    total_completion_tokens = total_usage.completion_tokens,
3006                    total_tokens = total_usage.total_tokens,
3007                    turns = turn,
3008                    "Agent execution completed"
3009                );
3010
3011                if emit_end {
3012                    if let Some(tx) = &event_tx {
3013                        let verification_summary =
3014                            crate::verification::VerificationSummary::from_reports(
3015                                &verification_reports,
3016                            );
3017                        tx.send(AgentEvent::End {
3018                            text: final_text.clone(),
3019                            usage: total_usage.clone(),
3020                            verification_summary: Box::new(verification_summary),
3021                            meta: response.meta.clone(),
3022                        })
3023                        .await
3024                        .ok();
3025                    }
3026                }
3027
3028                // Notify context providers of turn completion for memory extraction
3029                if let Some(sid) = session_id {
3030                    self.notify_turn_complete(sid, effective_prompt, &final_text)
3031                        .await;
3032                }
3033
3034                return Ok(AgentResult {
3035                    text: final_text,
3036                    messages,
3037                    usage: total_usage,
3038                    tool_calls_count,
3039                    verification_reports,
3040                });
3041            }
3042
3043            // Execute tools sequentially
3044            // Fast path: when all tool calls are independent file writes and no hooks/HITL
3045            // are configured, execute them concurrently to avoid serial I/O bottleneck.
3046            let tool_calls = if self.config.hook_engine.is_none()
3047                && self.config.confirmation_manager.is_none()
3048                && tool_calls.len() > 1
3049                && tool_calls
3050                    .iter()
3051                    .all(|tc| Self::is_parallel_safe_write(&tc.name, &tc.args))
3052                && {
3053                    // All target paths must be distinct (no write-write conflicts)
3054                    let paths: Vec<_> = tool_calls
3055                        .iter()
3056                        .filter_map(|tc| Self::extract_write_path(&tc.args))
3057                        .collect();
3058                    paths.len() == tool_calls.len()
3059                        && paths.iter().collect::<std::collections::HashSet<_>>().len()
3060                            == paths.len()
3061                } {
3062                tracing::info!(
3063                    count = tool_calls.len(),
3064                    "Parallel write batch: executing {} independent file writes concurrently",
3065                    tool_calls.len()
3066                );
3067
3068                let futures: Vec<_> = tool_calls
3069                    .iter()
3070                    .map(|tc| {
3071                        let ctx = self.tool_context.clone();
3072                        let executor = Arc::clone(&self.tool_executor);
3073                        let name = tc.name.clone();
3074                        let args = tc.args.clone();
3075                        async move { executor.execute_with_context(&name, &args, &ctx).await }
3076                    })
3077                    .collect();
3078
3079                let results = join_all(futures).await;
3080
3081                // Post-process results in original order (sequential, preserves message ordering)
3082                for (tc, result) in tool_calls.iter().zip(results) {
3083                    tool_calls_count += 1;
3084                    let (output, exit_code, is_error, metadata, images) =
3085                        Self::tool_result_to_tuple(result);
3086                    Self::collect_verification_report(&mut verification_reports, &metadata);
3087
3088                    // Track tool call in progress tracker
3089                    self.track_tool_result(&tc.name, &tc.args, exit_code);
3090
3091                    let output = if let Some(ref sp) = self.config.security_provider {
3092                        sp.sanitize_output(&output)
3093                    } else {
3094                        output
3095                    };
3096
3097                    if let Some(tx) = &event_tx {
3098                        tx.send(AgentEvent::ToolEnd {
3099                            id: tc.id.clone(),
3100                            name: tc.name.clone(),
3101                            output: output.clone(),
3102                            exit_code,
3103                            metadata,
3104                        })
3105                        .await
3106                        .ok();
3107                    }
3108
3109                    if images.is_empty() {
3110                        messages.push(Message::tool_result(&tc.id, &output, is_error));
3111                    } else {
3112                        messages.push(Message::tool_result_with_images(
3113                            &tc.id, &output, &images, is_error,
3114                        ));
3115                    }
3116                }
3117
3118                // Skip the sequential loop below
3119                continue;
3120            } else {
3121                tool_calls
3122            };
3123
3124            for tool_call in tool_calls {
3125                tool_calls_count += 1;
3126
3127                let tool_start = std::time::Instant::now();
3128
3129                tracing::info!(
3130                    tool_name = tool_call.name.as_str(),
3131                    tool_id = tool_call.id.as_str(),
3132                    "Tool execution started"
3133                );
3134
3135                // Send tool start event (only if not already sent during streaming)
3136                // In streaming mode, ToolStart is sent when we receive ToolUseStart from LLM
3137                // But we still need to send ToolEnd after execution
3138
3139                // Check for malformed tool arguments from LLM (4.1 parse error recovery)
3140                if let Some(parse_error) =
3141                    tool_call.args.get("__parse_error").and_then(|v| v.as_str())
3142                {
3143                    parse_error_count += 1;
3144                    let error_msg = format!("Error: {}", parse_error);
3145                    tracing::warn!(
3146                        tool = tool_call.name.as_str(),
3147                        parse_error_count = parse_error_count,
3148                        max_parse_retries = self.config.max_parse_retries,
3149                        "Malformed tool arguments from LLM"
3150                    );
3151
3152                    if let Some(tx) = &event_tx {
3153                        tx.send(AgentEvent::ToolEnd {
3154                            id: tool_call.id.clone(),
3155                            name: tool_call.name.clone(),
3156                            output: error_msg.clone(),
3157                            exit_code: 1,
3158                            metadata: None,
3159                        })
3160                        .await
3161                        .ok();
3162                    }
3163
3164                    messages.push(Message::tool_result(&tool_call.id, &error_msg, true));
3165
3166                    if parse_error_count > self.config.max_parse_retries {
3167                        let msg = format!(
3168                            "LLM produced malformed tool arguments {} time(s) in a row \
3169                             (max_parse_retries={}); giving up",
3170                            parse_error_count, self.config.max_parse_retries
3171                        );
3172                        tracing::error!("{}", msg);
3173                        if let Some(tx) = &event_tx {
3174                            tx.send(AgentEvent::Error {
3175                                message: msg.clone(),
3176                            })
3177                            .await
3178                            .ok();
3179                        }
3180                        anyhow::bail!(msg);
3181                    }
3182                    continue;
3183                }
3184
3185                // Tool args are valid — reset parse error counter
3186                parse_error_count = 0;
3187
3188                // Check skill-based tool permissions
3189                if let Some(ref registry) = self.config.skill_registry {
3190                    let instruction_skills =
3191                        registry.by_kind(crate::skills::SkillKind::Instruction);
3192                    let has_restrictions =
3193                        instruction_skills.iter().any(|s| s.allowed_tools.is_some());
3194                    if has_restrictions {
3195                        let allowed = instruction_skills
3196                            .iter()
3197                            .any(|s| s.is_tool_allowed(&tool_call.name));
3198                        if !allowed {
3199                            let msg = format!(
3200                                "Tool '{}' is not allowed by any active skill.",
3201                                tool_call.name
3202                            );
3203                            tracing::info!(
3204                                tool_name = tool_call.name.as_str(),
3205                                "Tool blocked by skill registry"
3206                            );
3207                            if let Some(tx) = &event_tx {
3208                                tx.send(AgentEvent::PermissionDenied {
3209                                    tool_id: tool_call.id.clone(),
3210                                    tool_name: tool_call.name.clone(),
3211                                    args: tool_call.args.clone(),
3212                                    reason: msg.clone(),
3213                                })
3214                                .await
3215                                .ok();
3216                            }
3217                            messages.push(Message::tool_result(&tool_call.id, &msg, true));
3218                            continue;
3219                        }
3220                    }
3221                }
3222
3223                // Fire PreToolUse hook (may block the tool call)
3224                if let Some(HookResult::Block(reason)) = self
3225                    .fire_pre_tool_use(
3226                        session_id.unwrap_or(""),
3227                        &tool_call.name,
3228                        &tool_call.args,
3229                        recent_tool_signatures.clone(),
3230                    )
3231                    .await
3232                {
3233                    let msg = format!("Tool '{}' blocked by hook: {}", tool_call.name, reason);
3234                    tracing::info!(
3235                        tool_name = tool_call.name.as_str(),
3236                        "Tool blocked by PreToolUse hook"
3237                    );
3238
3239                    if let Some(tx) = &event_tx {
3240                        tx.send(AgentEvent::PermissionDenied {
3241                            tool_id: tool_call.id.clone(),
3242                            tool_name: tool_call.name.clone(),
3243                            args: tool_call.args.clone(),
3244                            reason: reason.clone(),
3245                        })
3246                        .await
3247                        .ok();
3248                    }
3249
3250                    messages.push(Message::tool_result(&tool_call.id, &msg, true));
3251                    continue;
3252                }
3253
3254                // Check permission before executing tool
3255                let permission_decision = if let Some(checker) = &self.config.permission_checker {
3256                    checker.check(&tool_call.name, &tool_call.args)
3257                } else {
3258                    // No policy configured — default to Ask so HITL can still intervene
3259                    PermissionDecision::Ask
3260                };
3261
3262                let (output, exit_code, is_error, metadata, images) = match permission_decision {
3263                    PermissionDecision::Deny => {
3264                        tracing::info!(
3265                            tool_name = tool_call.name.as_str(),
3266                            permission = "deny",
3267                            "Tool permission denied"
3268                        );
3269                        // Tool execution denied by permission policy
3270                        let denial_msg = format!(
3271                            "Permission denied: Tool '{}' is blocked by permission policy.",
3272                            tool_call.name
3273                        );
3274
3275                        // Send permission denied event
3276                        if let Some(tx) = &event_tx {
3277                            tx.send(AgentEvent::PermissionDenied {
3278                                tool_id: tool_call.id.clone(),
3279                                tool_name: tool_call.name.clone(),
3280                                args: tool_call.args.clone(),
3281                                reason: "Blocked by deny rule in permission policy".to_string(),
3282                            })
3283                            .await
3284                            .ok();
3285                        }
3286
3287                        (denial_msg, 1, true, None, Vec::new())
3288                    }
3289                    PermissionDecision::Allow => {
3290                        tracing::info!(
3291                            tool_name = tool_call.name.as_str(),
3292                            permission = "allow",
3293                            "Tool permission: allow"
3294                        );
3295                        // Permission explicitly allows — execute directly, no HITL
3296                        let stream_ctx =
3297                            self.streaming_tool_context(&event_tx, &tool_call.id, &tool_call.name);
3298                        let result = self
3299                            .execute_tool_queued_or_direct(
3300                                &tool_call.name,
3301                                &tool_call.args,
3302                                &stream_ctx,
3303                            )
3304                            .await;
3305
3306                        let tuple = Self::tool_result_to_tuple(result);
3307                        // Track tool call in progress tracker
3308                        let (_, exit_code, _, _, _) = tuple;
3309                        self.track_tool_result(&tool_call.name, &tool_call.args, exit_code);
3310                        tuple
3311                    }
3312                    PermissionDecision::Ask => {
3313                        tracing::info!(
3314                            tool_name = tool_call.name.as_str(),
3315                            permission = "ask",
3316                            "Tool permission: ask"
3317                        );
3318                        // Permission says Ask — delegate to HITL confirmation manager
3319                        if let Some(cm) = &self.config.confirmation_manager {
3320                            // Check YOLO lanes: if the tool's lane is in YOLO mode, skip confirmation
3321                            if !cm.requires_confirmation(&tool_call.name).await {
3322                                let stream_ctx = self.streaming_tool_context(
3323                                    &event_tx,
3324                                    &tool_call.id,
3325                                    &tool_call.name,
3326                                );
3327                                let result = self
3328                                    .execute_tool_queued_or_direct(
3329                                        &tool_call.name,
3330                                        &tool_call.args,
3331                                        &stream_ctx,
3332                                    )
3333                                    .await;
3334
3335                                let (output, exit_code, is_error, metadata, images) =
3336                                    Self::tool_result_to_tuple(result);
3337                                Self::collect_verification_report(
3338                                    &mut verification_reports,
3339                                    &metadata,
3340                                );
3341
3342                                // Track tool call in progress tracker
3343                                self.track_tool_result(&tool_call.name, &tool_call.args, exit_code);
3344
3345                                // Add tool result to messages
3346                                if images.is_empty() {
3347                                    messages.push(Message::tool_result(
3348                                        &tool_call.id,
3349                                        &output,
3350                                        is_error,
3351                                    ));
3352                                } else {
3353                                    messages.push(Message::tool_result_with_images(
3354                                        &tool_call.id,
3355                                        &output,
3356                                        &images,
3357                                        is_error,
3358                                    ));
3359                                }
3360
3361                                // Record tool result on the tool span for early exit
3362                                let tool_duration = tool_start.elapsed();
3363                                crate::telemetry::record_tool_result(exit_code, tool_duration);
3364
3365                                // Send ToolEnd event
3366                                if let Some(tx) = &event_tx {
3367                                    tx.send(AgentEvent::ToolEnd {
3368                                        id: tool_call.id.clone(),
3369                                        name: tool_call.name.clone(),
3370                                        output: output.clone(),
3371                                        exit_code,
3372                                        metadata,
3373                                    })
3374                                    .await
3375                                    .ok();
3376                                }
3377
3378                                // Fire PostToolUse hook (fire-and-forget)
3379                                self.fire_post_tool_use(
3380                                    session_id.unwrap_or(""),
3381                                    &tool_call.name,
3382                                    &tool_call.args,
3383                                    &output,
3384                                    exit_code == 0,
3385                                    tool_duration.as_millis() as u64,
3386                                )
3387                                .await;
3388
3389                                continue; // Skip the rest, move to next tool call
3390                            }
3391
3392                            // Get timeout from policy
3393                            let policy = cm.policy().await;
3394                            let timeout_ms = policy.default_timeout_ms;
3395                            let timeout_action = policy.timeout_action;
3396
3397                            // Request confirmation (this emits ConfirmationRequired event)
3398                            let rx = cm
3399                                .request_confirmation(
3400                                    &tool_call.id,
3401                                    &tool_call.name,
3402                                    &tool_call.args,
3403                                )
3404                                .await;
3405
3406                            // Forward ConfirmationRequired to the streaming event channel
3407                            // so external consumers (e.g. SafeClaw engine) can relay it
3408                            // to the browser UI.
3409                            if let Some(tx) = &event_tx {
3410                                tx.send(AgentEvent::ConfirmationRequired {
3411                                    tool_id: tool_call.id.clone(),
3412                                    tool_name: tool_call.name.clone(),
3413                                    args: tool_call.args.clone(),
3414                                    timeout_ms,
3415                                })
3416                                .await
3417                                .ok();
3418                            }
3419
3420                            // Wait for confirmation with timeout
3421                            let confirmation_result =
3422                                tokio::time::timeout(Duration::from_millis(timeout_ms), rx).await;
3423
3424                            match confirmation_result {
3425                                Ok(Ok(response)) => {
3426                                    // Forward ConfirmationReceived
3427                                    if let Some(tx) = &event_tx {
3428                                        tx.send(AgentEvent::ConfirmationReceived {
3429                                            tool_id: tool_call.id.clone(),
3430                                            approved: response.approved,
3431                                            reason: response.reason.clone(),
3432                                        })
3433                                        .await
3434                                        .ok();
3435                                    }
3436                                    if response.approved {
3437                                        let stream_ctx = self.streaming_tool_context(
3438                                            &event_tx,
3439                                            &tool_call.id,
3440                                            &tool_call.name,
3441                                        );
3442                                        let result = self
3443                                            .execute_tool_queued_or_direct(
3444                                                &tool_call.name,
3445                                                &tool_call.args,
3446                                                &stream_ctx,
3447                                            )
3448                                            .await;
3449
3450                                        let tuple = Self::tool_result_to_tuple(result);
3451                                        // Track tool call in progress tracker
3452                                        let (_, exit_code, _, _, _) = tuple;
3453                                        self.track_tool_result(
3454                                            &tool_call.name,
3455                                            &tool_call.args,
3456                                            exit_code,
3457                                        );
3458                                        tuple
3459                                    } else {
3460                                        let rejection_msg = format!(
3461                                            "Tool '{}' execution was REJECTED by the user. Reason: {}. \
3462                                             DO NOT retry this tool call unless the user explicitly asks you to.",
3463                                            tool_call.name,
3464                                            response.reason.unwrap_or_else(|| "No reason provided".to_string())
3465                                        );
3466                                        (rejection_msg, 1, true, None, Vec::new())
3467                                    }
3468                                }
3469                                Ok(Err(_)) => {
3470                                    // Forward ConfirmationTimeout (channel closed = effectively timed out)
3471                                    if let Some(tx) = &event_tx {
3472                                        tx.send(AgentEvent::ConfirmationTimeout {
3473                                            tool_id: tool_call.id.clone(),
3474                                            action_taken: "rejected".to_string(),
3475                                        })
3476                                        .await
3477                                        .ok();
3478                                    }
3479                                    let msg = format!(
3480                                        "Tool '{}' confirmation failed: confirmation channel closed",
3481                                        tool_call.name
3482                                    );
3483                                    (msg, 1, true, None, Vec::new())
3484                                }
3485                                Err(_) => {
3486                                    cm.check_timeouts().await;
3487
3488                                    // Forward ConfirmationTimeout
3489                                    if let Some(tx) = &event_tx {
3490                                        tx.send(AgentEvent::ConfirmationTimeout {
3491                                            tool_id: tool_call.id.clone(),
3492                                            action_taken: match timeout_action {
3493                                                crate::hitl::TimeoutAction::Reject => {
3494                                                    "rejected".to_string()
3495                                                }
3496                                                crate::hitl::TimeoutAction::AutoApprove => {
3497                                                    "auto_approved".to_string()
3498                                                }
3499                                            },
3500                                        })
3501                                        .await
3502                                        .ok();
3503                                    }
3504
3505                                    match timeout_action {
3506                                        crate::hitl::TimeoutAction::Reject => {
3507                                            let msg = format!(
3508                                                "Tool '{}' execution was REJECTED: user confirmation timed out after {}ms. \
3509                                                 DO NOT retry this tool call — the user did not approve it. \
3510                                                 Inform the user that the operation requires their approval and ask them to try again.",
3511                                                tool_call.name, timeout_ms
3512                                            );
3513                                            (msg, 1, true, None, Vec::new())
3514                                        }
3515                                        crate::hitl::TimeoutAction::AutoApprove => {
3516                                            let stream_ctx = self.streaming_tool_context(
3517                                                &event_tx,
3518                                                &tool_call.id,
3519                                                &tool_call.name,
3520                                            );
3521                                            let result = self
3522                                                .execute_tool_queued_or_direct(
3523                                                    &tool_call.name,
3524                                                    &tool_call.args,
3525                                                    &stream_ctx,
3526                                                )
3527                                                .await;
3528
3529                                            let tuple = Self::tool_result_to_tuple(result);
3530                                            // Track tool call in progress tracker
3531                                            let (_, exit_code, _, _, _) = tuple;
3532                                            self.track_tool_result(
3533                                                &tool_call.name,
3534                                                &tool_call.args,
3535                                                exit_code,
3536                                            );
3537                                            tuple
3538                                        }
3539                                    }
3540                                }
3541                            }
3542                        } else {
3543                            // Ask without confirmation manager — safe deny
3544                            let msg = format!(
3545                                "Tool '{}' requires confirmation but no HITL confirmation manager is configured. \
3546                                 Configure a confirmation policy to enable tool execution.",
3547                                tool_call.name
3548                            );
3549                            tracing::warn!(
3550                                tool_name = tool_call.name.as_str(),
3551                                "Tool requires confirmation but no HITL manager configured"
3552                            );
3553                            (msg, 1, true, None, Vec::new())
3554                        }
3555                    }
3556                };
3557
3558                let tool_duration = tool_start.elapsed();
3559                crate::telemetry::record_tool_result(exit_code, tool_duration);
3560                Self::collect_verification_report(&mut verification_reports, &metadata);
3561
3562                // Sanitize tool output for sensitive data before it enters the message history
3563                let output = if let Some(ref sp) = self.config.security_provider {
3564                    sp.sanitize_output(&output)
3565                } else {
3566                    output
3567                };
3568
3569                recent_tool_signatures.push(format!(
3570                    "{}:{} => {}",
3571                    tool_call.name,
3572                    serde_json::to_string(&tool_call.args).unwrap_or_default(),
3573                    if is_error { "error" } else { "ok" }
3574                ));
3575                if recent_tool_signatures.len() > 8 {
3576                    let overflow = recent_tool_signatures.len() - 8;
3577                    recent_tool_signatures.drain(0..overflow);
3578                }
3579
3580                // Fire PostToolUse hook (fire-and-forget)
3581                self.fire_post_tool_use(
3582                    session_id.unwrap_or(""),
3583                    &tool_call.name,
3584                    &tool_call.args,
3585                    &output,
3586                    exit_code == 0,
3587                    tool_duration.as_millis() as u64,
3588                )
3589                .await;
3590
3591                // Auto-remember tool result in long-term memory
3592                if let Some(ref memory) = self.config.memory {
3593                    let tools_used = [tool_call.name.clone()];
3594                    let remember_result = if exit_code == 0 {
3595                        memory
3596                            .remember_success(effective_prompt, &tools_used, &output)
3597                            .await
3598                    } else {
3599                        memory
3600                            .remember_failure(effective_prompt, &output, &tools_used)
3601                            .await
3602                    };
3603                    match remember_result {
3604                        Ok(()) => {
3605                            if let Some(tx) = &event_tx {
3606                                let item_type = if exit_code == 0 { "success" } else { "failure" };
3607                                tx.send(AgentEvent::MemoryStored {
3608                                    memory_id: uuid::Uuid::new_v4().to_string(),
3609                                    memory_type: item_type.to_string(),
3610                                    importance: if exit_code == 0 { 0.8 } else { 0.9 },
3611                                    tags: vec![item_type.to_string(), tool_call.name.clone()],
3612                                })
3613                                .await
3614                                .ok();
3615                            }
3616                        }
3617                        Err(e) => {
3618                            tracing::warn!("Failed to store memory after tool execution: {}", e);
3619                        }
3620                    }
3621                }
3622
3623                // Send tool end event
3624                if let Some(tx) = &event_tx {
3625                    tx.send(AgentEvent::ToolEnd {
3626                        id: tool_call.id.clone(),
3627                        name: tool_call.name.clone(),
3628                        output: output.clone(),
3629                        exit_code,
3630                        metadata,
3631                    })
3632                    .await
3633                    .ok();
3634                }
3635
3636                // Add tool result to messages
3637                if images.is_empty() {
3638                    messages.push(Message::tool_result(&tool_call.id, &output, is_error));
3639                } else {
3640                    messages.push(Message::tool_result_with_images(
3641                        &tool_call.id,
3642                        &output,
3643                        &images,
3644                        is_error,
3645                    ));
3646                }
3647            }
3648        }
3649    }
3650
3651    /// Create an execution plan for a prompt
3652    ///
3653    /// Delegates to [`LlmPlanner`] for structured JSON plan generation,
3654    /// falling back to heuristic planning if the LLM call fails.
3655    pub async fn plan(&self, prompt: &str, _context: Option<&str>) -> Result<ExecutionPlan> {
3656        use crate::planning::LlmPlanner;
3657
3658        match LlmPlanner::create_plan(&self.llm_client, prompt).await {
3659            Ok(plan) => Ok(plan),
3660            Err(e) => {
3661                tracing::warn!("LLM plan creation failed, using fallback: {}", e);
3662                Ok(LlmPlanner::fallback_plan(prompt))
3663            }
3664        }
3665    }
3666
3667    /// Execute with planning phase.
3668    ///
3669    /// If `pre_analysis` is provided (from a single pre-analysis LLM call in
3670    /// `execute_with_session`), the goal and plan are already available and no
3671    /// additional LLM calls are needed for planning. Otherwise, falls back to
3672    /// calling `extract_goal` and `plan` individually.
3673    pub async fn execute_with_planning(
3674        &self,
3675        history: &[Message],
3676        prompt: &str,
3677        session_id: Option<&str>,
3678        event_tx: Option<mpsc::Sender<AgentEvent>>,
3679        pre_analysis: Option<PreAnalysis>,
3680    ) -> Result<AgentResult> {
3681        // Send planning start event
3682        if let Some(tx) = &event_tx {
3683            tx.send(AgentEvent::PlanningStart {
3684                prompt: prompt.to_string(),
3685            })
3686            .await
3687            .ok();
3688        }
3689
3690        // Use pre-analysis result if available (goal + plan already computed in one LLM call).
3691        let (goal, plan) = if let Some(analysis) = pre_analysis {
3692            (Some(analysis.goal.clone()), analysis.execution_plan.clone())
3693        } else {
3694            // Fall back: extract goal and create plan via separate LLM calls.
3695            let g = if self.config.goal_tracking {
3696                Some(self.extract_goal(prompt).await?)
3697            } else {
3698                None
3699            };
3700            let p = self.plan(prompt, None).await?;
3701            (g, p)
3702        };
3703
3704        // Send GoalExtracted event if goal_tracking is enabled.
3705        if self.config.goal_tracking {
3706            if let Some(ref g) = goal {
3707                if let Some(tx) = &event_tx {
3708                    tx.send(AgentEvent::GoalExtracted { goal: g.clone() })
3709                        .await
3710                        .ok();
3711                }
3712            }
3713        }
3714
3715        // Send planning end event
3716        if let Some(tx) = &event_tx {
3717            tx.send(AgentEvent::PlanningEnd {
3718                estimated_steps: plan.steps.len(),
3719                plan: plan.clone(),
3720            })
3721            .await
3722            .ok();
3723        }
3724
3725        let plan_start = std::time::Instant::now();
3726
3727        // Execute the plan step by step
3728        let result = self
3729            .execute_plan(history, &plan, session_id, event_tx.clone())
3730            .await?;
3731
3732        // Emit the final End event (execute_loop_inner does not emit End in planning mode)
3733        if let Some(tx) = &event_tx {
3734            tx.send(AgentEvent::End {
3735                text: result.text.clone(),
3736                usage: result.usage.clone(),
3737                verification_summary: Box::new(result.verification_summary()),
3738                meta: None,
3739            })
3740            .await
3741            .ok();
3742        }
3743
3744        // Check goal achievement when goal_tracking is enabled
3745        if self.config.goal_tracking {
3746            if let Some(ref g) = goal {
3747                let achieved = self.check_goal_achievement(g, &result.text).await?;
3748                if achieved {
3749                    if let Some(tx) = &event_tx {
3750                        tx.send(AgentEvent::GoalAchieved {
3751                            goal: g.description.clone(),
3752                            total_steps: result.messages.len(),
3753                            duration_ms: plan_start.elapsed().as_millis() as i64,
3754                        })
3755                        .await
3756                        .ok();
3757                    }
3758                }
3759            }
3760        }
3761
3762        Ok(result)
3763    }
3764
3765    /// Execute an execution plan using wave-based dependency-aware scheduling.
3766    ///
3767    /// Steps with no unmet dependencies are grouped into "waves". A wave with
3768    /// a single step executes sequentially (preserving the history chain). A
3769    /// wave with multiple independent steps executes them in parallel via
3770    /// `JoinSet`, then merges their results back into the shared history.
3771    async fn execute_plan(
3772        &self,
3773        history: &[Message],
3774        plan: &ExecutionPlan,
3775        session_id: Option<&str>,
3776        event_tx: Option<mpsc::Sender<AgentEvent>>,
3777    ) -> Result<AgentResult> {
3778        let mut plan = plan.clone();
3779        let task_session_id = session_id.unwrap_or("").to_string();
3780        let mut current_history = history.to_vec();
3781        let mut total_usage = TokenUsage::default();
3782        let mut tool_calls_count = 0;
3783        let total_steps = plan.steps.len();
3784
3785        // Add initial user message with the goal
3786        let steps_text = plan
3787            .steps
3788            .iter()
3789            .enumerate()
3790            .map(|(i, step)| format!("{}. {}", i + 1, step.content))
3791            .collect::<Vec<_>>()
3792            .join("\n");
3793        current_history.push(Message::user(&crate::prompts::render(
3794            crate::prompts::PLAN_EXECUTE_GOAL,
3795            &[("goal", &plan.goal), ("steps", &steps_text)],
3796        )));
3797        self.emit_task_updated(&event_tx, &task_session_id, &plan)
3798            .await;
3799
3800        loop {
3801            let ready: Vec<String> = plan
3802                .get_ready_steps()
3803                .iter()
3804                .map(|s| s.id.clone())
3805                .collect();
3806
3807            if ready.is_empty() {
3808                // All done or deadlock
3809                if plan.has_deadlock() {
3810                    tracing::warn!(
3811                        "Plan deadlock detected: {} pending steps with unresolvable dependencies",
3812                        plan.pending_count()
3813                    );
3814                }
3815                break;
3816            }
3817
3818            if ready.len() == 1 {
3819                // === Single step: sequential execution (preserves history chain) ===
3820                let step_id = &ready[0];
3821                let step = plan
3822                    .steps
3823                    .iter()
3824                    .find(|s| s.id == *step_id)
3825                    .ok_or_else(|| anyhow::anyhow!("step '{}' not found in plan", step_id))?
3826                    .clone();
3827                let step_number = plan
3828                    .steps
3829                    .iter()
3830                    .position(|s| s.id == *step_id)
3831                    .unwrap_or(0)
3832                    + 1;
3833
3834                // Send step start event
3835                plan.mark_status(&step.id, TaskStatus::InProgress);
3836                self.emit_task_updated(&event_tx, &task_session_id, &plan)
3837                    .await;
3838
3839                if let Some(tx) = &event_tx {
3840                    tx.send(AgentEvent::StepStart {
3841                        step_id: step.id.clone(),
3842                        description: step.content.clone(),
3843                        step_number,
3844                        total_steps,
3845                    })
3846                    .await
3847                    .ok();
3848                }
3849
3850                if Self::should_delegate_plan_step(&step) {
3851                    let tool_name = match Self::normalized_plan_tool(&step) {
3852                        Some("parallel_task") => "parallel_task",
3853                        _ => "task",
3854                    };
3855                    let args = if tool_name == "parallel_task" {
3856                        json!({ "tasks": [Self::delegated_task_args(&step, step_number, total_steps)] })
3857                    } else {
3858                        Self::delegated_task_args(&step, step_number, total_steps)
3859                    };
3860                    let (output, _exit_code, is_error, _metadata) = self
3861                        .execute_delegated_plan_tool(tool_name, &args, session_id, &event_tx)
3862                        .await;
3863                    tool_calls_count += 1;
3864
3865                    if is_error {
3866                        tracing::error!("Delegated plan step '{}' failed: {}", step.id, output);
3867                        current_history.push(Message::user(&format!(
3868                            "Delegated plan step '{}' failed:\n{}",
3869                            step.content, output
3870                        )));
3871                        plan.mark_status(&step.id, TaskStatus::Failed);
3872                        self.emit_task_updated(&event_tx, &task_session_id, &plan)
3873                            .await;
3874
3875                        if let Some(tx) = &event_tx {
3876                            tx.send(AgentEvent::StepEnd {
3877                                step_id: step.id.clone(),
3878                                status: TaskStatus::Failed,
3879                                step_number,
3880                                total_steps,
3881                            })
3882                            .await
3883                            .ok();
3884                        }
3885                    } else {
3886                        current_history.push(Message {
3887                            role: "assistant".to_string(),
3888                            content: vec![crate::llm::ContentBlock::Text { text: output }],
3889                            reasoning_content: None,
3890                        });
3891                        plan.mark_status(&step.id, TaskStatus::Completed);
3892                        self.emit_task_updated(&event_tx, &task_session_id, &plan)
3893                            .await;
3894
3895                        if let Some(tx) = &event_tx {
3896                            tx.send(AgentEvent::StepEnd {
3897                                step_id: step.id.clone(),
3898                                status: TaskStatus::Completed,
3899                                step_number,
3900                                total_steps,
3901                            })
3902                            .await
3903                            .ok();
3904                        }
3905                    }
3906                } else {
3907                    let step_prompt = crate::prompts::render(
3908                        crate::prompts::PLAN_EXECUTE_STEP,
3909                        &[
3910                            ("step_num", &step_number.to_string()),
3911                            ("description", &step.content),
3912                        ],
3913                    );
3914
3915                    match self
3916                        .execute_loop(
3917                            &current_history,
3918                            &step_prompt,
3919                            AgentStyle::GeneralPurpose,
3920                            None,
3921                            event_tx.clone(),
3922                            &tokio_util::sync::CancellationToken::new(),
3923                            false, // emit_end: false — End is emitted by execute_with_planning after execute_plan
3924                        )
3925                        .await
3926                    {
3927                        Ok(result) => {
3928                            current_history = result.messages.clone();
3929                            total_usage.prompt_tokens += result.usage.prompt_tokens;
3930                            total_usage.completion_tokens += result.usage.completion_tokens;
3931                            total_usage.total_tokens += result.usage.total_tokens;
3932                            tool_calls_count += result.tool_calls_count;
3933                            plan.mark_status(&step.id, TaskStatus::Completed);
3934                            self.emit_task_updated(&event_tx, &task_session_id, &plan)
3935                                .await;
3936
3937                            if let Some(tx) = &event_tx {
3938                                tx.send(AgentEvent::StepEnd {
3939                                    step_id: step.id.clone(),
3940                                    status: TaskStatus::Completed,
3941                                    step_number,
3942                                    total_steps,
3943                                })
3944                                .await
3945                                .ok();
3946                            }
3947                        }
3948                        Err(e) => {
3949                            tracing::error!("Plan step '{}' failed: {}", step.id, e);
3950                            plan.mark_status(&step.id, TaskStatus::Failed);
3951                            self.emit_task_updated(&event_tx, &task_session_id, &plan)
3952                                .await;
3953
3954                            if let Some(tx) = &event_tx {
3955                                tx.send(AgentEvent::StepEnd {
3956                                    step_id: step.id.clone(),
3957                                    status: TaskStatus::Failed,
3958                                    step_number,
3959                                    total_steps,
3960                                })
3961                                .await
3962                                .ok();
3963                            }
3964                        }
3965                    }
3966                }
3967            } else {
3968                // === Multiple steps: parallel execution via JoinSet ===
3969                // NOTE: Each parallel branch gets a clone of the base history.
3970                // Individual branch histories (tool calls, LLM turns) are NOT merged
3971                // back — only a summary message is appended. This is a deliberate
3972                // trade-off: merging divergent histories in a deterministic order is
3973                // complex and the summary approach keeps the context window manageable.
3974                let ready_steps: Vec<_> = ready
3975                    .iter()
3976                    .filter_map(|id| {
3977                        let step = plan.steps.iter().find(|s| s.id == *id)?.clone();
3978                        let step_number =
3979                            plan.steps.iter().position(|s| s.id == *id).unwrap_or(0) + 1;
3980                        Some((step, step_number))
3981                    })
3982                    .collect();
3983
3984                // Mark all as InProgress and emit one task-list snapshot for the wave.
3985                for (step, _) in &ready_steps {
3986                    plan.mark_status(&step.id, TaskStatus::InProgress);
3987                }
3988                self.emit_task_updated(&event_tx, &task_session_id, &plan)
3989                    .await;
3990
3991                // Emit StepStart events after the authoritative task-list snapshot.
3992                for (step, step_number) in &ready_steps {
3993                    if let Some(tx) = &event_tx {
3994                        tx.send(AgentEvent::StepStart {
3995                            step_id: step.id.clone(),
3996                            description: step.content.clone(),
3997                            step_number: *step_number,
3998                            total_steps,
3999                        })
4000                        .await
4001                        .ok();
4002                    }
4003                }
4004
4005                let mut parallel_results: Vec<ParallelStepResult> = Vec::new();
4006                if ready_steps
4007                    .iter()
4008                    .all(|(step, _)| Self::should_delegate_plan_step(step))
4009                {
4010                    let args = Self::parallel_delegated_task_args(&ready_steps, total_steps);
4011                    let (output, _exit_code, is_error, metadata) = self
4012                        .execute_delegated_plan_tool("parallel_task", &args, session_id, &event_tx)
4013                        .await;
4014                    tool_calls_count += 1;
4015
4016                    let status = if is_error {
4017                        TaskStatus::Failed
4018                    } else {
4019                        TaskStatus::Completed
4020                    };
4021                    for (step, step_number) in &ready_steps {
4022                        plan.mark_status(&step.id, status);
4023                        self.emit_task_updated(&event_tx, &task_session_id, &plan)
4024                            .await;
4025
4026                        parallel_results.push(ParallelStepResult {
4027                            step_id: step.id.clone(),
4028                            step_number: *step_number as u32,
4029                            status: if is_error { "failed" } else { "completed" }.to_string(),
4030                            summary: if is_error {
4031                                String::new()
4032                            } else {
4033                                output.trim().to_string()
4034                            },
4035                            key_findings: None,
4036                            error: is_error.then(|| output.clone()),
4037                            data: metadata.clone(),
4038                        });
4039
4040                        if let Some(tx) = &event_tx {
4041                            tx.send(AgentEvent::StepEnd {
4042                                step_id: step.id.clone(),
4043                                status,
4044                                step_number: *step_number,
4045                                total_steps,
4046                            })
4047                            .await
4048                            .ok();
4049                        }
4050                    }
4051
4052                    if is_error {
4053                        current_history.push(Message::user(&format!(
4054                            "Delegated parallel plan wave failed:\n{}",
4055                            output
4056                        )));
4057                    } else {
4058                        current_history.push(Message {
4059                            role: "assistant".to_string(),
4060                            content: vec![crate::llm::ContentBlock::Text {
4061                                text: output.clone(),
4062                            }],
4063                            reasoning_content: None,
4064                        });
4065                    }
4066                } else {
4067                    // Spawn all into JoinSet, each with a clone of the base history
4068                    let mut join_set = tokio::task::JoinSet::new();
4069                    for (step, step_number) in &ready_steps {
4070                        let base_history = current_history.clone();
4071                        let agent_clone = self.clone();
4072                        let tx = event_tx.clone();
4073                        let step_clone = step.clone();
4074                        let sn = *step_number;
4075
4076                        join_set.spawn(async move {
4077                            let prompt = crate::prompts::render(
4078                                crate::prompts::PLAN_EXECUTE_STEP,
4079                                &[
4080                                    ("step_num", &sn.to_string()),
4081                                    ("description", &step_clone.content),
4082                                ],
4083                            );
4084                            let result = agent_clone
4085                                .execute_loop(
4086                                    &base_history,
4087                                    &prompt,
4088                                    AgentStyle::GeneralPurpose,
4089                                    None,
4090                                    tx,
4091                                    &tokio_util::sync::CancellationToken::new(),
4092                                    false, // emit_end: false — End is emitted by execute_with_planning after execute_plan
4093                                )
4094                                .await;
4095                            (step_clone.id, sn, result)
4096                        });
4097                    }
4098
4099                    // Collect results
4100                    while let Some(join_result) = join_set.join_next().await {
4101                        match join_result {
4102                            Ok((step_id, step_number, step_result)) => match step_result {
4103                                Ok(result) => {
4104                                    total_usage.prompt_tokens += result.usage.prompt_tokens;
4105                                    total_usage.completion_tokens += result.usage.completion_tokens;
4106                                    total_usage.total_tokens += result.usage.total_tokens;
4107                                    tool_calls_count += result.tool_calls_count;
4108                                    plan.mark_status(&step_id, TaskStatus::Completed);
4109                                    self.emit_task_updated(&event_tx, &task_session_id, &plan)
4110                                        .await;
4111
4112                                    parallel_results.push(ParallelStepResult {
4113                                        step_id: step_id.clone(),
4114                                        step_number: step_number as u32,
4115                                        status: "completed".to_string(),
4116                                        summary: result.text.trim().to_string(),
4117                                        key_findings: None,
4118                                        error: None,
4119                                        data: None,
4120                                    });
4121
4122                                    if let Some(tx) = &event_tx {
4123                                        tx.send(AgentEvent::StepEnd {
4124                                            step_id,
4125                                            status: TaskStatus::Completed,
4126                                            step_number,
4127                                            total_steps,
4128                                        })
4129                                        .await
4130                                        .ok();
4131                                    }
4132                                }
4133                                Err(e) => {
4134                                    tracing::error!("Plan step '{}' failed: {}", step_id, e);
4135                                    plan.mark_status(&step_id, TaskStatus::Failed);
4136                                    self.emit_task_updated(&event_tx, &task_session_id, &plan)
4137                                        .await;
4138
4139                                    parallel_results.push(ParallelStepResult {
4140                                        step_id: step_id.clone(),
4141                                        step_number: step_number as u32,
4142                                        status: "failed".to_string(),
4143                                        summary: String::new(),
4144                                        key_findings: None,
4145                                        error: Some(e.to_string()),
4146                                        data: None,
4147                                    });
4148
4149                                    if let Some(tx) = &event_tx {
4150                                        tx.send(AgentEvent::StepEnd {
4151                                            step_id,
4152                                            status: TaskStatus::Failed,
4153                                            step_number,
4154                                            total_steps,
4155                                        })
4156                                        .await
4157                                        .ok();
4158                                    }
4159                                }
4160                            },
4161                            Err(e) => {
4162                                tracing::error!("JoinSet task panicked: {}", e);
4163                            }
4164                        }
4165                    }
4166                }
4167
4168                // Merge parallel results into history for subsequent steps.
4169                // Emit as a structured JSON USER message so the frontend can
4170                // parse and render it in the user's language.
4171                if !parallel_results.is_empty() {
4172                    parallel_results.sort_by_key(|r| r.step_number);
4173                    let envelope = ParallelStepResult::build_envelope(parallel_results);
4174                    current_history.push(Message::user(
4175                        &serde_json::to_string(&envelope).unwrap_or_default(),
4176                    ));
4177                }
4178            }
4179
4180            // Emit GoalProgress after each wave
4181            if self.config.goal_tracking {
4182                let completed = plan
4183                    .steps
4184                    .iter()
4185                    .filter(|s| s.status == TaskStatus::Completed)
4186                    .count();
4187                if let Some(tx) = &event_tx {
4188                    tx.send(AgentEvent::GoalProgress {
4189                        goal: plan.goal.clone(),
4190                        progress: plan.progress(),
4191                        completed_steps: completed,
4192                        total_steps,
4193                    })
4194                    .await
4195                    .ok();
4196                }
4197            }
4198        }
4199
4200        // Get final response — find the last assistant message (not the last history
4201        // entry, which may be a user-summary injected after parallel execution)
4202        let final_text = current_history
4203            .iter()
4204            .rev()
4205            .find(|m| m.role == "assistant")
4206            .map(|m| {
4207                m.content
4208                    .iter()
4209                    .filter_map(|block| {
4210                        if let crate::llm::ContentBlock::Text { text } = block {
4211                            Some(text.as_str())
4212                        } else {
4213                            None
4214                        }
4215                    })
4216                    .collect::<Vec<_>>()
4217                    .join("\n")
4218            })
4219            .unwrap_or_default();
4220
4221        Ok(AgentResult {
4222            text: final_text,
4223            messages: current_history,
4224            usage: total_usage,
4225            tool_calls_count,
4226            verification_reports: Vec::new(),
4227        })
4228    }
4229
4230    /// Extract goal from prompt
4231    ///
4232    /// Delegates to [`LlmPlanner`] for structured JSON goal extraction,
4233    /// falling back to heuristic logic if the LLM call fails.
4234    pub async fn extract_goal(&self, prompt: &str) -> Result<AgentGoal> {
4235        use crate::planning::LlmPlanner;
4236
4237        match LlmPlanner::extract_goal(&self.llm_client, prompt).await {
4238            Ok(goal) => Ok(goal),
4239            Err(e) => {
4240                tracing::warn!("LLM goal extraction failed, using fallback: {}", e);
4241                Ok(LlmPlanner::fallback_goal(prompt))
4242            }
4243        }
4244    }
4245
4246    /// Check if goal is achieved
4247    ///
4248    /// Delegates to [`LlmPlanner`] for structured JSON achievement check,
4249    /// falling back to heuristic logic if the LLM call fails.
4250    pub async fn check_goal_achievement(
4251        &self,
4252        goal: &AgentGoal,
4253        current_state: &str,
4254    ) -> Result<bool> {
4255        use crate::planning::LlmPlanner;
4256
4257        match LlmPlanner::check_achievement(&self.llm_client, goal, current_state).await {
4258            Ok(result) => Ok(result.achieved),
4259            Err(e) => {
4260                tracing::warn!("LLM achievement check failed, using fallback: {}", e);
4261                let result = LlmPlanner::fallback_check_achievement(goal, current_state);
4262                Ok(result.achieved)
4263            }
4264        }
4265    }
4266}
4267
4268#[cfg(test)]
4269mod tests {
4270    use super::*;
4271    use crate::llm::{ContentBlock, StreamEvent};
4272    use crate::permissions::PermissionPolicy;
4273    use crate::tools::ToolExecutor;
4274    use std::path::PathBuf;
4275    use std::sync::atomic::{AtomicUsize, Ordering};
4276
4277    /// Create a default ToolContext for tests
4278    fn test_tool_context() -> ToolContext {
4279        ToolContext::new(PathBuf::from("/tmp"))
4280    }
4281
4282    #[test]
4283    fn test_plan_step_delegation_detection() {
4284        use crate::planning::Task;
4285
4286        assert!(AgentLoop::should_delegate_plan_step(
4287            &Task::new("s1", "Find relevant files").with_tool("task")
4288        ));
4289        assert!(AgentLoop::should_delegate_plan_step(
4290            &Task::new("s2", "Check independent areas").with_tool("parallel_task")
4291        ));
4292        assert!(!AgentLoop::should_delegate_plan_step(&Task::new(
4293            "s3",
4294            "Implement directly"
4295        )));
4296    }
4297
4298    #[test]
4299    fn test_delegated_agent_selection_from_step_text() {
4300        use crate::planning::Task;
4301
4302        assert_eq!(
4303            AgentLoop::delegated_agent_for_step(&Task::new("s1", "查找相关实现")),
4304            "explore"
4305        );
4306        assert_eq!(
4307            AgentLoop::delegated_agent_for_step(&Task::new("s2", "Run release verification tests")),
4308            "verification"
4309        );
4310        assert_eq!(
4311            AgentLoop::delegated_agent_for_step(&Task::new("s3", "Review risky code changes")),
4312            "review"
4313        );
4314        assert_eq!(
4315            AgentLoop::delegated_agent_for_step(&Task::new("s4", "Design the architecture")),
4316            "plan"
4317        );
4318        assert_eq!(
4319            AgentLoop::delegated_agent_for_step(&Task::new("s5", "Implement the change")),
4320            "general"
4321        );
4322    }
4323
4324    #[test]
4325    fn test_delegated_task_args_include_prompt_contract() {
4326        use crate::planning::Task;
4327
4328        let task = Task::new("s1", "验证 program 工具")
4329            .with_tool("task")
4330            .with_success_criteria("All integration checks pass.");
4331        let args = AgentLoop::delegated_task_args(&task, 2, 5);
4332
4333        assert_eq!(args["agent"], "verification");
4334        assert_eq!(args["description"], "验证 program 工具");
4335        assert!(args["prompt"].as_str().unwrap().contains("2/5"));
4336        assert!(args["prompt"]
4337            .as_str()
4338            .unwrap()
4339            .contains("All integration checks pass."));
4340    }
4341
4342    #[test]
4343    fn test_parallel_delegated_task_args_preserve_order() {
4344        use crate::planning::Task;
4345
4346        let steps = vec![
4347            (Task::new("s1", "Find docs").with_tool("task"), 1),
4348            (Task::new("s2", "Run tests").with_tool("task"), 2),
4349        ];
4350        let args = AgentLoop::parallel_delegated_task_args(&steps, 2);
4351        let tasks = args["tasks"].as_array().unwrap();
4352
4353        assert_eq!(tasks.len(), 2);
4354        assert_eq!(tasks[0]["agent"], "explore");
4355        assert_eq!(tasks[1]["agent"], "verification");
4356    }
4357
4358    #[test]
4359    fn test_memory_items_become_context_result() {
4360        let item = a3s_memory::MemoryItem::new("Use focused regression tests for context changes.")
4361            .with_importance(0.8);
4362
4363        let result = crate::memory::memory_items_to_context_result("memory", vec![item.clone()]);
4364
4365        assert_eq!(result.provider, "memory");
4366        assert_eq!(result.items.len(), 1);
4367        assert_eq!(result.items[0].id, item.id.as_str());
4368        assert_eq!(result.items[0].context_type, ContextType::Memory);
4369        let expected_source = format!("memory://{}", item.id);
4370        assert_eq!(
4371            result.items[0].source.as_deref(),
4372            Some(expected_source.as_str())
4373        );
4374        assert!(result.items[0].content.contains("focused regression tests"));
4375        assert!(result.items[0].token_count > 0);
4376    }
4377
4378    #[cfg(feature = "ahp")]
4379    #[test]
4380    fn test_injected_context_to_results_includes_all_context_shapes() {
4381        let injected = a3s_ahp::InjectedContext {
4382            facts: vec![a3s_ahp::Fact {
4383                content: "Fact from harness".to_string(),
4384                source: "ahp://fact/source".to_string(),
4385                confidence: 0.92,
4386            }],
4387            file_contents: Some(vec![a3s_ahp::FileContentSnippet {
4388                path: "src/lib.rs".to_string(),
4389                snippet: "pub fn important() {}".to_string(),
4390                relevance_score: 0.88,
4391            }]),
4392            project_summary: Some(a3s_ahp::ProjectSummary {
4393                project_name: "demo".to_string(),
4394                language: Some("Rust".to_string()),
4395                key_files: Some(vec!["Cargo.toml".to_string(), "src/lib.rs".to_string()]),
4396                structure_description: "Small Rust crate".to_string(),
4397            }),
4398            knowledge: Some(vec!["Use context budgets".to_string()]),
4399            suggestions: Some(vec!["Prefer focused verification".to_string()]),
4400        };
4401
4402        let results = injected_context_to_results(injected);
4403        let items = results
4404            .iter()
4405            .flat_map(|result| result.items.iter())
4406            .collect::<Vec<_>>();
4407
4408        assert_eq!(results.len(), 5);
4409        assert!(items.iter().any(|item| item.content == "Fact from harness"
4410            && item.source.as_deref() == Some("ahp://fact/source")));
4411        assert!(items
4412            .iter()
4413            .any(|item| item.content == "pub fn important() {}"
4414                && item.source.as_deref() == Some("src/lib.rs")));
4415        assert!(items
4416            .iter()
4417            .any(|item| item.content.contains("Key files: Cargo.toml, src/lib.rs")));
4418        assert!(items
4419            .iter()
4420            .any(|item| item.source.as_deref() == Some("ahp://suggestions")
4421                && item.content.contains("Prefer focused verification")));
4422        assert!(results
4423            .iter()
4424            .all(|result| result.provider == "ahp_harness"));
4425    }
4426
4427    #[test]
4428    fn test_agent_config_default() {
4429        let config = AgentConfig::default();
4430        assert!(config.prompt_slots.is_empty());
4431        assert!(config.tools.is_empty()); // Tools are provided externally
4432        assert_eq!(config.max_tool_rounds, MAX_TOOL_ROUNDS);
4433        assert!(config.permission_checker.is_none());
4434        assert!(config.context_providers.is_empty());
4435        // Built-in skills are always present by default
4436        let registry = config
4437            .skill_registry
4438            .expect("skill_registry must be Some by default");
4439        assert!(registry.len() >= 4, "expected at least 4 built-in skills");
4440        assert!(registry.get("code-search").is_some());
4441        assert!(registry.get("find-bugs").is_some());
4442    }
4443
4444    // ========================================================================
4445    // Mock LLM Client for Testing
4446    // ========================================================================
4447
4448    /// Mock LLM client that returns predefined responses
4449    pub(crate) struct MockLlmClient {
4450        /// Responses to return (consumed in order)
4451        responses: std::sync::Mutex<Vec<LlmResponse>>,
4452        /// Number of calls made
4453        pub(crate) call_count: AtomicUsize,
4454    }
4455
4456    impl MockLlmClient {
4457        pub(crate) fn new(responses: Vec<LlmResponse>) -> Self {
4458            Self {
4459                responses: std::sync::Mutex::new(responses),
4460                call_count: AtomicUsize::new(0),
4461            }
4462        }
4463
4464        /// Create a response with text only (no tool calls)
4465        pub(crate) fn text_response(text: &str) -> LlmResponse {
4466            LlmResponse {
4467                message: Message {
4468                    role: "assistant".to_string(),
4469                    content: vec![ContentBlock::Text {
4470                        text: text.to_string(),
4471                    }],
4472                    reasoning_content: None,
4473                },
4474                usage: TokenUsage {
4475                    prompt_tokens: 10,
4476                    completion_tokens: 5,
4477                    total_tokens: 15,
4478                    cache_read_tokens: None,
4479                    cache_write_tokens: None,
4480                },
4481                stop_reason: Some("end_turn".to_string()),
4482                meta: None,
4483            }
4484        }
4485
4486        /// Create a response with a tool call
4487        pub(crate) fn tool_call_response(
4488            tool_id: &str,
4489            tool_name: &str,
4490            args: serde_json::Value,
4491        ) -> LlmResponse {
4492            LlmResponse {
4493                message: Message {
4494                    role: "assistant".to_string(),
4495                    content: vec![ContentBlock::ToolUse {
4496                        id: tool_id.to_string(),
4497                        name: tool_name.to_string(),
4498                        input: args,
4499                    }],
4500                    reasoning_content: None,
4501                },
4502                usage: TokenUsage {
4503                    prompt_tokens: 10,
4504                    completion_tokens: 5,
4505                    total_tokens: 15,
4506                    cache_read_tokens: None,
4507                    cache_write_tokens: None,
4508                },
4509                stop_reason: Some("tool_use".to_string()),
4510                meta: None,
4511            }
4512        }
4513    }
4514
4515    #[async_trait::async_trait]
4516    impl LlmClient for MockLlmClient {
4517        async fn complete(
4518            &self,
4519            messages: &[Message],
4520            system: Option<&str>,
4521            _tools: &[ToolDefinition],
4522        ) -> Result<LlmResponse> {
4523            if system == Some(crate::prompts::PRE_ANALYSIS_SYSTEM) {
4524                let prompt = messages
4525                    .last()
4526                    .and_then(|m| {
4527                        m.content.iter().find_map(|block| {
4528                            if let ContentBlock::Text { text } = block {
4529                                Some(text.as_str())
4530                            } else {
4531                                None
4532                            }
4533                        })
4534                    })
4535                    .unwrap_or("");
4536                let response = serde_json::json!({
4537                    "intent": "GeneralPurpose",
4538                    "requires_planning": false,
4539                    "goal": {
4540                        "description": prompt,
4541                        "success_criteria": []
4542                    },
4543                    "execution_plan": {
4544                        "complexity": "Simple",
4545                        "steps": [
4546                            {
4547                                "id": "step-1",
4548                                "description": prompt,
4549                                "tool": null,
4550                                "dependencies": [],
4551                                "success_criteria": "Complete the request"
4552                            }
4553                        ],
4554                        "required_tools": []
4555                    },
4556                    "optimized_input": prompt
4557                });
4558                return Ok(MockLlmClient::text_response(&response.to_string()));
4559            }
4560            self.call_count.fetch_add(1, Ordering::SeqCst);
4561            let mut responses = self.responses.lock().unwrap();
4562            if responses.is_empty() {
4563                anyhow::bail!("No more mock responses available");
4564            }
4565            Ok(responses.remove(0))
4566        }
4567
4568        async fn complete_streaming(
4569            &self,
4570            _messages: &[Message],
4571            _system: Option<&str>,
4572            _tools: &[ToolDefinition],
4573            _cancel_token: tokio_util::sync::CancellationToken,
4574        ) -> Result<mpsc::Receiver<StreamEvent>> {
4575            self.call_count.fetch_add(1, Ordering::SeqCst);
4576            let mut responses = self.responses.lock().unwrap();
4577            if responses.is_empty() {
4578                anyhow::bail!("No more mock responses available");
4579            }
4580            let response = responses.remove(0);
4581
4582            let (tx, rx) = mpsc::channel(10);
4583            tokio::spawn(async move {
4584                // Send text deltas if any
4585                for block in &response.message.content {
4586                    if let ContentBlock::Text { text } = block {
4587                        tx.send(StreamEvent::TextDelta(text.clone())).await.ok();
4588                    }
4589                }
4590                tx.send(StreamEvent::Done(response)).await.ok();
4591            });
4592
4593            Ok(rx)
4594        }
4595    }
4596
4597    // ========================================================================
4598    // Agent Loop Tests
4599    // ========================================================================
4600
4601    #[tokio::test]
4602    async fn test_agent_simple_response() {
4603        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
4604            "Hello, I'm an AI assistant.",
4605        )]));
4606
4607        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4608        let config = AgentConfig::default();
4609
4610        let agent = AgentLoop::new(
4611            mock_client.clone(),
4612            tool_executor,
4613            test_tool_context(),
4614            config,
4615        );
4616        let result = agent.execute(&[], "Hello", None).await.unwrap();
4617
4618        assert_eq!(result.text, "Hello, I'm an AI assistant.");
4619        assert_eq!(result.tool_calls_count, 0);
4620        assert_eq!(mock_client.call_count.load(Ordering::SeqCst), 1);
4621    }
4622
4623    #[tokio::test]
4624    async fn test_agent_with_tool_call() {
4625        let mock_client = Arc::new(MockLlmClient::new(vec![
4626            // First response: tool call
4627            MockLlmClient::tool_call_response(
4628                "tool-1",
4629                "bash",
4630                serde_json::json!({"command": "echo hello"}),
4631            ),
4632            // Second response: final text
4633            MockLlmClient::text_response("The command output was: hello"),
4634        ]));
4635
4636        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4637        let config = AgentConfig::default();
4638
4639        let agent = AgentLoop::new(
4640            mock_client.clone(),
4641            tool_executor,
4642            test_tool_context(),
4643            config,
4644        );
4645        let result = agent.execute(&[], "Run echo hello", None).await.unwrap();
4646
4647        assert_eq!(result.text, "The command output was: hello");
4648        assert_eq!(result.tool_calls_count, 1);
4649        assert_eq!(mock_client.call_count.load(Ordering::SeqCst), 2);
4650    }
4651
4652    #[tokio::test]
4653    async fn test_agent_permission_deny() {
4654        let mock_client = Arc::new(MockLlmClient::new(vec![
4655            // First response: tool call that will be denied
4656            MockLlmClient::tool_call_response(
4657                "tool-1",
4658                "bash",
4659                serde_json::json!({"command": "rm -rf /tmp/test"}),
4660            ),
4661            // Second response: LLM responds to the denial
4662            MockLlmClient::text_response(
4663                "I cannot execute that command due to permission restrictions.",
4664            ),
4665        ]));
4666
4667        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4668
4669        // Create permission policy that denies rm commands
4670        let permission_policy = PermissionPolicy::new().deny("bash(rm:*)");
4671
4672        let config = AgentConfig {
4673            permission_checker: Some(Arc::new(permission_policy)),
4674            ..Default::default()
4675        };
4676
4677        let (tx, mut rx) = mpsc::channel(100);
4678        let agent = AgentLoop::new(
4679            mock_client.clone(),
4680            tool_executor,
4681            test_tool_context(),
4682            config,
4683        );
4684        let result = agent.execute(&[], "Delete files", Some(tx)).await.unwrap();
4685
4686        // Check that we received a PermissionDenied event
4687        let mut found_permission_denied = false;
4688        while let Ok(event) = rx.try_recv() {
4689            if let AgentEvent::PermissionDenied { tool_name, .. } = event {
4690                assert_eq!(tool_name, "bash");
4691                found_permission_denied = true;
4692            }
4693        }
4694        assert!(
4695            found_permission_denied,
4696            "Should have received PermissionDenied event"
4697        );
4698
4699        assert_eq!(result.tool_calls_count, 1);
4700    }
4701
4702    #[tokio::test]
4703    async fn test_agent_permission_allow() {
4704        let mock_client = Arc::new(MockLlmClient::new(vec![
4705            // First response: tool call that will be allowed
4706            MockLlmClient::tool_call_response(
4707                "tool-1",
4708                "bash",
4709                serde_json::json!({"command": "echo hello"}),
4710            ),
4711            // Second response: final text
4712            MockLlmClient::text_response("Done!"),
4713        ]));
4714
4715        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4716
4717        // Create permission policy that allows echo commands
4718        let permission_policy = PermissionPolicy::new()
4719            .allow("bash(echo:*)")
4720            .deny("bash(rm:*)");
4721
4722        let config = AgentConfig {
4723            permission_checker: Some(Arc::new(permission_policy)),
4724            ..Default::default()
4725        };
4726
4727        let agent = AgentLoop::new(
4728            mock_client.clone(),
4729            tool_executor,
4730            test_tool_context(),
4731            config,
4732        );
4733        let result = agent.execute(&[], "Echo hello", None).await.unwrap();
4734
4735        assert_eq!(result.text, "Done!");
4736        assert_eq!(result.tool_calls_count, 1);
4737    }
4738
4739    #[tokio::test]
4740    async fn test_agent_streaming_events() {
4741        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
4742            "Hello!",
4743        )]));
4744
4745        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4746        let config = AgentConfig::default();
4747
4748        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
4749        let (tx, mut rx) = mpsc::channel(100);
4750        let cancel_token = tokio_util::sync::CancellationToken::new();
4751
4752        let result = agent
4753            .execute_with_session(&[], "Hi", None, Some(tx), Some(&cancel_token))
4754            .await
4755            .unwrap();
4756        let mut events = Vec::new();
4757        while let Some(event) = rx.recv().await {
4758            events.push(event);
4759        }
4760
4761        assert_eq!(result.text, "Hello!");
4762
4763        // Check we received Start and End events
4764        assert!(events.iter().any(|e| matches!(e, AgentEvent::Start { .. })));
4765        assert!(events.iter().any(|e| matches!(e, AgentEvent::End { .. })));
4766    }
4767
4768    #[tokio::test]
4769    async fn test_agent_max_tool_rounds() {
4770        // Create a mock that always returns tool calls (infinite loop)
4771        let responses: Vec<LlmResponse> = (0..100)
4772            .map(|i| {
4773                MockLlmClient::tool_call_response(
4774                    &format!("tool-{}", i),
4775                    "bash",
4776                    serde_json::json!({"command": "echo loop"}),
4777                )
4778            })
4779            .collect();
4780
4781        let mock_client = Arc::new(MockLlmClient::new(responses));
4782        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4783
4784        let config = AgentConfig {
4785            max_tool_rounds: 3,
4786            ..Default::default()
4787        };
4788
4789        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
4790        let result = agent.execute(&[], "Loop forever", None).await;
4791
4792        // Should fail due to max tool rounds exceeded
4793        assert!(result.is_err());
4794        assert!(result.unwrap_err().to_string().contains("Max tool rounds"));
4795    }
4796
4797    #[tokio::test]
4798    async fn test_agent_no_permission_policy_defaults_to_ask() {
4799        // When no permission policy is set, tools default to Ask.
4800        // Without a confirmation manager, Ask = safe deny.
4801        let mock_client = Arc::new(MockLlmClient::new(vec![
4802            MockLlmClient::tool_call_response(
4803                "tool-1",
4804                "bash",
4805                serde_json::json!({"command": "rm -rf /tmp/test"}),
4806            ),
4807            MockLlmClient::text_response("Denied!"),
4808        ]));
4809
4810        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4811        let config = AgentConfig {
4812            permission_checker: None, // No policy → defaults to Ask
4813            // No confirmation_manager → safe deny
4814            ..Default::default()
4815        };
4816
4817        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
4818        let result = agent.execute(&[], "Delete", None).await.unwrap();
4819
4820        // Should be denied (no policy + no CM = safe deny)
4821        assert_eq!(result.text, "Denied!");
4822        assert_eq!(result.tool_calls_count, 1);
4823    }
4824
4825    #[tokio::test]
4826    async fn test_agent_permission_ask_without_cm_denies() {
4827        // When permission is Ask and no confirmation manager configured,
4828        // tool execution should be denied (safe default).
4829        let mock_client = Arc::new(MockLlmClient::new(vec![
4830            MockLlmClient::tool_call_response(
4831                "tool-1",
4832                "bash",
4833                serde_json::json!({"command": "echo test"}),
4834            ),
4835            MockLlmClient::text_response("Denied!"),
4836        ]));
4837
4838        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4839
4840        // Create policy where bash falls through to Ask (default)
4841        let permission_policy = PermissionPolicy::new(); // Default decision is Ask
4842
4843        let config = AgentConfig {
4844            permission_checker: Some(Arc::new(permission_policy)),
4845            // No confirmation_manager — safe deny
4846            ..Default::default()
4847        };
4848
4849        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
4850        let result = agent.execute(&[], "Echo", None).await.unwrap();
4851
4852        // Should deny (Ask without CM = safe deny)
4853        assert_eq!(result.text, "Denied!");
4854        // The tool result should contain the denial message
4855        assert!(result.tool_calls_count >= 1);
4856    }
4857
4858    // ========================================================================
4859    // HITL (Human-in-the-Loop) Tests
4860    // ========================================================================
4861
4862    #[tokio::test]
4863    async fn test_agent_hitl_approved() {
4864        use crate::hitl::{ConfirmationManager, ConfirmationPolicy};
4865        use tokio::sync::broadcast;
4866
4867        let mock_client = Arc::new(MockLlmClient::new(vec![
4868            MockLlmClient::tool_call_response(
4869                "tool-1",
4870                "bash",
4871                serde_json::json!({"command": "echo hello"}),
4872            ),
4873            MockLlmClient::text_response("Command executed!"),
4874        ]));
4875
4876        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4877
4878        // Create HITL confirmation manager with policy enabled
4879        let (event_tx, _event_rx) = broadcast::channel(100);
4880        let hitl_policy = ConfirmationPolicy {
4881            enabled: true,
4882            ..Default::default()
4883        };
4884        let confirmation_manager = Arc::new(ConfirmationManager::new(hitl_policy, event_tx));
4885
4886        // Create permission policy that returns Ask for bash
4887        let permission_policy = PermissionPolicy::new(); // Default is Ask
4888
4889        let config = AgentConfig {
4890            permission_checker: Some(Arc::new(permission_policy)),
4891            confirmation_manager: Some(confirmation_manager.clone()),
4892            ..Default::default()
4893        };
4894
4895        // Spawn a task to approve the confirmation
4896        let cm_clone = confirmation_manager.clone();
4897        tokio::spawn(async move {
4898            // Wait a bit for the confirmation request to be created
4899            tokio::time::sleep(std::time::Duration::from_millis(50)).await;
4900            // Approve it
4901            cm_clone.confirm("tool-1", true, None).await.ok();
4902        });
4903
4904        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
4905        let result = agent.execute(&[], "Run echo", None).await.unwrap();
4906
4907        assert_eq!(result.text, "Command executed!");
4908        assert_eq!(result.tool_calls_count, 1);
4909    }
4910
4911    #[tokio::test]
4912    async fn test_agent_hitl_rejected() {
4913        use crate::hitl::{ConfirmationManager, ConfirmationPolicy};
4914        use tokio::sync::broadcast;
4915
4916        let mock_client = Arc::new(MockLlmClient::new(vec![
4917            MockLlmClient::tool_call_response(
4918                "tool-1",
4919                "bash",
4920                serde_json::json!({"command": "rm -rf /"}),
4921            ),
4922            MockLlmClient::text_response("Understood, I won't do that."),
4923        ]));
4924
4925        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4926
4927        // Create HITL confirmation manager
4928        let (event_tx, _event_rx) = broadcast::channel(100);
4929        let hitl_policy = ConfirmationPolicy {
4930            enabled: true,
4931            ..Default::default()
4932        };
4933        let confirmation_manager = Arc::new(ConfirmationManager::new(hitl_policy, event_tx));
4934
4935        // Permission policy returns Ask
4936        let permission_policy = PermissionPolicy::new();
4937
4938        let config = AgentConfig {
4939            permission_checker: Some(Arc::new(permission_policy)),
4940            confirmation_manager: Some(confirmation_manager.clone()),
4941            ..Default::default()
4942        };
4943
4944        // Spawn a task to reject the confirmation
4945        let cm_clone = confirmation_manager.clone();
4946        tokio::spawn(async move {
4947            tokio::time::sleep(std::time::Duration::from_millis(50)).await;
4948            cm_clone
4949                .confirm("tool-1", false, Some("Too dangerous".to_string()))
4950                .await
4951                .ok();
4952        });
4953
4954        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
4955        let result = agent.execute(&[], "Delete everything", None).await.unwrap();
4956
4957        // LLM should respond to the rejection
4958        assert_eq!(result.text, "Understood, I won't do that.");
4959    }
4960
4961    #[tokio::test]
4962    async fn test_agent_hitl_timeout_reject() {
4963        use crate::hitl::{ConfirmationManager, ConfirmationPolicy, TimeoutAction};
4964        use tokio::sync::broadcast;
4965
4966        let mock_client = Arc::new(MockLlmClient::new(vec![
4967            MockLlmClient::tool_call_response(
4968                "tool-1",
4969                "bash",
4970                serde_json::json!({"command": "echo test"}),
4971            ),
4972            MockLlmClient::text_response("Timed out, I understand."),
4973        ]));
4974
4975        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
4976
4977        // Create HITL with very short timeout and Reject action
4978        let (event_tx, _event_rx) = broadcast::channel(100);
4979        let hitl_policy = ConfirmationPolicy {
4980            enabled: true,
4981            default_timeout_ms: 50, // Very short timeout
4982            timeout_action: TimeoutAction::Reject,
4983            ..Default::default()
4984        };
4985        let confirmation_manager = Arc::new(ConfirmationManager::new(hitl_policy, event_tx));
4986
4987        let permission_policy = PermissionPolicy::new();
4988
4989        let config = AgentConfig {
4990            permission_checker: Some(Arc::new(permission_policy)),
4991            confirmation_manager: Some(confirmation_manager),
4992            ..Default::default()
4993        };
4994
4995        // Don't approve - let it timeout
4996        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
4997        let result = agent.execute(&[], "Echo", None).await.unwrap();
4998
4999        // Should get timeout rejection response from LLM
5000        assert_eq!(result.text, "Timed out, I understand.");
5001    }
5002
5003    #[tokio::test]
5004    async fn test_agent_hitl_timeout_auto_approve() {
5005        use crate::hitl::{ConfirmationManager, ConfirmationPolicy, TimeoutAction};
5006        use tokio::sync::broadcast;
5007
5008        let mock_client = Arc::new(MockLlmClient::new(vec![
5009            MockLlmClient::tool_call_response(
5010                "tool-1",
5011                "bash",
5012                serde_json::json!({"command": "echo hello"}),
5013            ),
5014            MockLlmClient::text_response("Auto-approved and executed!"),
5015        ]));
5016
5017        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5018
5019        // Create HITL with very short timeout and AutoApprove action
5020        let (event_tx, _event_rx) = broadcast::channel(100);
5021        let hitl_policy = ConfirmationPolicy {
5022            enabled: true,
5023            default_timeout_ms: 50, // Very short timeout
5024            timeout_action: TimeoutAction::AutoApprove,
5025            ..Default::default()
5026        };
5027        let confirmation_manager = Arc::new(ConfirmationManager::new(hitl_policy, event_tx));
5028
5029        let permission_policy = PermissionPolicy::new();
5030
5031        let config = AgentConfig {
5032            permission_checker: Some(Arc::new(permission_policy)),
5033            confirmation_manager: Some(confirmation_manager),
5034            ..Default::default()
5035        };
5036
5037        // Don't approve - let it timeout and auto-approve
5038        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
5039        let result = agent.execute(&[], "Echo", None).await.unwrap();
5040
5041        // Should auto-approve on timeout and execute
5042        assert_eq!(result.text, "Auto-approved and executed!");
5043        assert_eq!(result.tool_calls_count, 1);
5044    }
5045
5046    #[tokio::test]
5047    async fn test_agent_hitl_confirmation_events() {
5048        use crate::hitl::{ConfirmationManager, ConfirmationPolicy};
5049        use tokio::sync::broadcast;
5050
5051        let mock_client = Arc::new(MockLlmClient::new(vec![
5052            MockLlmClient::tool_call_response(
5053                "tool-1",
5054                "bash",
5055                serde_json::json!({"command": "echo test"}),
5056            ),
5057            MockLlmClient::text_response("Done!"),
5058        ]));
5059
5060        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5061
5062        // Create HITL confirmation manager
5063        let (event_tx, mut event_rx) = broadcast::channel(100);
5064        let hitl_policy = ConfirmationPolicy {
5065            enabled: true,
5066            default_timeout_ms: 5000, // Long enough timeout
5067            ..Default::default()
5068        };
5069        let confirmation_manager = Arc::new(ConfirmationManager::new(hitl_policy, event_tx));
5070
5071        let permission_policy = PermissionPolicy::new();
5072
5073        let config = AgentConfig {
5074            permission_checker: Some(Arc::new(permission_policy)),
5075            confirmation_manager: Some(confirmation_manager.clone()),
5076            ..Default::default()
5077        };
5078
5079        // Spawn task to approve and collect events
5080        let cm_clone = confirmation_manager.clone();
5081        let event_handle = tokio::spawn(async move {
5082            let mut events = Vec::new();
5083            // Wait for ConfirmationRequired event
5084            while let Ok(event) = event_rx.recv().await {
5085                events.push(event.clone());
5086                if let AgentEvent::ConfirmationRequired { tool_id, .. } = event {
5087                    // Approve it
5088                    cm_clone.confirm(&tool_id, true, None).await.ok();
5089                    // Wait for ConfirmationReceived
5090                    if let Ok(recv_event) = event_rx.recv().await {
5091                        events.push(recv_event);
5092                    }
5093                    break;
5094                }
5095            }
5096            events
5097        });
5098
5099        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
5100        let _result = agent.execute(&[], "Echo", None).await.unwrap();
5101
5102        // Check events
5103        let events = event_handle.await.unwrap();
5104        assert!(
5105            events
5106                .iter()
5107                .any(|e| matches!(e, AgentEvent::ConfirmationRequired { .. })),
5108            "Should have ConfirmationRequired event"
5109        );
5110        assert!(
5111            events
5112                .iter()
5113                .any(|e| matches!(e, AgentEvent::ConfirmationReceived { approved: true, .. })),
5114            "Should have ConfirmationReceived event with approved=true"
5115        );
5116    }
5117
5118    #[tokio::test]
5119    async fn test_agent_hitl_disabled_auto_executes() {
5120        // When HITL is disabled, tools should execute automatically even with Ask permission
5121        use crate::hitl::{ConfirmationManager, ConfirmationPolicy};
5122        use tokio::sync::broadcast;
5123
5124        let mock_client = Arc::new(MockLlmClient::new(vec![
5125            MockLlmClient::tool_call_response(
5126                "tool-1",
5127                "bash",
5128                serde_json::json!({"command": "echo auto"}),
5129            ),
5130            MockLlmClient::text_response("Auto executed!"),
5131        ]));
5132
5133        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5134
5135        // Create HITL with enabled=false
5136        let (event_tx, _event_rx) = broadcast::channel(100);
5137        let hitl_policy = ConfirmationPolicy {
5138            enabled: false, // HITL disabled
5139            ..Default::default()
5140        };
5141        let confirmation_manager = Arc::new(ConfirmationManager::new(hitl_policy, event_tx));
5142
5143        let permission_policy = PermissionPolicy::new(); // Default is Ask
5144
5145        let config = AgentConfig {
5146            permission_checker: Some(Arc::new(permission_policy)),
5147            confirmation_manager: Some(confirmation_manager),
5148            ..Default::default()
5149        };
5150
5151        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
5152        let result = agent.execute(&[], "Echo", None).await.unwrap();
5153
5154        // Should execute without waiting for confirmation
5155        assert_eq!(result.text, "Auto executed!");
5156        assert_eq!(result.tool_calls_count, 1);
5157    }
5158
5159    #[tokio::test]
5160    async fn test_agent_hitl_with_permission_deny_skips_hitl() {
5161        // When permission is Deny, HITL should not be triggered
5162        use crate::hitl::{ConfirmationManager, ConfirmationPolicy};
5163        use tokio::sync::broadcast;
5164
5165        let mock_client = Arc::new(MockLlmClient::new(vec![
5166            MockLlmClient::tool_call_response(
5167                "tool-1",
5168                "bash",
5169                serde_json::json!({"command": "rm -rf /"}),
5170            ),
5171            MockLlmClient::text_response("Blocked by permission."),
5172        ]));
5173
5174        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5175
5176        // Create HITL enabled
5177        let (event_tx, mut event_rx) = broadcast::channel(100);
5178        let hitl_policy = ConfirmationPolicy {
5179            enabled: true,
5180            ..Default::default()
5181        };
5182        let confirmation_manager = Arc::new(ConfirmationManager::new(hitl_policy, event_tx));
5183
5184        // Permission policy denies rm commands
5185        let permission_policy = PermissionPolicy::new().deny("bash(rm:*)");
5186
5187        let config = AgentConfig {
5188            permission_checker: Some(Arc::new(permission_policy)),
5189            confirmation_manager: Some(confirmation_manager),
5190            ..Default::default()
5191        };
5192
5193        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
5194        let result = agent.execute(&[], "Delete", None).await.unwrap();
5195
5196        // Should be denied without HITL
5197        assert_eq!(result.text, "Blocked by permission.");
5198
5199        // Should NOT have any ConfirmationRequired events
5200        let mut found_confirmation = false;
5201        while let Ok(event) = event_rx.try_recv() {
5202            if matches!(event, AgentEvent::ConfirmationRequired { .. }) {
5203                found_confirmation = true;
5204            }
5205        }
5206        assert!(
5207            !found_confirmation,
5208            "HITL should not be triggered when permission is Deny"
5209        );
5210    }
5211
5212    #[tokio::test]
5213    async fn test_agent_hitl_with_permission_allow_skips_hitl() {
5214        // When permission is Allow, HITL confirmation is skipped entirely.
5215        // PermissionPolicy is the declarative rule engine; Allow = execute directly.
5216        use crate::hitl::{ConfirmationManager, ConfirmationPolicy};
5217        use tokio::sync::broadcast;
5218
5219        let mock_client = Arc::new(MockLlmClient::new(vec![
5220            MockLlmClient::tool_call_response(
5221                "tool-1",
5222                "bash",
5223                serde_json::json!({"command": "echo hello"}),
5224            ),
5225            MockLlmClient::text_response("Allowed!"),
5226        ]));
5227
5228        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5229
5230        // Create HITL enabled
5231        let (event_tx, mut event_rx) = broadcast::channel(100);
5232        let hitl_policy = ConfirmationPolicy {
5233            enabled: true,
5234            ..Default::default()
5235        };
5236        let confirmation_manager = Arc::new(ConfirmationManager::new(hitl_policy, event_tx));
5237
5238        // Permission policy allows echo commands
5239        let permission_policy = PermissionPolicy::new().allow("bash(echo:*)");
5240
5241        let config = AgentConfig {
5242            permission_checker: Some(Arc::new(permission_policy)),
5243            confirmation_manager: Some(confirmation_manager.clone()),
5244            ..Default::default()
5245        };
5246
5247        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
5248        let result = agent.execute(&[], "Echo", None).await.unwrap();
5249
5250        // Should execute directly without HITL (permission Allow skips confirmation)
5251        assert_eq!(result.text, "Allowed!");
5252
5253        // Should NOT have ConfirmationRequired event (Allow bypasses HITL)
5254        let mut found_confirmation = false;
5255        while let Ok(event) = event_rx.try_recv() {
5256            if matches!(event, AgentEvent::ConfirmationRequired { .. }) {
5257                found_confirmation = true;
5258            }
5259        }
5260        assert!(
5261            !found_confirmation,
5262            "Permission Allow should skip HITL confirmation"
5263        );
5264    }
5265
5266    #[tokio::test]
5267    async fn test_agent_hitl_multiple_tool_calls() {
5268        // Test multiple tool calls in sequence with HITL
5269        use crate::hitl::{ConfirmationManager, ConfirmationPolicy};
5270        use tokio::sync::broadcast;
5271
5272        let mock_client = Arc::new(MockLlmClient::new(vec![
5273            // First response: two tool calls
5274            LlmResponse {
5275                message: Message {
5276                    role: "assistant".to_string(),
5277                    content: vec![
5278                        ContentBlock::ToolUse {
5279                            id: "tool-1".to_string(),
5280                            name: "bash".to_string(),
5281                            input: serde_json::json!({"command": "echo first"}),
5282                        },
5283                        ContentBlock::ToolUse {
5284                            id: "tool-2".to_string(),
5285                            name: "bash".to_string(),
5286                            input: serde_json::json!({"command": "echo second"}),
5287                        },
5288                    ],
5289                    reasoning_content: None,
5290                },
5291                usage: TokenUsage {
5292                    prompt_tokens: 10,
5293                    completion_tokens: 5,
5294                    total_tokens: 15,
5295                    cache_read_tokens: None,
5296                    cache_write_tokens: None,
5297                },
5298                stop_reason: Some("tool_use".to_string()),
5299                meta: None,
5300            },
5301            MockLlmClient::text_response("Both executed!"),
5302        ]));
5303
5304        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5305
5306        // Create HITL
5307        let (event_tx, _event_rx) = broadcast::channel(100);
5308        let hitl_policy = ConfirmationPolicy {
5309            enabled: true,
5310            default_timeout_ms: 5000,
5311            ..Default::default()
5312        };
5313        let confirmation_manager = Arc::new(ConfirmationManager::new(hitl_policy, event_tx));
5314
5315        let permission_policy = PermissionPolicy::new(); // Default Ask
5316
5317        let config = AgentConfig {
5318            permission_checker: Some(Arc::new(permission_policy)),
5319            confirmation_manager: Some(confirmation_manager.clone()),
5320            ..Default::default()
5321        };
5322
5323        // Spawn task to approve both tools
5324        let cm_clone = confirmation_manager.clone();
5325        tokio::spawn(async move {
5326            tokio::time::sleep(std::time::Duration::from_millis(30)).await;
5327            cm_clone.confirm("tool-1", true, None).await.ok();
5328            tokio::time::sleep(std::time::Duration::from_millis(30)).await;
5329            cm_clone.confirm("tool-2", true, None).await.ok();
5330        });
5331
5332        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
5333        let result = agent
5334            .execute_loop(
5335                &[],
5336                "run both commands now",
5337                AgentStyle::GeneralPurpose,
5338                None,
5339                None,
5340                &tokio_util::sync::CancellationToken::new(),
5341                true,
5342            )
5343            .await
5344            .unwrap();
5345
5346        assert_eq!(result.text, "Both executed!");
5347        assert_eq!(result.tool_calls_count, 2);
5348    }
5349
5350    #[tokio::test]
5351    async fn test_agent_hitl_partial_approval() {
5352        // Test: first tool approved, second rejected
5353        use crate::hitl::{ConfirmationManager, ConfirmationPolicy};
5354        use tokio::sync::broadcast;
5355
5356        let mock_client = Arc::new(MockLlmClient::new(vec![
5357            // First response: two tool calls
5358            LlmResponse {
5359                message: Message {
5360                    role: "assistant".to_string(),
5361                    content: vec![
5362                        ContentBlock::ToolUse {
5363                            id: "tool-1".to_string(),
5364                            name: "bash".to_string(),
5365                            input: serde_json::json!({"command": "echo safe"}),
5366                        },
5367                        ContentBlock::ToolUse {
5368                            id: "tool-2".to_string(),
5369                            name: "bash".to_string(),
5370                            input: serde_json::json!({"command": "rm -rf /"}),
5371                        },
5372                    ],
5373                    reasoning_content: None,
5374                },
5375                usage: TokenUsage {
5376                    prompt_tokens: 10,
5377                    completion_tokens: 5,
5378                    total_tokens: 15,
5379                    cache_read_tokens: None,
5380                    cache_write_tokens: None,
5381                },
5382                stop_reason: Some("tool_use".to_string()),
5383                meta: None,
5384            },
5385            MockLlmClient::text_response("First worked, second rejected."),
5386        ]));
5387
5388        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5389
5390        let (event_tx, _event_rx) = broadcast::channel(100);
5391        let hitl_policy = ConfirmationPolicy {
5392            enabled: true,
5393            default_timeout_ms: 5000,
5394            ..Default::default()
5395        };
5396        let confirmation_manager = Arc::new(ConfirmationManager::new(hitl_policy, event_tx));
5397
5398        let permission_policy = PermissionPolicy::new();
5399
5400        let config = AgentConfig {
5401            permission_checker: Some(Arc::new(permission_policy)),
5402            confirmation_manager: Some(confirmation_manager.clone()),
5403            ..Default::default()
5404        };
5405
5406        // Approve first, reject second
5407        let cm_clone = confirmation_manager.clone();
5408        tokio::spawn(async move {
5409            tokio::time::sleep(std::time::Duration::from_millis(30)).await;
5410            cm_clone.confirm("tool-1", true, None).await.ok();
5411            tokio::time::sleep(std::time::Duration::from_millis(30)).await;
5412            cm_clone
5413                .confirm("tool-2", false, Some("Dangerous".to_string()))
5414                .await
5415                .ok();
5416        });
5417
5418        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
5419        let result = agent.execute(&[], "Run both", None).await.unwrap();
5420
5421        assert_eq!(result.text, "First worked, second rejected.");
5422        assert_eq!(result.tool_calls_count, 2);
5423    }
5424
5425    #[tokio::test]
5426    async fn test_agent_hitl_yolo_mode_auto_approves() {
5427        // YOLO mode: specific lanes auto-approve without confirmation
5428        use crate::hitl::{ConfirmationManager, ConfirmationPolicy};
5429        use crate::queue::SessionLane;
5430        use tokio::sync::broadcast;
5431
5432        let mock_client = Arc::new(MockLlmClient::new(vec![
5433            MockLlmClient::tool_call_response(
5434                "tool-1",
5435                "read", // Query lane tool
5436                serde_json::json!({"path": "/tmp/test.txt"}),
5437            ),
5438            MockLlmClient::text_response("File read!"),
5439        ]));
5440
5441        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5442
5443        // YOLO mode for Query lane (read, glob, ls, grep)
5444        let (event_tx, mut event_rx) = broadcast::channel(100);
5445        let mut yolo_lanes = std::collections::HashSet::new();
5446        yolo_lanes.insert(SessionLane::Query);
5447        let hitl_policy = ConfirmationPolicy {
5448            enabled: true,
5449            yolo_lanes, // Auto-approve query operations
5450            ..Default::default()
5451        };
5452        let confirmation_manager = Arc::new(ConfirmationManager::new(hitl_policy, event_tx));
5453
5454        let permission_policy = PermissionPolicy::new();
5455
5456        let config = AgentConfig {
5457            permission_checker: Some(Arc::new(permission_policy)),
5458            confirmation_manager: Some(confirmation_manager),
5459            ..Default::default()
5460        };
5461
5462        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
5463        let result = agent.execute(&[], "Read file", None).await.unwrap();
5464
5465        // Should auto-execute without confirmation (YOLO mode)
5466        assert_eq!(result.text, "File read!");
5467
5468        // Should NOT have ConfirmationRequired for yolo lane
5469        let mut found_confirmation = false;
5470        while let Ok(event) = event_rx.try_recv() {
5471            if matches!(event, AgentEvent::ConfirmationRequired { .. }) {
5472                found_confirmation = true;
5473            }
5474        }
5475        assert!(
5476            !found_confirmation,
5477            "YOLO mode should not trigger confirmation"
5478        );
5479    }
5480
5481    #[tokio::test]
5482    async fn test_agent_config_with_all_options() {
5483        use crate::hitl::{ConfirmationManager, ConfirmationPolicy};
5484        use tokio::sync::broadcast;
5485
5486        let (event_tx, _) = broadcast::channel(100);
5487        let hitl_policy = ConfirmationPolicy::default();
5488        let confirmation_manager = Arc::new(ConfirmationManager::new(hitl_policy, event_tx));
5489
5490        let permission_policy = PermissionPolicy::new().allow("bash(*)");
5491
5492        let config = AgentConfig {
5493            prompt_slots: SystemPromptSlots {
5494                extra: Some("Test system prompt".to_string()),
5495                ..Default::default()
5496            },
5497            tools: vec![],
5498            max_tool_rounds: 10,
5499            permission_checker: Some(Arc::new(permission_policy)),
5500            confirmation_manager: Some(confirmation_manager),
5501            context_providers: vec![],
5502            planning_mode: PlanningMode::default(),
5503            goal_tracking: false,
5504            hook_engine: None,
5505            skill_registry: None,
5506            ..AgentConfig::default()
5507        };
5508
5509        assert!(config.prompt_slots.build().contains("Test system prompt"));
5510        assert_eq!(config.max_tool_rounds, 10);
5511        assert!(config.permission_checker.is_some());
5512        assert!(config.confirmation_manager.is_some());
5513        assert!(config.context_providers.is_empty());
5514
5515        // Test Debug trait
5516        let debug_str = format!("{:?}", config);
5517        assert!(debug_str.contains("AgentConfig"));
5518        assert!(debug_str.contains("permission_checker: true"));
5519        assert!(debug_str.contains("confirmation_manager: true"));
5520        assert!(debug_str.contains("context_providers: 0"));
5521    }
5522
5523    // ========================================================================
5524    // Context Provider Tests
5525    // ========================================================================
5526
5527    use crate::context::{ContextItem, ContextType};
5528
5529    /// Mock context provider for testing
5530    struct MockContextProvider {
5531        name: String,
5532        items: Vec<ContextItem>,
5533        on_turn_calls: std::sync::Arc<tokio::sync::RwLock<Vec<(String, String, String)>>>,
5534    }
5535
5536    impl MockContextProvider {
5537        fn new(name: &str) -> Self {
5538            Self {
5539                name: name.to_string(),
5540                items: Vec::new(),
5541                on_turn_calls: std::sync::Arc::new(tokio::sync::RwLock::new(Vec::new())),
5542            }
5543        }
5544
5545        fn with_items(mut self, items: Vec<ContextItem>) -> Self {
5546            self.items = items;
5547            self
5548        }
5549    }
5550
5551    #[async_trait::async_trait]
5552    impl ContextProvider for MockContextProvider {
5553        fn name(&self) -> &str {
5554            &self.name
5555        }
5556
5557        async fn query(&self, _query: &ContextQuery) -> anyhow::Result<ContextResult> {
5558            let mut result = ContextResult::new(&self.name);
5559            for item in &self.items {
5560                result.add_item(item.clone());
5561            }
5562            Ok(result)
5563        }
5564
5565        async fn on_turn_complete(
5566            &self,
5567            session_id: &str,
5568            prompt: &str,
5569            response: &str,
5570        ) -> anyhow::Result<()> {
5571            let mut calls = self.on_turn_calls.write().await;
5572            calls.push((
5573                session_id.to_string(),
5574                prompt.to_string(),
5575                response.to_string(),
5576            ));
5577            Ok(())
5578        }
5579    }
5580
5581    #[tokio::test]
5582    async fn test_agent_with_context_provider() {
5583        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
5584            "Response using context",
5585        )]));
5586
5587        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5588
5589        let provider =
5590            MockContextProvider::new("test-provider").with_items(vec![ContextItem::new(
5591                "ctx-1",
5592                ContextType::Resource,
5593                "Relevant context here",
5594            )
5595            .with_source("test://docs/example")]);
5596
5597        let config = AgentConfig {
5598            prompt_slots: SystemPromptSlots {
5599                extra: Some("You are helpful.".to_string()),
5600                ..Default::default()
5601            },
5602            context_providers: vec![Arc::new(provider)],
5603            ..Default::default()
5604        };
5605
5606        let agent = AgentLoop::new(
5607            mock_client.clone(),
5608            tool_executor,
5609            test_tool_context(),
5610            config,
5611        );
5612        let result = agent
5613            .execute(&[], "verify context provider output", None)
5614            .await
5615            .unwrap();
5616
5617        assert_eq!(result.text, "Response using context");
5618        assert_eq!(mock_client.call_count.load(Ordering::SeqCst), 1);
5619    }
5620
5621    #[tokio::test]
5622    async fn test_agent_context_provider_events() {
5623        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
5624            "Answer",
5625        )]));
5626
5627        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5628
5629        let provider =
5630            MockContextProvider::new("event-provider").with_items(vec![ContextItem::new(
5631                "item-1",
5632                ContextType::Memory,
5633                "Memory content",
5634            )
5635            .with_token_count(50)]);
5636
5637        let config = AgentConfig {
5638            context_providers: vec![Arc::new(provider)],
5639            ..Default::default()
5640        };
5641
5642        let (tx, mut rx) = mpsc::channel(100);
5643        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
5644        let _result = agent.execute(&[], "Test prompt", Some(tx)).await.unwrap();
5645
5646        // Collect events
5647        let mut events = Vec::new();
5648        while let Ok(event) = rx.try_recv() {
5649            events.push(event);
5650        }
5651
5652        // Should have ContextResolving and ContextResolved events
5653        assert!(
5654            events
5655                .iter()
5656                .any(|e| matches!(e, AgentEvent::ContextResolving { .. })),
5657            "Should have ContextResolving event"
5658        );
5659        assert!(
5660            events
5661                .iter()
5662                .any(|e| matches!(e, AgentEvent::ContextResolved { .. })),
5663            "Should have ContextResolved event"
5664        );
5665
5666        // Check context resolved values
5667        for event in &events {
5668            if let AgentEvent::ContextResolved {
5669                total_items,
5670                total_tokens,
5671            } = event
5672            {
5673                assert_eq!(*total_items, 1);
5674                assert_eq!(*total_tokens, 50);
5675            }
5676        }
5677    }
5678
5679    #[tokio::test]
5680    async fn test_agent_multiple_context_providers() {
5681        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
5682            "Combined response",
5683        )]));
5684
5685        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5686
5687        let provider1 = MockContextProvider::new("provider-1").with_items(vec![ContextItem::new(
5688            "p1-1",
5689            ContextType::Resource,
5690            "Resource from P1",
5691        )
5692        .with_token_count(100)]);
5693
5694        let provider2 = MockContextProvider::new("provider-2").with_items(vec![
5695            ContextItem::new("p2-1", ContextType::Memory, "Memory from P2").with_token_count(50),
5696            ContextItem::new("p2-2", ContextType::Skill, "Skill from P2").with_token_count(75),
5697        ]);
5698
5699        let config = AgentConfig {
5700            prompt_slots: SystemPromptSlots {
5701                extra: Some("Base system prompt.".to_string()),
5702                ..Default::default()
5703            },
5704            context_providers: vec![Arc::new(provider1), Arc::new(provider2)],
5705            ..Default::default()
5706        };
5707
5708        let (tx, mut rx) = mpsc::channel(100);
5709        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
5710        let result = agent
5711            .execute(&[], "verify combined context", Some(tx))
5712            .await
5713            .unwrap();
5714
5715        assert_eq!(result.text, "Combined response");
5716
5717        // Check context resolved event has combined totals
5718        while let Ok(event) = rx.try_recv() {
5719            if let AgentEvent::ContextResolved {
5720                total_items,
5721                total_tokens,
5722            } = event
5723            {
5724                assert_eq!(total_items, 3); // 1 + 2
5725                assert_eq!(total_tokens, 225); // 100 + 50 + 75
5726            }
5727        }
5728    }
5729
5730    #[tokio::test]
5731    async fn test_agent_no_context_providers() {
5732        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
5733            "No context",
5734        )]));
5735
5736        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5737
5738        // No context providers
5739        let config = AgentConfig::default();
5740
5741        let (tx, mut rx) = mpsc::channel(100);
5742        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
5743        let result = agent
5744            .execute(&[], "verify simple prompt", Some(tx))
5745            .await
5746            .unwrap();
5747
5748        assert_eq!(result.text, "No context");
5749
5750        // Should NOT have context events when no providers
5751        let mut events = Vec::new();
5752        while let Ok(event) = rx.try_recv() {
5753            events.push(event);
5754        }
5755
5756        assert!(
5757            !events
5758                .iter()
5759                .any(|e| matches!(e, AgentEvent::ContextResolving { .. })),
5760            "Should NOT have ContextResolving event"
5761        );
5762    }
5763
5764    #[tokio::test]
5765    async fn test_agent_memory_recall_routes_through_context_assembly() {
5766        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
5767            "Memory-aware response",
5768        )]));
5769
5770        let memory = crate::memory::AgentMemory::new(Arc::new(a3s_memory::InMemoryStore::new()));
5771        memory
5772            .remember(
5773                a3s_memory::MemoryItem::new(
5774                    "verify focused regression tests caught context regressions.",
5775                )
5776                .with_importance(0.9),
5777            )
5778            .await
5779            .unwrap();
5780
5781        let temp_dir = tempfile::tempdir().unwrap();
5782        let tool_executor = Arc::new(ToolExecutor::new(temp_dir.path().display().to_string()));
5783        let config = AgentConfig {
5784            memory: Some(Arc::new(memory)),
5785            ..Default::default()
5786        };
5787
5788        let (tx, mut rx) = mpsc::channel(100);
5789        let agent = AgentLoop::new(
5790            mock_client,
5791            tool_executor,
5792            ToolContext::new(temp_dir.path().to_path_buf()),
5793            config,
5794        );
5795        let result = agent
5796            .execute(&[], "verify focused regression tests", Some(tx))
5797            .await
5798            .unwrap();
5799
5800        assert_eq!(result.text, "Memory-aware response");
5801
5802        let mut recalled = false;
5803        let mut resolved_items = None;
5804        while let Ok(event) = rx.try_recv() {
5805            match event {
5806                AgentEvent::MemoryRecalled { content, .. } => {
5807                    recalled = content.contains("focused regression tests");
5808                }
5809                AgentEvent::ContextResolved { total_items, .. } => {
5810                    resolved_items = Some(total_items);
5811                }
5812                _ => {}
5813            }
5814        }
5815
5816        assert!(recalled);
5817        assert_eq!(resolved_items, Some(1));
5818    }
5819
5820    #[tokio::test]
5821    async fn test_agent_context_on_turn_complete() {
5822        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
5823            "Final response",
5824        )]));
5825
5826        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5827
5828        let provider = Arc::new(MockContextProvider::new("memory-provider"));
5829        let on_turn_calls = provider.on_turn_calls.clone();
5830
5831        let config = AgentConfig {
5832            context_providers: vec![provider],
5833            ..Default::default()
5834        };
5835
5836        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
5837
5838        // Execute with session ID
5839        let result = agent
5840            .execute_with_session(&[], "verify user prompt", Some("sess-123"), None, None)
5841            .await
5842            .unwrap();
5843
5844        assert_eq!(result.text, "Final response");
5845
5846        // Check on_turn_complete was called
5847        let calls = on_turn_calls.read().await;
5848        assert_eq!(calls.len(), 1);
5849        assert_eq!(calls[0].0, "sess-123");
5850        assert_eq!(calls[0].1, "verify user prompt");
5851        assert_eq!(calls[0].2, "Final response");
5852    }
5853
5854    #[tokio::test]
5855    async fn test_agent_context_on_turn_complete_no_session() {
5856        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
5857            "Response",
5858        )]));
5859
5860        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5861
5862        let provider = Arc::new(MockContextProvider::new("memory-provider"));
5863        let on_turn_calls = provider.on_turn_calls.clone();
5864
5865        let config = AgentConfig {
5866            context_providers: vec![provider],
5867            ..Default::default()
5868        };
5869
5870        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
5871
5872        // Execute without session ID (uses execute() which passes None)
5873        let _result = agent.execute(&[], "Prompt", None).await.unwrap();
5874
5875        // on_turn_complete should NOT be called when session_id is None
5876        let calls = on_turn_calls.read().await;
5877        assert!(calls.is_empty());
5878    }
5879
5880    #[tokio::test]
5881    async fn test_agent_build_augmented_system_prompt() {
5882        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response("OK")]));
5883
5884        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5885
5886        let provider = MockContextProvider::new("test").with_items(vec![ContextItem::new(
5887            "doc-1",
5888            ContextType::Resource,
5889            "Auth uses JWT tokens.",
5890        )
5891        .with_source("viking://docs/auth")]);
5892
5893        let config = AgentConfig {
5894            prompt_slots: SystemPromptSlots {
5895                extra: Some("You are helpful.".to_string()),
5896                ..Default::default()
5897            },
5898            context_providers: vec![Arc::new(provider)],
5899            ..Default::default()
5900        };
5901
5902        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
5903
5904        // Test building augmented prompt
5905        let context_results = agent.resolve_context("test", None).await;
5906        let augmented = agent.build_augmented_system_prompt(&context_results);
5907
5908        let augmented_str = augmented.unwrap();
5909        assert!(augmented_str.contains("You are helpful."));
5910        assert!(augmented_str.contains("<context source=\"viking://docs/auth\" type=\"Resource\">"));
5911        assert!(augmented_str.contains("Auth uses JWT tokens."));
5912    }
5913
5914    // ========================================================================
5915    // Agentic Loop Integration Tests
5916    // ========================================================================
5917
5918    /// Helper: collect all events from a channel
5919    async fn collect_events(mut rx: mpsc::Receiver<AgentEvent>) -> Vec<AgentEvent> {
5920        let mut events = Vec::new();
5921        while let Ok(event) = rx.try_recv() {
5922            events.push(event);
5923        }
5924        // Drain remaining
5925        while let Some(event) = rx.recv().await {
5926            events.push(event);
5927        }
5928        events
5929    }
5930
5931    #[tokio::test]
5932    async fn test_agent_multi_turn_tool_chain() {
5933        // LLM calls tool A → sees result → calls tool B → sees result → final answer
5934        let mock_client = Arc::new(MockLlmClient::new(vec![
5935            // Turn 1: call ls
5936            MockLlmClient::tool_call_response(
5937                "t1",
5938                "bash",
5939                serde_json::json!({"command": "echo step1"}),
5940            ),
5941            // Turn 2: call another tool based on first result
5942            MockLlmClient::tool_call_response(
5943                "t2",
5944                "bash",
5945                serde_json::json!({"command": "echo step2"}),
5946            ),
5947            // Turn 3: final answer
5948            MockLlmClient::text_response("Completed both steps: step1 then step2"),
5949        ]));
5950
5951        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5952        let config = AgentConfig::default();
5953
5954        let agent = AgentLoop::new(
5955            mock_client.clone(),
5956            tool_executor,
5957            test_tool_context(),
5958            config,
5959        );
5960        let result = agent.execute(&[], "Run two steps", None).await.unwrap();
5961
5962        assert_eq!(result.text, "Completed both steps: step1 then step2");
5963        assert_eq!(result.tool_calls_count, 2);
5964        assert_eq!(mock_client.call_count.load(Ordering::SeqCst), 3);
5965
5966        // Verify message history: user → assistant(tool_use) → user(tool_result) → assistant(tool_use) → user(tool_result) → assistant(text)
5967        assert_eq!(result.messages[0].role, "user");
5968        assert_eq!(result.messages[1].role, "assistant"); // tool call 1
5969        assert_eq!(result.messages[2].role, "user"); // tool result 1 (Anthropic convention)
5970        assert_eq!(result.messages[3].role, "assistant"); // tool call 2
5971        assert_eq!(result.messages[4].role, "user"); // tool result 2
5972        assert_eq!(result.messages[5].role, "assistant"); // final text
5973        assert_eq!(result.messages.len(), 6);
5974    }
5975
5976    #[tokio::test]
5977    async fn test_agent_conversation_history_preserved() {
5978        // Pass existing history, verify it's preserved in output
5979        let existing_history = vec![
5980            Message::user("What is Rust?"),
5981            Message {
5982                role: "assistant".to_string(),
5983                content: vec![ContentBlock::Text {
5984                    text: "Rust is a systems programming language.".to_string(),
5985                }],
5986                reasoning_content: None,
5987            },
5988        ];
5989
5990        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
5991            "Rust was created by Graydon Hoare at Mozilla.",
5992        )]));
5993
5994        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
5995        let agent = AgentLoop::new(
5996            mock_client.clone(),
5997            tool_executor,
5998            test_tool_context(),
5999            AgentConfig {
6000                prompt_slots: SystemPromptSlots {
6001                    style: Some(AgentStyle::GeneralPurpose),
6002                    ..Default::default()
6003                },
6004                ..Default::default()
6005            },
6006        );
6007
6008        let result = agent
6009            .execute(&existing_history, "Who created it?", None)
6010            .await
6011            .unwrap();
6012
6013        // History should contain: old user + old assistant + new user + new assistant
6014        assert_eq!(result.messages.len(), 4);
6015        assert_eq!(result.messages[0].text(), "What is Rust?");
6016        assert_eq!(
6017            result.messages[1].text(),
6018            "Rust is a systems programming language."
6019        );
6020        assert_eq!(result.messages[2].text(), "Who created it?");
6021        assert_eq!(
6022            result.messages[3].text(),
6023            "Rust was created by Graydon Hoare at Mozilla."
6024        );
6025    }
6026
6027    #[tokio::test]
6028    async fn test_agent_event_stream_completeness() {
6029        // Verify full event sequence for a single tool call loop
6030        let mock_client = Arc::new(MockLlmClient::new(vec![
6031            MockLlmClient::tool_call_response(
6032                "t1",
6033                "bash",
6034                serde_json::json!({"command": "echo hi"}),
6035            ),
6036            MockLlmClient::text_response("Done"),
6037        ]));
6038
6039        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
6040        let agent = AgentLoop::new(
6041            mock_client,
6042            tool_executor,
6043            test_tool_context(),
6044            AgentConfig {
6045                permission_checker: Some(Arc::new(PermissionPolicy::new().allow("bash(echo:*)"))),
6046                ..Default::default()
6047            },
6048        );
6049
6050        let (tx, rx) = mpsc::channel(100);
6051        let result = agent.execute(&[], "Say hi", Some(tx)).await.unwrap();
6052        assert_eq!(result.text, "Done");
6053
6054        let events = collect_events(rx).await;
6055
6056        // Verify event sequence
6057        let event_types: Vec<&str> = events
6058            .iter()
6059            .map(|e| match e {
6060                AgentEvent::Start { .. } => "Start",
6061                AgentEvent::TurnStart { .. } => "TurnStart",
6062                AgentEvent::TurnEnd { .. } => "TurnEnd",
6063                AgentEvent::ToolEnd { .. } => "ToolEnd",
6064                AgentEvent::End { .. } => "End",
6065                _ => "Other",
6066            })
6067            .collect();
6068
6069        // Mode/context events may precede Start; the execution lifecycle still
6070        // needs a Start before turns and an End as the final event.
6071        let start_index = event_types
6072            .iter()
6073            .position(|t| *t == "Start")
6074            .expect("Start event should be present");
6075        let first_turn_index = event_types
6076            .iter()
6077            .position(|t| *t == "TurnStart")
6078            .expect("TurnStart event should be present");
6079        assert!(start_index < first_turn_index);
6080        assert_eq!(event_types.last(), Some(&"End"));
6081
6082        // Must have 2 TurnStarts (tool call turn + final answer turn)
6083        let turn_starts = event_types.iter().filter(|&&t| t == "TurnStart").count();
6084        assert_eq!(turn_starts, 2);
6085
6086        // Must have 1 ToolEnd
6087        let tool_ends = event_types.iter().filter(|&&t| t == "ToolEnd").count();
6088        assert_eq!(tool_ends, 1);
6089    }
6090
6091    #[tokio::test]
6092    async fn test_agent_multiple_tools_single_turn() {
6093        // LLM returns 2 tool calls in one response
6094        let mock_client = Arc::new(MockLlmClient::new(vec![
6095            LlmResponse {
6096                message: Message {
6097                    role: "assistant".to_string(),
6098                    content: vec![
6099                        ContentBlock::ToolUse {
6100                            id: "t1".to_string(),
6101                            name: "bash".to_string(),
6102                            input: serde_json::json!({"command": "echo first"}),
6103                        },
6104                        ContentBlock::ToolUse {
6105                            id: "t2".to_string(),
6106                            name: "bash".to_string(),
6107                            input: serde_json::json!({"command": "echo second"}),
6108                        },
6109                    ],
6110                    reasoning_content: None,
6111                },
6112                usage: TokenUsage {
6113                    prompt_tokens: 10,
6114                    completion_tokens: 5,
6115                    total_tokens: 15,
6116                    cache_read_tokens: None,
6117                    cache_write_tokens: None,
6118                },
6119                stop_reason: Some("tool_use".to_string()),
6120                meta: None,
6121            },
6122            MockLlmClient::text_response("Both commands ran"),
6123            MockLlmClient::text_response("Both commands ran"),
6124        ]));
6125
6126        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
6127        let agent = AgentLoop::new(
6128            mock_client.clone(),
6129            tool_executor,
6130            test_tool_context(),
6131            AgentConfig {
6132                prompt_slots: SystemPromptSlots {
6133                    style: Some(AgentStyle::GeneralPurpose),
6134                    ..Default::default()
6135                },
6136                ..Default::default()
6137            },
6138        );
6139
6140        let result = agent
6141            .execute_loop(
6142                &[],
6143                "run both commands now",
6144                AgentStyle::GeneralPurpose,
6145                None,
6146                None,
6147                &tokio_util::sync::CancellationToken::new(),
6148                true,
6149            )
6150            .await
6151            .unwrap();
6152
6153        assert_eq!(result.text, "Both commands ran");
6154        assert_eq!(result.tool_calls_count, 2);
6155        assert!(
6156            mock_client.call_count.load(Ordering::SeqCst) >= 2,
6157            "expected at least the tool-call turn and final response turn"
6158        );
6159
6160        // Messages: user → assistant(2 tools) → user(tool_result) → user(tool_result) → assistant(text)
6161        assert_eq!(result.messages[0].role, "user");
6162        assert_eq!(result.messages[1].role, "assistant");
6163        assert_eq!(result.messages[2].role, "user"); // tool result 1
6164        assert_eq!(result.messages[3].role, "user"); // tool result 2
6165        assert_eq!(result.messages[4].role, "assistant");
6166    }
6167
6168    #[tokio::test]
6169    async fn test_agent_token_usage_accumulation() {
6170        // Verify usage sums across multiple turns
6171        let mock_client = Arc::new(MockLlmClient::new(vec![
6172            MockLlmClient::tool_call_response(
6173                "t1",
6174                "bash",
6175                serde_json::json!({"command": "echo x"}),
6176            ),
6177            MockLlmClient::text_response("Done"),
6178        ]));
6179
6180        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
6181        let agent = AgentLoop::new(
6182            mock_client,
6183            tool_executor,
6184            test_tool_context(),
6185            AgentConfig::default(),
6186        );
6187
6188        let result = agent.execute(&[], "test", None).await.unwrap();
6189
6190        // Each mock response has prompt=10, completion=5, total=15
6191        // 2 LLM calls → 20 prompt, 10 completion, 30 total
6192        assert_eq!(result.usage.prompt_tokens, 20);
6193        assert_eq!(result.usage.completion_tokens, 10);
6194        assert_eq!(result.usage.total_tokens, 30);
6195    }
6196
6197    #[tokio::test]
6198    async fn test_agent_system_prompt_passed() {
6199        // Verify system prompt is used (MockLlmClient captures calls)
6200        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
6201            "I am a coding assistant.",
6202        )]));
6203
6204        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
6205        let config = AgentConfig {
6206            prompt_slots: SystemPromptSlots {
6207                extra: Some("You are a coding assistant.".to_string()),
6208                ..Default::default()
6209            },
6210            ..Default::default()
6211        };
6212
6213        let agent = AgentLoop::new(
6214            mock_client.clone(),
6215            tool_executor,
6216            test_tool_context(),
6217            config,
6218        );
6219        let result = agent.execute(&[], "What are you?", None).await.unwrap();
6220
6221        assert_eq!(result.text, "I am a coding assistant.");
6222        assert_eq!(mock_client.call_count.load(Ordering::SeqCst), 1);
6223    }
6224
6225    #[tokio::test]
6226    async fn test_agent_max_rounds_with_persistent_tool_calls() {
6227        // LLM keeps calling tools forever — should hit max_tool_rounds
6228        let mut responses = Vec::new();
6229        for i in 0..15 {
6230            responses.push(MockLlmClient::tool_call_response(
6231                &format!("t{}", i),
6232                "bash",
6233                serde_json::json!({"command": format!("echo round{}", i)}),
6234            ));
6235        }
6236
6237        let mock_client = Arc::new(MockLlmClient::new(responses));
6238        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
6239        let config = AgentConfig {
6240            max_tool_rounds: 5,
6241            ..Default::default()
6242        };
6243
6244        let agent = AgentLoop::new(
6245            mock_client.clone(),
6246            tool_executor,
6247            test_tool_context(),
6248            config,
6249        );
6250        let result = agent.execute(&[], "Loop forever", None).await;
6251
6252        assert!(result.is_err());
6253        let err = result.unwrap_err().to_string();
6254        assert!(err.contains("Max tool rounds (5) exceeded"));
6255    }
6256
6257    #[tokio::test]
6258    async fn test_agent_end_event_contains_final_text() {
6259        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
6260            "Final answer here",
6261        )]));
6262
6263        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
6264        let agent = AgentLoop::new(
6265            mock_client,
6266            tool_executor,
6267            test_tool_context(),
6268            AgentConfig::default(),
6269        );
6270
6271        let (tx, rx) = mpsc::channel(100);
6272        agent.execute(&[], "test", Some(tx)).await.unwrap();
6273
6274        let events = collect_events(rx).await;
6275        let end_event = events.iter().find(|e| matches!(e, AgentEvent::End { .. }));
6276        assert!(end_event.is_some());
6277
6278        if let AgentEvent::End { text, usage, .. } = end_event.unwrap() {
6279            assert_eq!(text, "Final answer here");
6280            assert_eq!(usage.total_tokens, 15);
6281        }
6282    }
6283}
6284
6285#[cfg(test)]
6286mod extra_agent_tests {
6287    use super::*;
6288    use crate::agent::tests::MockLlmClient;
6289    use crate::queue::SessionQueueConfig;
6290    use crate::tools::ToolExecutor;
6291    use std::path::PathBuf;
6292    use std::sync::atomic::{AtomicUsize, Ordering};
6293
6294    fn test_tool_context() -> ToolContext {
6295        ToolContext::new(PathBuf::from("/tmp"))
6296    }
6297
6298    // ========================================================================
6299    // AgentConfig
6300    // ========================================================================
6301
6302    #[test]
6303    fn test_agent_config_debug() {
6304        let config = AgentConfig {
6305            prompt_slots: SystemPromptSlots {
6306                extra: Some("You are helpful".to_string()),
6307                ..Default::default()
6308            },
6309            tools: vec![],
6310            max_tool_rounds: 10,
6311            permission_checker: None,
6312            confirmation_manager: None,
6313            context_providers: vec![],
6314            planning_mode: PlanningMode::Enabled,
6315            goal_tracking: false,
6316            hook_engine: None,
6317            skill_registry: None,
6318            ..AgentConfig::default()
6319        };
6320        let debug = format!("{:?}", config);
6321        assert!(debug.contains("AgentConfig"));
6322        assert!(debug.contains("planning_mode"));
6323    }
6324
6325    #[test]
6326    fn test_agent_config_default_values() {
6327        let config = AgentConfig::default();
6328        assert_eq!(config.max_tool_rounds, MAX_TOOL_ROUNDS);
6329        assert_eq!(config.planning_mode, PlanningMode::Auto);
6330        assert!(!config.goal_tracking);
6331        assert!(config.context_providers.is_empty());
6332    }
6333
6334    #[test]
6335    fn test_auto_pre_analysis_runs_without_keyword_gate() {
6336        let mock_client = Arc::new(MockLlmClient::new(vec![]));
6337        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
6338        let agent = AgentLoop::new(
6339            mock_client,
6340            tool_executor,
6341            test_tool_context(),
6342            AgentConfig::default(),
6343        );
6344
6345        assert!(agent.should_run_pre_analysis());
6346    }
6347
6348    #[test]
6349    fn test_disabled_planning_never_runs_pre_analysis() {
6350        let mock_client = Arc::new(MockLlmClient::new(vec![]));
6351        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
6352        let config = AgentConfig {
6353            planning_mode: PlanningMode::Disabled,
6354            ..AgentConfig::default()
6355        };
6356        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
6357
6358        assert!(!agent.should_run_pre_analysis());
6359    }
6360
6361    // ========================================================================
6362    // AgentEvent serialization
6363    // ========================================================================
6364
6365    #[test]
6366    fn test_agent_event_serialize_start() {
6367        let event = AgentEvent::Start {
6368            prompt: "Hello".to_string(),
6369        };
6370        let json = serde_json::to_string(&event).unwrap();
6371        assert!(json.contains("agent_start"));
6372        assert!(json.contains("Hello"));
6373    }
6374
6375    #[test]
6376    fn test_agent_event_serialize_text_delta() {
6377        let event = AgentEvent::TextDelta {
6378            text: "chunk".to_string(),
6379        };
6380        let json = serde_json::to_string(&event).unwrap();
6381        assert!(json.contains("text_delta"));
6382    }
6383
6384    #[test]
6385    fn test_agent_event_serialize_tool_start() {
6386        let event = AgentEvent::ToolStart {
6387            id: "t1".to_string(),
6388            name: "bash".to_string(),
6389        };
6390        let json = serde_json::to_string(&event).unwrap();
6391        assert!(json.contains("tool_start"));
6392        assert!(json.contains("bash"));
6393    }
6394
6395    #[test]
6396    fn test_agent_event_serialize_tool_end() {
6397        let event = AgentEvent::ToolEnd {
6398            id: "t1".to_string(),
6399            name: "bash".to_string(),
6400            output: "hello".to_string(),
6401            exit_code: 0,
6402            metadata: None,
6403        };
6404        let json = serde_json::to_string(&event).unwrap();
6405        assert!(json.contains("tool_end"));
6406    }
6407
6408    #[test]
6409    fn test_agent_event_tool_end_has_metadata_field() {
6410        let event = AgentEvent::ToolEnd {
6411            id: "t1".to_string(),
6412            name: "write".to_string(),
6413            output: "Wrote 5 bytes".to_string(),
6414            exit_code: 0,
6415            metadata: Some(
6416                serde_json::json!({ "before": "old", "after": "new", "file_path": "f.txt" }),
6417            ),
6418        };
6419        let json = serde_json::to_string(&event).unwrap();
6420        assert!(json.contains("\"before\""));
6421    }
6422
6423    #[test]
6424    fn test_agent_event_serialize_error() {
6425        let event = AgentEvent::Error {
6426            message: "oops".to_string(),
6427        };
6428        let json = serde_json::to_string(&event).unwrap();
6429        assert!(json.contains("error"));
6430        assert!(json.contains("oops"));
6431    }
6432
6433    #[test]
6434    fn test_agent_event_serialize_confirmation_required() {
6435        let event = AgentEvent::ConfirmationRequired {
6436            tool_id: "t1".to_string(),
6437            tool_name: "bash".to_string(),
6438            args: serde_json::json!({"cmd": "rm"}),
6439            timeout_ms: 30000,
6440        };
6441        let json = serde_json::to_string(&event).unwrap();
6442        assert!(json.contains("confirmation_required"));
6443    }
6444
6445    #[test]
6446    fn test_agent_event_serialize_confirmation_received() {
6447        let event = AgentEvent::ConfirmationReceived {
6448            tool_id: "t1".to_string(),
6449            approved: true,
6450            reason: Some("safe".to_string()),
6451        };
6452        let json = serde_json::to_string(&event).unwrap();
6453        assert!(json.contains("confirmation_received"));
6454    }
6455
6456    #[test]
6457    fn test_agent_event_serialize_confirmation_timeout() {
6458        let event = AgentEvent::ConfirmationTimeout {
6459            tool_id: "t1".to_string(),
6460            action_taken: "rejected".to_string(),
6461        };
6462        let json = serde_json::to_string(&event).unwrap();
6463        assert!(json.contains("confirmation_timeout"));
6464    }
6465
6466    #[test]
6467    fn test_agent_event_serialize_external_task_pending() {
6468        let event = AgentEvent::ExternalTaskPending {
6469            task_id: "task-1".to_string(),
6470            session_id: "sess-1".to_string(),
6471            lane: crate::queue::SessionLane::Execute,
6472            command_type: "bash".to_string(),
6473            payload: serde_json::json!({}),
6474            timeout_ms: 60000,
6475        };
6476        let json = serde_json::to_string(&event).unwrap();
6477        assert!(json.contains("external_task_pending"));
6478    }
6479
6480    #[test]
6481    fn test_agent_event_serialize_external_task_completed() {
6482        let event = AgentEvent::ExternalTaskCompleted {
6483            task_id: "task-1".to_string(),
6484            session_id: "sess-1".to_string(),
6485            success: false,
6486        };
6487        let json = serde_json::to_string(&event).unwrap();
6488        assert!(json.contains("external_task_completed"));
6489    }
6490
6491    #[test]
6492    fn test_agent_event_serialize_permission_denied() {
6493        let event = AgentEvent::PermissionDenied {
6494            tool_id: "t1".to_string(),
6495            tool_name: "bash".to_string(),
6496            args: serde_json::json!({}),
6497            reason: "denied".to_string(),
6498        };
6499        let json = serde_json::to_string(&event).unwrap();
6500        assert!(json.contains("permission_denied"));
6501    }
6502
6503    #[test]
6504    fn test_agent_event_serialize_context_compacted() {
6505        let event = AgentEvent::ContextCompacted {
6506            session_id: "sess-1".to_string(),
6507            before_messages: 100,
6508            after_messages: 20,
6509            percent_before: 0.85,
6510        };
6511        let json = serde_json::to_string(&event).unwrap();
6512        assert!(json.contains("context_compacted"));
6513    }
6514
6515    #[test]
6516    fn test_agent_event_serialize_turn_start() {
6517        let event = AgentEvent::TurnStart { turn: 3 };
6518        let json = serde_json::to_string(&event).unwrap();
6519        assert!(json.contains("turn_start"));
6520    }
6521
6522    #[test]
6523    fn test_agent_event_serialize_turn_end() {
6524        let event = AgentEvent::TurnEnd {
6525            turn: 3,
6526            usage: TokenUsage::default(),
6527        };
6528        let json = serde_json::to_string(&event).unwrap();
6529        assert!(json.contains("turn_end"));
6530    }
6531
6532    #[test]
6533    fn test_agent_event_serialize_end() {
6534        let event = AgentEvent::End {
6535            text: "Done".to_string(),
6536            usage: TokenUsage {
6537                prompt_tokens: 100,
6538                completion_tokens: 50,
6539                total_tokens: 150,
6540                cache_read_tokens: None,
6541                cache_write_tokens: None,
6542            },
6543            verification_summary: Box::new(crate::verification::VerificationSummary::from_reports(
6544                &[],
6545            )),
6546            meta: None,
6547        };
6548        let json = serde_json::to_string(&event).unwrap();
6549        assert!(json.contains("agent_end"));
6550        assert!(json.contains("verification_summary"));
6551    }
6552
6553    // ========================================================================
6554    // AgentResult
6555    // ========================================================================
6556
6557    #[test]
6558    fn test_agent_result_fields() {
6559        let result = AgentResult {
6560            text: "output".to_string(),
6561            messages: vec![Message::user("hello")],
6562            usage: TokenUsage::default(),
6563            tool_calls_count: 3,
6564            verification_reports: Vec::new(),
6565        };
6566        assert_eq!(result.text, "output");
6567        assert_eq!(result.messages.len(), 1);
6568        assert_eq!(result.tool_calls_count, 3);
6569        assert!(result.verification_reports.is_empty());
6570        assert_eq!(
6571            result.verification_summary().status,
6572            crate::verification::VerificationStatus::Skipped
6573        );
6574        assert!(!result.has_pending_verification());
6575    }
6576
6577    #[test]
6578    fn test_collect_verification_report_from_tool_metadata() {
6579        let report = crate::verification::VerificationReport::new(
6580            "program:example",
6581            vec![crate::verification::VerificationCheck::required(
6582                "check:inspect",
6583                "inspect_artifacts",
6584                "Inspect artifacts",
6585            )],
6586        );
6587        let metadata = Some(serde_json::json!({
6588            "verification_report": report.to_value()
6589        }));
6590        let mut reports = Vec::new();
6591
6592        AgentLoop::collect_verification_report(&mut reports, &metadata);
6593
6594        assert_eq!(reports.len(), 1);
6595        assert_eq!(reports[0].subject, "program:example");
6596        assert_eq!(
6597            reports[0].status,
6598            crate::verification::VerificationStatus::NeedsReview
6599        );
6600    }
6601
6602    #[test]
6603    fn test_agent_result_verification_summary() {
6604        let report = crate::verification::VerificationReport::new(
6605            "program:example",
6606            vec![crate::verification::VerificationCheck::required(
6607                "check:inspect",
6608                "inspect_artifacts",
6609                "Inspect artifacts",
6610            )],
6611        );
6612        let result = AgentResult {
6613            text: "output".to_string(),
6614            messages: Vec::new(),
6615            usage: TokenUsage::default(),
6616            tool_calls_count: 1,
6617            verification_reports: vec![report],
6618        };
6619
6620        let summary = result.verification_summary();
6621
6622        assert_eq!(
6623            summary.status,
6624            crate::verification::VerificationStatus::NeedsReview
6625        );
6626        assert_eq!(summary.pending_required_check_count, 1);
6627        assert!(result
6628            .verification_summary_text()
6629            .contains("Verification needs review"));
6630        assert!(result.has_pending_verification());
6631    }
6632
6633    // ========================================================================
6634    // Missing AgentEvent serialization tests
6635    // ========================================================================
6636
6637    #[test]
6638    fn test_agent_event_serialize_context_resolving() {
6639        let event = AgentEvent::ContextResolving {
6640            providers: vec!["provider1".to_string(), "provider2".to_string()],
6641        };
6642        let json = serde_json::to_string(&event).unwrap();
6643        assert!(json.contains("context_resolving"));
6644        assert!(json.contains("provider1"));
6645    }
6646
6647    #[test]
6648    fn test_agent_event_serialize_context_resolved() {
6649        let event = AgentEvent::ContextResolved {
6650            total_items: 5,
6651            total_tokens: 1000,
6652        };
6653        let json = serde_json::to_string(&event).unwrap();
6654        assert!(json.contains("context_resolved"));
6655        assert!(json.contains("1000"));
6656    }
6657
6658    #[test]
6659    fn test_agent_event_serialize_command_dead_lettered() {
6660        let event = AgentEvent::CommandDeadLettered {
6661            command_id: "cmd-1".to_string(),
6662            command_type: "bash".to_string(),
6663            lane: "execute".to_string(),
6664            error: "timeout".to_string(),
6665            attempts: 3,
6666        };
6667        let json = serde_json::to_string(&event).unwrap();
6668        assert!(json.contains("command_dead_lettered"));
6669        assert!(json.contains("cmd-1"));
6670    }
6671
6672    #[test]
6673    fn test_agent_event_serialize_command_retry() {
6674        let event = AgentEvent::CommandRetry {
6675            command_id: "cmd-2".to_string(),
6676            command_type: "read".to_string(),
6677            lane: "query".to_string(),
6678            attempt: 2,
6679            delay_ms: 1000,
6680        };
6681        let json = serde_json::to_string(&event).unwrap();
6682        assert!(json.contains("command_retry"));
6683        assert!(json.contains("cmd-2"));
6684    }
6685
6686    #[test]
6687    fn test_agent_event_serialize_queue_alert() {
6688        let event = AgentEvent::QueueAlert {
6689            level: "warning".to_string(),
6690            alert_type: "depth".to_string(),
6691            message: "Queue depth exceeded".to_string(),
6692        };
6693        let json = serde_json::to_string(&event).unwrap();
6694        assert!(json.contains("queue_alert"));
6695        assert!(json.contains("warning"));
6696    }
6697
6698    #[test]
6699    fn test_agent_event_serialize_task_updated() {
6700        let event = AgentEvent::TaskUpdated {
6701            session_id: "sess-1".to_string(),
6702            tasks: vec![],
6703        };
6704        let json = serde_json::to_string(&event).unwrap();
6705        assert!(json.contains("task_updated"));
6706        assert!(json.contains("sess-1"));
6707    }
6708
6709    #[test]
6710    fn test_agent_event_serialize_memory_stored() {
6711        let event = AgentEvent::MemoryStored {
6712            memory_id: "mem-1".to_string(),
6713            memory_type: "conversation".to_string(),
6714            importance: 0.8,
6715            tags: vec!["important".to_string()],
6716        };
6717        let json = serde_json::to_string(&event).unwrap();
6718        assert!(json.contains("memory_stored"));
6719        assert!(json.contains("mem-1"));
6720    }
6721
6722    #[test]
6723    fn test_agent_event_serialize_memory_recalled() {
6724        let event = AgentEvent::MemoryRecalled {
6725            memory_id: "mem-2".to_string(),
6726            content: "Previous conversation".to_string(),
6727            relevance: 0.9,
6728        };
6729        let json = serde_json::to_string(&event).unwrap();
6730        assert!(json.contains("memory_recalled"));
6731        assert!(json.contains("mem-2"));
6732    }
6733
6734    #[test]
6735    fn test_agent_event_serialize_memories_searched() {
6736        let event = AgentEvent::MemoriesSearched {
6737            query: Some("search term".to_string()),
6738            tags: vec!["tag1".to_string()],
6739            result_count: 5,
6740        };
6741        let json = serde_json::to_string(&event).unwrap();
6742        assert!(json.contains("memories_searched"));
6743        assert!(json.contains("search term"));
6744    }
6745
6746    #[test]
6747    fn test_agent_event_serialize_memory_cleared() {
6748        let event = AgentEvent::MemoryCleared {
6749            tier: "short_term".to_string(),
6750            count: 10,
6751        };
6752        let json = serde_json::to_string(&event).unwrap();
6753        assert!(json.contains("memory_cleared"));
6754        assert!(json.contains("short_term"));
6755    }
6756
6757    #[test]
6758    fn test_agent_event_serialize_subagent_start() {
6759        let event = AgentEvent::SubagentStart {
6760            task_id: "task-1".to_string(),
6761            session_id: "child-sess".to_string(),
6762            parent_session_id: "parent-sess".to_string(),
6763            agent: "explore".to_string(),
6764            description: "Explore codebase".to_string(),
6765        };
6766        let json = serde_json::to_string(&event).unwrap();
6767        assert!(json.contains("subagent_start"));
6768        assert!(json.contains("explore"));
6769    }
6770
6771    #[test]
6772    fn test_agent_event_serialize_subagent_progress() {
6773        let event = AgentEvent::SubagentProgress {
6774            task_id: "task-1".to_string(),
6775            session_id: "child-sess".to_string(),
6776            status: "processing".to_string(),
6777            metadata: serde_json::json!({"progress": 50}),
6778        };
6779        let json = serde_json::to_string(&event).unwrap();
6780        assert!(json.contains("subagent_progress"));
6781        assert!(json.contains("processing"));
6782    }
6783
6784    #[test]
6785    fn test_agent_event_serialize_subagent_end() {
6786        let event = AgentEvent::SubagentEnd {
6787            task_id: "task-1".to_string(),
6788            session_id: "child-sess".to_string(),
6789            agent: "explore".to_string(),
6790            output: "Found 10 files".to_string(),
6791            success: true,
6792        };
6793        let json = serde_json::to_string(&event).unwrap();
6794        assert!(json.contains("subagent_end"));
6795        assert!(json.contains("Found 10 files"));
6796    }
6797
6798    #[test]
6799    fn test_agent_event_serialize_planning_start() {
6800        let event = AgentEvent::PlanningStart {
6801            prompt: "Build a web app".to_string(),
6802        };
6803        let json = serde_json::to_string(&event).unwrap();
6804        assert!(json.contains("planning_start"));
6805        assert!(json.contains("Build a web app"));
6806    }
6807
6808    #[test]
6809    fn test_agent_event_serialize_planning_end() {
6810        use crate::planning::{Complexity, ExecutionPlan};
6811        let plan = ExecutionPlan::new("Test goal".to_string(), Complexity::Simple);
6812        let event = AgentEvent::PlanningEnd {
6813            plan,
6814            estimated_steps: 3,
6815        };
6816        let json = serde_json::to_string(&event).unwrap();
6817        assert!(json.contains("planning_end"));
6818        assert!(json.contains("estimated_steps"));
6819    }
6820
6821    #[test]
6822    fn test_agent_event_serialize_step_start() {
6823        let event = AgentEvent::StepStart {
6824            step_id: "step-1".to_string(),
6825            description: "Initialize project".to_string(),
6826            step_number: 1,
6827            total_steps: 5,
6828        };
6829        let json = serde_json::to_string(&event).unwrap();
6830        assert!(json.contains("step_start"));
6831        assert!(json.contains("Initialize project"));
6832    }
6833
6834    #[test]
6835    fn test_agent_event_serialize_step_end() {
6836        let event = AgentEvent::StepEnd {
6837            step_id: "step-1".to_string(),
6838            status: TaskStatus::Completed,
6839            step_number: 1,
6840            total_steps: 5,
6841        };
6842        let json = serde_json::to_string(&event).unwrap();
6843        assert!(json.contains("step_end"));
6844        assert!(json.contains("step-1"));
6845    }
6846
6847    #[test]
6848    fn test_agent_event_serialize_goal_extracted() {
6849        use crate::planning::AgentGoal;
6850        let goal = AgentGoal::new("Complete the task".to_string());
6851        let event = AgentEvent::GoalExtracted { goal };
6852        let json = serde_json::to_string(&event).unwrap();
6853        assert!(json.contains("goal_extracted"));
6854    }
6855
6856    #[test]
6857    fn test_agent_event_serialize_goal_progress() {
6858        let event = AgentEvent::GoalProgress {
6859            goal: "Build app".to_string(),
6860            progress: 0.5,
6861            completed_steps: 2,
6862            total_steps: 4,
6863        };
6864        let json = serde_json::to_string(&event).unwrap();
6865        assert!(json.contains("goal_progress"));
6866        assert!(json.contains("0.5"));
6867    }
6868
6869    #[test]
6870    fn test_agent_event_serialize_goal_achieved() {
6871        let event = AgentEvent::GoalAchieved {
6872            goal: "Build app".to_string(),
6873            total_steps: 4,
6874            duration_ms: 5000,
6875        };
6876        let json = serde_json::to_string(&event).unwrap();
6877        assert!(json.contains("goal_achieved"));
6878        assert!(json.contains("5000"));
6879    }
6880
6881    #[tokio::test]
6882    async fn test_extract_goal_with_json_response() {
6883        // LlmPlanner expects JSON with "description" and "success_criteria" fields
6884        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
6885            r#"{"description": "Build web app", "success_criteria": ["App runs on port 3000", "Has login page"]}"#,
6886        )]));
6887        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
6888        let agent = AgentLoop::new(
6889            mock_client,
6890            tool_executor,
6891            test_tool_context(),
6892            AgentConfig::default(),
6893        );
6894
6895        let goal = agent.extract_goal("Build a web app").await.unwrap();
6896        assert_eq!(goal.description, "Build web app");
6897        assert_eq!(goal.success_criteria.len(), 2);
6898        assert_eq!(goal.success_criteria[0], "App runs on port 3000");
6899    }
6900
6901    #[tokio::test]
6902    async fn test_extract_goal_fallback_on_non_json() {
6903        // Non-JSON response triggers fallback: returns the original prompt as goal
6904        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
6905            "Some non-JSON response",
6906        )]));
6907        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
6908        let agent = AgentLoop::new(
6909            mock_client,
6910            tool_executor,
6911            test_tool_context(),
6912            AgentConfig::default(),
6913        );
6914
6915        let goal = agent.extract_goal("Do something").await.unwrap();
6916        // Fallback uses the original prompt as description
6917        assert_eq!(goal.description, "Do something");
6918        // Fallback adds 2 generic criteria
6919        assert_eq!(goal.success_criteria.len(), 2);
6920    }
6921
6922    #[tokio::test]
6923    async fn test_check_goal_achievement_json_yes() {
6924        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
6925            r#"{"achieved": true, "progress": 1.0, "remaining_criteria": []}"#,
6926        )]));
6927        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
6928        let agent = AgentLoop::new(
6929            mock_client,
6930            tool_executor,
6931            test_tool_context(),
6932            AgentConfig::default(),
6933        );
6934
6935        let goal = crate::planning::AgentGoal::new("Test goal".to_string());
6936        let achieved = agent
6937            .check_goal_achievement(&goal, "All done")
6938            .await
6939            .unwrap();
6940        assert!(achieved);
6941    }
6942
6943    #[tokio::test]
6944    async fn test_check_goal_achievement_fallback_not_done() {
6945        // Non-JSON response triggers heuristic fallback
6946        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
6947            "invalid json",
6948        )]));
6949        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
6950        let agent = AgentLoop::new(
6951            mock_client,
6952            tool_executor,
6953            test_tool_context(),
6954            AgentConfig::default(),
6955        );
6956
6957        let goal = crate::planning::AgentGoal::new("Test goal".to_string());
6958        // "still working" doesn't contain "complete"/"done"/"finished"
6959        let achieved = agent
6960            .check_goal_achievement(&goal, "still working")
6961            .await
6962            .unwrap();
6963        assert!(!achieved);
6964    }
6965
6966    // ========================================================================
6967    // build_augmented_system_prompt Tests
6968    // ========================================================================
6969
6970    #[test]
6971    fn test_build_augmented_system_prompt_empty_context() {
6972        let mock_client = Arc::new(MockLlmClient::new(vec![]));
6973        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
6974        let config = AgentConfig {
6975            prompt_slots: SystemPromptSlots {
6976                extra: Some("Base prompt".to_string()),
6977                ..Default::default()
6978            },
6979            ..Default::default()
6980        };
6981        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
6982
6983        let result = agent.build_augmented_system_prompt(&[]);
6984        assert!(result.unwrap().contains("Base prompt"));
6985    }
6986
6987    #[test]
6988    fn test_build_augmented_system_prompt_no_custom_slots() {
6989        let mock_client = Arc::new(MockLlmClient::new(vec![]));
6990        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
6991        let agent = AgentLoop::new(
6992            mock_client,
6993            tool_executor,
6994            test_tool_context(),
6995            AgentConfig::default(),
6996        );
6997
6998        let result = agent.build_augmented_system_prompt(&[]);
6999        // Default slots still produce the default agentic prompt
7000        assert!(result.is_some());
7001        assert!(result.unwrap().contains("Core Behaviour"));
7002    }
7003
7004    #[test]
7005    fn test_project_hint_is_assembled_as_context_item() {
7006        let temp_dir = tempfile::tempdir().unwrap();
7007        std::fs::write(
7008            temp_dir.path().join("Cargo.toml"),
7009            "[package]\nname = \"demo\"\n",
7010        )
7011        .unwrap();
7012
7013        let mock_client = Arc::new(MockLlmClient::new(vec![]));
7014        let tool_executor = Arc::new(ToolExecutor::new(temp_dir.path().display().to_string()));
7015        let agent = AgentLoop::new(
7016            mock_client,
7017            tool_executor,
7018            ToolContext::new(temp_dir.path().to_path_buf()),
7019            AgentConfig::default(),
7020        );
7021
7022        let assembly = agent.assemble_context_results(&[]);
7023        assert_eq!(assembly.items.len(), 1);
7024        assert_eq!(
7025            assembly.items[0].source.as_deref(),
7026            Some("a3s://project-hint")
7027        );
7028        assert!(assembly.items[0].content.contains("Rust"));
7029
7030        let text = agent.build_augmented_system_prompt(&[]).unwrap();
7031        assert!(text.contains("<context source=\"a3s://project-hint\" type=\"Resource\">"));
7032    }
7033
7034    #[test]
7035    fn test_build_augmented_system_prompt_with_context_no_base() {
7036        use crate::context::{ContextItem, ContextResult, ContextType};
7037
7038        let mock_client = Arc::new(MockLlmClient::new(vec![]));
7039        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
7040        let agent = AgentLoop::new(
7041            mock_client,
7042            tool_executor,
7043            test_tool_context(),
7044            AgentConfig::default(),
7045        );
7046
7047        let context = vec![ContextResult {
7048            provider: "test".to_string(),
7049            items: vec![ContextItem::new("id1", ContextType::Resource, "Content")],
7050            total_tokens: 10,
7051            truncated: false,
7052        }];
7053
7054        let result = agent.build_augmented_system_prompt(&context);
7055        assert!(result.is_some());
7056        let text = result.unwrap();
7057        assert!(text.contains("<context"));
7058        assert!(text.contains("Content"));
7059    }
7060
7061    // ========================================================================
7062    // AgentResult Clone and Debug
7063    // ========================================================================
7064
7065    #[test]
7066    fn test_agent_result_clone() {
7067        let result = AgentResult {
7068            text: "output".to_string(),
7069            messages: vec![Message::user("hello")],
7070            usage: TokenUsage::default(),
7071            tool_calls_count: 3,
7072            verification_reports: Vec::new(),
7073        };
7074        let cloned = result.clone();
7075        assert_eq!(cloned.text, result.text);
7076        assert_eq!(cloned.tool_calls_count, result.tool_calls_count);
7077    }
7078
7079    #[test]
7080    fn test_agent_result_debug() {
7081        let result = AgentResult {
7082            text: "output".to_string(),
7083            messages: vec![Message::user("hello")],
7084            usage: TokenUsage::default(),
7085            tool_calls_count: 3,
7086            verification_reports: Vec::new(),
7087        };
7088        let debug = format!("{:?}", result);
7089        assert!(debug.contains("AgentResult"));
7090        assert!(debug.contains("output"));
7091    }
7092
7093    // ========================================================================
7094    // handle_post_execution_metadata Tests
7095    // ========================================================================
7096
7097    // ========================================================================
7098    // ToolCommand adapter tests
7099    // ========================================================================
7100
7101    #[tokio::test]
7102    async fn test_tool_command_command_type() {
7103        let executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
7104        let cmd = ToolCommand {
7105            tool_executor: executor,
7106            tool_name: "read".to_string(),
7107            tool_args: serde_json::json!({"file": "test.rs"}),
7108            skill_registry: None,
7109            tool_context: test_tool_context(),
7110        };
7111        assert_eq!(cmd.command_type(), "read");
7112    }
7113
7114    #[tokio::test]
7115    async fn test_tool_command_payload() {
7116        let executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
7117        let args = serde_json::json!({"file": "test.rs", "offset": 10});
7118        let cmd = ToolCommand {
7119            tool_executor: executor,
7120            tool_name: "read".to_string(),
7121            tool_args: args.clone(),
7122            skill_registry: None,
7123            tool_context: test_tool_context(),
7124        };
7125        assert_eq!(cmd.payload(), args);
7126    }
7127
7128    // ========================================================================
7129    // AgentLoop with queue builder tests
7130    // ========================================================================
7131
7132    #[tokio::test(flavor = "multi_thread")]
7133    async fn test_agent_loop_with_queue() {
7134        use tokio::sync::broadcast;
7135
7136        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
7137            "Hello",
7138        )]));
7139        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
7140        let config = AgentConfig::default();
7141
7142        let (event_tx, _) = broadcast::channel(100);
7143        let queue = SessionLaneQueue::new("test-session", SessionQueueConfig::default(), event_tx)
7144            .await
7145            .unwrap();
7146
7147        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config)
7148            .with_queue(Arc::new(queue));
7149
7150        assert!(agent.command_queue.is_some());
7151    }
7152
7153    #[tokio::test]
7154    async fn test_agent_loop_without_queue() {
7155        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
7156            "Hello",
7157        )]));
7158        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
7159        let config = AgentConfig::default();
7160
7161        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
7162
7163        assert!(agent.command_queue.is_none());
7164    }
7165
7166    // ========================================================================
7167    // Parallel Plan Execution Tests
7168    // ========================================================================
7169
7170    #[tokio::test]
7171    async fn test_execute_plan_parallel_independent() {
7172        use crate::planning::{Complexity, ExecutionPlan, Task};
7173
7174        // 3 independent steps (no dependencies) — should all execute.
7175        // MockLlmClient needs one response per execute_loop call per step.
7176        let mock_client = Arc::new(MockLlmClient::new(vec![
7177            MockLlmClient::text_response("Step 1 done"),
7178            MockLlmClient::text_response("Step 2 done"),
7179            MockLlmClient::text_response("Step 3 done"),
7180        ]));
7181
7182        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
7183        let config = AgentConfig::default();
7184        let agent = AgentLoop::new(
7185            mock_client.clone(),
7186            tool_executor,
7187            test_tool_context(),
7188            config,
7189        );
7190
7191        let mut plan = ExecutionPlan::new("Test parallel", Complexity::Simple);
7192        plan.add_step(Task::new("s1", "First step"));
7193        plan.add_step(Task::new("s2", "Second step"));
7194        plan.add_step(Task::new("s3", "Third step"));
7195
7196        let (tx, mut rx) = mpsc::channel(100);
7197        let result = agent
7198            .execute_plan(&[], &plan, Some("test-session"), Some(tx))
7199            .await
7200            .unwrap();
7201
7202        // All 3 steps should have been executed (3 * 15 = 45 total tokens)
7203        assert_eq!(result.usage.total_tokens, 45);
7204
7205        // Verify we received StepStart and StepEnd events for all 3 steps
7206        let mut step_starts = Vec::new();
7207        let mut step_ends = Vec::new();
7208        rx.close();
7209        while let Some(event) = rx.recv().await {
7210            match event {
7211                AgentEvent::StepStart { step_id, .. } => step_starts.push(step_id),
7212                AgentEvent::StepEnd {
7213                    step_id, status, ..
7214                } => {
7215                    assert_eq!(status, TaskStatus::Completed);
7216                    step_ends.push(step_id);
7217                }
7218                _ => {}
7219            }
7220        }
7221        assert_eq!(step_starts.len(), 3);
7222        assert_eq!(step_ends.len(), 3);
7223    }
7224
7225    #[tokio::test]
7226    async fn test_execute_plan_emits_task_list_snapshots() {
7227        use crate::planning::{Complexity, ExecutionPlan, Task};
7228
7229        let mock_client = Arc::new(MockLlmClient::new(vec![
7230            MockLlmClient::text_response("Step 1 done"),
7231            MockLlmClient::text_response("Step 2 done"),
7232        ]));
7233
7234        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
7235        let agent = AgentLoop::new(
7236            mock_client,
7237            tool_executor,
7238            test_tool_context(),
7239            AgentConfig::default(),
7240        );
7241
7242        let mut plan = ExecutionPlan::new("Track task list", Complexity::Simple);
7243        plan.add_step(Task::new("s1", "First step"));
7244        plan.add_step(Task::new("s2", "Second step").with_dependencies(vec!["s1".to_string()]));
7245
7246        let (tx, mut rx) = mpsc::channel(100);
7247        let _ = agent
7248            .execute_plan(&[], &plan, Some("task-session"), Some(tx))
7249            .await
7250            .unwrap();
7251
7252        let mut snapshots = Vec::new();
7253        rx.close();
7254        while let Some(event) = rx.recv().await {
7255            if let AgentEvent::TaskUpdated { session_id, tasks } = event {
7256                assert_eq!(session_id, "task-session");
7257                snapshots.push(tasks);
7258            }
7259        }
7260
7261        assert!(
7262            snapshots
7263                .first()
7264                .unwrap()
7265                .iter()
7266                .all(|task| task.status == TaskStatus::Pending),
7267            "initial snapshot should expose the pending task list"
7268        );
7269        assert!(snapshots.iter().any(|tasks| tasks
7270            .iter()
7271            .any(|task| task.id == "s1" && task.status == TaskStatus::InProgress)));
7272        assert!(snapshots.iter().any(|tasks| tasks
7273            .iter()
7274            .any(|task| task.id == "s1" && task.status == TaskStatus::Completed)));
7275        assert!(snapshots
7276            .last()
7277            .unwrap()
7278            .iter()
7279            .all(|task| task.status == TaskStatus::Completed));
7280    }
7281
7282    #[tokio::test]
7283    async fn test_execute_plan_delegates_task_tool_steps() {
7284        use crate::planning::{Complexity, ExecutionPlan, Task};
7285        use crate::subagent::AgentRegistry;
7286        use crate::tools::register_task;
7287
7288        let mock_client = Arc::new(MockLlmClient::new(vec![MockLlmClient::text_response(
7289            "delegated search complete",
7290        )]));
7291        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
7292        register_task(
7293            tool_executor.registry(),
7294            mock_client,
7295            Arc::new(AgentRegistry::new()),
7296            "/tmp".to_string(),
7297        );
7298        let agent = AgentLoop::new(
7299            Arc::new(MockLlmClient::new(vec![])),
7300            tool_executor,
7301            test_tool_context(),
7302            AgentConfig::default(),
7303        );
7304
7305        let mut plan = ExecutionPlan::new("Delegate a step", Complexity::Simple);
7306        plan.add_step(Task::new("s1", "Find the relevant docs").with_tool("task"));
7307
7308        let (tx, mut rx) = mpsc::channel(100);
7309        let result = agent
7310            .execute_plan(&[], &plan, Some("task-session"), Some(tx))
7311            .await
7312            .unwrap();
7313
7314        assert_eq!(result.tool_calls_count, 1);
7315        assert!(result.text.contains("delegated search complete"));
7316
7317        let mut saw_task_tool_start = false;
7318        let mut saw_completed_step = false;
7319        rx.close();
7320        while let Some(event) = rx.recv().await {
7321            match event {
7322                AgentEvent::ToolStart { name, .. } if name == "task" => {
7323                    saw_task_tool_start = true;
7324                }
7325                AgentEvent::StepEnd { status, .. } if status == TaskStatus::Completed => {
7326                    saw_completed_step = true;
7327                }
7328                _ => {}
7329            }
7330        }
7331
7332        assert!(saw_task_tool_start);
7333        assert!(saw_completed_step);
7334    }
7335
7336    #[tokio::test]
7337    async fn test_execute_plan_delegates_parallel_task_wave_once() {
7338        use crate::planning::{Complexity, ExecutionPlan, Task};
7339        use crate::subagent::AgentRegistry;
7340        use crate::tools::register_task;
7341
7342        let child_client = Arc::new(MockLlmClient::new(vec![
7343            MockLlmClient::text_response("delegated docs complete"),
7344            MockLlmClient::text_response("delegated tests complete"),
7345        ]));
7346        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
7347        register_task(
7348            tool_executor.registry(),
7349            child_client,
7350            Arc::new(AgentRegistry::new()),
7351            "/tmp".to_string(),
7352        );
7353        let agent = AgentLoop::new(
7354            Arc::new(MockLlmClient::new(vec![])),
7355            tool_executor,
7356            test_tool_context(),
7357            AgentConfig::default(),
7358        );
7359
7360        let mut plan = ExecutionPlan::new("Delegate independent wave", Complexity::Medium);
7361        plan.add_step(Task::new("s1", "Find relevant docs").with_tool("task"));
7362        plan.add_step(Task::new("s2", "Run verification tests").with_tool("task"));
7363
7364        let (tx, mut rx) = mpsc::channel(100);
7365        let result = agent
7366            .execute_plan(&[], &plan, Some("parallel-task-session"), Some(tx))
7367            .await
7368            .unwrap();
7369
7370        assert_eq!(
7371            result.tool_calls_count, 1,
7372            "independent delegated wave should be collapsed into one parallel_task call"
7373        );
7374        assert!(result.text.contains("delegated docs complete"));
7375        assert!(result.text.contains("delegated tests complete"));
7376
7377        let mut parallel_task_starts = 0;
7378        let mut completed_steps = Vec::new();
7379        let mut task_snapshots = Vec::new();
7380        rx.close();
7381        while let Some(event) = rx.recv().await {
7382            match event {
7383                AgentEvent::ToolStart { name, .. } if name == "parallel_task" => {
7384                    parallel_task_starts += 1;
7385                }
7386                AgentEvent::StepEnd {
7387                    step_id,
7388                    status: TaskStatus::Completed,
7389                    ..
7390                } => completed_steps.push(step_id),
7391                AgentEvent::TaskUpdated { tasks, .. } => task_snapshots.push(tasks),
7392                _ => {}
7393            }
7394        }
7395
7396        completed_steps.sort();
7397        assert_eq!(parallel_task_starts, 1);
7398        assert_eq!(completed_steps, vec!["s1".to_string(), "s2".to_string()]);
7399        assert!(task_snapshots.iter().any(|tasks| tasks
7400            .iter()
7401            .all(|task| task.status == TaskStatus::InProgress)));
7402        assert!(task_snapshots
7403            .last()
7404            .unwrap()
7405            .iter()
7406            .all(|task| task.status == TaskStatus::Completed));
7407    }
7408
7409    #[tokio::test]
7410    async fn test_execute_plan_respects_dependencies() {
7411        use crate::planning::{Complexity, ExecutionPlan, Task};
7412
7413        // s1 and s2 are independent (wave 1), s3 depends on both (wave 2).
7414        // This requires 3 responses total.
7415        let mock_client = Arc::new(MockLlmClient::new(vec![
7416            MockLlmClient::text_response("Step 1 done"),
7417            MockLlmClient::text_response("Step 2 done"),
7418            MockLlmClient::text_response("Step 3 done"),
7419        ]));
7420
7421        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
7422        let config = AgentConfig::default();
7423        let agent = AgentLoop::new(
7424            mock_client.clone(),
7425            tool_executor,
7426            test_tool_context(),
7427            config,
7428        );
7429
7430        let mut plan = ExecutionPlan::new("Test deps", Complexity::Medium);
7431        plan.add_step(Task::new("s1", "Independent A"));
7432        plan.add_step(Task::new("s2", "Independent B"));
7433        plan.add_step(
7434            Task::new("s3", "Depends on A+B")
7435                .with_dependencies(vec!["s1".to_string(), "s2".to_string()]),
7436        );
7437
7438        let (tx, mut rx) = mpsc::channel(100);
7439        let result = agent
7440            .execute_plan(&[], &plan, Some("test-session"), Some(tx))
7441            .await
7442            .unwrap();
7443
7444        // All 3 steps should have been executed (3 * 15 = 45 total tokens)
7445        assert_eq!(result.usage.total_tokens, 45);
7446
7447        // Verify ordering: s3's StepStart must come after s1 and s2's StepEnd
7448        let mut events = Vec::new();
7449        rx.close();
7450        while let Some(event) = rx.recv().await {
7451            match &event {
7452                AgentEvent::StepStart { step_id, .. } => {
7453                    events.push(format!("start:{}", step_id));
7454                }
7455                AgentEvent::StepEnd { step_id, .. } => {
7456                    events.push(format!("end:{}", step_id));
7457                }
7458                _ => {}
7459            }
7460        }
7461
7462        // s3 start must occur after both s1 end and s2 end
7463        let s1_end = events.iter().position(|e| e == "end:s1").unwrap();
7464        let s2_end = events.iter().position(|e| e == "end:s2").unwrap();
7465        let s3_start = events.iter().position(|e| e == "start:s3").unwrap();
7466        assert!(
7467            s3_start > s1_end,
7468            "s3 started before s1 ended: {:?}",
7469            events
7470        );
7471        assert!(
7472            s3_start > s2_end,
7473            "s3 started before s2 ended: {:?}",
7474            events
7475        );
7476
7477        // Final result should reflect step 3 (last sequential step)
7478        assert!(result.text.contains("Step 3 done") || !result.text.is_empty());
7479    }
7480
7481    #[tokio::test]
7482    async fn test_execute_plan_handles_step_failure() {
7483        use crate::planning::{Complexity, ExecutionPlan, Task};
7484
7485        // s1 succeeds, s2 depends on s1 (succeeds), s3 depends on nothing (succeeds),
7486        // s4 depends on a step that will fail (s_fail).
7487        // We simulate failure by providing no responses for s_fail's execute_loop.
7488        //
7489        // Simpler approach: s1 succeeds, s2 depends on s1 (will fail because no
7490        // mock response left), s3 is independent.
7491        // Layout: s1 (independent), s3 (independent) → wave 1 parallel
7492        //         s2 depends on s1 → wave 2
7493        //         s4 depends on s2 → wave 3 (should deadlock since s2 fails)
7494        let mock_client = Arc::new(MockLlmClient::new(vec![
7495            // Wave 1: s1 and s3 execute in parallel
7496            MockLlmClient::text_response("s1 done"),
7497            MockLlmClient::text_response("s3 done"),
7498            // Wave 2: s2 executes — but we give it no response, causing failure
7499            // Actually the MockLlmClient will fail with "No more mock responses"
7500        ]));
7501
7502        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
7503        let config = AgentConfig::default();
7504        let agent = AgentLoop::new(
7505            mock_client.clone(),
7506            tool_executor,
7507            test_tool_context(),
7508            config,
7509        );
7510
7511        let mut plan = ExecutionPlan::new("Test failure", Complexity::Medium);
7512        plan.add_step(Task::new("s1", "Independent step"));
7513        plan.add_step(Task::new("s2", "Depends on s1").with_dependencies(vec!["s1".to_string()]));
7514        plan.add_step(Task::new("s3", "Another independent"));
7515        plan.add_step(Task::new("s4", "Depends on s2").with_dependencies(vec!["s2".to_string()]));
7516
7517        let (tx, mut rx) = mpsc::channel(100);
7518        let _result = agent
7519            .execute_plan(&[], &plan, Some("test-session"), Some(tx))
7520            .await
7521            .unwrap();
7522
7523        // s1 and s3 should succeed (wave 1), s2 should fail (wave 2),
7524        // s4 should never execute (deadlock — dep s2 failed, not completed)
7525        let mut completed_steps = Vec::new();
7526        let mut failed_steps = Vec::new();
7527        rx.close();
7528        while let Some(event) = rx.recv().await {
7529            if let AgentEvent::StepEnd {
7530                step_id, status, ..
7531            } = event
7532            {
7533                match status {
7534                    TaskStatus::Completed => completed_steps.push(step_id),
7535                    TaskStatus::Failed => failed_steps.push(step_id),
7536                    _ => {}
7537                }
7538            }
7539        }
7540
7541        assert!(
7542            completed_steps.contains(&"s1".to_string()),
7543            "s1 should complete"
7544        );
7545        assert!(
7546            completed_steps.contains(&"s3".to_string()),
7547            "s3 should complete"
7548        );
7549        assert!(failed_steps.contains(&"s2".to_string()), "s2 should fail");
7550        // s4 should NOT appear in either list — it was never started
7551        assert!(
7552            !completed_steps.contains(&"s4".to_string()),
7553            "s4 should not complete"
7554        );
7555        assert!(
7556            !failed_steps.contains(&"s4".to_string()),
7557            "s4 should not fail (never started)"
7558        );
7559    }
7560
7561    // ========================================================================
7562    // Phase 4: Error Recovery & Resilience Tests
7563    // ========================================================================
7564
7565    #[test]
7566    fn test_agent_config_resilience_defaults() {
7567        let config = AgentConfig::default();
7568        assert_eq!(config.max_parse_retries, 2);
7569        assert_eq!(config.tool_timeout_ms, None);
7570        assert_eq!(config.circuit_breaker_threshold, 3);
7571    }
7572
7573    /// 4.1 — Parse error recovery: bails after max_parse_retries exceeded
7574    #[tokio::test]
7575    async fn test_parse_error_recovery_bails_after_threshold() {
7576        // 3 parse errors with max_parse_retries=2: count reaches 3 > 2 → bail
7577        let mock_client = Arc::new(MockLlmClient::new(vec![
7578            MockLlmClient::tool_call_response(
7579                "c1",
7580                "bash",
7581                serde_json::json!({"__parse_error": "unexpected token at position 5"}),
7582            ),
7583            MockLlmClient::tool_call_response(
7584                "c2",
7585                "bash",
7586                serde_json::json!({"__parse_error": "missing closing brace"}),
7587            ),
7588            MockLlmClient::tool_call_response(
7589                "c3",
7590                "bash",
7591                serde_json::json!({"__parse_error": "still broken"}),
7592            ),
7593            MockLlmClient::text_response("Done"), // never reached
7594        ]));
7595
7596        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
7597        let config = AgentConfig {
7598            max_parse_retries: 2,
7599            ..AgentConfig::default()
7600        };
7601        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
7602        let result = agent.execute(&[], "Do something", None).await;
7603        assert!(result.is_err(), "should bail after parse error threshold");
7604        let err = result.unwrap_err().to_string();
7605        assert!(
7606            err.contains("malformed tool arguments"),
7607            "error should mention malformed tool arguments, got: {}",
7608            err
7609        );
7610    }
7611
7612    /// 4.1 — Parse error recovery: counter resets after a valid tool execution
7613    #[tokio::test]
7614    async fn test_parse_error_counter_resets_on_success() {
7615        // 2 parse errors (= max_parse_retries, not yet exceeded)
7616        // Then a valid tool call (resets counter)
7617        // Then final text — should NOT bail
7618        let mock_client = Arc::new(MockLlmClient::new(vec![
7619            MockLlmClient::tool_call_response(
7620                "c1",
7621                "bash",
7622                serde_json::json!({"__parse_error": "bad args"}),
7623            ),
7624            MockLlmClient::tool_call_response(
7625                "c2",
7626                "bash",
7627                serde_json::json!({"__parse_error": "bad args again"}),
7628            ),
7629            // Valid call — resets parse_error_count to 0
7630            MockLlmClient::tool_call_response(
7631                "c3",
7632                "bash",
7633                serde_json::json!({"command": "echo ok"}),
7634            ),
7635            MockLlmClient::text_response("All done"),
7636        ]));
7637
7638        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
7639        let config = AgentConfig {
7640            max_parse_retries: 2,
7641            ..AgentConfig::default()
7642        };
7643        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
7644        let result = agent.execute(&[], "Do something", None).await;
7645        assert!(
7646            result.is_ok(),
7647            "should not bail — counter reset after successful tool, got: {:?}",
7648            result.err()
7649        );
7650        assert_eq!(result.unwrap().text, "All done");
7651    }
7652
7653    /// 4.2 — Tool timeout: slow tool produces a timeout error result; session continues
7654    #[tokio::test]
7655    async fn test_tool_timeout_produces_error_result() {
7656        let mock_client = Arc::new(MockLlmClient::new(vec![
7657            MockLlmClient::tool_call_response(
7658                "t1",
7659                "bash",
7660                serde_json::json!({"command": "sleep 10"}),
7661            ),
7662            MockLlmClient::text_response("The command timed out."),
7663        ]));
7664
7665        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
7666        let config = AgentConfig {
7667            // 50ms — sleep 10 will never finish
7668            tool_timeout_ms: Some(50),
7669            ..AgentConfig::default()
7670        };
7671        let agent = AgentLoop::new(
7672            mock_client.clone(),
7673            tool_executor,
7674            test_tool_context(),
7675            config,
7676        );
7677        let result = agent.execute(&[], "Run sleep", None).await;
7678        assert!(
7679            result.is_ok(),
7680            "session should continue after tool timeout: {:?}",
7681            result.err()
7682        );
7683        assert_eq!(result.unwrap().text, "The command timed out.");
7684        // LLM called twice: initial request + response after timeout error
7685        assert_eq!(mock_client.call_count.load(Ordering::SeqCst), 2);
7686    }
7687
7688    /// 4.2 — Tool timeout: tool that finishes before the deadline succeeds normally
7689    #[tokio::test]
7690    async fn test_tool_within_timeout_succeeds() {
7691        let mock_client = Arc::new(MockLlmClient::new(vec![
7692            MockLlmClient::tool_call_response(
7693                "t1",
7694                "bash",
7695                serde_json::json!({"command": "echo fast"}),
7696            ),
7697            MockLlmClient::text_response("Command succeeded."),
7698        ]));
7699
7700        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
7701        let config = AgentConfig {
7702            tool_timeout_ms: Some(5_000), // 5 s — echo completes in <100ms
7703            ..AgentConfig::default()
7704        };
7705        let agent = AgentLoop::new(mock_client, tool_executor, test_tool_context(), config);
7706        let result = agent.execute(&[], "Run something fast", None).await;
7707        assert!(
7708            result.is_ok(),
7709            "fast tool should succeed: {:?}",
7710            result.err()
7711        );
7712        assert_eq!(result.unwrap().text, "Command succeeded.");
7713    }
7714
7715    /// 4.3 — Circuit breaker: retries non-streaming LLM failures up to threshold
7716    #[tokio::test]
7717    async fn test_circuit_breaker_retries_non_streaming() {
7718        // Empty response list → every call bails with "No more mock responses"
7719        // threshold=2 → tries twice, then bails with circuit-breaker message
7720        let mock_client = Arc::new(MockLlmClient::new(vec![]));
7721
7722        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
7723        let config = AgentConfig {
7724            circuit_breaker_threshold: 2,
7725            ..AgentConfig::default()
7726        };
7727        let agent = AgentLoop::new(
7728            mock_client.clone(),
7729            tool_executor,
7730            test_tool_context(),
7731            config,
7732        );
7733        let result = agent.execute(&[], "Hello", None).await;
7734        assert!(result.is_err(), "should fail when LLM always errors");
7735        let err = result.unwrap_err().to_string();
7736        assert!(
7737            err.contains("circuit breaker"),
7738            "error should mention circuit breaker, got: {}",
7739            err
7740        );
7741        assert_eq!(
7742            mock_client.call_count.load(Ordering::SeqCst),
7743            2,
7744            "should make exactly threshold=2 LLM calls"
7745        );
7746    }
7747
7748    /// 4.3 — Circuit breaker: threshold=1 bails on the very first failure
7749    #[tokio::test]
7750    async fn test_circuit_breaker_threshold_one_no_retry() {
7751        let mock_client = Arc::new(MockLlmClient::new(vec![]));
7752
7753        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
7754        let config = AgentConfig {
7755            circuit_breaker_threshold: 1,
7756            ..AgentConfig::default()
7757        };
7758        let agent = AgentLoop::new(
7759            mock_client.clone(),
7760            tool_executor,
7761            test_tool_context(),
7762            config,
7763        );
7764        let result = agent.execute(&[], "Hello", None).await;
7765        assert!(result.is_err());
7766        assert_eq!(
7767            mock_client.call_count.load(Ordering::SeqCst),
7768            1,
7769            "with threshold=1 exactly one attempt should be made"
7770        );
7771    }
7772
7773    /// 4.3 — Circuit breaker: succeeds when LLM recovers before hitting threshold
7774    #[tokio::test]
7775    async fn test_circuit_breaker_succeeds_if_llm_recovers() {
7776        // First call fails, second call succeeds; threshold=3 — recovery within threshold
7777        struct FailOnceThenSucceed {
7778            inner: MockLlmClient,
7779            failed_once: std::sync::atomic::AtomicBool,
7780            call_count: AtomicUsize,
7781        }
7782
7783        #[async_trait::async_trait]
7784        impl LlmClient for FailOnceThenSucceed {
7785            async fn complete(
7786                &self,
7787                messages: &[Message],
7788                system: Option<&str>,
7789                tools: &[ToolDefinition],
7790            ) -> Result<LlmResponse> {
7791                self.call_count.fetch_add(1, Ordering::SeqCst);
7792                let already_failed = self
7793                    .failed_once
7794                    .swap(true, std::sync::atomic::Ordering::SeqCst);
7795                if !already_failed {
7796                    anyhow::bail!("transient network error");
7797                }
7798                self.inner.complete(messages, system, tools).await
7799            }
7800
7801            async fn complete_streaming(
7802                &self,
7803                messages: &[Message],
7804                system: Option<&str>,
7805                tools: &[ToolDefinition],
7806                cancel_token: tokio_util::sync::CancellationToken,
7807            ) -> Result<tokio::sync::mpsc::Receiver<crate::llm::StreamEvent>> {
7808                self.inner
7809                    .complete_streaming(messages, system, tools, cancel_token)
7810                    .await
7811            }
7812        }
7813
7814        let mock = Arc::new(FailOnceThenSucceed {
7815            inner: MockLlmClient::new(vec![MockLlmClient::text_response("Recovered!")]),
7816            failed_once: std::sync::atomic::AtomicBool::new(false),
7817            call_count: AtomicUsize::new(0),
7818        });
7819
7820        let tool_executor = Arc::new(ToolExecutor::new("/tmp".to_string()));
7821        let config = AgentConfig {
7822            circuit_breaker_threshold: 3,
7823            ..AgentConfig::default()
7824        };
7825        let agent = AgentLoop::new(mock.clone(), tool_executor, test_tool_context(), config);
7826        let result = agent.execute(&[], "Hello", None).await;
7827        assert!(
7828            result.is_ok(),
7829            "should succeed when LLM recovers within threshold: {:?}",
7830            result.err()
7831        );
7832        assert_eq!(result.unwrap().text, "Recovered!");
7833        assert_eq!(
7834            mock.call_count.load(Ordering::SeqCst),
7835            2,
7836            "should have made exactly 2 calls (1 fail + 1 success)"
7837        );
7838    }
7839
7840    // ── Continuation detection tests ─────────────────────────────────────────
7841
7842    #[test]
7843    fn test_looks_incomplete_empty() {
7844        assert!(AgentLoop::looks_incomplete(""));
7845        assert!(AgentLoop::looks_incomplete("   "));
7846    }
7847
7848    #[test]
7849    fn test_looks_incomplete_trailing_colon() {
7850        assert!(AgentLoop::looks_incomplete("Let me check the file:"));
7851        assert!(AgentLoop::looks_incomplete("Next steps:"));
7852    }
7853
7854    #[test]
7855    fn test_looks_incomplete_ellipsis() {
7856        assert!(AgentLoop::looks_incomplete("Working on it..."));
7857        assert!(AgentLoop::looks_incomplete("Processing…"));
7858    }
7859
7860    #[test]
7861    fn test_looks_incomplete_intent_phrases() {
7862        assert!(AgentLoop::looks_incomplete(
7863            "I'll start by reading the file."
7864        ));
7865        assert!(AgentLoop::looks_incomplete(
7866            "Let me check the configuration."
7867        ));
7868        assert!(AgentLoop::looks_incomplete("I will now run the tests."));
7869        assert!(AgentLoop::looks_incomplete(
7870            "I need to update the Cargo.toml."
7871        ));
7872    }
7873
7874    #[test]
7875    fn test_looks_complete_final_answer() {
7876        // Clear final answers should NOT trigger continuation
7877        assert!(!AgentLoop::looks_incomplete(
7878            "The tests pass. All changes have been applied successfully."
7879        ));
7880        assert!(!AgentLoop::looks_incomplete(
7881            "Done. I've updated the three files and verified the build succeeds."
7882        ));
7883        assert!(!AgentLoop::looks_incomplete("42"));
7884        assert!(!AgentLoop::looks_incomplete("Yes."));
7885    }
7886
7887    #[test]
7888    fn test_looks_incomplete_multiline_complete() {
7889        let text = "Here is the summary:\n\n- Fixed the bug in agent.rs\n- All tests pass\n- Build succeeds";
7890        assert!(!AgentLoop::looks_incomplete(text));
7891    }
7892}
a3s_code_core/agent.rs

a3s_code_core/
agent.rs