Skip to main content

a3s_code_core/
agent.rs

1//! Agent Loop Implementation
2//!
3//! The agent loop handles the core conversation cycle:
4//! 1. User sends a prompt
5//! 2. LLM generates a response (possibly with tool calls)
6//! 3. If tool calls present, execute them and send results back
7//! 4. Repeat until LLM returns without tool calls
8//!
9//! This implements agentic behavior where the LLM can use tools
10//! to accomplish tasks agentically.
11
12use crate::context::ContextProvider;
13use crate::hitl::ConfirmationProvider;
14use crate::hooks::HookExecutor;
15#[cfg(test)]
16use crate::llm::LlmResponse;
17use crate::llm::{LlmClient, Message, TokenUsage, ToolDefinition};
18use crate::permissions::{PermissionChecker, PermissionPolicy};
19use crate::planning::{AgentGoal, ExecutionPlan, TaskStatus};
20use crate::prompts::{PlanningMode, SystemPromptSlots};
21use crate::queue::{SessionCommand, SessionQueueConfig};
22use crate::session_lane_queue::SessionLaneQueue;
23use crate::tools::{ToolContext, ToolExecutor};
24use anyhow::Result;
25use async_trait::async_trait;
26use serde::{Deserialize, Serialize};
27use serde_json::Value;
28use std::sync::Arc;
29
30mod completion_runtime;
31mod context_perception;
32mod execution_entry;
33mod execution_mode;
34mod execution_state;
35mod hook_runtime;
36mod llm_turn;
37mod loop_builder;
38mod loop_runtime;
39mod parallel_tool_runtime;
40mod plan_execution;
41mod planning_runtime;
42mod project_context;
43mod prompt_runtime;
44mod queue_forwarder;
45mod telemetry_runtime;
46mod tool_completion_runtime;
47mod tool_execution_runtime;
48mod tool_gate_runtime;
49mod tool_guard_runtime;
50mod tool_memory_runtime;
51mod tool_result_runtime;
52mod tool_turn;
53mod turn_context;
54
55/// Maximum number of tool execution rounds before stopping
56pub(crate) const MAX_TOOL_ROUNDS: usize = 50;
57
58/// Internal agent loop configuration.
59#[derive(Clone)]
60pub(crate) struct AgentConfig {
61    /// Slot-based system prompt customization.
62    ///
63    /// Users can customize specific parts (role, guidelines, response style, extra)
64    /// without overriding the core agentic capabilities. The default agentic core
65    /// (tool usage, autonomous behavior, completion criteria) is always preserved.
66    pub prompt_slots: SystemPromptSlots,
67    pub tools: Vec<ToolDefinition>,
68    pub max_tool_rounds: usize,
69    /// Optional security provider for input taint tracking and output sanitization
70    pub security_provider: Option<Arc<dyn crate::security::SecurityProvider>>,
71    /// Optional permission checker for tool execution control
72    pub permission_checker: Option<Arc<dyn PermissionChecker>>,
73    /// Serializable permission policy used to build the checker, when available.
74    pub permission_policy: Option<PermissionPolicy>,
75    /// Optional confirmation manager for HITL (Human-in-the-Loop)
76    pub confirmation_manager: Option<Arc<dyn ConfirmationProvider>>,
77    /// Serializable confirmation policy used to build the manager, when available.
78    pub confirmation_policy: Option<crate::hitl::ConfirmationPolicy>,
79    /// Serializable queue configuration used to build the optional command queue.
80    pub queue_config: Option<SessionQueueConfig>,
81    /// Context providers for augmenting prompts with external context
82    pub context_providers: Vec<Arc<dyn ContextProvider>>,
83    /// Planning mode — Auto (detect from message), Enabled, or Disabled.
84    pub planning_mode: PlanningMode,
85    /// Enable goal tracking
86    pub goal_tracking: bool,
87    /// Optional hook engine for firing lifecycle events (PreToolUse, PostToolUse, etc.)
88    pub hook_engine: Option<Arc<dyn HookExecutor>>,
89    /// Optional skill registry for tool permission enforcement
90    pub skill_registry: Option<Arc<crate::skills::SkillRegistry>>,
91    /// Max consecutive malformed-tool-args errors before aborting (default: 2).
92    ///
93    /// When the LLM returns tool arguments with `__parse_error`, the error is
94    /// fed back as a tool result. After this many consecutive parse errors the
95    /// loop bails instead of retrying indefinitely.
96    pub max_parse_retries: u32,
97    /// Per-tool execution timeout in milliseconds (`None` = no timeout).
98    ///
99    /// When set, each tool execution is wrapped in `tokio::time::timeout`.
100    /// A timeout produces an error result sent back to the LLM rather than
101    /// crashing the session.
102    pub tool_timeout_ms: Option<u64>,
103    /// Circuit-breaker threshold: max consecutive LLM API failures before
104    /// aborting (default: 3).
105    ///
106    /// In non-streaming mode, transient LLM failures are retried up to this
107    /// many times (with short exponential backoff) before the loop bails.
108    /// In streaming mode, any failure is fatal (events cannot be replayed).
109    pub circuit_breaker_threshold: u32,
110    /// Max consecutive identical tool signatures before aborting (default: 3).
111    ///
112    /// A tool signature is the exact combination of tool name + compact JSON
113    /// arguments. This prevents the agent from getting stuck repeating the same
114    /// tool call in a loop, for example repeatedly fetching the same URL.
115    pub duplicate_tool_call_threshold: u32,
116    /// Enable auto-compaction when context usage exceeds threshold.
117    pub auto_compact: bool,
118    /// Context usage percentage threshold to trigger auto-compaction (0.0 - 1.0).
119    /// Default: 0.80 (80%).
120    pub auto_compact_threshold: f32,
121    /// Maximum context window size in tokens (used for auto-compact calculation).
122    /// Default: 200_000.
123    pub max_context_tokens: usize,
124    /// Optional agent memory for auto-remember after tool execution and recall before prompts.
125    pub memory: Option<Arc<crate::memory::AgentMemory>>,
126    /// Inject a continuation message when the LLM stops calling tools before the
127    /// task is complete. Enabled by default. Set to `false` to disable.
128    ///
129    /// When enabled, if the LLM produces a response with no tool calls but the
130    /// response text looks like an intermediate step (not a final answer), the
131    /// loop injects [`crate::prompts::CONTINUATION`] as a user message and
132    /// continues for up to `max_continuation_turns` additional turns.
133    pub continuation_enabled: bool,
134    /// Maximum number of continuation injections per execution (default: 3).
135    ///
136    /// Prevents infinite loops when the LLM repeatedly stops without completing.
137    pub max_continuation_turns: u32,
138    /// Maximum execution time in milliseconds (`None` = no timeout).
139    ///
140    /// When set, the entire execution loop is wrapped in a timeout check.
141    /// If execution exceeds this duration, the loop bails with an error.
142    /// This prevents runaway executions that consume excessive API quota.
143    pub max_execution_time_ms: Option<u64>,
144}
145
146impl std::fmt::Debug for AgentConfig {
147    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
148        f.debug_struct("AgentConfig")
149            .field("prompt_slots", &self.prompt_slots)
150            .field("tools", &self.tools)
151            .field("max_tool_rounds", &self.max_tool_rounds)
152            .field("security_provider", &self.security_provider.is_some())
153            .field("permission_checker", &self.permission_checker.is_some())
154            .field("permission_policy", &self.permission_policy.is_some())
155            .field("confirmation_manager", &self.confirmation_manager.is_some())
156            .field("confirmation_policy", &self.confirmation_policy.is_some())
157            .field("queue_config", &self.queue_config.is_some())
158            .field("context_providers", &self.context_providers.len())
159            .field("planning_mode", &self.planning_mode)
160            .field("goal_tracking", &self.goal_tracking)
161            .field("hook_engine", &self.hook_engine.is_some())
162            .field(
163                "skill_registry",
164                &self.skill_registry.as_ref().map(|r| r.len()),
165            )
166            .field("max_parse_retries", &self.max_parse_retries)
167            .field("tool_timeout_ms", &self.tool_timeout_ms)
168            .field("circuit_breaker_threshold", &self.circuit_breaker_threshold)
169            .field(
170                "duplicate_tool_call_threshold",
171                &self.duplicate_tool_call_threshold,
172            )
173            .field("auto_compact", &self.auto_compact)
174            .field("auto_compact_threshold", &self.auto_compact_threshold)
175            .field("max_context_tokens", &self.max_context_tokens)
176            .field("continuation_enabled", &self.continuation_enabled)
177            .field("max_continuation_turns", &self.max_continuation_turns)
178            .field("memory", &self.memory.is_some())
179            .finish()
180    }
181}
182
183impl Default for AgentConfig {
184    fn default() -> Self {
185        Self {
186            prompt_slots: SystemPromptSlots::default(),
187            tools: Vec::new(), // Tools are provided by ToolExecutor
188            max_tool_rounds: MAX_TOOL_ROUNDS,
189            security_provider: None,
190            permission_checker: None,
191            permission_policy: None,
192            confirmation_manager: None,
193            confirmation_policy: None,
194            queue_config: None,
195            context_providers: Vec::new(),
196            planning_mode: PlanningMode::default(),
197            goal_tracking: false,
198            hook_engine: None,
199            skill_registry: Some(Arc::new(crate::skills::SkillRegistry::with_builtins())),
200            max_parse_retries: 2,
201            tool_timeout_ms: None,
202            circuit_breaker_threshold: 3,
203            duplicate_tool_call_threshold: 3,
204            auto_compact: false,
205            auto_compact_threshold: 0.80,
206            max_context_tokens: 200_000,
207            memory: None,
208            continuation_enabled: true,
209            max_continuation_turns: 3,
210            max_execution_time_ms: None,
211        }
212    }
213}
214
215/// Events emitted during agent execution
216///
217/// Subscribe via [`crate::AgentSession::stream`].
218/// New variants may be added in minor releases — always include a wildcard arm
219/// (`_ => {}`) when matching.
220#[derive(Debug, Clone, Serialize, Deserialize)]
221#[serde(tag = "type")]
222#[non_exhaustive]
223pub enum AgentEvent {
224    /// Agent started processing
225    #[serde(rename = "agent_start")]
226    Start { prompt: String },
227
228    /// Runtime agent style/mode selected for the current execution.
229    #[serde(rename = "agent_mode_changed")]
230    AgentModeChanged {
231        /// Stable UI/runtime mode label, e.g. "general", "planning", "explore".
232        mode: String,
233        /// Canonical built-in agent name associated with this mode.
234        agent: String,
235        /// Human-readable explanation of the selected style.
236        description: String,
237    },
238
239    /// LLM turn started
240    #[serde(rename = "turn_start")]
241    TurnStart { turn: usize },
242
243    /// Text delta from streaming
244    #[serde(rename = "text_delta")]
245    TextDelta { text: String },
246
247    /// Reasoning/thinking delta from streaming (for models like kimi, deepseek)
248    #[serde(rename = "reasoning_delta")]
249    ReasoningDelta { text: String },
250
251    /// Tool execution started
252    #[serde(rename = "tool_start")]
253    ToolStart { id: String, name: String },
254
255    /// Tool input delta from streaming (partial JSON arguments)
256    #[serde(rename = "tool_input_delta")]
257    ToolInputDelta { delta: String },
258
259    /// Tool execution completed
260    #[serde(rename = "tool_end")]
261    ToolEnd {
262        id: String,
263        name: String,
264        output: String,
265        exit_code: i32,
266        #[serde(skip_serializing_if = "Option::is_none")]
267        metadata: Option<serde_json::Value>,
268        /// Structured discriminant set by tools that mapped their failure
269        /// into a typed [`ToolErrorKind`](crate::tools::ToolErrorKind)
270        /// (e.g. `edit` / `patch` on a `WorkspaceError::VersionConflict`).
271        /// `None` on success or untyped failure.
272        #[serde(skip_serializing_if = "Option::is_none")]
273        error_kind: Option<crate::tools::ToolErrorKind>,
274    },
275
276    /// Intermediate tool output (streaming delta)
277    #[serde(rename = "tool_output_delta")]
278    ToolOutputDelta {
279        id: String,
280        name: String,
281        delta: String,
282    },
283
284    /// LLM turn completed
285    #[serde(rename = "turn_end")]
286    TurnEnd { turn: usize, usage: TokenUsage },
287
288    /// Agent completed
289    #[serde(rename = "agent_end")]
290    End {
291        text: String,
292        usage: TokenUsage,
293        verification_summary: Box<crate::verification::VerificationSummary>,
294        #[serde(skip_serializing_if = "Option::is_none")]
295        meta: Option<crate::llm::LlmResponseMeta>,
296    },
297
298    /// Error occurred
299    #[serde(rename = "error")]
300    Error { message: String },
301
302    /// Tool execution requires confirmation (HITL)
303    #[serde(rename = "confirmation_required")]
304    ConfirmationRequired {
305        tool_id: String,
306        tool_name: String,
307        args: serde_json::Value,
308        timeout_ms: u64,
309    },
310
311    /// Confirmation received from user (HITL)
312    #[serde(rename = "confirmation_received")]
313    ConfirmationReceived {
314        tool_id: String,
315        approved: bool,
316        reason: Option<String>,
317    },
318
319    /// Confirmation timed out (HITL)
320    #[serde(rename = "confirmation_timeout")]
321    ConfirmationTimeout {
322        tool_id: String,
323        action_taken: String, // "rejected" or "auto_approved"
324    },
325
326    /// External task pending (needs SDK processing)
327    #[serde(rename = "external_task_pending")]
328    ExternalTaskPending {
329        task_id: String,
330        session_id: String,
331        lane: crate::queue::SessionLane,
332        command_type: String,
333        payload: serde_json::Value,
334        timeout_ms: u64,
335    },
336
337    /// External task completed
338    #[serde(rename = "external_task_completed")]
339    ExternalTaskCompleted {
340        task_id: String,
341        session_id: String,
342        success: bool,
343    },
344
345    /// Tool execution denied by permission policy
346    #[serde(rename = "permission_denied")]
347    PermissionDenied {
348        tool_id: String,
349        tool_name: String,
350        args: serde_json::Value,
351        reason: String,
352    },
353
354    /// Context resolution started
355    #[serde(rename = "context_resolving")]
356    ContextResolving { providers: Vec<String> },
357
358    /// Context resolution completed
359    #[serde(rename = "context_resolved")]
360    ContextResolved {
361        total_items: usize,
362        total_tokens: usize,
363    },
364
365    // ========================================================================
366    // a3s-lane integration events
367    // ========================================================================
368    /// Command moved to dead letter queue after exhausting retries
369    #[serde(rename = "command_dead_lettered")]
370    CommandDeadLettered {
371        command_id: String,
372        command_type: String,
373        lane: String,
374        error: String,
375        attempts: u32,
376    },
377
378    /// Command retry attempt
379    #[serde(rename = "command_retry")]
380    CommandRetry {
381        command_id: String,
382        command_type: String,
383        lane: String,
384        attempt: u32,
385        delay_ms: u64,
386    },
387
388    /// Queue alert (depth warning, latency alert, etc.)
389    #[serde(rename = "queue_alert")]
390    QueueAlert {
391        level: String,
392        alert_type: String,
393        message: String,
394    },
395
396    // ========================================================================
397    // Task tracking events
398    // ========================================================================
399    /// Task list updated
400    #[serde(rename = "task_updated")]
401    TaskUpdated {
402        session_id: String,
403        tasks: Vec<crate::planning::Task>,
404    },
405
406    // ========================================================================
407    // Memory System events (Phase 3)
408    // ========================================================================
409    /// Memory stored
410    #[serde(rename = "memory_stored")]
411    MemoryStored {
412        memory_id: String,
413        memory_type: String,
414        importance: f32,
415        tags: Vec<String>,
416    },
417
418    /// Memory recalled
419    #[serde(rename = "memory_recalled")]
420    MemoryRecalled {
421        memory_id: String,
422        content: String,
423        relevance: f32,
424    },
425
426    /// Memories searched
427    #[serde(rename = "memories_searched")]
428    MemoriesSearched {
429        query: Option<String>,
430        tags: Vec<String>,
431        result_count: usize,
432    },
433
434    /// Memory cleared
435    #[serde(rename = "memory_cleared")]
436    MemoryCleared {
437        tier: String, // "long_term", "short_term", "working"
438        count: u64,
439    },
440
441    // ========================================================================
442    // Subagent events
443    // ========================================================================
444    /// Subagent task started
445    #[serde(rename = "subagent_start")]
446    SubagentStart {
447        /// Unique task identifier
448        task_id: String,
449        /// Child session ID
450        session_id: String,
451        /// Parent session ID
452        parent_session_id: String,
453        /// Agent type (e.g., "explore", "general")
454        agent: String,
455        /// Short description of the task
456        description: String,
457    },
458
459    /// Subagent task progress update
460    #[serde(rename = "subagent_progress")]
461    SubagentProgress {
462        /// Task identifier
463        task_id: String,
464        /// Child session ID
465        session_id: String,
466        /// Progress status message
467        status: String,
468        /// Additional metadata
469        metadata: serde_json::Value,
470    },
471
472    /// Subagent task completed
473    #[serde(rename = "subagent_end")]
474    SubagentEnd {
475        /// Task identifier
476        task_id: String,
477        /// Child session ID
478        session_id: String,
479        /// Agent type
480        agent: String,
481        /// Task output/result
482        output: String,
483        /// Whether the task succeeded
484        success: bool,
485    },
486
487    // ========================================================================
488    // Planning and Goal Tracking Events (Phase 1)
489    // ========================================================================
490    /// Planning phase started
491    #[serde(rename = "planning_start")]
492    PlanningStart { prompt: String },
493
494    /// Planning phase completed
495    #[serde(rename = "planning_end")]
496    PlanningEnd {
497        plan: ExecutionPlan,
498        estimated_steps: usize,
499    },
500
501    /// Step execution started
502    #[serde(rename = "step_start")]
503    StepStart {
504        step_id: String,
505        description: String,
506        step_number: usize,
507        total_steps: usize,
508    },
509
510    /// Step execution completed
511    #[serde(rename = "step_end")]
512    StepEnd {
513        step_id: String,
514        status: TaskStatus,
515        step_number: usize,
516        total_steps: usize,
517    },
518
519    /// Goal extracted from prompt
520    #[serde(rename = "goal_extracted")]
521    GoalExtracted { goal: AgentGoal },
522
523    /// Goal progress update
524    #[serde(rename = "goal_progress")]
525    GoalProgress {
526        goal: String,
527        progress: f32,
528        completed_steps: usize,
529        total_steps: usize,
530    },
531
532    /// Goal achieved
533    #[serde(rename = "goal_achieved")]
534    GoalAchieved {
535        goal: String,
536        total_steps: usize,
537        duration_ms: i64,
538    },
539
540    // ========================================================================
541    // Context Compaction events
542    // ========================================================================
543    /// Context automatically compacted due to high usage
544    #[serde(rename = "context_compacted")]
545    ContextCompacted {
546        session_id: String,
547        before_messages: usize,
548        after_messages: usize,
549        percent_before: f32,
550    },
551
552    // ========================================================================
553    // Persistence events
554    // ========================================================================
555    /// Session persistence failed — SDK clients should handle this
556    #[serde(rename = "persistence_failed")]
557    PersistenceFailed {
558        session_id: String,
559        operation: String,
560        error: String,
561    },
562}
563
564/// Result of agent execution
565#[derive(Debug, Clone)]
566pub struct AgentResult {
567    pub text: String,
568    pub messages: Vec<Message>,
569    pub usage: TokenUsage,
570    pub tool_calls_count: usize,
571    pub verification_reports: Vec<crate::verification::VerificationReport>,
572}
573
574impl AgentResult {
575    pub fn verification_summary(&self) -> crate::verification::VerificationSummary {
576        crate::verification::VerificationSummary::from_reports(&self.verification_reports)
577    }
578
579    pub fn verification_summary_text(&self) -> String {
580        crate::verification::format_verification_summary(&self.verification_summary())
581    }
582
583    pub fn has_pending_verification(&self) -> bool {
584        matches!(
585            self.verification_summary().status,
586            crate::verification::VerificationStatus::NeedsReview
587        )
588    }
589}
590
591// ============================================================================
592// ToolCommand — bridges ToolExecutor to SessionCommand for queue submission
593// ============================================================================
594
595/// Adapter that implements `SessionCommand` for tool execution via the queue.
596///
597/// Wraps a `ToolExecutor` call so it can be submitted to `SessionLaneQueue`.
598pub struct ToolCommand {
599    tool_executor: Arc<ToolExecutor>,
600    tool_name: String,
601    tool_args: Value,
602    tool_context: ToolContext,
603    skill_registry: Option<Arc<crate::skills::SkillRegistry>>,
604}
605
606impl ToolCommand {
607    /// Create a new ToolCommand
608    pub fn new(
609        tool_executor: Arc<ToolExecutor>,
610        tool_name: String,
611        tool_args: Value,
612        tool_context: ToolContext,
613        skill_registry: Option<Arc<crate::skills::SkillRegistry>>,
614    ) -> Self {
615        Self {
616            tool_executor,
617            tool_name,
618            tool_args,
619            tool_context,
620            skill_registry,
621        }
622    }
623}
624
625#[async_trait]
626impl SessionCommand for ToolCommand {
627    async fn execute(&self) -> Result<Value> {
628        // Check skill-based tool permissions
629        if let Some(registry) = &self.skill_registry {
630            let instruction_skills = registry.by_kind(crate::skills::SkillKind::Instruction);
631
632            // If there are instruction skills with tool restrictions, check permissions
633            let has_restrictions = instruction_skills.iter().any(|s| s.allowed_tools.is_some());
634
635            if has_restrictions {
636                let mut allowed = false;
637
638                for skill in &instruction_skills {
639                    if skill.is_tool_allowed(&self.tool_name) {
640                        allowed = true;
641                        break;
642                    }
643                }
644
645                if !allowed {
646                    return Err(anyhow::anyhow!(
647                        "Tool '{}' is not allowed by any active skill. Active skills restrict tools to their allowed-tools lists.",
648                        self.tool_name
649                    ));
650                }
651            }
652        }
653
654        // Execute the tool
655        let result = self
656            .tool_executor
657            .execute_with_context(&self.tool_name, &self.tool_args, &self.tool_context)
658            .await?;
659        Ok(serde_json::json!({
660            "output": result.output,
661            "exit_code": result.exit_code,
662            "metadata": result.metadata,
663        }))
664    }
665
666    fn command_type(&self) -> &str {
667        &self.tool_name
668    }
669
670    fn payload(&self) -> Value {
671        self.tool_args.clone()
672    }
673}
674
675// ============================================================================
676// AgentLoop
677// ============================================================================
678
679/// Internal agent loop executor.
680#[derive(Clone)]
681pub(crate) struct AgentLoop {
682    llm_client: Arc<dyn LlmClient>,
683    tool_executor: Arc<ToolExecutor>,
684    tool_context: ToolContext,
685    config: AgentConfig,
686    /// Optional lane queue for priority-based tool execution
687    command_queue: Option<Arc<SessionLaneQueue>>,
688}
689
690#[cfg(test)]
691pub(crate) mod tests;
692
693#[cfg(test)]
694mod extra_agent_tests;