a3s_code_core/
agent.rs

1//! Agent Loop Implementation
2//!
3//! The agent loop handles the core conversation cycle:
4//! 1. User sends a prompt
5//! 2. LLM generates a response (possibly with tool calls)
6//! 3. If tool calls present, execute them and send results back
7//! 4. Repeat until LLM returns without tool calls
8//!
9//! This implements agentic behavior where the LLM can use tools
10//! to accomplish tasks agentically.
11
12use crate::context::ContextProvider;
13use crate::hitl::ConfirmationProvider;
14use crate::hooks::HookExecutor;
15#[cfg(test)]
16use crate::llm::LlmResponse;
17use crate::llm::{LlmClient, Message, TokenUsage, ToolDefinition};
18use crate::permissions::{PermissionChecker, PermissionPolicy};
19use crate::planning::{AgentGoal, ExecutionPlan, TaskStatus};
20use crate::prompts::{PlanningMode, SystemPromptSlots};
21use crate::queue::{SessionCommand, SessionQueueConfig};
22use crate::session_lane_queue::SessionLaneQueue;
23use crate::subagent::AgentRegistry;
24use crate::tools::{ToolContext, ToolExecutor};
25use anyhow::Result;
26use async_trait::async_trait;
27use serde::{Deserialize, Serialize};
28use serde_json::Value;
29use std::sync::Arc;
30
31mod auto_delegation;
32mod completion_runtime;
33mod context_perception;
34mod execution_entry;
35mod execution_mode;
36mod execution_state;
37mod hook_runtime;
38mod llm_turn;
39mod loop_builder;
40mod loop_runtime;
41mod parallel_tool_runtime;
42mod plan_execution;
43mod planning_runtime;
44mod project_context;
45mod prompt_runtime;
46mod queue_forwarder;
47mod telemetry_runtime;
48mod tool_completion_runtime;
49mod tool_execution_runtime;
50mod tool_gate_runtime;
51mod tool_guard_runtime;
52mod tool_memory_runtime;
53mod tool_result_runtime;
54mod tool_turn;
55mod turn_context;
56
57/// Maximum number of tool execution rounds before stopping
58pub(crate) const MAX_TOOL_ROUNDS: usize = 50;
59pub(crate) const DEFAULT_MAX_PARALLEL_TASKS: usize = 8;
60
61/// Internal agent loop configuration.
62#[derive(Clone)]
63pub(crate) struct AgentConfig {
64    /// Slot-based system prompt customization.
65    ///
66    /// Users can customize specific parts (role, guidelines, response style, extra)
67    /// without overriding the core agentic capabilities. The default agentic core
68    /// (tool usage, autonomous behavior, completion criteria) is always preserved.
69    pub prompt_slots: SystemPromptSlots,
70    pub tools: Vec<ToolDefinition>,
71    pub max_tool_rounds: usize,
72    /// Optional security provider for input taint tracking and output sanitization
73    pub security_provider: Option<Arc<dyn crate::security::SecurityProvider>>,
74    /// Optional permission checker for tool execution control
75    pub permission_checker: Option<Arc<dyn PermissionChecker>>,
76    /// Serializable permission policy used to build the checker, when available.
77    pub permission_policy: Option<PermissionPolicy>,
78    /// Optional confirmation manager for HITL (Human-in-the-Loop)
79    pub confirmation_manager: Option<Arc<dyn ConfirmationProvider>>,
80    /// Serializable confirmation policy used to build the manager, when available.
81    pub confirmation_policy: Option<crate::hitl::ConfirmationPolicy>,
82    /// Serializable queue configuration used to build the optional command queue.
83    pub queue_config: Option<SessionQueueConfig>,
84    /// Context providers for augmenting prompts with external context
85    pub context_providers: Vec<Arc<dyn ContextProvider>>,
86    /// Planning mode — Auto (detect from message), Enabled, or Disabled.
87    pub planning_mode: PlanningMode,
88    /// Enable goal tracking
89    pub goal_tracking: bool,
90    /// Optional hook engine for firing lifecycle events (PreToolUse, PostToolUse, etc.)
91    pub hook_engine: Option<Arc<dyn HookExecutor>>,
92    /// Optional skill registry for tool permission enforcement
93    pub skill_registry: Option<Arc<crate::skills::SkillRegistry>>,
94    /// Max consecutive malformed-tool-args errors before aborting (default: 2).
95    ///
96    /// When the LLM returns tool arguments with `__parse_error`, the error is
97    /// fed back as a tool result. After this many consecutive parse errors the
98    /// loop bails instead of retrying indefinitely.
99    pub max_parse_retries: u32,
100    /// Per-tool execution timeout in milliseconds (`None` = no timeout).
101    ///
102    /// When set, each tool execution is wrapped in `tokio::time::timeout`.
103    /// A timeout produces an error result sent back to the LLM rather than
104    /// crashing the session.
105    pub tool_timeout_ms: Option<u64>,
106    /// Maximum number of sibling branches/tools to run concurrently in bounded
107    /// parallel fan-out paths.
108    pub max_parallel_tasks: usize,
109    /// Runtime-driven automatic child-agent delegation.
110    pub auto_delegation: crate::config::AutoDelegationConfig,
111    /// Available child agents for automatic delegation.
112    pub agent_registry: Option<Arc<AgentRegistry>>,
113    /// Circuit-breaker threshold: max consecutive LLM API failures before
114    /// aborting (default: 3).
115    ///
116    /// In non-streaming mode, transient LLM failures are retried up to this
117    /// many times (with short exponential backoff) before the loop bails.
118    /// In streaming mode, any failure is fatal (events cannot be replayed).
119    pub circuit_breaker_threshold: u32,
120    /// Max consecutive identical tool signatures before aborting (default: 3).
121    ///
122    /// A tool signature is the exact combination of tool name + compact JSON
123    /// arguments. This prevents the agent from getting stuck repeating the same
124    /// tool call in a loop, for example repeatedly fetching the same URL.
125    pub duplicate_tool_call_threshold: u32,
126    /// Enable auto-compaction when context usage exceeds threshold.
127    pub auto_compact: bool,
128    /// Context usage percentage threshold to trigger auto-compaction (0.0 - 1.0).
129    /// Default: 0.80 (80%).
130    pub auto_compact_threshold: f32,
131    /// Maximum context window size in tokens (used for auto-compact calculation).
132    /// Default: 200_000.
133    pub max_context_tokens: usize,
134    /// Optional agent memory for auto-remember after tool execution and recall before prompts.
135    pub memory: Option<Arc<crate::memory::AgentMemory>>,
136    /// Inject a continuation message when the LLM stops calling tools before the
137    /// task is complete. Enabled by default. Set to `false` to disable.
138    ///
139    /// When enabled, if the LLM produces a response with no tool calls but the
140    /// response text looks like an intermediate step (not a final answer), the
141    /// loop injects [`crate::prompts::CONTINUATION`] as a user message and
142    /// continues for up to `max_continuation_turns` additional turns.
143    pub continuation_enabled: bool,
144    /// Maximum number of continuation injections per execution (default: 3).
145    ///
146    /// Prevents infinite loops when the LLM repeatedly stops without completing.
147    pub max_continuation_turns: u32,
148    /// Maximum execution time in milliseconds (`None` = no timeout).
149    ///
150    /// When set, the entire execution loop is wrapped in a timeout check.
151    /// If execution exceeds this duration, the loop bails with an error.
152    /// This prevents runaway executions that consume excessive API quota.
153    pub max_execution_time_ms: Option<u64>,
154}
155
156impl std::fmt::Debug for AgentConfig {
157    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
158        f.debug_struct("AgentConfig")
159            .field("prompt_slots", &self.prompt_slots)
160            .field("tools", &self.tools)
161            .field("max_tool_rounds", &self.max_tool_rounds)
162            .field("security_provider", &self.security_provider.is_some())
163            .field("permission_checker", &self.permission_checker.is_some())
164            .field("permission_policy", &self.permission_policy.is_some())
165            .field("confirmation_manager", &self.confirmation_manager.is_some())
166            .field("confirmation_policy", &self.confirmation_policy.is_some())
167            .field("queue_config", &self.queue_config.is_some())
168            .field("context_providers", &self.context_providers.len())
169            .field("planning_mode", &self.planning_mode)
170            .field("goal_tracking", &self.goal_tracking)
171            .field("hook_engine", &self.hook_engine.is_some())
172            .field(
173                "skill_registry",
174                &self.skill_registry.as_ref().map(|r| r.len()),
175            )
176            .field("max_parse_retries", &self.max_parse_retries)
177            .field("tool_timeout_ms", &self.tool_timeout_ms)
178            .field("max_parallel_tasks", &self.max_parallel_tasks)
179            .field("auto_delegation", &self.auto_delegation)
180            .field(
181                "agent_registry",
182                &self.agent_registry.as_ref().map(|registry| registry.len()),
183            )
184            .field("circuit_breaker_threshold", &self.circuit_breaker_threshold)
185            .field(
186                "duplicate_tool_call_threshold",
187                &self.duplicate_tool_call_threshold,
188            )
189            .field("auto_compact", &self.auto_compact)
190            .field("auto_compact_threshold", &self.auto_compact_threshold)
191            .field("max_context_tokens", &self.max_context_tokens)
192            .field("continuation_enabled", &self.continuation_enabled)
193            .field("max_continuation_turns", &self.max_continuation_turns)
194            .field("memory", &self.memory.is_some())
195            .finish()
196    }
197}
198
199impl Default for AgentConfig {
200    fn default() -> Self {
201        Self {
202            prompt_slots: SystemPromptSlots::default(),
203            tools: Vec::new(), // Tools are provided by ToolExecutor
204            max_tool_rounds: MAX_TOOL_ROUNDS,
205            security_provider: None,
206            permission_checker: None,
207            permission_policy: None,
208            confirmation_manager: None,
209            confirmation_policy: None,
210            queue_config: None,
211            context_providers: Vec::new(),
212            planning_mode: PlanningMode::default(),
213            goal_tracking: false,
214            hook_engine: None,
215            skill_registry: Some(Arc::new(crate::skills::SkillRegistry::with_builtins())),
216            max_parse_retries: 2,
217            tool_timeout_ms: None,
218            max_parallel_tasks: DEFAULT_MAX_PARALLEL_TASKS,
219            auto_delegation: crate::config::AutoDelegationConfig::default(),
220            agent_registry: None,
221            circuit_breaker_threshold: 3,
222            duplicate_tool_call_threshold: 3,
223            auto_compact: false,
224            auto_compact_threshold: 0.80,
225            max_context_tokens: 200_000,
226            memory: None,
227            continuation_enabled: true,
228            max_continuation_turns: 3,
229            max_execution_time_ms: None,
230        }
231    }
232}
233
234/// Events emitted during agent execution
235///
236/// Subscribe via [`crate::AgentSession::stream`].
237/// New variants may be added in minor releases — always include a wildcard arm
238/// (`_ => {}`) when matching.
239#[derive(Debug, Clone, Serialize, Deserialize)]
240#[serde(tag = "type")]
241#[non_exhaustive]
242pub enum AgentEvent {
243    /// Agent started processing
244    #[serde(rename = "agent_start")]
245    Start { prompt: String },
246
247    /// Runtime agent style/mode selected for the current execution.
248    #[serde(rename = "agent_mode_changed")]
249    AgentModeChanged {
250        /// Stable UI/runtime mode label, e.g. "general", "planning", "explore".
251        mode: String,
252        /// Canonical built-in agent name associated with this mode.
253        agent: String,
254        /// Human-readable explanation of the selected style.
255        description: String,
256    },
257
258    /// LLM turn started
259    #[serde(rename = "turn_start")]
260    TurnStart { turn: usize },
261
262    /// Text delta from streaming
263    #[serde(rename = "text_delta")]
264    TextDelta { text: String },
265
266    /// Reasoning/thinking delta from streaming (for models like kimi, deepseek)
267    #[serde(rename = "reasoning_delta")]
268    ReasoningDelta { text: String },
269
270    /// Tool execution started
271    #[serde(rename = "tool_start")]
272    ToolStart { id: String, name: String },
273
274    /// Tool input delta from streaming (partial JSON arguments)
275    #[serde(rename = "tool_input_delta")]
276    ToolInputDelta { delta: String },
277
278    /// Tool execution completed
279    #[serde(rename = "tool_end")]
280    ToolEnd {
281        id: String,
282        name: String,
283        output: String,
284        exit_code: i32,
285        #[serde(skip_serializing_if = "Option::is_none")]
286        metadata: Option<serde_json::Value>,
287        /// Structured discriminant set by tools that mapped their failure
288        /// into a typed [`ToolErrorKind`](crate::tools::ToolErrorKind)
289        /// (e.g. `edit` / `patch` on a `WorkspaceError::VersionConflict`).
290        /// `None` on success or untyped failure.
291        #[serde(skip_serializing_if = "Option::is_none")]
292        error_kind: Option<crate::tools::ToolErrorKind>,
293    },
294
295    /// Intermediate tool output (streaming delta)
296    #[serde(rename = "tool_output_delta")]
297    ToolOutputDelta {
298        id: String,
299        name: String,
300        delta: String,
301    },
302
303    /// LLM turn completed
304    #[serde(rename = "turn_end")]
305    TurnEnd { turn: usize, usage: TokenUsage },
306
307    /// Agent completed
308    #[serde(rename = "agent_end")]
309    End {
310        text: String,
311        usage: TokenUsage,
312        verification_summary: Box<crate::verification::VerificationSummary>,
313        #[serde(skip_serializing_if = "Option::is_none")]
314        meta: Option<crate::llm::LlmResponseMeta>,
315    },
316
317    /// Error occurred
318    #[serde(rename = "error")]
319    Error { message: String },
320
321    /// Tool execution requires confirmation (HITL)
322    #[serde(rename = "confirmation_required")]
323    ConfirmationRequired {
324        tool_id: String,
325        tool_name: String,
326        args: serde_json::Value,
327        timeout_ms: u64,
328    },
329
330    /// Confirmation received from user (HITL)
331    #[serde(rename = "confirmation_received")]
332    ConfirmationReceived {
333        tool_id: String,
334        approved: bool,
335        reason: Option<String>,
336    },
337
338    /// Confirmation timed out (HITL)
339    #[serde(rename = "confirmation_timeout")]
340    ConfirmationTimeout {
341        tool_id: String,
342        action_taken: String, // "rejected" or "auto_approved"
343    },
344
345    /// External task pending (needs SDK processing)
346    #[serde(rename = "external_task_pending")]
347    ExternalTaskPending {
348        task_id: String,
349        session_id: String,
350        lane: crate::queue::SessionLane,
351        command_type: String,
352        payload: serde_json::Value,
353        timeout_ms: u64,
354    },
355
356    /// External task completed
357    #[serde(rename = "external_task_completed")]
358    ExternalTaskCompleted {
359        task_id: String,
360        session_id: String,
361        success: bool,
362    },
363
364    /// Tool execution denied by permission policy
365    #[serde(rename = "permission_denied")]
366    PermissionDenied {
367        tool_id: String,
368        tool_name: String,
369        args: serde_json::Value,
370        reason: String,
371    },
372
373    /// Context resolution started
374    #[serde(rename = "context_resolving")]
375    ContextResolving { providers: Vec<String> },
376
377    /// Context resolution completed
378    #[serde(rename = "context_resolved")]
379    ContextResolved {
380        total_items: usize,
381        total_tokens: usize,
382    },
383
384    // ========================================================================
385    // a3s-lane integration events
386    // ========================================================================
387    /// Command moved to dead letter queue after exhausting retries
388    #[serde(rename = "command_dead_lettered")]
389    CommandDeadLettered {
390        command_id: String,
391        command_type: String,
392        lane: String,
393        error: String,
394        attempts: u32,
395    },
396
397    /// Command retry attempt
398    #[serde(rename = "command_retry")]
399    CommandRetry {
400        command_id: String,
401        command_type: String,
402        lane: String,
403        attempt: u32,
404        delay_ms: u64,
405    },
406
407    /// Queue alert (depth warning, latency alert, etc.)
408    #[serde(rename = "queue_alert")]
409    QueueAlert {
410        level: String,
411        alert_type: String,
412        message: String,
413    },
414
415    // ========================================================================
416    // Task tracking events
417    // ========================================================================
418    /// Task list updated
419    #[serde(rename = "task_updated")]
420    TaskUpdated {
421        session_id: String,
422        tasks: Vec<crate::planning::Task>,
423    },
424
425    // ========================================================================
426    // Memory System events (Phase 3)
427    // ========================================================================
428    /// Memory stored
429    #[serde(rename = "memory_stored")]
430    MemoryStored {
431        memory_id: String,
432        memory_type: String,
433        importance: f32,
434        tags: Vec<String>,
435    },
436
437    /// Memory recalled
438    #[serde(rename = "memory_recalled")]
439    MemoryRecalled {
440        memory_id: String,
441        content: String,
442        relevance: f32,
443    },
444
445    /// Memories searched
446    #[serde(rename = "memories_searched")]
447    MemoriesSearched {
448        query: Option<String>,
449        tags: Vec<String>,
450        result_count: usize,
451    },
452
453    /// Memory cleared
454    #[serde(rename = "memory_cleared")]
455    MemoryCleared {
456        tier: String, // "long_term", "short_term", "working"
457        count: u64,
458    },
459
460    // ========================================================================
461    // Subagent events
462    // ========================================================================
463    /// Subagent task started
464    #[serde(rename = "subagent_start")]
465    SubagentStart {
466        /// Unique task identifier
467        task_id: String,
468        /// Child session ID
469        session_id: String,
470        /// Parent session ID
471        parent_session_id: String,
472        /// Agent type (e.g., "explore", "general")
473        agent: String,
474        /// Short description of the task
475        description: String,
476    },
477
478    /// Subagent task progress update
479    #[serde(rename = "subagent_progress")]
480    SubagentProgress {
481        /// Task identifier
482        task_id: String,
483        /// Child session ID
484        session_id: String,
485        /// Progress status message
486        status: String,
487        /// Additional metadata
488        metadata: serde_json::Value,
489    },
490
491    /// Subagent task completed
492    #[serde(rename = "subagent_end")]
493    SubagentEnd {
494        /// Task identifier
495        task_id: String,
496        /// Child session ID
497        session_id: String,
498        /// Agent type
499        agent: String,
500        /// Task output/result
501        output: String,
502        /// Whether the task succeeded
503        success: bool,
504    },
505
506    // ========================================================================
507    // Planning and Goal Tracking Events (Phase 1)
508    // ========================================================================
509    /// Planning phase started
510    #[serde(rename = "planning_start")]
511    PlanningStart { prompt: String },
512
513    /// Planning phase completed
514    #[serde(rename = "planning_end")]
515    PlanningEnd {
516        plan: ExecutionPlan,
517        estimated_steps: usize,
518    },
519
520    /// Step execution started
521    #[serde(rename = "step_start")]
522    StepStart {
523        step_id: String,
524        description: String,
525        step_number: usize,
526        total_steps: usize,
527    },
528
529    /// Step execution completed
530    #[serde(rename = "step_end")]
531    StepEnd {
532        step_id: String,
533        status: TaskStatus,
534        step_number: usize,
535        total_steps: usize,
536    },
537
538    /// Goal extracted from prompt
539    #[serde(rename = "goal_extracted")]
540    GoalExtracted { goal: AgentGoal },
541
542    /// Goal progress update
543    #[serde(rename = "goal_progress")]
544    GoalProgress {
545        goal: String,
546        progress: f32,
547        completed_steps: usize,
548        total_steps: usize,
549    },
550
551    /// Goal achieved
552    #[serde(rename = "goal_achieved")]
553    GoalAchieved {
554        goal: String,
555        total_steps: usize,
556        duration_ms: i64,
557    },
558
559    // ========================================================================
560    // Context Compaction events
561    // ========================================================================
562    /// Context automatically compacted due to high usage
563    #[serde(rename = "context_compacted")]
564    ContextCompacted {
565        session_id: String,
566        before_messages: usize,
567        after_messages: usize,
568        percent_before: f32,
569    },
570
571    // ========================================================================
572    // Persistence events
573    // ========================================================================
574    /// Session persistence failed — SDK clients should handle this
575    #[serde(rename = "persistence_failed")]
576    PersistenceFailed {
577        session_id: String,
578        operation: String,
579        error: String,
580    },
581}
582
583/// Result of agent execution
584#[derive(Debug, Clone)]
585pub struct AgentResult {
586    pub text: String,
587    pub messages: Vec<Message>,
588    pub usage: TokenUsage,
589    pub tool_calls_count: usize,
590    pub verification_reports: Vec<crate::verification::VerificationReport>,
591}
592
593impl AgentResult {
594    pub fn verification_summary(&self) -> crate::verification::VerificationSummary {
595        crate::verification::VerificationSummary::from_reports(&self.verification_reports)
596    }
597
598    pub fn verification_summary_text(&self) -> String {
599        crate::verification::format_verification_summary(&self.verification_summary())
600    }
601
602    pub fn has_pending_verification(&self) -> bool {
603        matches!(
604            self.verification_summary().status,
605            crate::verification::VerificationStatus::NeedsReview
606        )
607    }
608}
609
610// ============================================================================
611// ToolCommand — bridges ToolExecutor to SessionCommand for queue submission
612// ============================================================================
613
614/// Adapter that implements `SessionCommand` for tool execution via the queue.
615///
616/// Wraps a `ToolExecutor` call so it can be submitted to `SessionLaneQueue`.
617pub struct ToolCommand {
618    tool_executor: Arc<ToolExecutor>,
619    tool_name: String,
620    tool_args: Value,
621    tool_context: ToolContext,
622    skill_registry: Option<Arc<crate::skills::SkillRegistry>>,
623}
624
625impl ToolCommand {
626    /// Create a new ToolCommand
627    pub fn new(
628        tool_executor: Arc<ToolExecutor>,
629        tool_name: String,
630        tool_args: Value,
631        tool_context: ToolContext,
632        skill_registry: Option<Arc<crate::skills::SkillRegistry>>,
633    ) -> Self {
634        Self {
635            tool_executor,
636            tool_name,
637            tool_args,
638            tool_context,
639            skill_registry,
640        }
641    }
642}
643
644#[async_trait]
645impl SessionCommand for ToolCommand {
646    async fn execute(&self) -> Result<Value> {
647        // Check skill-based tool permissions
648        if let Some(registry) = &self.skill_registry {
649            // If there are instruction skills with tool restrictions, check permissions
650            let restricting_skills = registry.global_tool_restricting_skills();
651
652            if !restricting_skills.is_empty() {
653                let mut allowed = false;
654
655                for skill in &restricting_skills {
656                    if skill.is_tool_allowed(&self.tool_name) {
657                        allowed = true;
658                        break;
659                    }
660                }
661
662                if !allowed {
663                    return Err(anyhow::anyhow!(
664                        "Tool '{}' is not allowed by any active skill. Active skills restrict tools to their allowed-tools lists.",
665                        self.tool_name
666                    ));
667                }
668            }
669        }
670
671        // Execute the tool
672        let result = self
673            .tool_executor
674            .execute_with_context(&self.tool_name, &self.tool_args, &self.tool_context)
675            .await?;
676        Ok(serde_json::json!({
677            "output": result.output,
678            "exit_code": result.exit_code,
679            "metadata": result.metadata,
680        }))
681    }
682
683    fn command_type(&self) -> &str {
684        &self.tool_name
685    }
686
687    fn payload(&self) -> Value {
688        self.tool_args.clone()
689    }
690}
691
692// ============================================================================
693// AgentLoop
694// ============================================================================
695
696/// Internal agent loop executor.
697#[derive(Clone)]
698pub(crate) struct AgentLoop {
699    llm_client: Arc<dyn LlmClient>,
700    tool_executor: Arc<ToolExecutor>,
701    tool_context: ToolContext,
702    config: AgentConfig,
703    /// Optional lane queue for priority-based tool execution
704    command_queue: Option<Arc<SessionLaneQueue>>,
705}
706
707#[cfg(test)]
708pub(crate) mod tests;
709
710#[cfg(test)]
711mod extra_agent_tests;
a3s_code_core/agent.rs

a3s_code_core/
agent.rs